In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
import numpy as np
import pandas as pd
import torch
import torch.optim.lr_scheduler as lr_scheduler

from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score, average_precision_score
from tensorboardX import SummaryWriter
from torch.utils.data import DataLoader

import utils.helper as helper
from utils.losses import VaeLoss
from models.lmf import LMF
from utils.muad_dataset import MuadDataset
from utils.parser import train_parser

import warnings
warnings.filterwarnings('ignore')

### Load Training dataset to get threshold

In [2]:
dataset = MuadDataset("data/CrisisMMD_v2.0/CrisisMMD_v2.0",
                          "data/CrisisMMD_v2.0/train_cleaned.xlsx")
torch.multiprocessing.set_start_method('spawn')
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0)

### Setup model

In [3]:
model = LMF(rank=16, latent_dim=32)
model.cuda()

loss_func = VaeLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=0.01)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.999)

### load model from folder

In [4]:
saved_path = 'logs/LMF_2021_01_11_16_06_20'
init_epoch, model = helper.load_saved_model(saved_path, model)

resuming by loading epoch 396


In [20]:
loss_list = []
label_list = []

### Get all losses

In [None]:
model.eval()
for i, sample_batched in enumerate(dataloader):
    print(i)
    image_features, text_features, label = sample_batched['image_features'], sample_batched['text_features'], \
                                                   sample_batched['label']
    image_features = image_features.cuda()
    text_features = text_features.cuda()
    label = label.cuda()
    
    output = model(image_features, text_features)
    loss = loss_func(output)
    
    loss_list.append(loss['total_loss'].cpu().detach().numpy())
    label_list.append(label.cpu().detach().numpy())

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121


### Find thresholding

In [7]:
labels = np.array(label_list)
losses = np.array(loss_list)
error_df = pd.DataFrame(data = {'error':losses,'true':np.squeeze(labels, 1)})

In [8]:
error_df.groupby('true')['error'].describe().reset_index()

Unnamed: 0,true,count,mean,std,min,25%,50%,75%,max
0,0,5514.0,554.0354,141.962936,175.923309,461.311562,550.487946,643.187195,1174.256714
1,1,612.0,567.998962,117.852051,289.573242,485.853104,563.84613,639.08725,1013.422729


In [16]:
temp_df = error_df[error_df['true'] == 0]
threshold = error_df['error'].mean() + temp_df['error'].std()

In [17]:
y_pred = np.array([1 if e > threshold else 0 for e in error_df.error.values])
y = error_df.true.values
print('{clf_name} ROC:{roc}, ap:{ap}, precision @ rank n:{prn}'.format(
        clf_name='LMF',
        roc=np.round(roc_auc_score(y, y_pred), decimals=4),
        prn=np.round(precision_n_scores(y, y_pred), decimals=5),
        ap=np.round(average_precision_score(y, y_pred), decimals=5)))

LMF ROC:0.4683, ap:0.08048, precision @ rank n:0.0


### Load testdataset

In [11]:
dataset = MuadDataset("data/CrisisMMD_v2.0/CrisisMMD_v2.0",
                          "data/CrisisMMD_v2.0/test_cleaned.xlsx")
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0)

In [12]:
loss_list = []
label_list = []

In [13]:
for i, sample_batched in enumerate(dataloader):
    image_features, text_features, label = sample_batched['image_features'], sample_batched['text_features'], \
                                                   sample_batched['label']
    image_features = image_features.cuda()
    text_features = text_features.cuda()
    label = label.cuda()
    
    output = model(image_features, text_features)
    loss = loss_func(output)
    
    loss_list.append(loss['reconstruction_loss'].cpu().detach().numpy())
    label_list.append(label.cpu().detach().numpy())

In [14]:
labels = np.array(label_list)
losses = np.array(loss_list)
error_df = pd.DataFrame(data = {'error':losses,'true':np.squeeze(labels, 1)})

### Evaluate method

In [15]:
y_pred = np.array([1 if e > threshold else 0 for e in error_df.error.values])
y = error_df.true.values
print('{clf_name} ROC:{roc}, ap:{ap}, precision @ rank n:{prn}'.format(
        clf_name='LMF',
        roc=np.round(roc_auc_score(y, y_pred), decimals=4),
        prn=np.round(precision_n_scores(y, y_pred), decimals=5),
        ap=np.round(average_precision_score(y, y_pred), decimals=5)))

LMF ROC:0.4669, ap:0.08149, precision @ rank n:0.0
