In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
from sklearn.metrics import roc_auc_score
from sklearn import mixture, preprocessing, datasets

from importlib import reload
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import patches

import torch.utils.data as data_utils

import utils.models as models
import utils.plotting as plotting
import utils.dataloaders as dl
import utils.traintest as tt
import utils.adversarial as adv
import utils.eval as ev
import model_params as params
import utils.resnet_orig as resnet
import utils.gmm_helpers as gmm_helpers

import model_paths

In [2]:
def test_metrics(model, device, in_loader, out_loader):
    with torch.no_grad():
        model.eval()
        conf_in = []
        conf_out = []

        for data_in, _ in in_loader:
            data_in = data_in.to(device)
            output_in = model(data_in).max(1)[0].exp()
            conf_in.append(output_in)
            
        for data_out, _ in out_loader:    
            data_out = data_out.to(device)
            noise, _ = adv.gen_adv_noise(model, device, data_out, epsilon=0.1, 
                                         restarts=3, steps=200, step_size=.1)
            output_out = model(noise).max(1)[0].exp()
            conf_out.append(output_out)
            
        conf_in = torch.cat(conf_in)
        conf_out = torch.cat(conf_out)
        
        y_true = torch.cat([torch.ones_like(conf_in), 
                            torch.zeros_like(conf_out)]).cpu().numpy()
        y_scores = torch.cat([conf_in, 
                              conf_out]).cpu().numpy()
        
        mmc = conf_out.mean().item()
        auroc = roc_auc_score(y_true, y_scores)
        fp95 = ((conf_out > 0.95).float().mean().item())
        return mmc, auroc, fp95

In [3]:
def evaluate_model(model, device, base_loader, loaders):
    metrics = []
    for (name, data_loader) in loaders:
        mmc, auroc, fp95 = test_metrics(model, device, base_loader, data_loader)
        metrics.append([name, 100*mmc, 100*auroc])

    df = pd.DataFrame(metrics, columns = ['DataSet', 'MMC', 'AUC'])
    return df.set_index('DataSet')

In [4]:
ds_list = ['MNIST', 'FMNIST', 'SVHN', 'CIFAR10', 'CIFAR100']
device = torch.device('cuda:1')

for dataset in ds_list:
    model_params = params.params_dict[dataset]()
    model_path = model_paths.model_dict[dataset]() 
    model_list = [torch.load(file).cpu().eval() for file in list(model_path.file_dict.values())]
    
    df_list = []

    for i in range(len(model_list)):
        print(i)
        df = evaluate_model(model_list[i].to(device), device, model_params.test_loader, 
                   [('Adv', model_params.loaders[-1][1])])

        df_list.append(df)

    df = pd.concat(df_list, axis=1, keys=list(model_path.file_dict.keys()))
    
    print(df.round(1).to_latex())

0
1
2
3
4
5
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{Base} & \multicolumn{2}{l}{CEDA} & \multicolumn{2}{l}{ACET} & \multicolumn{2}{l}{ODIN} & \multicolumn{2}{l}{CCUb} & \multicolumn{2}{l}{CCU} \\
{} &    MMC &   AUC &   MMC &   AUC &   MMC &    AUC &   MMC &   AUC &   MMC &   AUC &   MMC &    AUC \\
DataSet &        &       &       &       &       &        &       &       &       &       &       &        \\
\midrule
Adv     &  100.0 &  16.4 &  54.5 &  99.6 &  10.0 &  100.0 &  10.3 &  25.0 &  99.9 &  71.7 &  10.0 &  100.0 \\
\bottomrule
\end{tabular}

0
1
2
3
4
5
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{Base} & \multicolumn{2}{l}{CEDA} & \multicolumn{2}{l}{ACET} & \multicolumn{2}{l}{ODIN} & \multicolumn{2}{l}{CCUb} & \multicolumn{2}{l}{CCU} \\
{} &    MMC &  AUC &   MMC &   AUC &   MMC &    AUC &   MMC &  AUC &    MMC &  AUC &   MMC &    AUC \\
DataSet &        &      &       &       &       &        &       &      &        &      &       & 