# Anomaly Score comparison

In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join(".."))
sys.path.append(module_path)
import torch
from torchvision import datasets

from anomaly_scores.energy import energy_anomaly_score
from anomaly_scores.max_logit import max_logit_anomaly_score
from anomaly_scores.softmax import max_softmax_anomaly_score
from anomaly_scores.vim_scores import VIM
from energy_ood.CIFAR.models.wrn import WideResNet
from energy_ood.utils.svhn_loader import SVHN
from util import TEST_TRANSFORM
from util.display_results import compare_all_results
from util.get_ood_score import get_ood_score_for_multiple_datasets
from vim_training.test import test

## The data
Let's start with replicating the results from the paper. First, with the SVHN data set. 

In [2]:
loaders = []

id_data = datasets.CIFAR10("../data/cifar10", train=False, transform=TEST_TRANSFORM)
id_loader = torch.utils.data.DataLoader(
    id_data, batch_size=200, shuffle=False, num_workers=2, pin_memory=True
)
loaders.append(("CIFAR10", id_loader))


ood_data = SVHN(
    root="../data/svhn/",
    split="test",
    transform=TEST_TRANSFORM,
    download=False,
)
ood_loader = torch.utils.data.DataLoader(
    ood_data, batch_size=200, shuffle=True, num_workers=2, pin_memory=True
)
ood_num_examples = len(loaders[0][1].dataset) // 5
loaders.append(("SVHN", ood_loader))


data = datasets.CIFAR100("../data/cifar-100", train=False, transform=TEST_TRANSFORM)
loader = torch.utils.data.DataLoader(
    data, batch_size=200, shuffle=True, num_workers=2, pin_memory=True
)
loaders.append(("CIFAR100", loader))

## Models

We are using the Wide ResNet as in the paper.

In [3]:
models = []

model_folder = "../snapshots/pretrained/"

for filename in  next(os.walk(model_folder), (None, None, []))[2]:
    model_name = filename.split(".")[0].replace("_"," ")
    print(model_name)
    model = WideResNet(depth=40, num_classes=10, widen_factor=2, dropRate=0.3)
    model.load_state_dict(
        torch.load(
            model_folder + filename
        )
    )
    model.eval()
    _ = model.cuda()
    models.append((model_name, model))

WRN Hendrycks Calib Seed1
WRN Ours Seed1
WRN Hendrycks Seed1
WRN Hendrycks Seed64
WRN Hendrycks Seed42


# Anomaly Scores
Let's compare the scores.

In [5]:
import numpy as np
aurocs_results = {}
auprs_results = {}
for model_name, model in models:
    print(model_name)
    aurocs_results[model_name] = {}
    auprs_results[model_name] = {}
    vim = VIM(id_loader, model)

    scores = [
        ("MaxLogit", max_logit_anomaly_score),
        ("MaxSoftmax", max_softmax_anomaly_score),
        ("Energy", energy_anomaly_score),
        ("VIM", vim.compute_anomaly_score),
    ]

    _, test_accuracy = test(model, loaders[0][1])
    aurocs_results[model_name]["test_acc"] = test_accuracy
    auprs_results[model_name]["test_acc"] = test_accuracy

    for name, score in scores:
        print("  ", name)
        results = get_ood_score_for_multiple_datasets(
            loaders,
            model,
            score,
            is_using="last" if not name == "VIM" else "last_penultimate",
        )
        aurocs = [np.mean(aurocs) for aurocs, _, _ in results]
        aurocs.append(np.mean(aurocs))
        aurocs_results[model_name][name] = aurocs
        auprs = [np.mean(auprs) for _, auprs, _ in results]
        auprs.append(np.mean(auprs))
        auprs_results[model_name][name] = auprs

WRN Hendrycks Calib Seed1
   MaxLogit
   MaxSoftmax
   Energy
   VIM
WRN Ours Seed1
   MaxLogit
   MaxSoftmax
   Energy
   VIM
WRN Hendrycks Seed1
   MaxLogit
   MaxSoftmax
   Energy
   VIM
WRN Hendrycks Seed64
   MaxLogit
   MaxSoftmax
   Energy
   VIM
WRN Hendrycks Seed42
   MaxLogit
   MaxSoftmax
   Energy
   VIM


In [6]:
compare_all_results(aurocs_results, loaders)

WRN Hendrycks Calib Seed1 (5.58%) |     SVHN     |   CIFAR100   |     AVG     
                 MaxLogit |    86.54%    |    84.77%    |    85.66%   
               MaxSoftmax |    87.64%    |   *86.67%    |    87.16%   
                   Energy |    86.37%    |    85.78%    |    86.07%   
                      VIM |   *89.90%    |    85.40%    |   *87.65%   

   WRN Ours Seed1 (5.18%) |     SVHN     |   CIFAR100   |     AVG     
                 MaxLogit |    82.53%    |    86.49%    |    84.51%   
               MaxSoftmax |    86.53%    |   *87.70%    |    87.11%   
                   Energy |    81.91%    |    86.15%    |    84.03%   
                      VIM |   *96.32%    |    86.85%    |   *91.58%   

WRN Hendrycks Seed1 (5.15%) |     SVHN     |   CIFAR100   |     AVG     
                 MaxLogit |    91.03%    |    87.58%    |    89.31%   
               MaxSoftmax |    92.21%    |   *88.68%    |    90.45%   
                   Energy |    90.75%    |    87.98%    |    89.3

In [7]:
compare_all_results(auprs_results, loaders)

WRN Hendrycks Calib Seed1 (5.58%) |     SVHN     |   CIFAR100   |     AVG     
                 MaxLogit |    85.16%    |    85.33%    |    85.25%   
               MaxSoftmax |    84.52%    |    84.61%    |    84.57%   
                   Energy |    84.81%    |   *85.67%    |    85.24%   
                      VIM |   *87.82%    |    85.36%    |   *86.59%   

   WRN Ours Seed1 (5.18%) |     SVHN     |   CIFAR100   |     AVG     
                 MaxLogit |    83.56%    |    86.01%    |    84.79%   
               MaxSoftmax |    84.58%    |    84.73%    |    84.66%   
                   Energy |    82.76%    |    85.76%    |    84.26%   
                      VIM |   *95.26%    |   *86.20%    |   *90.73%   

WRN Hendrycks Seed1 (5.15%) |     SVHN     |   CIFAR100   |     AVG     
                 MaxLogit |    91.07%    |    87.24%    |    89.16%   
               MaxSoftmax |    90.49%    |    86.47%    |    88.48%   
                   Energy |    90.82%    |   *87.38%    |    89.1