In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import torch
import torch.nn as nn
from torch import optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import torch.distributions as D
from tqdm import tqdm

import image_dataset
from hmc_vae import HMCVAE
import utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Training Data

In [None]:
train_dataset_in, _ = image_dataset.get('FashionMNIST', 'data')
hidden_channels = 32
in_channels = train_dataset_in.tensors[0].shape[1]
train_dataloader_in = DataLoader(train_dataset_in, batch_size=100, shuffle=False)

### Testing Data

In [None]:
noise_dataset = torchvision.datasets.FakeData(size=5000, image_size=(in_channels, 32, 32), transform=torchvision.transforms.ToTensor())
noise_dataloader = DataLoader(noise_dataset, batch_size=100, shuffle=False)
_, FashionMNIST_dataset = image_dataset.get('FashionMNIST', 'data', 5000)
FashionMNIST_dataloader = DataLoader(FashionMNIST_dataset, batch_size=100, shuffle=False)
_, MNIST_dataset = image_dataset.get('MNIST', 'data', 5000)
MNIST_dataloader = DataLoader(MNIST_dataset, batch_size=100, shuffle=False)

### Model

In [None]:
model = HMCVAE(in_channels, latent_dim=100, hidden_channels=hidden_channels, T=10, L=5).to(device)

In [None]:
model_path = "logs/FashionMNIST_17h_21m_17_Jul_2022_seed42/hmc_epoch10"
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

### Utilities

In [None]:
from sklearn import metrics
def plot(x1, x2=None):
    if type(x1) is torch.Tensor:
        x1 = x1.cpu().numpy()
        x2 = x2.cpu().numpy()
    plt.hist(x1, bins=50, alpha=0.4)
    if x2 is not None:
        plt.hist(x2, bins=50, alpha=0.4)
    plt.show()

def aucroc(score_in, score_out):
    if type(score_in) is torch.Tensor:
        score_in = score_in.cpu().numpy()
        score_out = score_out.cpu().numpy()
    ytrue = np.array([0]*len(score_in) + [1]*len(score_out))
    yscore = np.concatenate([score_in, score_out])
    fpr, tpr, thresholds = metrics.roc_curve(ytrue, yscore)
    plt.plot(fpr, tpr)
    return metrics.auc(fpr, tpr)

### Euclidean

In [None]:
def L2(test_X, train_X):
    mean_dist = []
    for x in tqdm(test_X):
        dist = torch.norm(train_X - x, dim=-1)
        knn = dist.topk(20, dim=-1, largest=False)[0]
        mean_dist.append(knn.mean())
    return torch.tensor(mean_dist)

In [None]:
d1 = L2(FashionMNIST_dataset.tensors[0].flatten(1)[:100], train_dataset_in.tensors[0].flatten(1)[:5000])
d2 = L2(MNIST_dataset.tensors[0].flatten(1)[:100], train_dataset_in.tensors[0].flatten(1)[:5000])

In [None]:
aucroc(d1, d2)

### Cosine

In [None]:
def cosine(test_X, train_X):
    mean_dist = []
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    for x in tqdm(test_X):
        dist = 1 - cos(train_X, x)
        knn = dist.topk(20, dim=-1, largest=False)[0]
        mean_dist.append(knn.mean())
    return torch.tensor(mean_dist)

In [None]:
d1 = cosine(FashionMNIST_dataset.tensors[0].flatten(1)[:100], train_dataset_in.tensors[0].flatten(1)[:5000])
d2 = cosine(MNIST_dataset.tensors[0].flatten(1)[:100], train_dataset_in.tensors[0].flatten(1)[:5000])

### Mahalanobis

Doesn't work on raw image inputs, some dimensions are constant and the covariance is not PSD

In [None]:
def Mahalanobis(test_X, train_X):
    inv_cov = torch.inverse(torch.cov(train_X.t()))
    mat = torch.cov(train_X.t())
    assert bool((mat == mat.T).all() and (torch.eig(mat)[0][:,0]>=0).all())
    mean = torch.mean(train_X, dim=0)
    centered = test_X - mean
    M = torch.mm(torch.mm(centered, inv_cov), centered.t())
    print(torch.diag(M))
    assert torch.all(torch.diag(M) >= 0)
    return torch.sqrt(torch.diag(M))

In [None]:
d1 = Mahalanobis(FashionMNIST_dataset.tensors[0].flatten(1)[:100], train_dataset_in.tensors[0].flatten(1)[:5000])
d2 = Mahalanobis(MNIST_dataset.tensors[0].flatten(1)[:100], train_dataset_in.tensors[0].flatten(1)[:5000])