In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import torch
import pickle
import json
from torchvision.datasets import CIFAR10, CIFAR100, SVHN, ImageFolder
from torch.utils.data import DataLoader
from tqdm import tqdm
from pathlib import Path
from PIL import Image
from matplotlib import pyplot as plt
from collections import Counter
from helpers import *
from explore import *

pd.set_option('display.max_rows', 1000)

In [9]:
class CIFAR10Instance(CIFAR10):
    def __getitem__(self, index):
        sample = self.data[index]
        sample = Image.fromarray(sample)
        if self.transform is not None:
            sample = self.transform(sample)           
        return sample, index


class CIFAR100Instance(CIFAR100):
    def __getitem__(self, index):
        sample = self.data[index]
        sample = Image.fromarray(sample)
        if self.transform is not None:
            sample = self.transform(sample)           
        return sample, index
    

class SVHNInstance(SVHN):
    def __getitem__(self, index):
        sample = self.data[index]
        sample = Image.fromarray(sample.transpose(1,2,0))
        if self.transform is not None:
            sample = self.transform(sample)           
        return sample, index
    
    
class LSUNResize(ImageFolder):

    def __getitem__(self, index):
        sample = self.samples[index][0]
        sample = Image.open(sample)
        if self.transform is not None:
            sample = self.transform(sample)           
        return sample, index
    
    
class Rot90:
    """"Rotate Image if height > width"""
    def __call__(self, y):
        return y.transpose(Image.ROTATE_90) if y.size[0] < y.size[1] else y


class ImageNet30(ImageFolder):
    def __init__(self, **kwargs):
        super(ImageNet30, self).__init__(**kwargs)
        
        data_type = kwargs['root'].split(os.sep)[-1]
        if data_type == "train":
            self.gs_list = pd.read_pickle("/data/datasets/imgnet30_train_grayscale.pickle")
        else:
            self.gs_list = pd.read_pickle("/data/datasets/imgnet30_val_grayscale.pickle")
        
        self.imgs = list(filter (lambda x:x[0] not in self.gs_list, self.samples))
        self.targets = [v for k,v in self.imgs]
    

class CUB(ImageFolder):
    pass


def eval_augmentation(x, y=224, z=True, r_and_z=True):
    if z:
        print("using center crop")
        if r_and_z:
            print("using resize and center crop")
            aug = [
                 transforms.Resize(256),
                 transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=means[x], std=stds[x])]
        else:
            aug = [
                transforms.CenterCrop(y),
                transforms.ToTensor(),
                transforms.Normalize(mean=means[x], std=stds[x])]
    else:
        print("no center crop")
        aug = [
            transforms.Resize(y),
            transforms.ToTensor(),
            transforms.Normalize(mean=means[x], std=stds[x])]

    if x == "Imagenet30":
        print("imagenet30 mode")
        aug = [Rot90()] + aug
    
    aug = transforms.Compose(aug)
    return aug 


def get_feats(data_set, id_dataset, model, gpu, img_size):
    if model.training:
        raise ValueError("Model not in eval mode")
    else:
        print("model in eval mode")
        
    train_targets = None
    test_targets = None
        
    if data_set == "CIFAR10":
        selected_data = CIFAR10Instance 
        root = "/data/datasets/CIFAR10"
        training_data = selected_data(
            root=root,
            train=True,
            download=False,
            transform=eval_augmentation(id_dataset, img_size)
            )

        test_data = selected_data(
            root=root,
            train=False,
            download=False,
            transform=eval_augmentation(id_dataset, img_size)
        )
        
        train_targets = np.array(training_data.targets)
        test_targets = np.array(test_data.targets)
        
    elif data_set == "CIFAR100":
        selected_data = CIFAR100Instance
        root = "/data/datasets/CIFAR100"
        training_data = selected_data(
            root=root,
            train=True,
            download=False,
            transform=eval_augmentation(id_dataset, img_size)
            )

        test_data = selected_data(
            root=root,
            train=False,
            download=False,
            transform=eval_augmentation(id_dataset, img_size)
        )

    elif data_set == "SVHN":
        selected_data = SVHNInstance
        root = "/data/datasets/svhn-data"
        
        training_data = selected_data(
            root=root,
            split="train",
            download=False,
            transform=eval_augmentation(id_dataset, img_size)
            )

        test_data = selected_data(
            root=root,
            split='test',
            download=False,
            transform=eval_augmentation(id_dataset, img_size)
        )
        
    elif data_set == "LSUNResize":
        root = "/data/datasets/LSUN_datasets/LSUN_resize"
        selected_data = LSUNResize
        
        training_data = selected_data(
            root=root,
            transform=eval_augmentation(id_dataset, img_size)
            )

        test_data = selected_data(
            root=root,
            transform=eval_augmentation(id_dataset, img_size)
        )
        
    elif data_set == "Imagenet30":
        datadir = "/data/datasets"
        training_data = ImageNet30(root=os.path.join(datadir, 'ImageNet30', 'train'),
                         transform=eval_augmentation("Imagenet30", img_size, True, False))
        
        
        test_data = ImageNet30(root=os.path.join(datadir, 'ImageNet30', 'val'),
                         transform=eval_augmentation("Imagenet30", img_size, True, False))
        selected_data = training_data
        
    elif data_set == "CUB":
        training_data = None
        test_data = CUB(root="/data/datasets/cub200/CUB_200_2011/images", 
                        transform=eval_augmentation("Imagenet30", img_size, True, False))
        selected_data = test_data
    
    if data_set in ["Imagenet30", "CUB"]:
        batch_size = 128 
    else:
        batch_size = 256 * 4
    
    print(f"selected data: {str(selected_data)}")
    
    print(f"dataset : {data_set}")
    if training_data:
        train_dataloader = DataLoader(training_data, batch_size, shuffle=False)
    test_dataloader = DataLoader(test_data, batch_size, shuffle=False)
    train_feats = []
    test_feats = []
    
    if training_data:
        for x,y in tqdm(train_dataloader):
            with torch.no_grad():
                train_feats.append(model(x.to(gpu)))
            
    for x,y in tqdm(test_dataloader):
        with torch.no_grad():
            test_feats.append(model(x.to(gpu)))
    if training_data:
        train_feats = [i.detach().cpu().numpy() for i in train_feats]
        train_feats = np.vstack(train_feats)
    test_feats = [i.detach().cpu().numpy() for i in test_feats]
    
    print(f"data augmentation: {str(eval_augmentation(id_dataset, img_size))}")
            
    return train_feats, np.vstack(test_feats), train_targets, test_targets


def run_clustering(data, num_cluster):
    clus_result = run_kmeans(data, num_cluster)
    im2cluster = np.array(clus_result['im2cluster']).flatten()
    prototypes = np.array(clus_result['centroids'][0])
    density = np.array(clus_result['density']).flatten()
    return im2cluster, prototypes, density


def softmax_t(logits, temp=1):
    logits = logits/temp
    _max = np.expand_dims(np.max(logits, axis=-1), axis=-1)
    probs = np.exp(logits - _max)
    _sum = np.expand_dims(np.sum(probs, axis=-1), axis=-1)
    return probs/_sum


def cluster_purity(kmeans_targets, in_targets):
    k_classes = np.unique(kmeans_targets).astype(int)
    k_class_idx = [np.nonzero(np.equal(cls_, kmeans_targets)) for cls_ in k_classes]
    in_classes_in_k_clstr = [in_targets[idx] for idx in k_class_idx]
    purity_list = []

    for cluster_k in in_classes_in_k_clstr:
        unique, counts = np.unique(cluster_k, return_counts=True)
        purity_list.append(np.round(np.asarray(counts).max()/len(cluster_k), 5))

    return purity_list

## Load Model

In [11]:
model_path = "/data/temiloluwa.adeoti/fourth_experiments/CIFAR100_clus_1024_neg_768/exp_1/checkpoints"
ckpt = 199
encoder_type = "key"
arch = "resnet50"
num_classes = 128
output_layer = "avg_pool"
img_size = 224
gpu = 2
cal_feats = True
if cal_feats:
    model = load_model(model_path, ckpt, arch, num_classes, output_layer, encoder_type)
    model.cuda(gpu)
    _ = model.eval()

loaded contrastive model @ ckpt: /data/temiloluwa.adeoti/fourth_experiments/CIFAR100_clus_1024_neg_768/exp_1/checkpoints/checkpoint_0199.pth.tar
Outputing contrastive model from Avgpool Layer


## Load IID

In [30]:
# load id
iid = "CIFAR10"
save_feats = True
prefix = f"{iid}_{encoder_type}_{output_layer}"
prefix

'CIFAR10_key_avg_pool'

In [31]:
#_dat = CIFAR100("/data/datasets/CIFAR100", train=False).targets
#save_features("../cache", f"{prefix}_id_test_targ.npy", id_test_targ)

In [32]:
if cal_feats:
    id_train, id_test, id_train_targ, id_test_targ = get_feats(iid, iid, model, gpu, img_size)

if save_feats:
    save_features("../cache", f"{prefix}_id_train.npy", id_train)
    save_features("../cache", f"{prefix}_id_test.npy", id_test)
    save_features("../cache", f"{prefix}_id_train_targ.npy", id_train_targ)
    save_features("../cache", f"{prefix}_id_test_targ.npy", id_test_targ)
else:
    id_train = load_features("../cache", f"{prefix}_id_train.npy")
    id_test = load_features("../cache", f"{prefix}_id_test.npy")
    id_train_targ = load_features("../cache", f"{prefix}_id_train_targ.npy")
    id_test_targ = load_features("../cache", f"{prefix}_id_test_targ.npy")

model in eval mode
using center crop
using resize and center crop
using center crop
using resize and center crop


  0%|          | 0/49 [00:00<?, ?it/s]

selected data: <class '__main__.CIFAR10Instance'>
dataset : CIFAR10


100%|██████████| 49/49 [00:54<00:00,  1.11s/it]
100%|██████████| 10/10 [00:11<00:00,  1.10s/it]


using center crop
using resize and center crop
data augmentation: Compose(
    Resize(size=256, interpolation=bilinear)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])
)


## Load OOD

In [33]:
# load ood
oods = ["CIFAR100", "SVHN",  "LSUNResize"]
for ood in oods:
    save_feats = True
    prefix = f"{ood}_{encoder_type}_{output_layer}"
    prefix

    if cal_feats:
        ood_train, ood_test, _, _ = get_feats(ood, iid, model, gpu, img_size)

    if save_feats:
        save_features("../cache", f"{prefix}_ood_train.npy", ood_train)
        save_features("../cache", f"{prefix}_ood_test.npy", ood_test)
    else:
        ood_train = load_features("../cache", f"{prefix}_ood_train.npy")
        ood_test = load_features("../cache",  f"{prefix}_ood_test.npy")

model in eval mode
using center crop
using resize and center crop
using center crop
using resize and center crop


  0%|          | 0/49 [00:00<?, ?it/s]

selected data: <class '__main__.CIFAR100Instance'>
dataset : CIFAR100


100%|██████████| 49/49 [00:54<00:00,  1.11s/it]
100%|██████████| 10/10 [00:11<00:00,  1.10s/it]


using center crop
using resize and center crop
data augmentation: Compose(
    Resize(size=256, interpolation=bilinear)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])
)
model in eval mode
using center crop
using resize and center crop
using center crop
using resize and center crop


  0%|          | 0/72 [00:00<?, ?it/s]

selected data: <class '__main__.SVHNInstance'>
dataset : SVHN


100%|██████████| 72/72 [01:19<00:00,  1.11s/it]
100%|██████████| 26/26 [00:28<00:00,  1.08s/it]


using center crop
using resize and center crop
data augmentation: Compose(
    Resize(size=256, interpolation=bilinear)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])
)


  0%|          | 0/10 [00:00<?, ?it/s]

model in eval mode
using center crop
using resize and center crop
using center crop
using resize and center crop
selected data: <class '__main__.LSUNResize'>
dataset : LSUNResize


100%|██████████| 10/10 [00:12<00:00,  1.23s/it]
100%|██████████| 10/10 [00:12<00:00,  1.23s/it]


using center crop
using resize and center crop
data augmentation: Compose(
    Resize(size=256, interpolation=bilinear)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])
)


## Train Features

In [None]:
#train_linear_model(id_train, id_train_targets, id_test,  id_test_targets, 3)

## Get Prototypes

In [None]:
get_prototypes = True

In [None]:
if get_prototypes:
    num_cluster = 768
    id_im2cluster, id_prototypes, id_density = run_clustering(norm_feats(id_train), num_cluster)

## Perform OOD detection

In [None]:
oe = OodEvaluator(norm_feats(id_train), norm_feats(id_test), id_train_targ, id_test_targ,
                num_clusters = 768,
                pca_com = 10,
                cluster_method = "kmeans",
                means = None,
                im2cluster = None,
                clip = 0.5,
                clip_metric = "cosine")

oe(norm_feats(ood_test), "cosine")
oe.get_scores()
res_df = oe.get_auroc()