## Evaluate models based on classifier layer

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

import matplotlib.pyplot as plt

import seaborn as sns
sns.set_theme(color_codes=True)
import os
import sys

import transformers
from transformers import AutoModelForImageClassification, AutoConfig, AutoFeatureExtractor
from transformers.utils import logging
from transformers import DefaultDataCollator

logging.set_verbosity(transformers.logging.ERROR) 
logging.disable_progress_bar() 

p = os.path.abspath('../')
sys.path.insert(1, p)

import torchvision.transforms as transforms
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
import evaluate
from src.utils.utils import *
from src.wordnet_ontology.wordnet_ontology import WordnetOntology

import math
import copy
from collections import defaultdict

import random
import torchvision
from datasets import Image
from datasets import load_dataset 
from evaluate import evaluator

seed=7631
n_excluded_classes = int(556 * 0.05)
N_EXAMPLES = 32

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

%load_ext autoreload
%autoreload 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
mapping_filename = './data/external/imagenet/LOC_synset_mapping.txt'
wn = WordnetOntology(mapping_filename)

sketch = load_dataset("imagenet_sketch", split='train', cache_dir='./cache/')
vocab = torch.load('./models/vocab.pt')
NUM_CLASSES = len(vocab)

sketch = sketch.map(lambda x: {
    'label': vocab[wn.hypernym(wn.class_for_index[x['label']])],
})

Found cached dataset imagenet_sketch (/mnt/HDD/kevinds/sketch/./cache/imagenet_sketch/default/0.0.0/9bbda26372327ae1daa792112c8bbd2545a91b9f397ea6f285576add0a70ab6e)


  0%|          | 0/50889 [00:00<?, ?ex/s]

In [3]:
_classes = list(set(sketch['label']))
excluded_classes = [random.choice(_classes) for i in range(n_excluded_classes)]
dt = train_test_split(sketch, excluded_labels=excluded_classes)
train, test = dt['train'], dt['test']

In [7]:
torch.hub.set_dir('../cache')
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
model = AutoModelForImageClassification.from_pretrained(
    f"./models/contrastive-classifier-{seed}/last-checkpoint", 
    num_labels=NUM_CLASSES,
    label2id=vocab.get_stoi(),
    id2label=dict(enumerate(vocab.get_itos()))
)

test_transforms = Compose([
    transforms.Resize((feature_extractor.size, feature_extractor.size)), 
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float),
    Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])

model.eval()
model.to(device)
seen, unseen = get_seen_unseen_split(train, test, label_col='label')
score = {}

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

In [None]:
accuracy_1 = evaluate.load("KevinSpaghetti/accuracyk")
accuracy_5 = evaluate.load("KevinSpaghetti/accuracyk")

for row in tqdm(test):
    model_inputs = test_transforms(row['image'].convert('RGB')).to(device)
    model_predictions = model(pixel_values=model_inputs[None, ...])
    logits = model_predictions.get('logits').detach().cpu()
    top1_pred = np.argmax(logits, axis=-1, keepdims=True)
    top5_pred = np.argpartition(logits, -5, axis=-1)[:, -5:]
    accuracy_1.add_batch(predictions=top1_pred, references=[row['label']])
    accuracy_5.add_batch(predictions=top5_pred, references=[row['label']])
score['complete']={
    'top1': accuracy_1.compute()['accuracy'],
    'top5': accuracy_5.compute()['accuracy']
}

In [9]:
accuracy_1 = evaluate.load("KevinSpaghetti/accuracyk")
accuracy_5 = evaluate.load("KevinSpaghetti/accuracyk")

for row in tqdm(seen):
    model_inputs = test_transforms(row['image'].convert('RGB')).to(device)
    model_predictions = model(pixel_values=model_inputs[None, ...])
    logits = model_predictions.get('logits').detach().cpu()
    top1_pred = np.argmax(logits, axis=-1, keepdims=True)
    top5_pred = np.argpartition(logits, -5, axis=-1)[:, -5:]
    accuracy_1.add_batch(predictions=top1_pred, references=[row['label']])
    accuracy_5.add_batch(predictions=top5_pred, references=[row['label']])
score['seen']={
    'top1': accuracy_1.compute()['accuracy'],
    'top5': accuracy_5.compute()['accuracy']
}

100%|█████████████████████████████████████| 10177/10177 [03:20<00:00, 50.70it/s]


In [10]:
accuracy_1 = evaluate.load("KevinSpaghetti/accuracyk")
accuracy_5 = evaluate.load("KevinSpaghetti/accuracyk")

for row in tqdm(unseen):
    model_inputs = test_transforms(row['image'].convert('RGB')).to(device)
    model_predictions = model(pixel_values=model_inputs[None, ...])
    logits = model_predictions.get('logits').detach().cpu()
    top1_pred = np.argmax(logits, axis=-1, keepdims=True)
    top5_pred = np.argpartition(logits, -5, axis=-1)[:, -5:]
    accuracy_1.add_batch(predictions=top1_pred, references=[row['label']])
    accuracy_5.add_batch(predictions=top5_pred, references=[row['label']])
score['unseen']={
    'top1': accuracy_1.compute()['accuracy'],
    'top5': accuracy_5.compute()['accuracy']
}

100%|███████████████████████████████████████| 2897/2897 [01:00<00:00, 48.09it/s]


In [11]:
score

{'complete': {'top1': 0.5797766559583907, 'top5': 0.7540156034878385},
 'seen': {'top1': 0.6753463692640267, 'top5': 0.8358062297337133},
 'unseen': {'top1': 0.24404556437694166, 'top5': 0.46668967897825336}}