In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import open_clip
from torchvision import datasets, transforms
import torch
from PIL import Image
import open_clip
from tqdm import tqdm
from torch.utils.data import DataLoader
import numpy as np
from sklearn import calibration
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import torch.nn as nn
import torch.optim as optim
from sklearn.tree import DecisionTreeRegressor as DT
from sklearn.isotonic import IsotonicRegression as IR
from scipy.stats import norm
from tqdm import tqdm
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from util import *

In [4]:
device = torch.device('cuda:6' if torch.cuda.is_available() else 'cpu')
batch_size = 128

In [5]:
model_name = 'ViT-B-16'
pretrained_dset = 'laion400m_e31'

model, _, preprocess = open_clip.create_model_and_transforms(model_name,
    pretrained=pretrained_dset,
    device=device)
tokenizer = open_clip.get_tokenizer(model_name)

In [6]:
imagenet_test = datasets.ImageFolder(f'/home/ubuntu/data/Imagenet/ILSVRC/Data/CLS-LOC/train/', transform=preprocess)

In [7]:
text_template = 'a photo of a {}.'
np.random.seed(0)

subset_in_classes = []
for c in tqdm(range(len(imagenet_test.classes))):
    appending = np.random.choice(np.where(np.array(imagenet_test.targets) == c)[0], 50, replace=False)
    subset_in_classes.extend(appending.tolist())
subset = torch.utils.data.Subset(imagenet_test, subset_in_classes)
subset.classes = imagenet_test.classes
imagenet_test = subset
len(imagenet_test)

100%|██████████| 1000/1000 [01:18<00:00, 12.74it/s]


50000

In [5]:
models_to_consider = [
    ('ViT-B-16' , 'laion2b_s34b_b88k'),
    ('ViT-L-14' , 'laion2b_s32b_b82k'),
    ('ViT-B-32' , 'laion2b_s34b_b79k'),

    ('ViT-B-16' , 'openai'),
    ('ViT-L-14' , 'openai'),
    ('ViT-B-32' , 'openai'),

    ('ViT-B-16' , 'laion400m_e31'),
    ('ViT-L-14' , 'laion400m_e31'),
    ('ViT-B-32' , 'laion400m_e31'),

    ('RN50', 'openai'),
    ('RN50', 'yfcc15m'),
    ('RN50', 'cc12m'),

    ('ViT-H-14', 'laion2b_s32b_b79k')
]

In [27]:
#imagenet_mapping = map_imagenet_to_readable_label()
#imagenet_test.classes = [imagenet_mapping[x] for x in imagenet_test.classes ]

all_temps = {}
for model_name, pretrained_dset in tqdm(models_to_consider):
    model, _, preprocess = open_clip.create_model_and_transforms(model_name,
        pretrained=pretrained_dset,
        device=device)
    tokenizer = open_clip.get_tokenizer(model_name)
    imagenet_test.dataset.transform = preprocess
    
    image_features, actual = get_image_features(model, imagenet_test, batch_size=128,
        device = device)

    actual = torch.IntTensor(actual).to(device).long()

    text = tokenizer([text_template.replace('{}',x) for x in imagenet_test.classes])
    with torch.no_grad(), torch.cuda.amp.autocast():
        text_features = model.encode_text(text.to(device))
        text_features /= text_features.norm(dim=-1, keepdim=True)
        
    text_probs = (100.0 * image_features @ text_features.T)

    ## Setup LBGFS
    temperature = nn.Parameter((torch.ones(1)).to(device))
    args = {'temperature': temperature}
    criterion = nn.CrossEntropyLoss()

    # Removing strong_wolfe line search results in jump after 50 epochs
    optimizer = optim.LBFGS([temperature], lr=0.001, max_iter=1000, line_search_fn='strong_wolfe')

    temps = []
    losses = []
    def _eval():
        loss = criterion(T_scaling(text_probs, args), actual)
        loss.backward()
        temps.append(temperature.item())
        losses.append(loss)
        return loss
    optimizer.step(_eval)
    all_temps[model_name+';' + pretrained_dset] = temperature.item()
all_temps

100%|██████████| 13/13 [29:06<00:00, 134.32s/it]


{'ViT-B-16;laion2b_s34b_b88k': 1.3878337144851685,
 'ViT-L-14;laion2b_s32b_b82k': 1.4129570722579956,
 'ViT-B-32;laion2b_s34b_b79k': 1.4092495441436768,
 'ViT-B-16;openai': 0.9825258851051331,
 'ViT-L-14;openai': 1.0273551940917969,
 'ViT-B-32;openai': 0.9789162874221802,
 'ViT-B-16;laion400m_e31': 1.6131701469421387,
 'ViT-L-14;laion400m_e31': 1.7537566423416138,
 'ViT-B-32;laion400m_e31': 1.37242591381073,
 'RN50;openai': 0.9742085933685303,
 'RN50;yfcc15m': 2.5651497840881348,
 'RN50;cc12m': 2.662893295288086,
 'ViT-H-14;laion2b_s32b_b79k': 1.5119352340698242}

In [17]:
dataset_eces = {}
dataset_accs = {}
for dataset_name in ['CIFAR100', 'CIFAR10', 'Food101', 'SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    model_accs = {}
    for model_legend, temp in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        template_accs = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
            template_accs.append(acc)
        
        model_eces[model_legend] = template_eces
        model_accs[model_legend] = template_accs
    dataset_eces[dataset_name] = model_eces
    dataset_accs[dataset_name] = model_eces

Files already downloaded and verified


100%|██████████| 13/13 [06:40<00:00, 30.84s/it]


Files already downloaded and verified


100%|██████████| 13/13 [06:38<00:00, 30.66s/it]
100%|██████████| 13/13 [14:19<00:00, 66.15s/it]


In [7]:
def run_supervised_fromimagefeatures(model, tokenizer, text_template, image_features, actual, device):
    actual = torch.IntTensor(actual).to(device).long()

    text = tokenizer([text_template.replace('{}',x) for x in dset.classes])
    with torch.no_grad(), torch.cuda.amp.autocast():
        text_features = model.encode_text(text.to(device))
        text_features /= text_features.norm(dim=-1, keepdim=True)
        
    text_probs = (100.0 * image_features @ text_features.T)

    ## Setup LBGFS
    temperature = nn.Parameter((torch.ones(1)).to(device))
    args = {'temperature': temperature}
    criterion = nn.CrossEntropyLoss()

    # Removing strong_wolfe line search results in jump after 50 epochs
    optimizer = optim.LBFGS([temperature], lr=0.001, max_iter=1000, line_search_fn='strong_wolfe')

    temps = []
    losses = []
    def _eval():
        loss = criterion(T_scaling(text_probs, args), actual)
        loss.backward()
        temps.append(temperature.item())
        losses.append(loss)
        return loss
    optimizer.step(_eval)
    return temperature.item()

In [32]:
#dataset_eces_supervised = {}
#dataset_temps_supervised = {}
for dataset_name in ['DTD', 'SUN397',]:#['CIFAR100', 'CIFAR10', 'Food101', 'SUN397']:
    #if dataset_name in dataset_eces_supervised:
    #    continue
    dset,_ = get_test_set(dataset_name, None)
    classes , templates = get_openai_prompts(dataset_name)
    val_dset = get_val_set(dataset_name, classes, None)

    model_eces = {}
    model_temps = {}
    for model_legend, _ in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        if isinstance(dset, torch.utils.data.dataset.Subset):
            dset.dataset.transform = preprocess
        else:
            dset.transform = preprocess

        if isinstance(val_dset, torch.utils.data.dataset.Subset):
            val_dset.dataset.transform = preprocess
        else:
            val_dset.transform = preprocess
        

        template_eces = []
        template_temps = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        val_image_features, actual_val = get_image_features(
            model,  val_dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)
            sup_temp = run_supervised_fromimagefeatures(model, tokenizer, text_template, val_image_features, actual_val, device)
            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, temp_scaling=sup_temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
            template_temps.append(sup_temp)
        
        model_eces[model_legend] = template_eces
        model_temps[model_legend] = template_temps
    dataset_eces_supervised[dataset_name] = model_eces
    print(pd.DataFrame(model_eces))
    dataset_temps_supervised[dataset_name] = model_temps

100%|██████████| 13/13 [09:20<00:00, 43.11s/it]


   ViT-B-16;laion2b_s34b_b88k  ViT-L-14;laion2b_s32b_b82k  \
0                    0.052960                    0.044266   
1                    0.015577                    0.027554   
2                    0.046688                    0.040429   
3                    0.034539                    0.067685   
4                    0.033247                    0.079353   
5                    0.025430                    0.067133   
6                    0.048645                    0.044957   
7                    0.025491                    0.067092   

   ViT-B-32;laion2b_s34b_b79k  ViT-B-16;openai  ViT-L-14;openai  \
0                    0.050882         0.036334         0.026013   
1                    0.023034         0.021782         0.019164   
2                    0.034367         0.032281         0.023873   
3                    0.042729         0.053331         0.018901   
4                    0.056648         0.036224         0.043715   
5                    0.032343         0.021942  

100%|██████████| 13/13 [30:43<00:00, 141.82s/it]

   ViT-B-16;laion2b_s34b_b88k  ViT-L-14;laion2b_s32b_b82k  \
0                    0.009581                    0.012874   
1                    0.012493                    0.013935   

   ViT-B-32;laion2b_s34b_b79k  ViT-B-16;openai  ViT-L-14;openai  \
0                    0.013423         0.038983         0.043229   
1                    0.013218         0.024134         0.034394   

   ViT-B-32;openai  ViT-B-16;laion400m_e31  ViT-L-14;laion400m_e31  \
0         0.036582                0.008826                0.015309   
1         0.032753                0.011028                0.013897   

   ViT-B-32;laion400m_e31  RN50;openai  RN50;yfcc15m  RN50;cc12m  \
0                0.011505     0.028224      0.019826    0.020423   
1                0.013701     0.025124      0.021942    0.028202   

   ViT-H-14;laion2b_s32b_b79k  
0                    0.012386  
1                    0.017874  





In [35]:
pd.DataFrame(dataset_eces_supervised['SUN397'])

Unnamed: 0,ViT-B-16;laion2b_s34b_b88k,ViT-L-14;laion2b_s32b_b82k,ViT-B-32;laion2b_s34b_b79k,ViT-B-16;openai,ViT-L-14;openai,ViT-B-32;openai,ViT-B-16;laion400m_e31,ViT-L-14;laion400m_e31,ViT-B-32;laion400m_e31,RN50;openai,RN50;yfcc15m,RN50;cc12m,ViT-H-14;laion2b_s32b_b79k
0,0.009581,0.012874,0.013423,0.038983,0.043229,0.036582,0.008826,0.015309,0.011505,0.028224,0.019826,0.020423,0.012386
1,0.012493,0.013935,0.013218,0.024134,0.034394,0.032753,0.011028,0.013897,0.013701,0.025124,0.021942,0.028202,0.017874


In [32]:
dataset_eces_uncalib = {}
for dataset_name in ['CIFAR100', 'CIFAR10', 'Food101', 'SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    for model_legend, temp in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        if isinstance(dset, torch.utils.data.dataset.Subset):
            dset.dataset.transform = preprocess
        else:
            dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, 
                #temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
        
        model_eces[model_legend] = template_eces
    dataset_eces_uncalib[dataset_name] = model_eces

Files already downloaded and verified


100%|██████████| 13/13 [06:53<00:00, 31.78s/it]


Files already downloaded and verified


100%|██████████| 13/13 [06:41<00:00, 30.90s/it]
100%|██████████| 13/13 [14:21<00:00, 66.28s/it]
  0%|          | 0/13 [02:44<?, ?it/s]


KeyboardInterrupt: 

In [35]:
for dataset_name in ['SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    for model_legend, temp in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        if isinstance(dset, torch.utils.data.dataset.Subset):
            dset.dataset.transform = preprocess
        else:
            dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, 
                #temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
        
        model_eces[model_legend] = template_eces
    dataset_eces_uncalib[dataset_name] = model_eces

  8%|▊         | 1/13 [02:47<33:27, 167.30s/it]

In [8]:
image_features, actual = get_image_features(model, imagenet_test, batch_size=128,
        device = device)

In [9]:
actual = torch.IntTensor(actual).to(device).long()
actual.shape

torch.Size([50000])

In [10]:
imagenet_mapping = map_imagenet_to_readable_label()
imagenet_test.classes = [imagenet_mapping[x] for x in imagenet_test.classes ]

In [11]:
text = tokenizer([text_template.replace('{}',x) for x in imagenet_test.classes])
with torch.no_grad(), torch.cuda.amp.autocast():
    text_features = model.encode_text(text.to(device))
    text_features /= text_features.norm(dim=-1, keepdim=True)
    
text_probs = (100.0 * image_features @ text_features.T)

In [12]:
## Setup LBGFS
temperature = nn.Parameter((torch.ones(1)).to(device))
args = {'temperature': temperature}
criterion = nn.CrossEntropyLoss()

# Removing strong_wolfe line search results in jump after 50 epochs
optimizer = optim.LBFGS([temperature], lr=0.001, max_iter=1000, line_search_fn='strong_wolfe')

temps = []
losses = []
def _eval():
    loss = criterion(T_scaling(text_probs, args), actual)
    loss.backward()
    temps.append(temperature.item())
    losses.append(loss)
    return loss
optimizer.step(_eval)
temperature.item()

1.6131701469421387

In [13]:
predictions, probs = get_preds_from_img_features(
    model, tokenizer, imagenet_test, image_features, text_template=text_template, temp_scaling=temperature.item(), device = device
)
get_metrics(predictions, actual, probs)

(0.620787089606212, 0.958984375, 0.0)

In [14]:
cifar_test, num_classes = get_test_set('CIFAR100', preprocess)
predictions, actual, probs = get_preds(model, tokenizer, cifar_test, 
    text_template=text_template, temp_scaling=temperature.item(), device=device)
get_metrics(predictions, actual, probs)

Files already downloaded and verified


(0.025181555526703616, 0.057354552355015076, 0.7139)

In [15]:
cifar_test, num_classes = get_test_set('CIFAR10', preprocess)
predictions, actual, probs = get_preds(model, tokenizer, cifar_test, 
    text_template=text_template, temp_scaling=temperature.item(), device=device)
get_metrics(predictions, actual, probs)

Files already downloaded and verified


(0.007352347984910052, 0.18847142159938812, 0.9174)