In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import open_clip
from torchvision import datasets, transforms
import torch
from PIL import Image
import open_clip
from tqdm import tqdm
from torch.utils.data import DataLoader
import numpy as np
from sklearn import calibration
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import torch.nn as nn
import torch.optim as optim
from sklearn.tree import DecisionTreeRegressor as DT
from sklearn.isotonic import IsotonicRegression as IR
from scipy.stats import norm
from tqdm import tqdm
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from util import *

In [4]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
batch_size = 128

In [7]:
model_name = 'ViT-B-16'
pretrained_dset = 'laion400m_e31'

model, _, preprocess = open_clip.create_model_and_transforms(model_name,
    pretrained=pretrained_dset,
    device=device)
tokenizer = open_clip.get_tokenizer(model_name)

In [6]:
imagenet_test = datasets.ImageFolder(f'/home/ubuntu/data/Imagenet/ILSVRC/Data/CLS-LOC/train/', transform=preprocess)

In [7]:
text_template = 'a photo of a {}.'
np.random.seed(0)

subset_in_classes = []
for c in tqdm(range(len(imagenet_test.classes))):
    appending = np.random.choice(np.where(np.array(imagenet_test.targets) == c)[0], 50, replace=False)
    subset_in_classes.extend(appending.tolist())
subset = torch.utils.data.Subset(imagenet_test, subset_in_classes)
subset.classes = imagenet_test.classes
imagenet_test = subset
len(imagenet_test)

100%|██████████| 1000/1000 [01:18<00:00, 12.74it/s]


50000

In [5]:
models_to_consider = [
    ('ViT-B-16' , 'laion2b_s34b_b88k'),
    ('ViT-L-14' , 'laion2b_s32b_b82k'),
    ('ViT-B-32' , 'laion2b_s34b_b79k'),

    ('ViT-B-16' , 'openai'),
    ('ViT-L-14' , 'openai'),
    ('ViT-B-32' , 'openai'),

    ('ViT-B-16' , 'laion400m_e31'),
    ('ViT-L-14' , 'laion400m_e31'),
    ('ViT-B-32' , 'laion400m_e31'),

    ('RN50', 'openai'),
    ('RN50', 'yfcc15m'),
    ('RN50', 'cc12m'),

    ('ViT-H-14', 'laion2b_s32b_b79k')
]

In [9]:
#imagenet_mapping = map_imagenet_to_readable_label()
#imagenet_test.classes = [imagenet_mapping[x] for x in imagenet_test.classes ]

cifar_val = datasets.CIFAR100('/home/ubuntu/data/', train = True, transform = None, download=True)
classes , templates = get_openai_prompts('CIFAR100')
text_template = templates[5]
all_temps = {}
for model_name, pretrained_dset in tqdm(models_to_consider):
    model, _, preprocess = open_clip.create_model_and_transforms(model_name,
        pretrained=pretrained_dset,
        device=device)
    tokenizer = open_clip.get_tokenizer(model_name)
    cifar_val.transform = preprocess
    cifar_val.classes = classes

    image_features, actual = get_image_features(model, cifar_val, batch_size=128,
        device = device)

    actual = torch.IntTensor(actual).to(device).long()

    text = tokenizer([text_template.replace('{}',x) for x in cifar_val.classes])
    with torch.no_grad(), torch.cuda.amp.autocast():
        text_features = model.encode_text(text.to(device))
        text_features /= text_features.norm(dim=-1, keepdim=True)
        
    text_probs = (100.0 * image_features @ text_features.T)

    ## Setup LBGFS
    temperature = nn.Parameter((torch.ones(1)).to(device))
    args = {'temperature': temperature}
    criterion = nn.CrossEntropyLoss()

    # Removing strong_wolfe line search results in jump after 50 epochs
    optimizer = optim.LBFGS([temperature], lr=0.001, max_iter=1000, line_search_fn='strong_wolfe')

    temps = []
    losses = []
    def _eval():
        loss = criterion(T_scaling(text_probs, args), actual)
        loss.backward()
        temps.append(temperature.item())
        losses.append(loss)
        return loss
    optimizer.step(_eval)
    all_temps[model_name+';' + pretrained_dset] = temperature.item()
all_temps

Files already downloaded and verified


100%|██████████| 13/13 [32:24<00:00, 149.58s/it]


{'ViT-B-16;laion2b_s34b_b88k': 1.2636255025863647,
 'ViT-L-14;laion2b_s32b_b82k': 1.269670009613037,
 'ViT-B-32;laion2b_s34b_b79k': 1.2540593147277832,
 'ViT-B-16;openai': 0.7175883650779724,
 'ViT-L-14;openai': 0.7447569966316223,
 'ViT-B-32;openai': 0.6858727335929871,
 'ViT-B-16;laion400m_e31': 1.5263614654541016,
 'ViT-L-14;laion400m_e31': 1.6955595016479492,
 'ViT-B-32;laion400m_e31': 1.2037781476974487,
 'RN50;openai': 0.7391236424446106,
 'RN50;yfcc15m': 2.5977625846862793,
 'RN50;cc12m': 2.781331777572632,
 'ViT-H-14;laion2b_s32b_b79k': 1.3318419456481934}

In [14]:
a = {'ViT-B-16;laion2b_s34b_b88k': 1.3878337144851685,
 'ViT-L-14;laion2b_s32b_b82k': 1.4129570722579956,
 'ViT-B-32;laion2b_s34b_b79k': 1.4092495441436768,
 'ViT-B-16;openai': 0.9825258851051331,
 'ViT-L-14;openai': 1.0273551940917969,
 'ViT-B-32;openai': 0.9789162874221802,
 'ViT-B-16;laion400m_e31': 1.6131701469421387,
 'ViT-L-14;laion400m_e31': 1.7537566423416138,
 'ViT-B-32;laion400m_e31': 1.37242591381073,
 'RN50;openai': 0.9742085933685303,
 'RN50;yfcc15m': 2.5651497840881348,
 'RN50;cc12m': 2.662893295288086,
 'ViT-H-14;laion2b_s32b_b79k': 1.5119352340698242}
b = {'ViT-B-16;laion2b_s34b_b88k': 1.253799319267273,
 'ViT-L-14;laion2b_s32b_b82k': 1.2685033082962036,
 'ViT-B-32;laion2b_s34b_b79k': 1.3079637289047241,
 'ViT-B-16;openai': 0.7161993384361267,
 'ViT-L-14;openai': 0.7225357890129089,
 'ViT-B-32;openai': 0.7408358454704285,
 'ViT-B-16;laion400m_e31': 1.5010323524475098,
 'ViT-L-14;laion400m_e31': 1.6865270137786865,
 'ViT-B-32;laion400m_e31': 1.23966383934021,
 'RN50;openai': 0.7067190408706665,
 'RN50;yfcc15m': 2.858851432800293,
 'RN50;cc12m': 2.6683664321899414,
 'ViT-H-14;laion2b_s32b_b79k': 1.3310267925262451}

from collections import defaultdict
res = defaultdict(list)
for k in a:
    res[k] = [a[k], b[k], all_temps[k]]

pd.DataFrame(res).transpose().rename(columns={0:'imagenet', 1:'cifar100',2: f"cifaro100 {text_template}"})

Unnamed: 0,imagenet,cifar100,cifaro100 a bad photo of a {}.
ViT-B-16;laion2b_s34b_b88k,1.387834,1.253799,1.263626
ViT-L-14;laion2b_s32b_b82k,1.412957,1.268503,1.26967
ViT-B-32;laion2b_s34b_b79k,1.40925,1.307964,1.254059
ViT-B-16;openai,0.982526,0.716199,0.717588
ViT-L-14;openai,1.027355,0.722536,0.744757
ViT-B-32;openai,0.978916,0.740836,0.685873
ViT-B-16;laion400m_e31,1.61317,1.501032,1.526361
ViT-L-14;laion400m_e31,1.753757,1.686527,1.69556
ViT-B-32;laion400m_e31,1.372426,1.239664,1.203778
RN50;openai,0.974209,0.706719,0.739124


In [12]:
cifar100, _ = get_test_set('CIFAR100', preprocess)
cifar10, _ = get_test_set('CIFAR10', preprocess)
#sun_dset = datasets.LSUN(root=f'/home/ubuntu/data/LSUN', classes='test', transform=preprocess)
food_dset = datasets.Food101('/home/ubuntu/data/Food101/', split='test', transform=preprocess,download=True)
#sun_dset = datasets.SUN397()

Files already downloaded and verified
Files already downloaded and verified


In [17]:
dataset_eces = {}
dataset_accs = {}
for dataset_name in ['CIFAR100', 'CIFAR10', 'Food101', 'SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    model_accs = {}
    for model_legend, temp in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        template_accs = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
            template_accs.append(acc)
        
        model_eces[model_legend] = template_eces
        model_accs[model_legend] = template_accs
    dataset_eces[dataset_name] = model_eces
    dataset_accs[dataset_name] = model_eces

Files already downloaded and verified


100%|██████████| 13/13 [06:40<00:00, 30.84s/it]


Files already downloaded and verified


100%|██████████| 13/13 [06:38<00:00, 30.66s/it]
100%|██████████| 13/13 [14:19<00:00, 66.15s/it]


In [31]:
dataset_eces_supervised = {}
dataset_accs_supervised = {}
for dataset_name in ['CIFAR100', 'CIFAR10', 'Food101', 'SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    model_accs = {}
    for model_legend, temp in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        if isinstance(dset, torch.utils.data.dataset.Subset):
            dset.dataset.transform = preprocess
        else:
            dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        template_accs = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
            template_accs.append(acc)
        
        model_eces[model_legend] = template_eces
        model_accs[model_legend] = template_accs
    dataset_eces_supervised[dataset_name] = model_eces
    dataset_accs_supervised[dataset_name] = model_eces

Unnamed: 0,ViT-B-16;laion2b_s34b_b88k,ViT-L-14;laion2b_s32b_b82k,ViT-B-32;laion2b_s34b_b79k,ViT-B-16;openai,ViT-L-14;openai,ViT-B-32;openai,ViT-B-16;laion400m_e31,ViT-L-14;laion400m_e31,ViT-B-32;laion400m_e31,RN50;openai,RN50;yfcc15m,RN50;cc12m,ViT-H-14;laion2b_s32b_b79k
0,0.028102,0.014133,0.015249,0.031657,0.041401,0.025413,0.024479,0.010244,0.027594,0.034527,0.135337,0.100776,0.013398
1,0.046665,0.029012,0.049906,0.057445,0.039721,0.051354,0.04041,0.015074,0.063761,0.067796,0.135968,0.08982,0.027745


In [41]:
model_legend =  'ViT-B-16;laion400m_e31'
model_name, pretrained_dset = model_legend.split(';')
model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
tokenizer = open_clip.get_tokenizer(model_name)
dset,_ = get_test_set('CIFAR10', preprocess)
_ , templates = get_openai_prompts('CIFAR10')
print(templates[0])
image_features, actual = get_image_features(
        model,  dset,  batch_size=batch_size, device=device
    )
predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=templates[0], 
        #temp_scaling=temp,
        device = device)
predictions_, actual_, probs_ = get_preds(model, tokenizer, dset, text_template=templates[0], device=device)
get_metrics(predictions, actual, probs), get_metrics(predictions_, actual_, probs_)

Files already downloaded and verified
a photo of a {}.


((0.06008501325334824, 0.19970703125, 0.9174),
 (0.034525964367322824, 0.19967076182365417, 0.9175))

In [79]:
#dataset_eces_uncalib = {}
for dataset_name in ['CIFAR100', 'CIFAR10', 'Food101', 'SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    for model_legend, _ in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        if isinstance(dset, torch.utils.data.dataset.Subset):
            dset.dataset.transform = preprocess
        else:
            dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, 
                #temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
        
        model_eces[model_legend] = template_eces
    dataset_eces_uncalib[dataset_name] = model_eces
    print(pd.DataFrame(model_eces))

Files already downloaded and verified


100%|██████████| 13/13 [06:52<00:00, 31.77s/it]


    ViT-B-16;laion2b_s34b_b88k  ViT-L-14;laion2b_s32b_b82k  \
0                     0.058503                    0.045934   
1                     0.045820                    0.038335   
2                     0.060482                    0.039551   
3                     0.059873                    0.049759   
4                     0.066750                    0.043859   
5                     0.062486                    0.050213   
6                     0.062093                    0.053934   
7                     0.053304                    0.045275   
8                     0.069938                    0.057849   
9                     0.048214                    0.046141   
10                    0.040250                    0.036563   
11                    0.053200                    0.037751   
12                    0.061325                    0.051173   
13                    0.063761                    0.046218   
14                    0.054451                    0.048778   
15      

100%|██████████| 13/13 [06:47<00:00, 31.35s/it]


    ViT-B-16;laion2b_s34b_b88k  ViT-L-14;laion2b_s32b_b82k  \
0                     0.010584                    0.007821   
1                     0.003846                    0.004197   
2                     0.007205                    0.003622   
3                     0.011140                    0.007182   
4                     0.009263                    0.006535   
5                     0.012927                    0.006660   
6                     0.009915                    0.006664   
7                     0.015371                    0.009377   
8                     0.013392                    0.028946   
9                     0.008501                    0.007654   
10                    0.002923                    0.004935   
11                    0.006695                    0.005682   
12                    0.011906                    0.004308   
13                    0.009057                    0.004868   
14                    0.010353                    0.006806   
15      

100%|██████████| 13/13 [14:27<00:00, 66.76s/it]


   ViT-B-16;laion2b_s34b_b88k  ViT-L-14;laion2b_s32b_b82k  \
0                    0.017865                    0.015185   

   ViT-B-32;laion2b_s34b_b79k  ViT-B-16;openai  ViT-L-14;openai  \
0                    0.028567         0.029396         0.015401   

   ViT-B-32;openai  ViT-B-16;laion400m_e31  ViT-L-14;laion400m_e31  \
0         0.029977                0.044579                0.033489   

   ViT-B-32;laion400m_e31  RN50;openai  RN50;yfcc15m  RN50;cc12m  \
0                0.025616     0.047133      0.228004    0.268225   

   ViT-H-14;laion2b_s32b_b79k  
0                    0.015839  


  0%|          | 0/13 [01:59<?, ?it/s]


KeyboardInterrupt: 

In [82]:
pd.DataFrame(dataset_eces_uncalib['Food101'])

Unnamed: 0,ViT-B-16;laion2b_s34b_b88k,ViT-L-14;laion2b_s32b_b82k,ViT-B-32;laion2b_s34b_b79k,ViT-B-16;openai,ViT-L-14;openai,ViT-B-32;openai,ViT-B-16;laion400m_e31,ViT-L-14;laion400m_e31,ViT-B-32;laion400m_e31,RN50;openai,RN50;yfcc15m,RN50;cc12m,ViT-H-14;laion2b_s32b_b79k
0,0.017865,0.015185,0.028567,0.029396,0.015401,0.029977,0.044579,0.033489,0.025616,0.047133,0.228004,0.268225,0.015839


In [83]:
for dataset_name in ['SUN397']:
    dset,_ = get_test_set(dataset_name, None)
    model_eces = {}
    for model_legend, temp in tqdm(all_temps.items()):
        model_name, pretrained_dset = model_legend.split(';')
        model, _, preprocess = open_clip.create_model_and_transforms(model_name,
            pretrained=pretrained_dset,
            device=device)
        tokenizer = open_clip.get_tokenizer(model_name)
        if isinstance(dset, torch.utils.data.dataset.Subset):
            dset.dataset.transform = preprocess
        else:
            dset.transform = preprocess
        _ , templates = get_openai_prompts(dataset_name)

        template_eces = []
        image_features, actual = get_image_features(
            model,  dset,  batch_size=batch_size, device=device
        )
        for text_template in templates:
            #predictions, actual, probs = get_preds(model, tokenizer, dset, 
            #    text_template=text_template, temp_scaling=temp, device=device)

            predictions, probs = get_preds_from_img_features(model, tokenizer, dset, image_features, text_template=text_template, 
                #temp_scaling=temp,
                device = device)

            ECE, _, acc = get_metrics(predictions, actual, probs)
            template_eces.append(ECE)
        
        model_eces[model_legend] = template_eces
    dataset_eces_uncalib[dataset_name] = model_eces

 46%|████▌     | 6/13 [28:28<32:35, 279.33s/it]  

In [37]:
pd.DataFrame(dataset_eces_uncalib['CIFAR10'])

Unnamed: 0,ViT-B-16;laion2b_s34b_b88k,ViT-L-14;laion2b_s32b_b82k,ViT-B-32;laion2b_s34b_b79k,ViT-B-16;openai,ViT-L-14;openai,ViT-B-32;openai,ViT-B-16;laion400m_e31,ViT-L-14;laion400m_e31,ViT-B-32;laion400m_e31,RN50;openai,RN50;yfcc15m,RN50;cc12m,ViT-H-14;laion2b_s32b_b79k
0,0.016477,0.01579,0.023797,0.052869,0.048692,0.049168,0.060085,0.074716,0.041315,0.076222,0.301586,0.244496,0.022556
1,0.005005,0.006279,0.014653,0.053697,0.05344,0.040927,0.052569,0.046396,0.041712,0.089666,0.180283,0.243485,0.008576
2,0.009754,0.005568,0.019888,0.029596,0.039544,0.021597,0.045846,0.04712,0.028131,0.0291,0.280472,0.25214,0.007252
3,0.01764,0.013397,0.017957,0.049189,0.037274,0.041498,0.059882,0.057221,0.045282,0.052157,0.183357,0.176376,0.017618
4,0.013293,0.011741,0.019585,0.040568,0.034073,0.01295,0.048032,0.062678,0.035936,0.041095,0.230471,0.139998,0.014786
5,0.020227,0.01128,0.0229,0.058637,0.05568,0.071162,0.067231,0.06697,0.036518,0.081492,0.196379,0.222628,0.014313
6,0.0148,0.012401,0.023917,0.069136,0.052788,0.057587,0.064864,0.08347,0.037807,0.04848,0.224352,0.227845,0.020242
7,0.023362,0.015036,0.035868,0.045805,0.058565,0.044232,0.072784,0.08171,0.044215,0.041976,0.249727,0.250848,0.022378
8,0.016674,0.043433,0.035388,0.087863,0.101702,0.088415,0.052829,0.066712,0.047753,0.097319,0.280751,0.302008,0.0107
9,0.010787,0.01121,0.021534,0.063763,0.076167,0.078766,0.054817,0.088222,0.042147,0.076091,0.34165,0.294014,0.016334


In [8]:
image_features, actual = get_image_features(model, imagenet_test, batch_size=128,
        device = device)

In [9]:
actual = torch.IntTensor(actual).to(device).long()
actual.shape

torch.Size([50000])

In [10]:
imagenet_mapping = map_imagenet_to_readable_label()
imagenet_test.classes = [imagenet_mapping[x] for x in imagenet_test.classes ]

In [11]:
text = tokenizer([text_template.replace('{}',x) for x in imagenet_test.classes])
with torch.no_grad(), torch.cuda.amp.autocast():
    text_features = model.encode_text(text.to(device))
    text_features /= text_features.norm(dim=-1, keepdim=True)
    
text_probs = (100.0 * image_features @ text_features.T)

In [12]:
## Setup LBGFS
temperature = nn.Parameter((torch.ones(1)).to(device))
args = {'temperature': temperature}
criterion = nn.CrossEntropyLoss()

# Removing strong_wolfe line search results in jump after 50 epochs
optimizer = optim.LBFGS([temperature], lr=0.001, max_iter=1000, line_search_fn='strong_wolfe')

temps = []
losses = []
def _eval():
    loss = criterion(T_scaling(text_probs, args), actual)
    loss.backward()
    temps.append(temperature.item())
    losses.append(loss)
    return loss
optimizer.step(_eval)
temperature.item()

1.6131701469421387

In [13]:
predictions, probs = get_preds_from_img_features(
    model, tokenizer, imagenet_test, image_features, text_template=text_template, temp_scaling=temperature.item(), device = device
)
get_metrics(predictions, actual, probs)

(0.620787089606212, 0.958984375, 0.0)

In [14]:
cifar_test, num_classes = get_test_set('CIFAR100', preprocess)
predictions, actual, probs = get_preds(model, tokenizer, cifar_test, 
    text_template=text_template, temp_scaling=temperature.item(), device=device)
get_metrics(predictions, actual, probs)

Files already downloaded and verified


(0.025181555526703616, 0.057354552355015076, 0.7139)

In [15]:
cifar_test, num_classes = get_test_set('CIFAR10', preprocess)
predictions, actual, probs = get_preds(model, tokenizer, cifar_test, 
    text_template=text_template, temp_scaling=temperature.item(), device=device)
get_metrics(predictions, actual, probs)

Files already downloaded and verified


(0.007352347984910052, 0.18847142159938812, 0.9174)