In [1]:
import torch
import numpy as np
import os
import torchvision
import sys
sys.path.append('../')
from voc import *
from coco import *
from backbones.config import config
import torchvision.transforms as transforms
from torchvision.models import resnet152, resnet101, resnet18, resnet34, resnet50
from tqdm import tqdm
import json
import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
# from timm.models.mvitv2.mvitv2_base


  from .autonotebook import tqdm as notebook_tqdm


In [3]:

avail_pretrained_models = timm.list_models(pretrained=True)



m_li = [resnet18(pretrained=True), resnet34(pretrained=True), resnet50(pretrained=True), resnet101(pretrained=True), resnet152(pretrained=True)]
model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=0)
model = timm.create_model('resnetv2_50x3_bitm_in21k', pretrained=True)
model.reset_classifier(0)

config = resolve_data_config({}, model=model)
transform = create_transform(**config)

m_li.append(model)

create model arch file

In [2]:
config

{'resnet50': 'resnetv2_50x3_bitm_in21k',
 'vit': 'mvitv2_base_in21k',
 'swin': 'swin_base_patch4_window7_224_in22k',
 'swin_large': 'swin_large_patch4_window7_224_in22k',
 'convnext': 'convnext_base_in22k',
 'mlpmixer': 'mixer_b16_224_in21k',
 'resnet101': 'resnetv2_101x1_bitm_in21k'}

In [3]:

for k, v in config.items():
  if k == 'resnet101':
    print(k)
    model = timm.create_model(v, pretrained=True, num_classes=80)   
    # print(model)
    for n, p in model.named_parameters():
      if p.requires_grad:
        p.requires_grad=False
        # print(p.requires_grad)
    with open('../backbones/{}.txt'.format(k), 'w') as f:
      f.write(str(model))
# config = resolve_data_config({}, model=model)
# transform = create_transform(**config)

resnet101


In [10]:
for name, child in model.named_children():
  print(name)
model.head

patch_embed
pos_drop
blocks
norm
fc_norm
head


Linear(in_features=768, out_features=20, bias=True)

In [4]:
def get_feat_extr_resnet10t(model):
    feat_extractors = []
    tmp = []
    for name, module in model.named_children():
        print(name)
        if name=='layer1' or name=='layer2' or name=='layer3' or name=='layer4' or name=='global_pool':
            feat_extractors.append(torch.nn.Sequential(*(tmp+[model.global_pool])))
            tmp.append(module)
        else:
            tmp.append(module)
    len(feat_extractors)
    return feat_extractors
feat_extractors=[]
for i in range(12):
    vit = [model.patch_embed,
    model.pos_drop,
    model.blocks[:i+1],
    model.norm,
    model.fc_norm,
    model.head]
    feat_extractors.append(torch.nn.Sequential(*vit))
print(feat_extractors)

[Sequential(
  (0): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (1): Dropout(p=0.0, inplace=False)
  (2): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU()
        (drop1): Dropout(p=0.0, inplace=False)
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop2): Dropout(p=0.0, inplace=False)
      )
      (ls2): Identity()
      (drop_path2): Identity()
  

In [4]:

train_dataset = Voc2007Classification('data/voc', 'trainval', inp_name='data/voc/voc_glove_word2vec.pkl', LT=True)
# train_dataset = COCO2014('data/coco', phase='train', inp_name='data/coco/coco_glove_word2vec.pkl')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])
train_dataset.transform = transforms.Compose([
                MultiScaleCrop(224, scales=(1.0, 0.875, 0.75, 0.66, 0.5), max_distort=2),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])
train_loader = torch.utils.data.DataLoader(train_dataset,
                                            batch_size=32, shuffle=False,
                                            num_workers=4)

[dataset] read data/voc/files/VOC2007/classification_trainval_LT.csv
[dataset] VOC 2007 classification set=trainval number of classes=20  number of images=1058


In [5]:
## get feat representation from pretrained layers
def get_Z(train_loader, g):
  for i, (input, target) in tqdm(enumerate(train_loader)):
    target[target == 0] = 1
    target[target == -1] = 0

    feat_Var = torch.autograd.Variable(input[0]).float().detach()
    if i==0:
      Z=g(feat_Var).detach().numpy()
      print(Z.shape)
      labels = target
    Z = np.concatenate((Z, g(feat_Var).detach().numpy()), axis=0)
    labels = np.concatenate((labels, target), axis=0)
    
  return Z, labels

In [6]:
## calculate transferability score
## Z: Feature from single forward pass of target sample (tensor)
def get_transferability(Z, labels, class_num=[], eps=1e-4,):
  n, d = Z.shape #1090, 1000
  
  # print(Z.shape, torch.t(Z).shape)
  inp = torch.eye(d) + torch.matmul(torch.t(Z), Z) * 1/(n*(1e-3))
  first_term = 0.5 * torch.logdet(inp)
  print("first term: ", first_term)
  second_term = 0
  total_instance = 0
  for i, n_c in enumerate(class_num):
    ind = np.argwhere(labels[:,i] == 1).flatten()
    Z_c = Z[ind,:]
    # print(Z_c.shape)
    # total_instance += Z_c.shape[0]
    if n_c <= 17:
      eps=1/np.sqrt(n_c)
    elif n_c <= 99:
      eps=1e-3
    second_term += n_c/(2*sum(class_num)) * torch.logdet(torch.eye(d) + 1/(n_c*eps) * torch.matmul(torch.t(Z_c), Z_c))
    
    
  print("second_term: ", second_term)
  # print(total_instance)
  # inp = torch.eye(d) + torch.matmul(torch.t(Z), Z) * 1/(total_instance*eps)
  # first_term = 0.5 * torch.logdet(inp)

  return first_term - second_term

In [7]:
def get_feat_extr_resnet(model):
    feat_extractors = []
    tmp = []
    for name, module in model.named_children():
        print(name)
        if name=='avgpool':
            feat_extractors.append(torch.nn.Sequential(*(tmp)))
            tmp.append(module)
        else:
            tmp.append(module)
    len(feat_extractors)
    return feat_extractors

In [8]:
## get class num list
import pandas as pd
trainval_csv = 'data/voc/files/VOC2007/classification_trainval_LT.csv'
hi = pd.read_csv(trainval_csv)
print(hi.columns)
print(hi.shape)
## generate gt pkl file
hi = hi.replace(0,1)
gt_labels = hi.iloc[:,1:].to_numpy()
print(gt_labels.shape)
img_id2idx = dict()
idx2img_id = dict()

for i, row in hi.iterrows():
  img_id2idx[row['name']] = i
  idx2img_id[i] = row['name']

# print(len(img_id2idx), len(idx2img_id))

di = dict()
for col_name, val in hi.iteritems():
  # print(col_name)
  if col_name =='name':
    continue
  di[col_name] = val.value_counts().to_dict()

print(di)
li = []
for k,v in di.items():


    li.append(v[1])
# li = list(np.load('coco_class_num.npy'))
print(len(li), max(li), min(li))
print(sum(li))
li.sort(reverse=True)
medium = li[6]
tail = li[12]

print(medium, tail)


Index(['name', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
       'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
       'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'],
      dtype='object')
(1058, 21)
(1058, 20)
{'aeroplane': {-1: 1054, 1: 4}, 'bicycle': {-1: 1028, 1: 30}, 'bird': {-1: 1052, 1: 6}, 'boat': {-1: 1046, 1: 12}, 'bottle': {-1: 1000, 1: 58}, 'bus': {-1: 1045, 1: 13}, 'car': {-1: 811, 1: 247}, 'cat': {-1: 1031, 1: 27}, 'chair': {-1: 630, 1: 428}, 'cow': {-1: 1050, 1: 8}, 'diningtable': {-1: 851, 1: 207}, 'dog': {-1: 1018, 1: 40}, 'horse': {-1: 1040, 1: 18}, 'motorbike': {-1: 1041, 1: 17}, 'person': {1: 627, -1: 431}, 'pottedplant': {-1: 923, 1: 135}, 'sheep': {-1: 1055, 1: 3}, 'sofa': {-1: 925, 1: 133}, 'train': {-1: 1052, 1: 6}, 'tvmonitor': {-1: 959, 1: 99}}
20 627 3
2118
99 17


In [9]:
# for model in m_li:
feat_extractors= get_feat_extr_resnet(model)
for g in feat_extractors:
  Z, labels = get_Z(train_loader, g)
  Z = torch.as_tensor(Z)
  # print(Z.shape, labels.shape)
  Z = torch.flatten(Z, start_dim=1)
  # print(Z.shape)
  ## centralize Z
  Z = Z - torch.mean(Z)
  print(torch.mean(Z))

  ##scale down to trace
  Z = Z.reshape(Z.shape[0], Z.shape[1])
  Z /= torch.sqrt(torch.trace(torch.matmul(Z,torch.t(Z))))

  score = get_transferability(Z, labels, li)
  print(score)
    

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
avgpool
fc


0it [00:00, ?it/s]

(32, 2048, 7, 7)


34it [00:42,  1.26s/it]


tensor(-2.8355e-08)


In [16]:

import pickle
result = pickle.load(open('data/voc/voc_adj.pkl', 'rb'))

print(result['adj'],type( result['nums']))
print(result['nums'])

result = pickle.load(open('data/voc/class_split.pkl', 'rb'))


[[  0   0   0   2   0   0   4   0   1   0   0   0   0   0  25   0   1   0
    0   0]
 [  0   0   0   1   2   8  18   1   8   1   0   1   0   7 161   5   0   3
    0   3]
 [  0   0   0   4   0   0   3   0   1   2   0   1   0   0  22   1   0   0
    0   0]
 [  2   1   4   0   0   0   6   0   0   2   0   2   0   1  43   1   0   0
    0   0]
 [  0   2   0   0   0   0   2   4  28   0  46   4   0   0 143  12   2   8
    0  19]
 [  0   8   0   0   0   0  63   0   0   0   0   1   0   4  71   0   0   0
    0   0]
 [  4  18   3   6   2  63   0   2   5   2   0   6  11  32 215   7   1   7
   12   0]
 [  0   1   0   0   4   0   2   0  23   0   2   8   0   0  26  14   0   5
    0  10]
 [  1   8   1   0  28   0   5  23   0   0  99  16   1   2 167  53   0  72
    0  76]
 [  0   1   2   2   0   0   2   0   0   0   0   1   5   2  33   0   1   0
    0   0]
 [  0   0   0   0  46   0   0   2  99   0   0   1   0   0  94  27   0  11
    0  13]
 [  0   1   1   2   4   1   6   8  16   1   1   0   8   2 116   9

In [12]:
with open('data/voc/img_id.txt', 'r') as f:
  id=f.read()
  print(id)

5
20
23
44
47
48
63
73
81
83
89
112
125
142
147
159
169
177
203
210
218
220
222
229
233
249
251
263
285
303
306
308
311
320
322
331
334
338
343
349
355
359
373
381
387
391
394
406
428
433
446
448
459
460
461
468
474
492
496
500
526
545
550
555
590
591
592
597
599
613
632
653
661
663
677
694
707
709
710
717
729
752
753
771
780
787
804
805
806
830
834
847
848
860
865
874
885
888
889
895
903
904
906
915
921
926
929
937
943
948
950
971
972
977
996
1001
1014
1024
1027
1028
1042
1052
1057
1066
1079
1091
1097
1106
1112
1113
1119
1147
1158
1164
1170
1174
1175
1206
1234
1240
1248
1254
1263
1265
1309
1325
1327
1330
1343
1345
1350
1352
1364
1390
1393
1430
1432
1439
1443
1451
1457
1466
1472
1480
1494
1510
1521
1531
1532
1544
1554
1557
1563
1577
1604
1608
1633
1650
1677
1678
1683
1689
1711
1717
1725
1729
1730
1746
1756
1771
1777
1784
1785
1793
1795
1797
1801
1806
1828
1834
1837
1845
1849
1860
1862
1881
1882
1887
1898
1901
1932
1938
1954
1958
1963
1978
1980
1999
2002
2015
2023
2025
2045
2051
2061
20