In [1]:
from unlabeled_extrapolation.models import bit_resnet, vit_model, timm_model, clip_model, imnet_resnet, suriya_bit_resnet
from unlabeled_extrapolation.utils import utils
import importlib
import timm
import torch
from torch import nn
import numpy as np
importlib.reload(bit_resnet)
importlib.reload(vit_model)
importlib.reload(utils)
importlib.reload(timm_model)
importlib.reload(clip_model)
importlib.reload(imnet_resnet)

ModuleNotFoundError: No module named 'unlabeled_extrapolation'

In [None]:
import sys
print(sys.path)

In [2]:
def get_layers_freeze_test(model):
    print('num params before freezing: ', utils.count_parameters(model, trainable=True))
    print(model.get_layers()[1])
    print(len(model.get_layers()))
    for k in [1, 2, len(model.get_layers())]:
        model.freeze_bottom_k(k=k)
        print(f'num params after freezing {k}: {utils.count_parameters(model, trainable=True)}')


# Get image

# Analyzing layer norm

In [3]:
data = np.array([0.1, 0.4, 0.5], dtype=np.float32)+0.6
x = torch.tensor(data)
layer_norm = nn.LayerNorm(len(data), elementwise_affine=False)
A = torch.eye(len(data), requires_grad=True)
output = layer_norm(torch.matmul(A, x))
loss = torch.dot(torch.tensor([0.1, 0.2, 0.5]), output) # torch.sum(torch.square(output))
loss.backward()
print(loss)
print(A.grad)

NameError: name 'np' is not defined

In [4]:
data = np.array([0.1, 0.4, 0.5], dtype=np.float32) - 100.0
x = torch.tensor(data)
A = torch.eye(len(data), requires_grad=True)
z = torch.matmul(A, x)
z.retain_grad()
mu = torch.mean(z)
mu.retain_grad()
sigma = torch.sqrt(torch.std(z, unbiased=False) ** 2)
sigma.retain_grad()
o = (z - mu) / sigma
o.retain_grad()
l = torch.dot(torch.tensor([0.1, 0.2, 0.5]), o)
# l.backward()
# print(A.grad)
l.backward()
print(A.grad)

NameError: name 'np' is not defined

# Get norms and gradients for different models

In [None]:
model = clip_model.ClipModel('ViT-L/14')


In [None]:
model = clip_model.ClipModel('ViT-B/16')
model.new_last_layer(2)

In [None]:
# print(list(model.get_layers()[0][1].parameters())[0].requires_grad)
# inputs = torch.zeros((1, 3, 224, 224))

def get_clip_mean_input():
    inputs = torch.tensor([0.48145466, 0.4578275, 0.40821073])
    inputs = inputs.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
    inputs = inputs.tile([1, 1, 224, 224])
    return inputs

def get_clip_dist_input():
    inputs = torch.tensor([0.48145466, 0.4578275, 0.40821073])
    inputs = inputs.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
    inputs = inputs.tile([1, 1, 224, 224])
    return inputs

inputs = torch.tensor([0.48145466, 0.4578275, 0.40821073])
inputs = inputs.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
inputs = inputs.tile([1, 1, 224, 224])
print(inputs.shape)
inputs = inputs.cuda()
model.cuda()
outputs = model(inputs)
loss = torch.sum(torch.square(outputs))
loss.backward()

In [None]:

def get_grad_layer(cur_layer):
    grads = [p.grad.detach().cpu().numpy() for p in cur_layer.parameters()]
    grad_norms_squared = [np.linalg.norm(g) ** 2 for g in grads]
    grad_norm = np.sqrt(np.sum(grad_norms_squared))
    return grad_norm

def get_norm_layer(cur_layer):
    norms_squared = [np.linalg.norm(p.data.detach().cpu().numpy()) ** 2 for p in cur_layer.parameters()]
    norm = np.sqrt(np.sum(norms_squared))
    return norm

def get_layer_grads(model):
    named_layers = model.get_layers()
    names, layers = zip(*named_layers)
    norms = [get_norm_layer(l) for l in layers]
    grad_norms = [get_grad_layer(l) for l in layers]
    return norms, grad_norms

norms, grad_norms = get_layer_grads(model)
print(grad_norms)
    

# Test get layers for different models

In [None]:
# Bit-resnet (get layers) for ResNet-50 and ResNet-101

resnet50_checkpoint_path = "/u/scr/ananya/simclr_weights/BiT-M-R50x1.npz"
resnet50 = bit_resnet.BitResNet(model_name='BiT-M-R50x1', checkpoint_path=resnet50_checkpoint_path)
get_layers_freeze_test(resnet50)

resnet101_checkpoint_path = "/u/scr/ananya/simclr_weights/BiT-M-R101x1.npz"
resnet101 = bit_resnet.BitResNet(model_name='BiT-M-R101x1', checkpoint_path=resnet101_checkpoint_path)
get_layers_freeze_test(resnet101)

In [None]:
# DINO
model = vit_model.VitModel(model_name='dino_vitb16')
model.new_last_layer(10)
get_layers_freeze_test(model)


In [None]:
# Timm ViT-S
model = vit_model.VitModel(model_name='timm.vit_small_patch16_224')
get_layers_freeze_test(model)


In [None]:
# Get model grads and norms for Timm ViT-S
layers = model.get_layers()

for name, layer in layers:
    norms_squared = [np.linalg.norm(p.data.detach().cpu().numpy()) ** 2 for p in layer.parameters()]
    norm = np.sqrt(np.sum(norms_squared))
    print(name, norm)

In [None]:
# Conv-next
model = timm_model.TimmModel('convnext_base_in22k')
get_layers_freeze_test(model)

In [None]:
x = torch.zeros((8,3,224,224))
x = x.cuda()
model(x)

In [None]:
print(model._model.head)

# Supervised ResNet

In [None]:
model = imnet_resnet.ResNet50(pretrained=True)

In [None]:
model.get_layers()

# Load Suriya's models

In [3]:
checkpoint_path='/u/scr/ananya/simclr_weights/imagenet_BiT_R50x1_b256x16m_opt_adamw_aug_sa_cg_1_ep_90_lr_0.001_norm_gn_se_1857_wd_0.1/lastepoch.pt'
model = suriya_bit_resnet.BitResNet50Wrapper(checkpoint_path)