In [9]:
from datasets import dog_dataset, cub_dataset, food_dataset
from models.models_to_finetune import deit_small_patch16_224, myresnetv2_task1, myresnetv2_task2, myresnetv2_for_c_loss
import PIL
import numpy as np
from tqdm import tqdm
import torch
import torch.optim as optim
from torchvision import transforms
import config
import sys
import math
from run import train_model
from vit.vit_pytorch.nest import NesT
import timm

We created the **datasets.py** file in which we build custom dataloaders for each dataset. Calling the function e.g **cub_dataset()**, will return train_loader, val_loader, test_loader. Validation data is split from the training data $(90:10)$. Test set is only used in the end after hyperparameter tuning. Training loop is implemented in a way so that we can get the train & val loss of after each iteration, as well as the top-1 and top-k accuracy for each epoch.\
**Kindly check the excel sheet provided in the submission to look through all the experiments done for task 1.**

In [10]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

In [11]:
epochs = 50
batch_size = 128
torch.manual_seed(42)

<torch._C.Generator at 0x7f92f73dff18>

All of the models can be trained using the two **main.py** files in the submission folder. These notebook contain sufficient code to run inference on the selected models for the sake of clarity.

Here we are showing the validation and test accuracy of each model on their corresponding best augmentation

## Using DieT transformer as the backbone

In [6]:
test_transform=transforms.Compose([
                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                ])

data_transform4 = transforms.Compose([  #

        transforms.Resize((224, 224)),
        transforms.RandomRotation(20),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=data_transform4, test_transform=test_transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = deit_small_patch16_224(pretrained=True, use_top_n_heads=12, use_patch_outputs=False)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp1/modeldiet4_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))


_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])


In [7]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  1.670 ( 1.670)	Loss 9.8001e-01 (9.8001e-01)	Acc@1  72.66 ( 72.66)	Acc@5  92.19 ( 92.19)
 * Acc@1 74.023 Acc@5 92.773
Test: [ 0/46]	Time  1.131 ( 1.131)	Loss 1.0286e+00 (1.0286e+00)	Acc@1  71.88 ( 71.88)	Acc@5  91.41 ( 91.41)
Test: [ 5/46]	Time  0.996 ( 1.094)	Loss 8.2085e-01 (9.1773e-01)	Acc@1  78.12 ( 73.96)	Acc@5  92.97 ( 93.62)
Test: [10/46]	Time  1.018 ( 1.075)	Loss 8.7018e-01 (9.0017e-01)	Acc@1  78.91 ( 74.79)	Acc@5  94.53 ( 93.47)
Test: [15/46]	Time  1.115 ( 1.088)	Loss 9.0634e-01 (8.8681e-01)	Acc@1  76.56 ( 75.10)	Acc@5  93.75 ( 93.75)
Test: [20/46]	Time  1.101 ( 1.092)	Loss 1.0219e+00 (8.8342e-01)	Acc@1  71.88 ( 75.11)	Acc@5  92.19 ( 93.75)
Test: [25/46]	Time  1.171 ( 1.099)	Loss 9.8335e-01 (8.9213e-01)	Acc@1  76.56 ( 75.15)	Acc@5  89.84 ( 93.54)
Test: [30/46]	Time  1.122 ( 1.101)	Loss 1.0551e+00 (8.9644e-01)	Acc@1  75.00 ( 75.28)	Acc@5  94.53 ( 93.72)
Test: [35/46]	Time  1.141 ( 1.102)	Loss 7.5823e-01 (9.0330e-01)	Acc@1  79.69 ( 75.46)	Acc@5  93.75 ( 93.49)
T

Achieved top1 accurcay of *$74.023\%$* on validation set and *$75.492\%$* on the test set. \
Batch size of 256 was used during training.

## Using CaiT transformer as the backbone

In [8]:
test_transform=transforms.Compose([
                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                ])
data_transform4 = transforms.Compose([  #

        transforms.Resize((224, 224)),
        transforms.RandomRotation(20),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=data_transform4, test_transform=test_transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = timm.create_model("cait_xxs24_224", pretrained=True)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp2/modelcait4_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))

In [9]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  1.703 ( 1.703)	Loss 1.2184e+00 (1.2184e+00)	Acc@1  69.53 ( 69.53)	Acc@5  87.50 ( 87.50)
 * Acc@1 72.656 Acc@5 92.188
Test: [ 0/46]	Time  1.108 ( 1.108)	Loss 8.0442e-01 (8.0442e-01)	Acc@1  78.91 ( 78.91)	Acc@5  96.88 ( 96.88)
Test: [ 5/46]	Time  1.179 ( 1.193)	Loss 1.1397e+00 (9.8487e-01)	Acc@1  70.31 ( 73.83)	Acc@5  94.53 ( 94.66)
Test: [10/46]	Time  1.063 ( 1.185)	Loss 1.0279e+00 (9.8428e-01)	Acc@1  75.00 ( 74.79)	Acc@5  92.97 ( 93.96)
Test: [15/46]	Time  1.257 ( 1.181)	Loss 9.3267e-01 (9.6391e-01)	Acc@1  75.00 ( 75.34)	Acc@5  94.53 ( 94.14)
Test: [20/46]	Time  1.180 ( 1.174)	Loss 1.2021e+00 (9.7285e-01)	Acc@1  71.88 ( 75.37)	Acc@5  90.62 ( 93.75)
Test: [25/46]	Time  1.137 ( 1.169)	Loss 8.0677e-01 (9.7281e-01)	Acc@1  85.16 ( 75.78)	Acc@5  92.97 ( 93.57)
Test: [30/46]	Time  1.091 ( 1.164)	Loss 8.7290e-01 (9.7399e-01)	Acc@1  78.12 ( 75.53)	Acc@5  94.53 ( 93.65)
Test: [35/46]	Time  1.159 ( 1.156)	Loss 1.0205e+00 (9.7324e-01)	Acc@1  72.66 ( 75.35)	Acc@5  92.97 ( 93.71)
T

Achieved top1 accurcay of $72.656\%$ on validation set and $74.974\%$ on the test set. Batch size of 128 was used during training. 

 To further improve the accuracy , we trained the cait model on a larger resolution of $384 \times 384$. This however increased the training time. We were only able to use a batch size of 24 during training.

In [10]:
test_transform=transforms.Compose([
                    transforms.Resize((384, 384)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                ])

data_transform5 = transforms.Compose([  #

        transforms.CenterCrop(384),
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=data_transform5, test_transform=test_transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = timm.create_model("cait_xxs24_384", pretrained=True)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp3/modelcait_xxs24_3845_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))

In [11]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  3.976 ( 3.976)	Loss 7.5370e-01 (7.5370e-01)	Acc@1  80.47 ( 80.47)	Acc@5  94.53 ( 94.53)
 * Acc@1 83.203 Acc@5 97.461
Test: [ 0/46]	Time  2.648 ( 2.648)	Loss 6.6868e-01 (6.6868e-01)	Acc@1  83.59 ( 83.59)	Acc@5  97.66 ( 97.66)
Test: [ 5/46]	Time  2.709 ( 2.644)	Loss 6.9248e-01 (7.4004e-01)	Acc@1  83.59 ( 80.08)	Acc@5  96.09 ( 96.35)
Test: [10/46]	Time  2.632 ( 2.646)	Loss 6.2364e-01 (7.2949e-01)	Acc@1  85.94 ( 81.32)	Acc@5  97.66 ( 96.31)
Test: [15/46]	Time  2.474 ( 2.629)	Loss 7.7368e-01 (7.4513e-01)	Acc@1  80.47 ( 80.96)	Acc@5  95.31 ( 96.04)
Test: [20/46]	Time  2.609 ( 2.633)	Loss 7.4663e-01 (7.5035e-01)	Acc@1  81.25 ( 80.77)	Acc@5  95.31 ( 95.91)
Test: [25/46]	Time  2.685 ( 2.639)	Loss 7.1762e-01 (7.4134e-01)	Acc@1  80.47 ( 80.92)	Acc@5  95.31 ( 95.94)
Test: [30/46]	Time  2.647 ( 2.638)	Loss 7.2082e-01 (7.4222e-01)	Acc@1  82.03 ( 80.87)	Acc@5  96.09 ( 95.89)
Test: [35/46]	Time  2.652 ( 2.639)	Loss 6.5973e-01 (7.3819e-01)	Acc@1  86.72 ( 81.16)	Acc@5  95.31 ( 95.81)
T

Achieved top1 accurcay of $83.203\%$ on validation set and $81.025\%$ on the test set. Using Center-crop during training gave the best validation accuracy, so we tried using center crop during test evaluation as well.

In [12]:
test_transform=transforms.Compose([
                    transforms.CenterCrop(384),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                ])

data_transform5 = transforms.Compose([  #

        transforms.CenterCrop(384),
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=data_transform5, test_transform=test_transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = timm.create_model("cait_xxs24_384", pretrained=True)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp3/modelcait_xxs24_3845_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))

In [13]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  3.767 ( 3.767)	Loss 7.5370e-01 (7.5370e-01)	Acc@1  80.47 ( 80.47)	Acc@5  94.53 ( 94.53)
 * Acc@1 83.203 Acc@5 97.461
Test: [ 0/46]	Time  2.339 ( 2.339)	Loss 6.7336e-01 (6.7336e-01)	Acc@1  80.47 ( 80.47)	Acc@5  97.66 ( 97.66)
Test: [ 5/46]	Time  2.380 ( 2.339)	Loss 6.7446e-01 (7.2066e-01)	Acc@1  83.59 ( 81.51)	Acc@5  96.88 ( 97.01)
Test: [10/46]	Time  2.447 ( 2.375)	Loss 5.8640e-01 (6.9900e-01)	Acc@1  86.72 ( 82.95)	Acc@5  97.66 ( 96.59)
Test: [15/46]	Time  2.436 ( 2.374)	Loss 7.1913e-01 (7.0423e-01)	Acc@1  85.16 ( 83.54)	Acc@5  95.31 ( 96.19)
Test: [20/46]	Time  2.409 ( 2.375)	Loss 7.8519e-01 (7.1380e-01)	Acc@1  85.16 ( 83.22)	Acc@5  94.53 ( 96.09)
Test: [25/46]	Time  2.378 ( 2.379)	Loss 7.3424e-01 (7.0880e-01)	Acc@1  78.12 ( 82.96)	Acc@5  98.44 ( 96.27)
Test: [30/46]	Time  2.379 ( 2.373)	Loss 7.4703e-01 (7.1449e-01)	Acc@1  81.25 ( 82.74)	Acc@5  96.88 ( 96.17)
Test: [35/46]	Time  2.382 ( 2.383)	Loss 6.3819e-01 (7.0734e-01)	Acc@1  86.72 ( 82.75)	Acc@5  95.31 ( 96.18)
T

Using Center-crop at test time improved the accuracy to $82.620$!

## Resnetv2-448

In [13]:
from datasets import cub_dataset
epochs = 50
batch_size = 48
test_transform = transforms.Compose([ # Accuracy:85.524%
        transforms.Resize((448, 448)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

transform = transforms.Compose([ # Accuracy:85.524%
        transforms.Resize((448, 448)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)])
# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=transform, test_transform=test_transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = myresnetv2_task1(num_classes=200)

model = model.to(device)

path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/modelresnetv2448_submission_task1_exp5_best.pth.tar"
checkpoint = torch.load(path)

model.load_state_dict(checkpoint['state_dict'])

omy_list = ['head.weight', 'head.bias']
params = list(filter(lambda kv: kv[0] in my_list, model.named_parameters()))
base_params = list(filter(lambda kv: kv[0] not in my_list, model.named_parameters()))


optimizer = optim.Adam([
                {'params':  [i[1]for i in params], 'lr': 0.0001, 'betas': (0.5, 0.999)},
                {'params':  [i[1]for i in base_params], 'lr': 0.00001, 'betas': (0.5, 0.999)}])

criterion = torch.nn.CrossEntropyLoss()

train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnetv2448_submission_task1_exp5', is_train=False)



Test: [ 0/12]	Time  0.716 ( 0.716)	Loss 5.4752e-01 (5.4752e-01)	Acc@1  79.17 ( 79.17)	Acc@5  97.92 ( 97.92)
Test: [ 5/12]	Time  0.716 ( 0.724)	Loss 8.7025e-01 (6.5456e-01)	Acc@1  77.08 ( 81.94)	Acc@5  93.75 ( 96.18)
Test: [10/12]	Time  0.703 ( 0.728)	Loss 6.5187e-01 (6.1897e-01)	Acc@1  83.33 ( 83.90)	Acc@5  97.92 ( 96.59)
 * Acc@1 84.201 Acc@5 96.701
Test: [  0/121]	Time  0.766 ( 0.766)	Loss 3.7180e-01 (3.7180e-01)	Acc@1  93.75 ( 93.75)	Acc@5  97.92 ( 97.92)
Test: [  5/121]	Time  0.715 ( 0.733)	Loss 6.2226e-01 (4.4676e-01)	Acc@1  89.58 ( 89.24)	Acc@5  97.92 ( 97.92)
Test: [ 10/121]	Time  0.707 ( 0.723)	Loss 6.2391e-01 (5.2920e-01)	Acc@1  87.50 ( 87.69)	Acc@5  97.92 ( 97.73)
Test: [ 15/121]	Time  0.679 ( 0.713)	Loss 8.2642e-01 (5.8505e-01)	Acc@1  83.33 ( 86.20)	Acc@5  93.75 ( 97.40)
Test: [ 20/121]	Time  0.671 ( 0.705)	Loss 6.1384e-01 (6.0829e-01)	Acc@1  89.58 ( 85.91)	Acc@5  97.92 ( 96.83)
Test: [ 25/121]	Time  0.675 ( 0.700)	Loss 3.3187e-01 (5.8100e-01)	Acc@1  91.67 ( 86.06)	Acc@5  97

Achieved top1 accurcay of $84.201\%$ on validation set and $84.829\%$ on the test set.