In [4]:
from datasets import dog_dataset, cub_dataset, food_dataset
from models.models_to_finetune import deit_small_patch16_224, myresnetv2_task1, myresnetv2_task2, myresnetv2_for_c_loss
import PIL
import numpy as np
from tqdm import tqdm
import torch
import torch.optim as optim
from torchvision import transforms
import config
import sys
import math
from run import train_model
from vit.vit_pytorch.nest import NesT
import timm

In [19]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

In [None]:
data_transform = transforms.Compose([  # Accuracy:87.622%

        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
data_transform1 = transforms.Compose([ # Accuracy:83.263%

        transforms.CenterCrop(224),
        transforms.ColorJitter(brightness=0.2, contrast=0.3, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
data_transform2 = transforms.Compose([ # Accuracy:85.524%
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
data_transform3 = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(20),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

data_transform4 = transforms.Compose([  #

        transforms.Resize((224, 224)),
        transforms.RandomRotation(20),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
data_transform5 = transforms.Compose([  #

        transforms.CenterCrop(384),
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

In [7]:
epochs = 50
batch_size = 128
torch.manual_seed(42)
transform = config.data_transform4
# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

## Using DieT transformer as the backbone

In [13]:
model = deit_small_patch16_224(pretrained=True, use_top_n_heads=12, use_patch_outputs=False)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp1/modeldiet4_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))


_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])


In [14]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  1.651 ( 1.651)	Loss 8.7371e-01 (8.7371e-01)	Acc@1  75.78 ( 75.78)	Acc@5  92.97 ( 92.97)
 * Acc@1 74.023 Acc@5 92.578
Test: [ 0/46]	Time  1.186 ( 1.186)	Loss 1.1993e+00 (1.1993e+00)	Acc@1  68.75 ( 68.75)	Acc@5  89.06 ( 89.06)
Test: [ 5/46]	Time  1.134 ( 1.136)	Loss 9.6994e-01 (9.8503e-01)	Acc@1  77.34 ( 74.87)	Acc@5  92.97 ( 92.84)
Test: [10/46]	Time  1.107 ( 1.119)	Loss 6.5334e-01 (9.2885e-01)	Acc@1  81.25 ( 76.14)	Acc@5  97.66 ( 93.39)
Test: [15/46]	Time  1.116 ( 1.116)	Loss 1.0024e+00 (9.3397e-01)	Acc@1  75.78 ( 75.44)	Acc@5  90.62 ( 93.12)
Test: [20/46]	Time  1.135 ( 1.119)	Loss 8.3974e-01 (9.2190e-01)	Acc@1  74.22 ( 75.41)	Acc@5  96.09 ( 93.38)
Test: [25/46]	Time  1.139 ( 1.124)	Loss 1.0838e+00 (9.2694e-01)	Acc@1  74.22 ( 75.39)	Acc@5  90.62 ( 93.12)
Test: [30/46]	Time  1.091 ( 1.124)	Loss 7.0929e-01 (9.2233e-01)	Acc@1  78.91 ( 75.33)	Acc@5  96.88 ( 93.20)
Test: [35/46]	Time  1.130 ( 1.120)	Loss 8.4988e-01 (9.3025e-01)	Acc@1  71.88 ( 75.13)	Acc@5  93.75 ( 93.01)
T

Achieved top1 accurcay of 74.023% on validation set and 75.492% on the test set.

## Using CaiT transformer as the backbone

In [15]:
transform = config.data_transform4
# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=transform)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = timm.create_model("cait_xxs24_224", pretrained=True)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp2/modelcait4_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))

In [16]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  1.734 ( 1.734)	Loss 1.2184e+00 (1.2184e+00)	Acc@1  69.53 ( 69.53)	Acc@5  87.50 ( 87.50)
 * Acc@1 72.656 Acc@5 92.188
Test: [ 0/46]	Time  1.194 ( 1.194)	Loss 8.0442e-01 (8.0442e-01)	Acc@1  78.91 ( 78.91)	Acc@5  96.88 ( 96.88)
Test: [ 5/46]	Time  1.125 ( 1.172)	Loss 1.1397e+00 (9.8487e-01)	Acc@1  70.31 ( 73.83)	Acc@5  94.53 ( 94.66)
Test: [10/46]	Time  1.187 ( 1.162)	Loss 1.0279e+00 (9.8428e-01)	Acc@1  75.00 ( 74.79)	Acc@5  92.97 ( 93.96)
Test: [15/46]	Time  1.208 ( 1.156)	Loss 9.3267e-01 (9.6391e-01)	Acc@1  75.00 ( 75.34)	Acc@5  94.53 ( 94.14)
Test: [20/46]	Time  1.085 ( 1.138)	Loss 1.2021e+00 (9.7285e-01)	Acc@1  71.88 ( 75.37)	Acc@5  90.62 ( 93.75)
Test: [25/46]	Time  1.104 ( 1.127)	Loss 8.0677e-01 (9.7281e-01)	Acc@1  85.16 ( 75.78)	Acc@5  92.97 ( 93.57)
Test: [30/46]	Time  1.078 ( 1.130)	Loss 8.7290e-01 (9.7399e-01)	Acc@1  78.12 ( 75.53)	Acc@5  94.53 ( 93.65)
Test: [35/46]	Time  1.097 ( 1.121)	Loss 1.0205e+00 (9.7324e-01)	Acc@1  72.66 ( 75.35)	Acc@5  92.97 ( 93.71)
T

Achieved top1 accurcay of 72.656% on validation set and 74.974% on the test set.

In [20]:
data_transform5 = transforms.Compose([  #

        transforms.CenterCrop(384),
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.GaussianBlur(3, sigma=(0.1, 2.0)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
# Load Data
train_loader, val_loader,test_loader = cub_dataset(bs=batch_size, data_transform=data_transform5)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = timm.create_model("cait_xxs24_384", pretrained=True)
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=200)  # dogs dataset has 120 classes
model.head.apply(model._init_weights)
model.to(device)
path = "/home/hashmat.malik/Fall 2021/CV703 Lab/Week5/datasets/Task1:cub_dataset_weights/Exp3/modelcait_xxs24_3845_best.pth.tar"
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))

In [22]:
train_model(epochs, train_loader, val_loader, test_loader, optimizer, criterion, model, 'resnet', is_train=False)

Test: [0/4]	Time  3.909 ( 3.909)	Loss 6.9312e-01 (6.9312e-01)	Acc@1  84.38 ( 84.38)	Acc@5  96.88 ( 96.88)
 * Acc@1 82.617 Acc@5 96.680


AssertionError: Input image size (224*224) doesn't match model (384*384).

Achieved top1 accurcay of 72.656% on validation set and 74.974% on the test set.