# Comparison of different transfer learning strategies


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import time

import torch
import torch.optim as optim
from torch import nn

import torchvision as tv
import torchvision.models as models
from torchvision import transforms, datasets

import warnings
warnings.filterwarnings('ignore')

In [None]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0, 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(device), y.to(device)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs):
    net.to(device)
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()

    loss_hist = []

    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        
        for i, (X, y) in enumerate(train_iter):
            X, y = X.to(device), y.to(device)
            trainer.zero_grad()

            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]

            #if i % 2 == 0:
            #  print(f"Step {i}. time since epoch: {time.time() -  start:.3f}. " 
            #       f"Train acc: {train_acc_sum / n:.3f}. Train Loss: {train_l_sum / n:.3f}")
        test_acc = evaluate_accuracy(test_iter, net.to(device))
        print('-' * 20)
        print(f'epoch {epoch + 1}, loss {train_l_sum / n:.4f}, train acc {train_acc_sum / n:.3f}'
              f', test acc {test_acc:.3f}, time {time.time() - start:.1f} sec')
        
        loss_hist.append(train_l_sum)
    
    return loss_hist

## Data preparation

In [None]:
BATCH_SIZE = 4
data_dir = '/kaggle/input/hymenoptera-data/hymenoptera_data'

In [None]:
data_transform = tv.transforms.Compose([tv.transforms.Grayscale(3), 
                                        tv.transforms.Resize((224, 224)),
                                        tv.transforms.ToTensor()])

In [None]:
train_dataset = datasets.ImageFolder(data_dir + '/train', data_transform)
test_dataset = datasets.ImageFolder(data_dir + '/val', data_transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## ResNet18



In [None]:
model = tv.models.resnet18(pretrained=True)

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.fc = nn.Linear(in_features=512, out_features=2)

In [None]:
print("Params to learn:")
params_to_update = []
for name, param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [None]:
trainer = torch.optim.Adam(params_to_update, lr=0.001)

In [None]:
loss_rn18 = train(model, train_iter, test_iter, trainer, 50)

--------------------
epoch 1, loss 0.9706, train acc 0.824, test acc 0.542, time 4.3 sec
--------------------
epoch 2, loss 3.5243, train acc 0.508, test acc 0.542, time 4.5 sec
--------------------
epoch 3, loss 1.9035, train acc 0.590, test acc 0.542, time 4.3 sec
--------------------
epoch 4, loss 1.0680, train acc 0.664, test acc 0.569, time 4.4 sec
--------------------
epoch 5, loss 0.7256, train acc 0.730, test acc 0.608, time 4.3 sec
--------------------
epoch 6, loss 0.5508, train acc 0.762, test acc 0.641, time 4.3 sec
--------------------
epoch 7, loss 0.4586, train acc 0.799, test acc 0.647, time 4.7 sec
--------------------
epoch 8, loss 0.4023, train acc 0.811, test acc 0.673, time 4.4 sec
--------------------
epoch 9, loss 0.3627, train acc 0.828, test acc 0.699, time 4.4 sec
--------------------
epoch 10, loss 0.3321, train acc 0.832, test acc 0.725, time 4.3 sec
--------------------
epoch 11, loss 0.3067, train acc 0.844, test acc 0.732, time 4.3 sec
-------------------

## VGG16


In [None]:
model2 = tv.models.vgg16(pretrained=True)

In [None]:
for param in model2.parameters():
    param.requires_grad = False

In [None]:
model2.classifier._modules['6'] = nn.Linear(in_features=4096, out_features=2)

In [None]:
print("Params to learn:")
params_to_update2 = []
for name, param in model2.named_parameters():
    if param.requires_grad == True:
        params_to_update2.append(param)
        print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [None]:
trainer2 = torch.optim.Adam(params_to_update2, lr=0.001)

In [None]:
loss_vgg16 = train(model2, train_iter, test_iter, trainer2, 50)

--------------------
epoch 1, loss 1.4314, train acc 0.816, test acc 0.542, time 5.3 sec
--------------------
epoch 2, loss 2.7290, train acc 0.594, test acc 0.699, time 5.2 sec
--------------------
epoch 3, loss 0.8074, train acc 0.787, test acc 0.752, time 5.3 sec
--------------------
epoch 4, loss 0.4061, train acc 0.869, test acc 0.784, time 5.2 sec
--------------------
epoch 5, loss 0.2582, train acc 0.914, test acc 0.804, time 5.3 sec
--------------------
epoch 6, loss 0.1881, train acc 0.951, test acc 0.817, time 5.3 sec
--------------------
epoch 7, loss 0.1458, train acc 0.963, test acc 0.824, time 5.2 sec
--------------------
epoch 8, loss 0.1163, train acc 0.971, test acc 0.837, time 5.1 sec
--------------------
epoch 9, loss 0.0946, train acc 0.975, test acc 0.850, time 5.2 sec
--------------------
epoch 10, loss 0.0785, train acc 0.988, test acc 0.863, time 5.1 sec
--------------------
epoch 11, loss 0.0664, train acc 0.988, test acc 0.869, time 5.2 sec
-------------------

## Inception_v3



In [None]:
model3 = tv.models.inception_v3(pretrained=True)

In [None]:
for param in model3.parameters():
    param.requires_grad = False

In [None]:
model3.fc = nn.Linear(in_features=2048, out_features=2)

In [None]:
model3.aux_logits = False 

In [None]:
print("Params to learn:")
params_to_update3 = []
for name, param in model3.named_parameters():
    if param.requires_grad == True:
        params_to_update3.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [None]:
trainer3 = torch.optim.Adam(params_to_update3, lr=0.001)

In [None]:
data_transform_iv3 = tv.transforms.Compose([tv.transforms.Grayscale(3), 
                                        tv.transforms.Resize((299, 299)), # only difference from other data transform
                                        tv.transforms.ToTensor()])

In [None]:
train_dataset_iv3 = datasets.ImageFolder(data_dir + '/train', data_transform_iv3)
test_dataset_iv3 = datasets.ImageFolder(data_dir + '/val', data_transform_iv3)

train_loader_iv3 = torch.utils.data.DataLoader(train_dataset_iv3, batch_size=BATCH_SIZE, shuffle=True)
test_loader_iv3 = torch.utils.data.DataLoader(train_dataset_iv3, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
train_iter_iv3 = torch.utils.data.DataLoader(train_dataset_iv3, batch_size=BATCH_SIZE)
test_iter_iv3 = torch.utils.data.DataLoader(test_dataset_iv3, batch_size=BATCH_SIZE)

In [None]:
loss_iv3 = train(model3, train_iter_iv3, test_iter_iv3, trainer3, 50)

--------------------
epoch 1, loss 1.1302, train acc 0.816, test acc 0.542, time 6.5 sec
--------------------
epoch 2, loss 3.1919, train acc 0.529, test acc 0.542, time 6.0 sec
--------------------
epoch 3, loss 1.9671, train acc 0.611, test acc 0.542, time 6.2 sec
--------------------
epoch 4, loss 1.2330, train acc 0.656, test acc 0.575, time 6.3 sec
--------------------
epoch 5, loss 0.8387, train acc 0.734, test acc 0.608, time 6.1 sec
--------------------
epoch 6, loss 0.5553, train acc 0.787, test acc 0.647, time 6.1 sec
--------------------
epoch 7, loss 0.3922, train acc 0.832, test acc 0.699, time 6.1 sec
--------------------
epoch 8, loss 0.3082, train acc 0.865, test acc 0.739, time 6.0 sec
--------------------
epoch 9, loss 0.2568, train acc 0.893, test acc 0.752, time 6.8 sec
--------------------
epoch 10, loss 0.2194, train acc 0.898, test acc 0.778, time 6.2 sec
--------------------
epoch 11, loss 0.1908, train acc 0.914, test acc 0.784, time 6.2 sec
-------------------

## DenseNet161



In [None]:
model4 = tv.models.densenet161(pretrained=True)

In [None]:
for param in model4.parameters():
    param.requires_grad = False

In [None]:
model4.classifier = nn.Linear(in_features=2208, out_features=2)

In [None]:
print("Params to learn:")
params_to_update4 = []
for name, param in model4.named_parameters():
    if param.requires_grad == True:
        params_to_update4.append(param)
        print("\t",name)

Params to learn:
	 classifier.weight
	 classifier.bias


In [None]:
trainer4 = torch.optim.Adam(params_to_update4, lr=0.001)

In [None]:
loss_dn = train(model4, train_iter, test_iter, trainer4, 50)

--------------------
epoch 1, loss 1.0182, train acc 0.832, test acc 0.542, time 7.9 sec
--------------------
epoch 2, loss 3.9099, train acc 0.520, test acc 0.542, time 7.0 sec
--------------------
epoch 3, loss 1.8495, train acc 0.623, test acc 0.542, time 7.1 sec
--------------------
epoch 4, loss 0.9577, train acc 0.697, test acc 0.582, time 7.0 sec
--------------------
epoch 5, loss 0.5069, train acc 0.779, test acc 0.686, time 7.3 sec
--------------------
epoch 6, loss 0.3213, train acc 0.865, test acc 0.771, time 7.1 sec
--------------------
epoch 7, loss 0.2490, train acc 0.885, test acc 0.817, time 7.1 sec
--------------------
epoch 8, loss 0.2119, train acc 0.910, test acc 0.824, time 7.0 sec
--------------------
epoch 9, loss 0.1856, train acc 0.926, test acc 0.830, time 7.5 sec
--------------------
epoch 10, loss 0.1647, train acc 0.943, test acc 0.856, time 6.9 sec
--------------------
epoch 11, loss 0.1474, train acc 0.947, test acc 0.850, time 7.0 sec
-------------------

## Loss compare



In [None]:
import pandas as pd

In [None]:
data = [loss_rn18, loss_vgg16, loss_iv3, loss_dn] 
columns = ['Rn18', 'VGG16', 'IV3', 'DN161'] 
df = pd.DataFrame(data, columns) 
print(df)

               0           1           2           3           4           5   \
Rn18   236.831179  859.926310  464.465327  260.580550  177.052047  134.392575   
VGG16  349.255499  665.863910  196.999362   99.089470   63.002495   45.901000   
IV3    275.767786  778.816193  479.983364  300.861796  204.648105  135.485961   
DN161  248.437378  954.006353  451.287361  233.682808  123.688434   78.395890   

               6          7          8          9   ...         40         41  \
Rn18   111.895554  98.162074  88.509186  81.025123  ...  13.429202  12.513911   
VGG16   35.583642  28.383970  23.085154  19.160355  ...   0.843555   0.808153   
IV3     95.688781  75.205246  62.660937  53.531170  ...   3.050430   2.863726   
DN161   60.748723  51.702898  45.278765  40.177705  ...   2.037760   1.950984   

              42         43        44        45        46        47        48  \
Rn18   11.631009  10.778053  9.953800  9.158625  8.394855  7.666903  6.981118   
VGG16   0.774688   0.74301