<a href="https://colab.research.google.com/github/GirishShanmugam/transfer-learning/blob/master/Transfer_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AlexNet Model architecture playground 

In [178]:
# get AlexNet architecture since AlexNet has only 5 convolutional layers
import torch
model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
model.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [0]:
model.state_dict().keys()

odict_keys(['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias'])

In [0]:
model.state_dict()['features.0.weight'].shape

torch.Size([64, 3, 11, 11])

In [0]:
model.state_dict()['features.0.bias'].shape

torch.Size([64])

In [0]:
print(model.state_dict()['features.0.weight'].shape)

torch.Size([64, 3, 11, 11])


### Helper function to retain layers from AlexNet model and reinitialise other layers to random weights

In [179]:
# weights for bias in 1st Conv layer before modifying
model.state_dict()['features.0.bias'][:10]

tensor([-0.9705, -2.8070, -0.0371, -0.0795, -0.1159,  0.0252, -0.0752, -1.4181,
         1.6454, -0.0990])

In [180]:
# weights for bias in 4th Conv layer before modifying
model.state_dict()['features.8.bias'][:10]

tensor([-0.0629,  0.1260,  0.2991,  0.1123,  0.2853,  0.1280,  0.1828, -0.0310,
         0.5452,  0.1565])

In [0]:
def retain_layers(model, num_layers_retain):
  total_cnn_layers = 5
  layer_names = ['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias']
  for i in range(num_layers_retain, total_cnn_layers):
    sd = model.state_dict()
    feature = layer_names[i*2]
    bias = layer_names[(i*2)+1]
    sd[feature].normal_()
    sd[bias].normal_()

In [0]:
# retain first three layers
retain_layers(model, 3)

In [0]:
# weights for bias in 1st Conv layer after modifying
model.state_dict()['features.0.bias'][:10]

tensor([-0.9705, -2.8070, -0.0371, -0.0795, -0.1159,  0.0252, -0.0752, -1.4181,
         1.6454, -0.0990])

In [0]:
# weights for bias in 4th Conv layer after modifying
model.state_dict()['features.8.bias'][:10]

tensor([-0.4857,  1.9247, -0.4161,  0.6972, -0.4460,  1.0776, -0.4933,  0.6414,
         0.6040,  1.1095])

# Load cats Vs Dogs dataset
https://www.pluralsight.com/guides/image-classification-with-pytorch


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!unzip -uq "/content/drive/My Drive/PetImages.zip" -d "/content/PetImages"

In [0]:
import pandas as pd 
import matplotlib.pyplot as plt 
import torch
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.image as img

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

%matplotlib inline

In [0]:
labels = pd.read_csv('/content/PetImages/PetImages/train.csv')

train_path = '/content/PetImages/PetImages/train/'
test_path = '/content/PetImages/PetImages/test/'

In [0]:
class CatsDogsDataset(Dataset):
    def __init__(self, data, path , transform = None):
        super().__init__()
        self.data = data.values
        self.path = path
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,index):
        img_name,label = self.data[index]
        img_path = os.path.join(self.path, img_name)
        image = img.imread(img_path)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [0]:
size=224
train_transform = transforms.Compose([transforms.ToPILImage(),
                                      transforms.Grayscale(num_output_channels=3),
                                      transforms.CenterCrop(size),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 


valid_transform = transforms.Compose([transforms.ToPILImage(),
                                      transforms.Grayscale(num_output_channels=3),
                                      transforms.CenterCrop(size),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 

train, valid_data = train_test_split(labels, stratify=labels['0'], test_size=0.2)

train_data = CatsDogsDataset(train, train_path, train_transform )
valid_data = CatsDogsDataset(valid_data, train_path, valid_transform )

# Hyper parameters
num_epochs = 35
num_classes = 2
batch_size = 25
learning_rate = 0.001

# CPU or GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset = valid_data, batch_size = batch_size, shuffle=True, num_workers=0)

In [0]:
inputs, classes=next(iter(train_loader))
inputs.shape

torch.Size([25, 3, 224, 224])

# Helper functions


In [0]:
%%time
import os

# Function to train and evaluate the performance of the model
def run_and_evaluate_model(device, model, train_loader, valid_loader ):
    num_epochs = 35
    learning_rate = 0.001

    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # keeping-track-of-losses
    train_losses = []
    valid_losses = []

    for epoch in range(1, num_epochs + 1):
        # keep-track-of-training-and-validation-loss
        train_loss = 0.0
        valid_loss = 0.0

        # training-the-model
        model.train()
        for data, target in train_loader:
            # move-tensors-to-GPU
            data = data.to(device)
            target = target.to(device)

            # clear-the-gradients-of-all-optimized-variables
            optimizer.zero_grad()
            # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
            output = model(data)
            # calculate-the-batch-loss
            loss = criterion(output, target)
            # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
            loss.backward()
            # perform-a-ingle-optimization-step (parameter-update)
            optimizer.step()
            # update-training-loss
            train_loss += loss.item() * data.size(0)

        # validate-the-model
        model.eval()
        for data, target in valid_loader:
            data = data.to(device)
            target = target.to(device)

            output = model(data)

            loss = criterion(output, target)

            # update-average-validation-loss
            valid_loss += loss.item() * data.size(0)

        # calculate-average-losses
        train_loss = train_loss / len(train_loader.sampler)
        valid_loss = valid_loss / len(valid_loader.sampler)
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)

        # print-training/validation-statistics
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, valid_loss))

    # test-the-model
    model.eval()  # it-disables-dropout
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Test Accuracy of the model: {} %'.format(100 * correct / total))


# Helper function to retain layers from AlexNet model and reinitialise other layers to random weights
def retain_layers(model, num_layers_retain):
  total_cnn_layers = 5
  layer_names = ['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias']
  for i in range(num_layers_retain, total_cnn_layers):
    sd = model.state_dict()
    feature = layer_names[i*2]
    bias = layer_names[(i*2)+1]
    sd[feature].normal_()
    sd[bias].normal_()

# freeze or fine tune weights of n layers
# https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088
def process_weights(model, start, end, grad_bool):
  i=0
  for param in model.parameters():
    if((i>=start*2) & (i<end*2)):
      param.requires_grad=grad_bool
    i+=1

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 9.78 µs


# Experiment 1: Train on similar pairs

- Dataset A: Use pretrained weights from AlexNet model trained on 1000 classes which also has cat family in it.
- Dataset B - cats vs dogs Kaggle dataset



## baseA
AlexNet model trained on 1000 classes of ImageNet dataset (use pretrained weights)



In [0]:
# get AlexNet architecture since AlexNet has only 5 convolutional layers
import torch
baseA = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)

Downloading: "https://github.com/pytorch/vision/archive/v0.6.0.zip" to /root/.cache/torch/hub/v0.6.0.zip
Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




## baseB 
AlexNet architecture trained on Cats Vs Dogs dataset(start with random weights)



In [0]:
baseB = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=False)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


In [0]:
import torch.nn as nn
baseB.classifier[6] = nn.Linear(4096,2)
baseB.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [0]:
 # fine tune weights of all layers
for param in model.parameters():
    param.requires_grad = True

run_and_evaluate_model(device, baseB, train_loader, valid_loader)

  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)


Epoch: 1 	Training Loss: 0.693365 	Validation Loss: 0.693156
Epoch: 2 	Training Loss: 0.693278 	Validation Loss: 0.693152
Epoch: 3 	Training Loss: 0.693177 	Validation Loss: 0.693152


ValueError: ignored

In [0]:
torch.save(baseB.state_dict(), '/content/baseB.pt')

## A selffer network BnB: 
Copy first n layers from baseB (freeze it). Initialize random weights to rest of the 5-n layers and train on dataset B.


In [193]:
device = torch.device("cuda")
bnb = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=False)
bnb.classifier[6] = nn.Linear(4096,2)
# load pretrained model baseB
bnb.load_state_dict(torch.load('/content/baseB.pt'))
bnb.to(device)

# retain n layers and reinitialise weights of others layers randomnly
for layer in range(1,6):
  print('Starting for BnB model retaining {} layers'.format(layer))
  num_layers_retain=layer
  retain_layers(bnb, num_layers_retain)
  # freeze weights of n layers 
  process_weights(bnb, 0, num_layers_retain, False)
  # fine tune weights of rest of the layers
  process_weights(bnb, num_layers_retain+1, 4, True)
  # run and evaluate the model
  run_and_evaluate_model(device, bnb, train_loader, valid_loader)
  print('Done for BnB model retaining {} layers'.format(layer))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


## A selffer network BnB+ 
Copy first n layers from baseB. Initialize random weights to rest of the 5-n layers and train on dataset B. Fine tune all layers. (similar to BnB but the first n layers also learn - fine tuned).

In [0]:
device = torch.device("cuda")
bnb_plus = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=False)
bnb.classifier[6] = nn.Linear(4096,2)
# load pretrained model baseB
bnb_plus.load_state_dict(torch.load('/content/baseB.pt'))
bnb_plus.to(device)

# retain n layers and reinitialise weights of others layers randomnly
for layer in range(1,6):
  print('Starting for BnB+ model retaining {} layers'.format(layer))
  num_layers_retain=layer
  retain_layers(bnb_plus, num_layers_retain)
  # fine tune weights of all  the layers
  process_weights(bnb_plus, 0, 7, True)
  # run and evaluate the model
  run_and_evaluate_model(device, bnb_plus, train_loader, valid_loader)
  print('Done for BnB+ model retaining {} layers'.format(layer))