<a href="https://colab.research.google.com/github/GirishShanmugam/transfer-learning/blob/master/Transfer_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Load AlexNet Model and view the architecture

In [143]:
# get AlexNet architecture since AlexNet has only 5 convolutional layers
import torch
model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
model.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [0]:
model.state_dict().keys()

odict_keys(['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias'])

In [0]:
model.state_dict()['features.0.weight'].shape

torch.Size([64, 3, 11, 11])

In [0]:
model.state_dict()['features.0.bias'].shape

torch.Size([64])

In [0]:
print(model.state_dict()['features.0.weight'].shape)

torch.Size([64, 3, 11, 11])


### Helper function to retain layers from AlexNet model and reinitialise other layers to random weights

In [0]:
# weights for bias in 1st Conv layer before modifying
model.state_dict()['features.0.bias'][:10]

tensor([-0.9705, -2.8070, -0.0371, -0.0795, -0.1159,  0.0252, -0.0752, -1.4181,
         1.6454, -0.0990])

In [0]:
# weights for bias in 3rd Conv layer before modifying
model.state_dict()['features.8.bias'][:10]

tensor([-0.0629,  0.1260,  0.2991,  0.1123,  0.2853,  0.1280,  0.1828, -0.0310,
         0.5452,  0.1565])

In [0]:
def retain_layers(model, num_layers_retain):
  total_cnn_layers = 5
  layer_names = ['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias']
  for i in range(num_layers_retain, total_cnn_layers):
    sd = model.state_dict()
    feature = layer_names[i*2]
    bias = layer_names[(i*2)+1]
    sd[feature].normal_()
    sd[bias].normal_()

In [0]:
# retain first three layers
retain_layers(model, 3)

In [0]:
# weights for bias in 1st Conv layer after modifying
model.state_dict()['features.0.bias'][:10]

tensor([-0.9705, -2.8070, -0.0371, -0.0795, -0.1159,  0.0252, -0.0752, -1.4181,
         1.6454, -0.0990])

In [0]:
# weights for bias in 3rd Conv layer after modifying
model.state_dict()['features.8.bias'][:10]

tensor([-0.4857,  1.9247, -0.4161,  0.6972, -0.4460,  1.0776, -0.4933,  0.6414,
         0.6040,  1.1095])

## Load cats Vs Dogs dataset
https://www.pluralsight.com/guides/image-classification-with-pytorch


In [73]:
from google.colab import files
uploaded = files.upload()

Saving kagglecatsanddogs_sample1.zip to kagglecatsanddogs_sample1.zip


In [0]:
!unzip -uq "/content/kagglecatsanddogs_sample1.zip" -d "/content/sample1"

In [0]:
import pandas as pd 
import matplotlib.pyplot as plt 
import torch
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.image as img

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

%matplotlib inline

In [0]:
labels = pd.read_csv('/content/sample1/kagglecatsanddogs_sample1/train.csv')

train_path = '/content/sample1/kagglecatsanddogs_sample1/train/'
test_path = '/content/sample1/kagglecatsanddogs_sample1/test/'

In [0]:
class CatsDogsDataset(Dataset):
    def __init__(self, data, path , transform = None):
        super().__init__()
        self.data = data.values
        self.path = path
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,index):
        img_name,label = self.data[index]
        img_path = os.path.join(self.path, img_name)
        image = img.imread(img_path)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [0]:
size=224
train_transform = transforms.Compose([transforms.ToPILImage(),
                                      transforms.CenterCrop(size),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 


valid_transform = transforms.Compose([transforms.ToPILImage(),
                                      transforms.CenterCrop(size),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 

train, valid_data = train_test_split(labels, stratify=labels['class'], test_size=0.2)

train_data = CatsDogsDataset(train, train_path, train_transform )
valid_data = CatsDogsDataset(valid_data, train_path, valid_transform )

# Hyper parameters
num_epochs = 35
num_classes = 2
batch_size = 25
learning_rate = 0.001

# CPU or GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset = valid_data, batch_size = batch_size, shuffle=True, num_workers=0)

## Experiment 1: Train on similar pairs

- Dataset A: Use pretrained weights from AlexNet model trained on 1000 classes which also has cat family in it.
- Dataset B - cats vs dogs Kaggle dataset

### baseA - AlexNet model trained on 1000 classes of ImageNet dataset
(use pretrained weights)



In [0]:
# get AlexNet architecture since AlexNet has only 5 convolutional layers
import torch
baseA = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


### baseB - AlexNet architecture trained on Cats Vs Dogs dataset
(start with random weights)



In [149]:
baseB = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=False)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


In [150]:
baseB.classifier[6] = nn.Linear(4096,2)
baseB.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [152]:
%%time
import os

epochs = 35
batch_size = 25
learning_rate = 0.001

# fine tune weights of all layers
for param in baseB.parameters():
    param.requires_grad = True

model = baseB.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

# keeping-track-of-losses 
train_losses = []
valid_losses = []

for epoch in range(1, num_epochs + 1):
    # keep-track-of-training-and-validation-loss
    train_loss = 0.0
    valid_loss = 0.0
    
    # training-the-model
    model.train()
    for data, target in train_loader:
        # move-tensors-to-GPU 
        data = data.to(device)
        target = target.to(device)
        
        # clear-the-gradients-of-all-optimized-variables
        optimizer.zero_grad()
        # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
        output = model(data)
        # calculate-the-batch-loss
        loss = criterion(output, target)
        # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
        loss.backward()
        # perform-a-ingle-optimization-step (parameter-update)
        optimizer.step()
        # update-training-loss
        train_loss += loss.item() * data.size(0)
        
    # validate-the-model
    model.eval()
    for data, target in valid_loader:
        
        data = data.to(device)
        target = target.to(device)
        
        output = model(data)
        
        loss = criterion(output, target)
        
        # update-average-validation-loss 
        valid_loss += loss.item() * data.size(0)
    
    # calculate-average-losses
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
        
    # print-training/validation-statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))

Epoch: 1 	Training Loss: 1.544361 	Validation Loss: 0.714792
Epoch: 2 	Training Loss: 0.696303 	Validation Loss: 0.706148
Epoch: 3 	Training Loss: 0.746961 	Validation Loss: 0.705147
Epoch: 4 	Training Loss: 0.710836 	Validation Loss: 0.692208
Epoch: 5 	Training Loss: 0.696291 	Validation Loss: 0.693552
Epoch: 6 	Training Loss: 0.692535 	Validation Loss: 0.693116
Epoch: 7 	Training Loss: 0.693953 	Validation Loss: 0.692564
Epoch: 8 	Training Loss: 0.693434 	Validation Loss: 0.692572
Epoch: 9 	Training Loss: 0.693734 	Validation Loss: 0.692662
Epoch: 10 	Training Loss: 0.693811 	Validation Loss: 0.692739
Epoch: 11 	Training Loss: 0.693225 	Validation Loss: 0.692713
Epoch: 12 	Training Loss: 0.692773 	Validation Loss: 0.692842
Epoch: 13 	Training Loss: 0.693120 	Validation Loss: 0.693092
Epoch: 14 	Training Loss: 0.692638 	Validation Loss: 0.692062
Epoch: 15 	Training Loss: 0.693602 	Validation Loss: 0.690677
Epoch: 16 	Training Loss: 0.692028 	Validation Loss: 0.690300
Epoch: 17 	Traini

ValueError: ignored

In [142]:
# test-the-model
model.eval()  # it-disables-dropout
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in valid_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
          
    print('Test Accuracy of the model: {} %'.format(100 * correct / total))


Test Accuracy of the model: 47.61904761904762 %


In [0]:
# https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088
for param in model.parameters():
  print(param.requires_grad)

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [0]:
len(['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias'])

16

KeyboardInterrupt: ignored