# Transfer Learning

In this Notebook, I will use Transfer Learning to classify images of Dogs&Cats. There are two methods of transfer learning:

**Feature Extraction**: This method involves getting a pretrained network, freezing the weights of the convolutional layers and then replacing the fully connected layer with a new one.

**Fine-Tuning**: Similar to feature extraction except most of the convolutional layers are unfreezed and their weight can be updated.

The Model is a **VGG-16** Nueral Network with pretrained weights from ImageNet dataset.

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

# Data Preparation

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
data_dir = '../input/cat-and-dog/'

image_datasets = {
    'train': 
    datasets.ImageFolder(data_dir + 'training_set/training_set', data_transforms['train']),
    'val': 
    datasets.ImageFolder(data_dir + 'test_set/test_set', data_transforms['val'])
}

dataloaders = {
    'train':
    torch.utils.data.DataLoader(image_datasets['train'],
                                batch_size=32,
                                shuffle=True,
                                num_workers=0),
    'val':
    torch.utils.data.DataLoader(image_datasets['val'],
                                batch_size=32,
                                shuffle=False,
                                num_workers=0)
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class_names = image_datasets['train'].classes
class_names

# Helper Functions

In [None]:
#Training Loop
def traineval(model, num_epochs, criterion, optimizer):
    
    losses = {'train' : [], 'valid': []}
    accs = {'train' : [], 'valid': []}
    # keeping-track-of-losses    
    train_samples,train_correct,valid_samples, valid_correct = 0,0,0,0
    

    for epoch in range(1, num_epochs + 1):
        # keep-track-of-training-and-validation-loss
        train_loss = 0.0
        valid_loss = 0.0
    
        # training-the-model
        model.train()
        for data, target in dataloaders['train']:
            # move-tensors-to-GPU 
            data = data.to(device)
            target = target.to(device)
        
            # clear-the-gradients-of-all-optimized-variables
            optimizer.zero_grad()
            # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
            output = model(data)
            _, predicted = torch.max(output, 1)
            train_samples += target.size(0)
            train_correct += (predicted == target).sum().item()
        
            train_acc = 100.0 * train_correct / train_samples
            
            # calculate-the-batch-loss
            loss = criterion(output, target)
            
            # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
            loss.backward()
            
            # perform-a-single-optimization-step (parameter-update)
            optimizer.step()
            
            # update-training-loss
            train_loss += loss.item() * data.size(0)
        
        
        model.eval()
        for data, target in dataloaders['val']:
        
            data = data.to(device)
            target = target.to(device)
        
            output = model(data)
            _, predicted = torch.max(output, 1)
            valid_samples += target.size(0)
            valid_correct += (predicted == target).sum().item()
        
            val_acc = 100.0 * valid_correct / valid_samples
        
            loss = criterion(output, target)
        
            # update-average-validation-loss 
            valid_loss += loss.item() * data.size(0)
    
        # calculate-average-losses and Accuracies
        train_loss = train_loss/len(dataloaders['train'].sampler)
        valid_loss = valid_loss/len(dataloaders['val'].sampler)
    
    
        losses['train'].append(train_loss)
        losses['valid'].append(valid_loss)
        accs['train'].append(train_acc)
        accs['valid'].append(val_acc)
        
        print('Epoch: {} Training Loss: {:.6f} Validation Loss: {:.6f} Training accuracy: {:.6f} Valid accuracy: {:.6f}'.format(
        epoch, train_loss, valid_loss, train_acc, val_acc))
        
    return losses, accs


In [None]:
#Plot Loss & Accuracy Graph

def Graph(losses, accs, num_epochs):
    epochs = [x for x in range(1,num_epochs+1)]
    
    plt.figure(figsize = (10,5))
    plt.subplot(1,2,1)
    
    #Loss
    plt.title("Loss")
    plt.plot(epochs, losses['train'] , label='Training loss')
    plt.plot(epochs, losses['valid'] , label='Validation loss')
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend(frameon=False)

    plt.subplot(1,2,2)
    
    #Accuracy
    plt.title("Accuracy")
    plt.plot(epochs, accs['train'], label='Training Accuracy')
    plt.plot(epochs, accs['valid'], label='Validation Accuracy')
    plt.xlabel("Epochs")
    plt.ylabel("Acc")
    plt.legend(frameon=False)

# Feature Extraction


In [None]:
#Load Model
model =  models.vgg16(pretrained=False)

#Freeze conv layers
for param in model.parameters():
    param.requires_grad = False

#Load Weights
model.load_state_dict(torch.load("../input/vgg16/vgg16.pth"))

print("Original VGG16 Model:\n", model)

In [None]:
# Adding New fully connected layer
model.classifier = nn.Sequential(
                      nn.Linear(25088,256),
                      nn.ReLU(),
                      nn.Dropout(0.5),
                      nn.Linear(256,2)
                   )

print("New VGG16 Model:\n", model)

In [None]:
model = model.to(device)

num_epochs = 25

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
%%time
losses, accs = traineval(model,num_epochs,criterion, optimizer)

In [None]:
Graph(losses, accs, num_epochs)

In [None]:
PATH = './ExtractedCatDog.pth'
torch.save(model.state_dict(), PATH)

# Fine-Tuning

In [None]:
#Load Model
fine_model =  models.vgg16(pretrained=False)

# Freeze the first 4 Conv blocks
for i, param in enumerate(fine_model.parameters()):
    param.requires_grad = False
    # UnFreeze the 5th Conv block
    if i >= 20:
        param.requires_grad = True

#Load Weights
fine_model.load_state_dict(torch.load("../input/vgg16/vgg16.pth"))

In [None]:
# Adding New fully connected layer
fine_model.classifier = nn.Sequential(
                      nn.Linear(25088,256),
                      nn.ReLU(),
                      nn.Dropout(0.5),
                      nn.Linear(256,2)
                   )

In [None]:
# Checking trainable parameters
for name, param in fine_model.named_parameters():
    if param.requires_grad == True:
        print(name)

In [None]:
fine_model = fine_model.to(device)

num_epochs = 25

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(fine_model.parameters(), lr=0.001, momentum=0.9)

In [None]:
%%time
losses, accs = traineval(fine_model,num_epochs,criterion, optimizer)

In [None]:
Graph(losses, accs, num_epochs)