In [111]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [112]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

In [113]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.optim import Adam
from torchvision import datasets, transforms, models
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset, random_split
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [114]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [115]:
# to store class name
classes_dict = {}

def create_class_name_dict():
    classes_file = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/classes.txt'
    try:
        with open(classes_file, 'r') as file:
            lines = file.read().splitlines()

        for i, line in enumerate(lines):
            class_label = line.split('.')[1]
            classes_dict[i] = class_label

    except FileNotFoundError:
        print('File does not exist.\n')
        return None

# create class dict
create_class_name_dict()

### Data Loading and Augmentation

In [116]:
from pyparsing.common import traceback

def denormalize(tensor):
    tensor = tensor*std + mean
    return tensor

def show_img(img):
    # arrange channels
    img = img.numpy().transpose((1,2,0))

    # use mean and std values
    img = denormalize(img)

    # clip values and view image
    img = np.clip(img,0,1)
    plt.imshow(img)


# returns data loader objects, resizing_factor is a size tuple
def get_data_loader(img_dir_path, batch_size=64, shuffle=False, transform=None):

    # create custom dataset object
    dataset = torchvision.datasets.ImageFolder(img_dir_path, transform=transform)

    # create dataloader objects
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

    return data_loader


### Define Custom Model Class

In [117]:
class ConvNet(nn.Module):
    def __init__(self, pretrained_model_name, pretrained_model_path, num_classes):
        super(ConvNet, self).__init__()

        self.pretrained_model_name =  pretrained_model_name
        self.pretrained_model_path = pretrained_model_path
        self.num_classes = num_classes
        self.in_feat = None
        self.model = None

        # check for GPU availability
        use_gpu = torch.cuda.is_available()

        # load model architectures without weight
        if use_gpu:
            self.model = getattr(models, self.pretrained_model_name)().cuda()
        else:
            self.model = getattr(models, self.pretrained_model_name)()

        # load pre-trained weights
        self.model.load_state_dict(torch.load(self.pretrained_model_path))

        # get input dimension of the fc layer to be replaced and index of the last fc layer
        self.in_feat = self.model.classifier[-1].in_features
        fc_idx = len(self.model.classifier) - 1

        custom_fc = nn.Sequential(nn.Linear(self.in_feat, 512),
                    nn.ReLU(),
                    nn.Dropout(0.5),
                    nn.Linear(512, self.num_classes),
                    nn.ReLU(),
                    nn.Dropout(0.5),
                    nn.LogSoftmax(dim=1))

        # add custom fc layers to model
        self.model.classifier[fc_idx] = custom_fc

    def forward(self, x):
        x = self.model(x)
        return x

### Helper Functions

In [156]:
def unfreeze_last_n_layers(model, layer_type, n):

    # if n == -1 don't unfreeze any layers
    if n == -1:
        return 0

    n = n*2 # since weights and bias are included as separate

    # for classifier layer
    if layer_type == 'classifier':
        total_layers = len(list(model.model.classifier.parameters()))

        # invalid n
        if n > total_layers:
            print(f"Warning: There are only {total_layers} layers in the model. Cannot unfreeze {n} layers.")

        # if n == 0 unfreeze all layers
        elif n == 0:
            for param in model.model.classifier.parameters():
                param.requires_grad = True
        else:
            for i, param in enumerate(model.model.classifier.parameters()):
                if i >= total_layers - n:
                    param.requires_grad = True
                else:
                    param.requires_grad = False

    # conv layers
    elif layer_type == 'features':
        total_layers = len(list(model.model.features.parameters()))

        # invalid n
        if n > total_layers:
            print(f"Warning: There are only {total_layers} layers in the model. Cannot unfreeze {n} layers.")

        # if n == 0 unfreeze all layers
        elif n == 0:
            for param in model.model.features.parameters():
                param.requires_grad = True
        else:
            for i, param in enumerate(model.model.features.parameters()):
                if i >= total_layers - n:
                    param.requires_grad = True
                else:
                    pass

# freezes all layers in the model
def freeze_all_layers(model):
    for param in model.model.parameters():
        param.requires_grad = False


# returns an optimizer dict for the model given the number of layers being trained
def get_optimizer_dict(model, features_n, classifier_n, lr = 3e-4):

    # list of dictionaries to store parameter values
    params_list = []
    fc_layer_indices = []
    conv_layer_indices = []

    # dividing factor
    f_fc = 2
    f_conv = 3

    if classifier_n != -1:
        if classifier_n == 0:
            named_params = list(name for name, _ in model.model.classifier.named_parameters())
            layer_indices = list(set([int(name.split('.')[0]) for name in named_params]))

        else:
            # get indices of the last 'n' layers in the model
            named_params = list(name for name, _ in model.model.classifier.named_parameters())
            layer_indices = list(set([int(name.split('.')[0]) for name in named_params[-classifier_n*2:]]))

        for i, index_val in enumerate(layer_indices):
            params_list.append({'params':model.model.classifier[index_val].parameters(), 'lr': lr*(f_fc**i)})
            fc_layer_indices.append((index_val, lr*(f_fc**i)))


    if features_n != -1:
        if features_n == 0:
            features_n = list(name for name, _ in model.model.features.named_parameters())
            layer_indices = list(set([int(name.split('.')[0]) for name in named_params]))

        else:
            # get indices of the last 'n' layers in the model
            named_params = list(name for name, _ in model.model.features.named_parameters())
            layer_indices = list(set([int(name.split('.')[0]) for name in named_params[-features_n*2:]]))

        for i, index_val in enumerate(layer_indices):
            params_list.append({'params':model.model.features[index_val].parameters(), 'lr': lr*(f_conv**i)})
            conv_layer_indices.append((index_val, lr*(f_conv**i)))

    optimizer = Adam(params_list, lr = lr)
    return fc_layer_indices, conv_layer_indices, optimizer

# plot history
def plot_history(history):
    train_loss = history['train_loss']
    val_loss = history['val_loss']
    train_acc = history['train_acc']
    val_acc = history['val_acc']

    # Plot train_loss vs. val_loss
    plt.figure(figsize=(12, 4))
    plt.subplot(2, 1, 1)
    plt.plot(train_loss, label='Train Loss', color='blue')
    plt.plot(val_loss, label='Validation Loss', color='red')
    plt.title('Training Vs Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot train_acc vs. val_acc
    plt.subplot(2, 1, 2)
    plt.plot(train_acc, label='Train Accuracy', color='blue')
    plt.plot(val_acc, label='Validation Accuracy', color='red')
    plt.title('Training Vs Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Adjust spacing between subplots
    plt.tight_layout()

### Train and Validation Functions For Single Epoch

In [119]:
from pyparsing.core import ParseExpression
# training for single epoch
def train(model, dataloader, criterion, optimizer, max_batch, device = 'cpu'):

    # initalize variables to store loss and acc
    tr_epoch_loss = 0.0
    tr_epoch_corr = 0
    total = 0

    # send model to device and set to training model
    model.to(device)
    model.train()

    for batch_no, (images, labels) in enumerate(dataloader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        y_pred = model(images)
        y_pred_labels = torch.argmax(torch.exp(y_pred), 1)
        loss = criterion(y_pred, labels)
        loss.backward()
        optimizer.step()

        tr_epoch_loss += loss.item()
        total += labels.size(0)
        tr_epoch_corr += (y_pred_labels == labels).sum().item()

        if max_batch is not None:
            if batch_no == max_batch:
                break;

    # return epoch loss and accuracy
    return tr_epoch_loss, tr_epoch_corr/total

def validate(model, dataloader, criterion, device = 'cpu'):

    # initialize variables to store validation loss and acc
    val_loss = 0.0
    val_corr = 0
    total = 0

    model.to(device)
    model.eval()

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            y_pred = model(images)
            y_pred_labels = torch.argmax(torch.exp(y_pred), 1)
            loss = criterion(y_pred, labels)

            val_loss += loss.item()
            val_corr += (y_pred_labels == labels).sum().item()
            total += labels.size(0)

    # return validation loss and accuracy
    return val_loss, val_corr/total


### Training Iterations

In [120]:
'''
This function can be used to both train and fine-tune the model.
unfreeze_n_fc = number of fc layers to unfreeze from last fc layer (0 = all layers, -1 = none)
unfreeze_n_conv = number of conv layers to unfreeze from last conv layer (0 = all layers, -1 = none)

By controlling these two parameters, the model can be trained in a step-wise manner:
1. unfreeze only the fc layers and train the model
2. unfreeze only the last two conv layers and train the model
3. fine-tuning: unfreeze the last two conv layers and the entire fc block and train the model
'''

def train_model_layers(model, train_data, validation_data, epochs, criterion,
                       max_batch, unfreeze_n_fc, unfreeze_n_conv):

    # dict to store training progress
    history = {'train_loss': [],
               'val_loss': [],
               'train_acc':[],
               'val_acc':[]
               }

    # set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # turn off gradients for all layers except the fc layers
    freeze_all_layers(model)
    unfreeze_last_n_layers(model, 'classifier', unfreeze_n_fc)
    unfreeze_last_n_layers(model, 'features', unfreeze_n_conv)
    optimizer = get_optimizer_dict(model, unfreeze_n_fc, unfreeze_n_conv)

    for epoch in range(epochs):

        # variables to store loss and acc values per epoch
        tr_epoch_loss = 0
        tr_epoch_acc = 0
        val_epoch_loss = 0
        val_epoch_acc = 0

        # train model
        tr_epoch_loss, tr_epoch_acc = train(model, train_data, criterion, optimizer, max_batch, device)
        history['train_loss'].append(tr_epoch_loss)
        history['train_acc'].append(tr_epoch_acc)

        # evaluate model
        val_epoch_loss, val_epoch_acc = validate(model, validation_data, criterion, device = 'cpu')
        history['val_loss'].append(val_epoch_loss)
        history['val_acc'].append(val_epoch_acc)

    # return model and history
    return model, history


In [121]:
# define parameters that will remain constant
num_classes = 200
resizing_factor = (224, 224)  # specific to VGG

# normalization paramteters for imagenet
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# define transformers
train_transform = transforms.Compose([
        transforms.Resize(resizing_factor),
        transforms.RandomHorizontalFlip(0.5),
        transforms.RandomRotation(15),
        transforms.RandomAffine(degrees = 10,
                                translate = (0.2, 0.2), shear = 10),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)])

test_transform = transforms.Compose([transforms.Resize(resizing_factor),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean, std)])

# define path to data
train_path = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/train_test_cropped/train'
test_path = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/train_test_cropped/test'
train_loader = get_data_loader(train_path, 64, True, train_transform)
test_loader = get_data_loader(test_path, 64, False, test_transform)

### Transfer Learning using VGG pre-trained weights

In [122]:
vgg16_weights_path = '/content/drive/MyDrive/Colab Notebooks/pretrained_models/vgg16.pth'

In [161]:
# create model
vgg16_custom_model = ConvNet('vgg16', vgg16_weights_path, num_classes)

In [148]:
vgg16_custom_model.model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [162]:
fc_layer_idx, conv_layer_idx, _ = get_optimizer_dict(vgg16_custom_model, 0, 0, 0.00003)

In [163]:
fc_layer_idx

[(0, 3e-05), (3, 6e-05), (6, 0.00012)]

In [164]:
conv_layer_idx

[(0, 3e-05), (3, 9e-05), (6, 0.00027)]