In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.0.9.post0-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
Collecting arrow<3.0,>=1.2.0 (from lightning)
  Downloading arrow-1.3.0-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting backoff<4.0,>=2.2.1 (from lightning)
  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting croniter<1.5.0,>=1.3.0 (from lightning)
  Downloading croniter-1.4.1-py2.py3-none-any.whl (19 kB)
Collecting dateutils<2.0 (from lightning)
  Downloading dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)
Collecting deepdiff<8.0,>=5.7.0 (from lightning)
  Downloading deepdiff-6.6.0-py3-none-any.whl (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.0/73.0 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi<2.0,>=0.92.0 (

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.optim import Adam
from collections import OrderedDict
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from torchvision import datasets, transforms, models
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset, random_split
from IPython.core.display import set_matplotlib_formats

### Custom Model Class

In [5]:
class CIFAR10Classifier(pl.LightningModule):
    def __init__(self, pretrained_model_name, pretrained_model_path, num_classes, batch_size, resizing_factor):
        super(CIFAR10Classifier, self).__init__()
        self.pretrained_model_name = pretrained_model_name
        self.pretrained_model_path = pretrained_model_path
        self.num_classes = num_classes
        self.loss_fn = nn.NLLLoss()
        self.batch_size = batch_size
        self.resizing_factor = resizing_factor
        self.history = {'train_loss': [], 'train_acc':[], 'val_loss': [], 'val_acc':[]}
        self.training_step_outputs = []
        self.validation_step_outputs = []
        self.test_step_outputs = []

        self.classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

        # transfer learning parameters
        self.mean = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)
        self.classifiers_n = -1
        self.features_n = -1

        # check for GPU availability
        use_gpu = torch.cuda.is_available()

        # load model architectures without weight
        if use_gpu:
            self.model = getattr(models, self.pretrained_model_name)().cuda()
        else:
            self.model = getattr(models, self.pretrained_model_name)()

        # load pre-trained weights
        if use_gpu:
            self.model.load_state_dict(torch.load(self.pretrained_model_path))
        else:
            self.model.load_state_dict(torch.load(self.pretrained_model_path, map_location=torch.device('cpu')))

        # get input dimension of the fc layer to be replaced and index of the last fc layer
        self.in_feat = self.model.classifier[-1].in_features
        fc_idx = len(self.model.classifier) - 1

        custom_fc = nn.Sequential(nn.Linear(self.in_feat, 512),
                    nn.ReLU(),
                    nn.Dropout(0.5),
                    nn.Linear(512, self.num_classes),
                    nn.ReLU(),
                    nn.Dropout(0.5),
                    nn.LogSoftmax(dim=1))

        # add custom fc layers to model
        self.model.classifier[fc_idx] = custom_fc

    def forward(self, x):
        x = self.model(x)
        return x

    # freezes all layers in the model
    def freeze_all_layers(self):
        for param in self.model.parameters():
            param.requires_grad = False

    # unfreeze last 'n' fully connected layers
    def unfreeze_last_n_fc_layers(self, n):

        # if n == -1 don't unfreeze any layers
        if n == -1:
            return 0

        n = n*2 # since weights and bias are included as separate
        total_layers = len(list(self.model.classifier.parameters()))

        # invalid n
        if n > total_layers:
            print(f"Warning: There are only {total_layers} layers in the model. Cannot unfreeze {n} layers.")

        # if n == 0 unfreeze all layers
        elif n == 0:
            for param in self.model.classifier.parameters():
                param.requires_grad = True
        else:
            for i, param in enumerate(self.model.classifier.parameters()):
                if i >= (total_layers - n):
                    param.requires_grad = True
                else:
                    param.requires_grad = False


    # unfreeze last 'n' fully connected layers
    def unfreeze_last_n_conv_layers(self, n):

        # if n == -1 don't unfreeze any layers
        if n == -1:
            return 0

        n = n*2 # since weights and bias are included as separate
        total_layers = len(list(self.model.features.parameters()))

        # invalid n
        if n > total_layers:
            print(f"Warning: There are only {total_layers} layers in the model. Cannot unfreeze {n} layers.")
        # if n == 0 unfreeze all layers
        elif n == 0:
            for param in self.model.features.parameters():
                param.requires_grad = True
        else:
            for i, param in enumerate(self.model.features.parameters()):
                if i >= total_layers - n:
                    param.requires_grad = True
                else:
                    pass

    # set parameters for transfer learning
    def set_transfer_learning_params(self, unfreeze_n_fc, unfreeze_n_conv):
        self.classifier_n = unfreeze_n_fc
        self.features_n = unfreeze_n_conv
        self.freeze_all_layers()
        self.unfreeze_last_n_fc_layers(unfreeze_n_fc)
        self.unfreeze_last_n_conv_layers(unfreeze_n_conv)


    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.loss_fn(logits, y)
        y_pred = torch.argmax(torch.exp(logits), 1)
        acc = (y_pred == y).sum().item()/self.batch_size
        self.training_step_outputs.append((loss.item(), acc))
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=False, on_epoch=True, prog_bar=True)
        return loss

    def on_train_epoch_end(self):
        num_items = len(self.training_step_outputs)
        cum_loss = 0
        cum_acc = 0
        for loss, acc in self.training_step_outputs:
            cum_loss += loss
            cum_acc += acc

        self.history['train_loss'].append(cum_loss/num_items)
        self.history['train_acc'].append(cum_acc/num_items)
        self.training_step_outputs.clear()

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.loss_fn(logits, y)
        y_pred = torch.argmax(torch.exp(logits), 1)
        acc = (y_pred == y).sum().item()/self.batch_size
        self.validation_step_outputs.append((loss.item(), acc))
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True)

    def on_validation_epoch_end(self):
        num_items = len(self.validation_step_outputs)
        cum_loss = 0
        cum_acc = 0
        for loss, acc in self.validation_step_outputs:
            cum_loss += loss
            cum_acc += acc

        self.history['val_loss'].append(cum_loss/num_items)
        self.history['val_acc'].append(cum_acc/num_items)
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        optimizer = Adam(filter(lambda p:p.requires_grad, self.model.parameters()), lr=0.001)
        return optimizer

    def train_dataloader(self):
        transform = transforms.Compose([
            transforms.Resize(self.resizing_factor),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])
        cifar10_train = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
        return DataLoader(cifar10_train, batch_size=self.batch_size, shuffle=True, num_workers=4)

    def val_dataloader(self):
        transform = transforms.Compose([
            transforms.Resize(self.resizing_factor),
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])
        cifar10_val = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
        return DataLoader(cifar10_val, batch_size=self.batch_size)

    def test_dataloader(self):
        transform = transforms.Compose([
            transforms.Resize(self.resizing_factor),
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])
        cifar10_test = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
        return DataLoader(cifar10_test, batch_size=self.batch_size)

    def get_history(self):
        return self.history

### Helper Functions

In [6]:
# plot history
def plot_history(history):
    train_loss = history['train_loss']
    val_loss = history['val_loss']
    train_acc = history['train_acc']
    val_acc = history['val_acc']

    # Plot train_loss vs. val_loss
    plt.figure(figsize=(8, 6))
    plt.subplot(2, 1, 1)
    plt.plot(train_loss, label='Train Loss', color='blue')
    plt.plot(val_loss, label='Validation Loss', color='red')
    plt.title('Training Vs Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot train_acc vs. val_acc
    plt.subplot(2, 1, 2)
    plt.plot(train_acc, label='Train Accuracy', color='blue')
    plt.plot(val_acc, label='Validation Accuracy', color='red')
    plt.title('Training Vs Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Adjust spacing between subplots
    plt.tight_layout()

### Transfer Learning Steps

In [13]:
def train_custom_fc_layers(model, epochs, unfreeze_n_fc):
    # freeze all layers except the last two fc layers
    unfreeze_n_conv = -1
    model.set_transfer_learning_params(unfreeze_n_fc, unfreeze_n_conv)

    # initialize logger
    save_dir = '/content/drive/MyDrive/Colab Notebooks/logs/'
    model_name = str(model.pretrained_model_name)
    training_type = 'fc'
    batch_size = str(model.batch_size)
    log_dir = save_dir + model_name + '/' + training_type + '/batchsz' + batch_size

    # create directories if non-existent
    try:
        os.makedirs(log_dir, exist_ok=True)
    except OSError as e:
        print(f"Error creating directory {log_dir}: {e}")

    # initalize logger
    logger = TensorBoardLogger(log_dir, name = model_name)
    logger.log_hyperparams({'epochs': epochs,
                                'batch_size': model.batch_size,
                                'name': model_name})

    # train model
    trainer = pl.Trainer(max_epochs = epochs, logger = logger)
    trainer.fit(model)
    print(f'Training Complete. Results logges at {log_dir}')

    # get training history
    history = model.get_history()

    # plot history
    plot_history(history)

    return history

def train_entire_fc_block(model, epochs, unfreeze_n_fc = 0):
    # freeze all layers except the fc block
    unfreeze_n_conv = -1
    model.set_transfer_learning_params(unfreeze_n_fc, unfreeze_n_conv)

    # initialize logger
    save_dir = '/content/drive/MyDrive/Colab Notebooks/logs/'
    model_name = str(model.pretrained_model_name)
    training_type = 'compfc'
    batch_size = str(model.batch_size)
    log_dir = save_dir + model_name + '/' + training_type + '/batchsz' + batch_size

    # create directories if non-existent
    try:
        os.makedirs(log_dir, exist_ok=True)
    except OSError as e:
        print(f"Error creating directory {log_dir}: {e}")

    # initalize logger
    logger = TensorBoardLogger(log_dir, name = model_name)
    logger.log_hyperparams({'epochs': epochs,
                                'batch_size': model.batch_size,
                                'name': model_name})

    # train model
    trainer = pl.Trainer(max_epochs = epochs, logger = logger)
    trainer.fit(model)
    print(f'Training Complete. Results logges at {log_dir}')

    # get training history
    history = model.get_history()

    # plot history
    plot_history(history)

    return history

def train_conv_layers(model, epochs, unfreeze_n_conv):
    # freeze all layers except the last two conv layers
    unfreeze_n_fc = -1
    model.set_transfer_learning_params(unfreeze_n_fc, unfreeze_n_conv)

    # initialize logger
    save_dir = '/content/drive/MyDrive/Colab Notebooks/logs/'
    model_name = str(model.pretrained_model_name)
    training_type = 'conv'
    batch_size = str(model.batch_size)
    log_dir = save_dir + model_name + '/' + training_type + '/batchsz' + batch_size

    # create directories if non-existent
    try:
        os.makedirs(log_dir, exist_ok=True)
    except OSError as e:
        print(f"Error creating directory {log_dir}: {e}")

    # initalize logger
    logger = TensorBoardLogger(log_dir, name = model_name)
    logger.log_hyperparams({'epochs': epochs,
                                'batch_size': model.batch_size,
                                'name': model_name})

    # train model
    trainer = pl.Trainer(max_epochs = epochs, logger = logger)
    trainer.fit(model)
    print(f'Training Complete. Results logges at {log_dir}')

    # get training history
    history = model.get_history()

    # plot history
    plot_history(history)

    return history

def fine_tune_model(model, epochs, unfreeze_n_fc = 0, unfreeze_n_conv = 2):
    # freeze all layers except the last two conv layers and the fc block
    model.set_transfer_learning_params(unfreeze_n_fc, unfreeze_n_conv)

        # initialize logger
    save_dir = '/content/drive/MyDrive/Colab Notebooks/logs/'
    model_name = str(model.pretrained_model_name)
    training_type = 'finetuning'
    batch_size = str(model.batch_size)
    log_dir = save_dir + model_name + '/' + training_type + '/batchsz' + batch_size

    # create directories if non-existent
    try:
        os.makedirs(log_dir, exist_ok=True)
    except OSError as e:
        print(f"Error creating directory {log_dir}: {e}")

    # initalize logger
    logger = TensorBoardLogger(log_dir, name = model_name)
    logger.log_hyperparams({'epochs': epochs,
                                'batch_size': model.batch_size,
                                'name': model_name})

    # train model
    trainer = pl.Trainer(max_epochs = epochs, logger = logger)
    trainer.fit(model)
    print(f'Training Complete. Results logges at {log_dir}')

    # get training history
    history = model.get_history()

    # plot history
    plot_history(history)

    return history

### Model - Specific Transfer Learning

In [11]:
def TransferLearningVGG(n_fc, n_compfc, n_conv, n_ft_fc, n_ft_conv, epochs_fc, epochs_compfc,
                                  epochs_conv, epochs_finetune, batch_size):
    # define variables
    pretrained_model_name = 'vgg16'
    pretrained_model_path = '/content/drive/MyDrive/Colab Notebooks/pretrained_models/vgg16.pth'
    num_classes_CIFAR10 = 10
    resizing_factor_VGG = (224, 224)

    # initialize model
    custom_model = CIFAR10Classifier(pretrained_model_name, pretrained_model_path, num_classes_CIFAR10,
                                     batch_size, resizing_factor_VGG)

    # print model architecture
    print(custom_model.model)

    # transfer learning steps
    # 1. Train added fc layers
    n_fc = 2 # fc layers to unfreeze from last
    fc_history = train_custom_fc_layers(custom_model, epochs_fc, n_fc)
    plot_history(fc_history)

    # 2. Train all fc layers
    n_compfc = 0 # fc layers to unfreeze from last; 0 coresponds to all
    compfc_history = train_entire_fc_block(custom_model, epochs_compfc, n_compfc)
    plot_history(compfc_history)

    # 3. Train convolutional layers
    n_conv = 2
    conv_history = train_conv_layers(custom_model, epochs_conv, n_conv)
    plot_history(conv_history)

    # 4. Fine tune model
    n_ft_fc = 0    # no. of fc layers to unfreeze
    n_ft_conv = 2  # no. of conv layers to unfreeze
    finetune_history = fine_tune_model(custom_model, epochs_finetune, n_ft_fc, n_ft_conv)
    plot_history(finetune_history)

    return custom_model

## Training

In [None]:
# define transfer learning parameters

# 1. added fully-connected layers training
n_fc = 2 # fc layers to unfreeze from last
epochs_fc = 8

# 2. entire fully-connected block training
n_compfc = 0 # fc layers to unfreeze from last; 0 coresponds to all
epochs_compfc = 8

# 3. convolutional layers training
n_conv = 2 # no. of convolutional layers to unfreeze from last
epochs_conv = 10

# 4. fine-tuning
n_ft_fc = 0    # no. of fully connected layers to unfreeze
n_ft_conv = 2  # no. of convolutional layers to unfreeze
epochs_finetune = 10

batch_size = 64

# start training pipeling
final_model = TransferLearningVGG(n_fc, n_compfc, n_conv, n_ft_fc, n_ft_conv, epochs_fc, epochs_compfc,
                                  epochs_conv, epochs_finetune, batch_size)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | loss_fn | NLLLoss | 0     
1 | model   | VGG     | 136 M 
------------------------------------
2.1 M     Trainable params
134 M     Non-trainable params
136 M     Total params
545.453   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz



  0%|          | 0/170498071 [00:00<?, ?it/s][A
  0%|          | 65536/170498071 [00:00<04:55, 577657.84it/s][A
  0%|          | 229376/170498071 [00:00<02:41, 1051497.24it/s][A
  1%|          | 917504/170498071 [00:00<00:52, 3209150.59it/s][A
  2%|▏         | 3702784/170498071 [00:00<00:15, 11118667.19it/s][A
  6%|▌         | 9502720/170498071 [00:00<00:06, 24315836.05it/s][A
  9%|▉         | 16089088/170498071 [00:00<00:04, 33546431.64it/s][A
 13%|█▎        | 22839296/170498071 [00:00<00:03, 39323418.70it/s][A
 17%|█▋        | 29720576/170498071 [00:01<00:03, 43047067.40it/s][A
 21%|██▏       | 36569088/170498071 [00:01<00:02, 49600177.52it/s][A
 24%|██▍       | 41648128/170498071 [00:01<00:02, 47662251.86it/s][A
 27%|██▋       | 46497792/170498071 [00:01<00:02, 46003174.91it/s][A
 31%|███       | 52166656/170498071 [00:01<00:02, 46109069.74it/s][A
 34%|███▍      | 58818560/170498071 [00:01<00:02, 47441889.96it/s][A
 38%|███▊      | 65568768/170498071 [00:01<00:02, 484

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified




Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]