In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchsummary
from torch.optim import Adam
from torchvision import datasets, transforms, models
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset, random_split
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.1.2-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.8.0 (from lightning)
  Downloading lightning_utilities-0.9.0-py3-none-any.whl (23 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.2.0-py3-none-any.whl (805 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.2/805.2 kB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.1.2-py3-none-any.whl (776 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m776.9/776.9 kB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning, lightning
Successfully installed lightning-2.1.2 lightning-utilities-0.9.0 pytorch-lightning-2.1.2 torchmetrics-1.2.0


In [5]:
import pytorch_lightning as pl

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [7]:
# these are the mean and std of the data per channel
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
    ])

def denormalize(tensor):
    tensor = tensor*std + mean
    return tensor

# function for viewling image
def show_img(img):
    # arrange channels
    img = img.numpy().transpose((1,2,0))

    # use mean and std values
    img = denormalize(img)

    # clip values and view image
    rgb_img = np.clip(img,0,1)

    return np.float32(rgb_img)

In [8]:
# load and return model
def return_model(name, path):
    use_gpu = torch.cuda.is_available()

    # load model architectures without weight
    if use_gpu:
        model = getattr(models, name)().cuda()
    else:
        model = getattr(models, name)()

    # load pre-trained weights
    model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))

    return model

# Initialize model class

In [9]:
# provide model name and path
name = 'vgg16'
path = '/content/drive/MyDrive/Colab Notebooks/pretrained_models/vgg16.pth'

In [12]:
# load test dataset
test_data = datasets.CIFAR10(root='./data', train=False, download=True,
                                              transform=transform)
test_dataloader = DataLoader(test_data, batch_size=1, num_workers=2)

Files already downloaded and verified


In [19]:
class CIFAR10Classifier(pl.LightningModule):
    def __init__(self, pretrained_model):
        super(CIFAR10Classifier, self).__init__()
        self.model = pretrained_model
        self.batch_size = 64
        self.resizing_factor = 224
        self.num_classes = 10
        self.val_ratio = 0.3
        self.loss_fn = nn.NLLLoss()
        self.train_data, self.val_data, self.test_data = None, None, None
        self.model_dest_folder = '/content/drive/MyDrive/Colab Notebooks/modified_model'

        # transfer learning parameters
        self.mean = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)
        self.classifiers_n = -1
        self.features_n = -1

        # lists to store outputs from each train/val step
        self.training_step_outputs = []
        self.validation_step_outputs = []
        self.test_step_outputs = []

        self.history = {'train_loss': [], 'train_acc': [],
                    'val_loss': [], 'val_acc': []}

        # modify model classifier
        self.model.classifier[-1] = nn.Sequential(nn.Linear(4096, 512),
                                nn.ReLU(),
                                nn.Dropout(0.5),
                                nn.Linear(512, self.num_classes),
                                nn.LogSoftmax(dim=1))


    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self, mode = None):
        optimizer = Adam(filter(lambda p: p.requires_grad,
                            self.model.parameters()), lr=0.001)
        return optimizer

    # freezes all layers in the model
    def freeze_all_layers(self):
        for param in self.model.parameters():
            param.requires_grad = False

    # unfreeze last 'n' fully connected layers
    def unfreeze_last_n_fc_layers(self, n):

        # if n == -1 don't unfreeze any layers
        if n == -1:
            return 0

        n = n*2  # since weights and bias are included as separate
        total_layers = len(list(self.model.classifier.parameters()))

        # invalid n
        if n > total_layers:
            print(f"Warning: There are only {total_layers} layers in the model. Cannot unfreeze {n} layers.")

        # if n == 0 unfreeze all layers
        elif n == 0:
            for param in self.model.classifier.parameters():
                param.requires_grad = True
        else:
            for i, param in enumerate(self.model.classifier.parameters()):
                if i >= (total_layers - n):
                    param.requires_grad = True
                else:
                    param.requires_grad = False

    # unfreeze last 'n' fully connected layers
    def unfreeze_last_n_conv_layers(self, n):

        # if n == -1 don't unfreeze any layers
        if n == -1:
            return 0

        n = n*2  # since weights and bias are included as separate
        total_layers = len(list(self.model.features.parameters()))

        # invalid n
        if n > total_layers:
            print(
                f"Warning: There are only {total_layers} layers in the model. Cannot unfreeze {n} layers.")
        # if n == 0 unfreeze all layers
        elif n == 0:
            for param in self.model.features.parameters():
                param.requires_grad = True
        else:
            for i, param in enumerate(self.model.features.parameters()):
                if i >= total_layers - n:
                    param.requires_grad = True
                else:
                    pass

    # set parameters for transfer learning
    def set_transfer_learning_params(self, unfreeze_n_fc, unfreeze_n_conv):
        self.classifiers_n = unfreeze_n_fc
        self.features_n = unfreeze_n_conv
        self.freeze_all_layers()
        self.unfreeze_last_n_fc_layers(unfreeze_n_fc)
        self.unfreeze_last_n_conv_layers(unfreeze_n_conv)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.loss_fn(logits, y)
        y_pred = torch.argmax(torch.exp(logits), 1)
        acc = (y_pred == y).sum().item()/self.batch_size
        self.training_step_outputs.append((loss.item(), acc))
        return loss

    def on_train_epoch_end(self):
        num_items = len(self.training_step_outputs)
        cum_loss = 0
        cum_acc = 0
        for loss, acc in self.training_step_outputs:
            cum_loss += loss
            cum_acc += acc

        avg_epoch_loss = cum_loss/num_items
        avg_epoch_acc = cum_acc/num_items
        self.history['train_loss'].append(avg_epoch_loss)
        self.history['train_acc'].append(avg_epoch_acc)
        print(f'\nTraining Epoch({self.current_epoch}): loss: {avg_epoch_loss}, acc:{avg_epoch_acc}')
        self.training_step_outputs.clear()

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.loss_fn(logits, y)
        y_pred = torch.argmax(torch.exp(logits), 1)
        acc = (y_pred == y).sum().item()/self.batch_size
        self.validation_step_outputs.append((loss.item(), acc))
        return loss

    def on_validation_epoch_end(self):
        num_items = len(self.validation_step_outputs)
        cum_loss = 0
        cum_acc = 0
        for loss, acc in self.validation_step_outputs:
            cum_loss += loss
            cum_acc += acc

        avg_epoch_loss = cum_loss/num_items
        avg_epoch_acc = cum_acc/num_items
        self.history['val_loss'].append(avg_epoch_loss)
        self.history['val_acc'].append(avg_epoch_acc)
        print(f'\nValidation Epoch({self.current_epoch}): loss: {avg_epoch_loss}, acc:{avg_epoch_acc}')
        self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.loss_fn(logits, y)
        y_pred = torch.argmax(torch.exp(logits), 1)
        acc = (y_pred == y).sum().item() / self.batch_size
        self.test_step_outputs.append((loss.item(), acc))
        return loss

    def on_test_epoch_end(self):
        num_items = len(self.test_step_outputs)
        cum_loss = 0
        cum_acc = 0
        for loss, acc in self.test_step_outputs:
            cum_loss += loss
            cum_acc += acc

        avg_epoch_loss = cum_loss / num_items
        avg_epoch_acc = cum_acc / num_items
        print(f'Test Epoch loss: {avg_epoch_loss} Test epoch Acc: {avg_epoch_acc}')
        self.test_step_outputs.clear()

    def prepare_data(self):
        # Download CIFAR-10 dataset
        datasets.CIFAR10(root='./data', train=True, download=True)
        datasets.CIFAR10(root='./data', train=False, download=True)

    def setup(self, stage = None):
        transform = transforms.Compose([
                transforms.Resize((self.resizing_factor, self.resizing_factor)),
                transforms.ToTensor(),
                transforms.Normalize(self.mean, self.std)
            ])

        if stage == 'fit' or stage == 'validate' or stage is None:


            # load train and validation datasets
            train = datasets.CIFAR10(root='./data', train=True, transform=transform)
            val_size = int(self.val_ratio * len(train))
            train_size = len(train) - val_size
            self.train_data, self.val_data = random_split(train, [train_size, val_size])

    def train_dataloader(self):
        return DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True, num_workers=2)

    def val_dataloader(self):
        return DataLoader(self.val_data, batch_size=self.batch_size, num_workers=2)

    def get_history(self):
        # remove the first validation epoch data
        self.history['val_loss'].pop(0)
        self.history['val_acc'].pop(0)
        return self.history

    def clear_history(self):
        for key in self.history:
            self.history[key] = []

    def save_model(self):
        # save the entire model
        arc_final_path = os.path.join(self.model_dest_folder, 'vgg16_arc.pth')
        weights_final_path = os.path.join(self.model_dest_folder, 'vgg16_weights.pth')
        torch.save(self.model, arc_final_path)
        torch.save(self.model.state_dict(), weights_final_path)


# Initialize Transfer Learning Pipeline

In [14]:
class TransferLearningPipiline:
    def __init__(self, model):
        self.model = model

        # funny connected layers to unfreeze from last
        self.n_fc = 2

        # number of total fully connected layers to unfreeze
        self.n_compfc = 0

        self.epochs = dict()
        self.epochs['fc'] = 2
        self.epochs['compfc'] = 2
        self.trainer = None

    def initalize_trainer(self, mode):
        self.trainer = pl.Trainer(accelerator="gpu", devices=1,  max_epochs=self.epochs[mode],
                                 enable_progress_bar=False, limit_val_batches=10,
                                 enable_checkpointing=True, logger=False)

    def train_custom_fc_layers(self):

        # freeze all layers except the last two fc layers
        self.model.set_transfer_learning_params(self.n_fc, -1)
        self.model.configure_optimizers('lr_fc')
        self.model.clear_history()

        # train model
        self.initalize_trainer('fc')
        self.trainer.fit(self.model)


    def train_all_fc_layers(self):

        # freeze all layers except the last two fc layers
        self.model.set_transfer_learning_params(self.n_compfc, -1)
        self.model.configure_optimizers('lr_compfc')
        self.model.clear_history()

        # train model
        self.initalize_trainer('compfc')
        self.trainer.fit(self.model)

    # complete transfer learning pipeline
    def train_model(self):
        self.train_custom_fc_layers()
        self.train_all_fc_layers()

    def save_model(self):
        self.model.save_model()



In [15]:
pretrained_model = return_model(name, path)

In [20]:
custom_model = CIFAR10Classifier(pretrained_model)

In [21]:
# initalize trainer
trainer = TransferLearningPipiline(custom_model)

In [22]:
# fit model
trainer.train_model()

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | model   | VGG     | 136 M 
1 | loss_fn | NLLLoss | 0     
------------------------------------
2.1 M     Trainable params
134 M     Non-trainable params
136 M     Total params
545.453   Total estimated model params size (MB)



Validation Epoch(0): loss: 2.3530668020248413, acc:0.03125

Validation Epoch(0): loss: 0.9280479192733765, acc:0.703125

Training Epoch(0): loss: 1.2147839065007797, acc:0.5536448811700183

Validation Epoch(1): loss: 0.9605607450008392, acc:0.6796875

Training Epoch(1): loss: 1.132152301301904, acc:0.5868372943327239


INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /content/checkpoints exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | model   | VGG     | 136 M 
1 | loss_fn | NLLLoss | 0     
------------------------------------
121 M     Trainable params
14.7 M    Non-trainable params
136 M     Total params
545.453   Total estimated model params size (MB)



Validation Epoch(0): loss: 0.9534895420074463, acc:0.7265625

Validation Epoch(0): loss: 0.6159978538751603, acc:0.821875

Training Epoch(0): loss: 0.8382723775489674, acc:0.735974634369287

Validation Epoch(1): loss: 0.5258054852485656, acc:0.846875

Training Epoch(1): loss: 0.5536500936343421, acc:0.8330952925045704


INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.


In [23]:
trainer.save_model()