In [None]:
!pip install pytorch-lightning wandb torchmetrics

In [3]:
%%writefile ModelPreTrained_Base.py
import torch
import torchvision
import torch.nn as nn
import lightning as pl
import re
import torchmetrics
import torch.optim as optim
from torchvision.models import GoogLeNet_Weights

"""Base class to perform fine tuning on a pre-trained model.
This class initializes the pre-trained GoogLeNet model, freezes layers, sets up the feature extractor and attaches a new output layer to match the number of classes."""
class FineTuningModelBase(pl.LightningModule):
    """Constructor to set all the class parameters and initialize the pre-trained model."""
    def __init__(self, numClasses, numOfFreezedLayers, learning_rate, aux_logits=True):
        super(FineTuningModelBase, self).__init__()
        
        '''loads the GoogLeNet model'''
        self.model=torchvision.models.googlenet(pretrained=True)
        self.numOfFreezedLayers=numOfFreezedLayers
        self.learning_rate=learning_rate

        """Freezes the layers based on the specified number of layers to freeze."""
        for n, p in self.model.named_parameters():
            match = re.search(r'\d+', n.split('.')[0])
            if match and int(match.group()) < self.numOfFreezedLayers:
                p.requires_grad = False
        
        """Extracts the layers excluding the final fully-connected layer as a feature extractor."""
        numOfLayers = list(self.model.children())[:-1]
        self.feature_extractor = nn.Sequential(*numOfLayers)
        self.feature_extractor.eval()

        """Adding a new layer to match the output dimensions to the number of classes in the iNaturalist dataset."""
        inFeatures = self.model.fc.in_features
        self.outputLayer = nn.Linear(inFeatures, numClasses)

        """The cross-entropy loss function."""
        self.criterion = nn.CrossEntropyLoss()

        """Initializes metrics for training and validation accuracies and a variable for test accuracy."""
        self.training_accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=numClasses)
        self.validation_accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=numClasses)
        self.test_accuracy = 0
        
    def forward(self, x):
        """
        Parameters:
            x: Input tensor for forward propagation.
        Returns:
            x: Output tensor after applying the forward propagation.
        Function:
            Flattens the features extracted by the feature extractor and applies the output layer.
        """
        flattened = self.feature_extractor(x).flatten(1)
        x = self.outputLayer(flattened)
        return x
    
    def configure_optimizers(self):
        """
        Parameters:
            None
        Returns:
            optimizer: Optimizer object for training the network.
        Function:
            Creates and returns an Adam optimizer using the model parameters and the specified learning rate.
        """
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        return optimizer

Writing ModelPreTrained_Base.py


In [2]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.5.1-py3-none-any.whl.metadata (39 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch<4.0,>

In [4]:
%%writefile ModelPreTrained_test.py
import torch
from ModelPreTrained_train_val import FineTuningModelTrainVal

"""Extending FineTuningModelTrainVal by adding the test step method"""
class FineTuningModel(FineTuningModelTrainVal):
    """Inherits from FineTuningModelTrainVal and implements the test_step method"""
    
    def test_step(self, batch, batch_idx):
        """
        Parameters:
            batch: A batch of test data (input tensor and labels).
            batch_idx: Batch index.
        Returns:
            accuracy: Test accuracy for the processed batch.
        Function:
            Performs a test step by processing the input, determining the predicted class,
            calculating the batch accuracy, and accumulating the test accuracy.
        """
        x, y = batch
        y_hat = self(x)
        predicted = torch.argmax(y_hat, dim=1)
        correct_points = (predicted == y).sum().item()
        total_points = len(y)
        accuracy = correct_points / total_points
        self.test_accuracy += accuracy
        return accuracy

Writing ModelPreTrained_test.py


In [5]:
%%writefile LoadDataset.py
import torch
import torchvision
from torchvision import transforms
import os
from torch.utils.data import random_split

'''class to load the dataset'''
class DatasetLoader:
    '''constructor to set all the class parameters'''
    def __init__(self, root, batch_size):
        '''path of the dataset'''
        self.root = root
        '''batch size'''
        self.batch_size = batch_size
        '''transformation to apply on the dataset'''
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.4747786223888397, 0.4644955098628998, 0.3964916169643402],std=[0.2389, 0.2289, 0.2422]),
        ])
        self.train_dataset, self.val_dataset, self.test_dataset = self.load_and_split_datasets()

    def load_and_split_datasets(self):
        '''
        Parameters:
            None
        Function:
            Loads and splits the original dataset
        Returns:
            train_dataset: dataset for training
            val_dataset: dataset for validation
            test_dataset: dataset for testing
        '''
        train_path = ''
        test_path = ''
        train_path = os.path.join(self.root, "train")
        test_path = os.path.join(self.root, "val")
        train_val_dataset = torchvision.datasets.ImageFolder(root=train_path, transform=self.transform)
        
        '''splitting into train and val'''
        train_size = int(0.8 * len(train_val_dataset))
        val_size = len(train_val_dataset) - train_size
        train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])
        
        test_dataset = torchvision.datasets.ImageFolder(root=test_path, transform=self.transform)
        return train_dataset, val_dataset, test_dataset

Writing LoadDataset.py


In [6]:
%%writefile Create_DataLoader.py
from LoadDataset import DatasetLoader
from torch.utils.data import DataLoader

'''This class inherits from DatasetLoader and adds the data_loaders method.'''
class DatasetLoader_create(DatasetLoader):
    '''This class inherits from DatasetLoader and adds the data_loaders method.'''
    def data_loaders(self):
        '''
        Parameters:
            None
        Function:
            Creates DataLoader objects for training, validation, and testing (creates a one hot vector).
        Returns:
            train_loader : Data loader object of torch to be used for training
            val_loader : Data loader object of torch to be used for validation
            test_loader : Data loader object of torch to be used for testing
        '''
        train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
        val_loader = DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)
        test_loader = DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
        return train_loader, val_loader, test_loader

Writing Create_DataLoader.py


In [7]:
%%writefile ModelPreTrained_train_val.py
from ModelPreTrained_Base import FineTuningModelBase

"""Extending FineTuningModelBase by adding the training and validation methods."""
class FineTuningModelTrainVal(FineTuningModelBase):
    """Inherits from FineTuningModelBase and implements training and validation methods."""
    
    def training_step(self, batch, batch_idx):
        """
        Parameters:
            batch: A batch of training data (input tensor and labels).
            batch_idx: Batch index.
        Returns:
            loss: Loss value calculated after forward propagation and loss computation.
        Function:
            Performs a training step by processing the input, computing the loss, updating training metrics, and logging the loss.
        """
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.training_accuracy(y_hat, y)
        self.log("training_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def on_train_epoch_end(self):
        """
        Parameters:
            None
        Returns:
            training accuracy after resetting the training metric.
        Function:
            Logs the training accuracy at the end of an epoch and resets the training accuracy metric.
        """
        accuracy = self.training_accuracy.compute()
        self.log('training_accuracy', accuracy, prog_bar=True, logger=True, sync_dist=True)
        return self.training_accuracy.reset()
    
    def validation_step(self, batch, batch_idx):
        """
        Parameters:
            batch: A batch of validation data (input tensor and labels).
            batch_idx: Batch index.
        Returns:
            loss: Loss value calculated on the validation batch.
        Function:
            Performs a validation step by processing the input, computing the loss, updating validation metrics, and logging the loss.
        """
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.validation_accuracy(y_hat, y)
        self.log("validation_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def on_validation_epoch_end(self):
        """
        Parameters:
            None
        Returns:
            validation accuracy after resetting the validation metric.
        Function:
            Logs the validation accuracy at the end of an epoch and resets the validation accuracy metric.
        """
        accuracy = self.validation_accuracy.compute()
        self.log('validation_accuracy', accuracy, prog_bar=True, logger=True, sync_dist=True)
        return self.validation_accuracy.reset()

Writing ModelPreTrained_train_val.py


In [8]:
%%writefile Question3.py
import Create_DataLoader
import ModelPreTrained_test
import lightning as pl

'''function to the pre trained model'''
def main():   
    '''
        defining parameteres and loading the dataset
        The values of the parameters are set according to the best model that I had achieved
    ''' 
    batch_size=16
    dataLoader=Create_DataLoader.DatasetLoader_create(root='/kaggle/input/inaturalist-12k/nature_12K_dataset/inaturalist_12K',batch_size=batch_size)
    trainLoader,valLoader,testLoader=dataLoader.data_loaders()
    
    freezed_layers=5
    learning_rate=1e-4
    numOfOutputClasses=10
    epochs=20

    '''creating object of the FineTuningModel class'''
    preTrainedModel=ModelPreTrained_test.FineTuningModel(numOfOutputClasses,freezed_layers,learning_rate)
    
    '''creating trainer object by pytorch lightning'''
    #trainer=pl.Trainer(max_epochs=epochs)
    trainer = pl.Trainer(max_epochs=epochs,strategy="ddp_find_unused_parameters_true",devices=1, enable_progress_bar=True)

    '''exectuing training and validartion step'''
    trainer.fit(preTrainedModel,trainLoader,valLoader)

    '''executing test step and reporting test accuracy'''
    trainer.test(preTrainedModel,testLoader)
    print("Test Accuracy : ",preTrainedModel.test_accuracy/len(testLoader))

if __name__ == '__main__':
    main()

Writing Question3.py


In [16]:
!python Question3.py

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|███████████████████████████████████████| 49.7M/49.7M [00:00<00:00, 206MB/s]
INFO: You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
INFO: ----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 1 processes
----------------------------------------------------------------------------------------------------

2025-04-17 11:40:43.279956: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register facto

In [2]:
# zip unzip
#!unzip -q /kaggle/input/inaturalist-12k.zip -d /kaggle/working/inaturalist-12k

unzip:  cannot find or open /kaggle/input/inaturalist-12k.zip, /kaggle/input/inaturalist-12k.zip.zip or /kaggle/input/inaturalist-12k.zip.ZIP.


In [None]:
# train file below

In [9]:
%%writefile train_arguments_partb.py
import argparse

def arguments():
    '''
      Parameters:
        None
      Returns :
        A parser object
      Function:
        Does command line argument parsing and returns the arguments passed
    '''
    commandLineArgument = argparse.ArgumentParser(description='Model Parameters')
    commandLineArgument.add_argument('-r','--root', help="Absolute path of the dataset")
    commandLineArgument.add_argument('-e','--epochs', type=int, help="Number of epochs to train neural network")
    commandLineArgument.add_argument('-b','--batch', type=int, help="Batch size to divide the dataset")
    commandLineArgument.add_argument('-lr','--learning', type=float, help="Learning rate to train the model")
    commandLineArgument.add_argument('-fr','--freezed', type=int, help="Number of layers freezed from the beginning")
    commandLineArgument.add_argument('-t','--test', type=int, choices=[0,1], help="choices: [0,1]")
    
    return commandLineArgument.parse_args()

Writing train_arguments_partb.py


In [16]:
%%writefile train_partb.py
import torch
from Create_DataLoader import DatasetLoader_create
import ModelPreTrained_test
import lightning as pl
import wandb
import warnings
from train_arguments_partb import arguments

warnings.filterwarnings("ignore")

'''login to wandb to generate plot'''
wandb.login()

'''setting the device to gpu if avaiable'''
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

'''helper class to execute fine-tuning and optional testing'''
class PreTrained:
    def run(root, epochs, batch_size, learning_rate, freezed, test):
        '''
        Parameters:
            root : absolute path of the dataset
            epochs : number of epochs to run
            batch_size : batch size to split the dataset
            learning_rate : learning rate used to train the model
            freezed : number of layers freezed starting from the input layer
            test : boolean variable denoting whether or not to test the model 
        Returns :
            None
        Function:
            Executes the Fine Tuning on the model
        '''

        '''loads dataset'''
        dataLoader = DatasetLoader_create(root=root, batch_size=batch_size)
        trainLoader, valLoader, testLoader = dataLoader.data_loaders()
        
        '''setting number of output classes to 10'''
        numOfOutputClasses = 10

        '''creating the object of the class and a trainer on it'''
        preTrainedModel = ModelPreTrained_test.FineTuningModel(numOfOutputClasses, freezed, learning_rate)

        # trainer = pl.Trainer(max_epochs=epochs)
        trainer = pl.Trainer(
            max_epochs=epochs, 
            strategy='ddp_find_unused_parameters_true',
            devices=1,
            enable_progress_bar=True
        )


        '''executing train and validation steps'''
        trainer.fit(preTrainedModel, trainLoader, valLoader)

        '''if prompted then executing test step'''
        if test == 1:
            trainer.test(preTrainedModel, testLoader)
            print("Test Accuracy : ", preTrainedModel.test_accuracy / len(testLoader))

'''main driver function'''
def main():
    '''default values of each of the hyperparameter. Since there was a positive corelation in Part_A co-relation summary table, I tried running with higher number of epochs in Part B.'''
    epochs = 20
    batch_size = 32
    learning_rate = 1e-4
    freezed = 5
    test = 0
    root = '/kaggle/input/inaturalist-12k/nature_12K_dataset/inaturalist_12K'

    '''call to argument function to get the arguments'''
    args = arguments()

    '''checking if a particular argument is passed through command line or not and updating the values accordingly'''
    if args.epochs is not None:
        epochs = args.epochs
    if args.batch is not None:
        batch_size = args.batch
    if args.learning is not None:
        learning_rate = args.learning
    if args.freezed is not None:
        freezed = args.freezed
    if args.test is not None:
        test = args.test
    if args.root is not None:
        root = args.root

    '''calling the run method with the parameters'''
    PreTrained.run(root, epochs, batch_size, learning_rate, freezed, test)
    wandb.finish()

if __name__ == '__main__':
    main()

Overwriting train_partb.py


In [17]:
!python train_partb.py

[34m[1mwandb[0m: Currently logged in as: [33mcs24m015[0m ([33mcs24m015-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
INFO: You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
INFO: ----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 1 processes
----------------------------------------------------------------------------------------------------

2025-04-18 17:51:38.500143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for pl