In [9]:
pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from math import ceil as ceil
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import wandb

# Download the dataset.
#!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip

# unzip the datqaset.
#!unzip -q nature_12K.zip

The Following class prepData prepares the data with the option to augment or not
- it returns training data Loader, validation Data Loader, test Data Loader and the class Names dictionary

- it takes a boolean(augment) as it's input which decides whether to augment the data or not

In [11]:
def prepData(augment:bool):
    ''' Function to prepare the data using torch libraries for the purpose of training torch
        neural networks with relative ease.
        
        Using torch dataLoaders helps in memory management as well
        
        args : augment bool ---> True would enable data augmentation, False would disable.
        
        return : 
            TrainDataLoader --> torch data loader wrapper for training dataset.
            ValDataLoader ----> torch data loader wrapper for validation data set.
            TestDataLoader ---> torch data loader wrapper for test data set.'''
    
    if augment == True:
        preProcess = transforms.Compose([
            transforms.Resize(size = (128,128)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.2),
            transforms.ToTensor()
        ])
    else:
        preProcess = transforms.Compose([
            transforms.Resize(size = (128,128)),
            transforms.ToTensor()
        ])
    print("loading the data into tensors ==============================")
    trainData = datasets.ImageFolder(root = "inaturalist_12K/train",
                                    transform = preProcess,
                                    target_transform = None)
    classLabels = trainData.classes
    
    testData  = datasets.ImageFolder(root = "inaturalist_12K/val",
                                    transform = preProcess)

    print(f"train data : {trainData} and test data : {testData}")

    print("splitting into train and val ================================")
    trainSplit = ceil(0.8*len(trainData))
    trainData, valData = torch.utils.data.random_split(trainData, [trainSplit, len(trainData) - trainSplit])

    print("wrapping into train loader ==================================")

    trainDataLoader = torch.utils.data.DataLoader(trainData,
                                                shuffle=True,
                                                batch_size=32)

    valDataLoader = torch.utils.data.DataLoader(valData,
                                                shuffle=True,
                                                batch_size=32)

    testDataLoader = torch.utils.data.DataLoader(testData,
                                                shuffle=False,
                                                batch_size=32)
    
    print("loaders created for faster loading ===========================")



    return trainDataLoader, valDataLoader, testDataLoader, classLabels

The class for the model made from scratch
- named CNN Model, inherits nn.Module.
- must have a forward function for completion and successful training.
- Flexibility with respect to layer size, kernel size, and batchNormalization(add/not add)

In [12]:
class CNNModel(nn.Module):
    ''' CNN Model for classifying the images
    
        __init__ : creates a blueprint for the model
        forward  : forward propagation facilitated by Torch Layers.'''

    def __init__(self, activation, kernels, inputShape: int, hiddenUnit: int, outputSize: int, dropOut: float, batchNorm: bool, factor: int):
        
        ''' initialize the model == inherit from nn.Module
            
            args : activation --> activation Function torch.nn.$SomeValidActivationFunction$
                   kernels ---> list conatining 5 kernel sizes that may be taken as input.
                   inputShape --> Number of Channels in input data.
                   hiddenUnit --> Filter size.
                   outputSize --> number of output channels.
                   batchNorm ---> boolean var to indicate whether to add batch normalization or not.
                   factor ------> int value used as multiplier for subsequent layers.
        '''
        super().__init__()
        self.hiddenUnit = hiddenUnit
        self.factor = factor
        self.batchNorm = batchNorm

        self.conv_blocks = nn.ModuleList()    # ModuleList (Torch container) used to record specific layers.  
        self.batch_norms = nn.ModuleList()    # Using ModuleList shortens boiler plate code.

        layerSize = [inputShape] + [self.hiddenUnit] + [self.factor * self.hiddenUnit] * 4
        for i in range(5):
            self.conv_blocks.append(nn.Conv2d(layerSize[i], layerSize[i+1], kernel_size=kernels[i], padding=2))  # add conv layers
            if self.batchNorm:
                self.batch_norms.append(nn.BatchNorm2d(layerSize[i+1])) # add Batch normalization only if specified.

        self.activate = activation  # add activation function (taken as input).
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # max pool layer (as and when to be used).
        self.drop = nn.Dropout(p=dropOut)  # drop out layer for reducing over fitting.
        
        # DenseBlock containing a flattening layer, dense layer.
        self.DenseBlock = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(out_features=1024, bias=True, device=None, dtype=None), # LazyLinear used for calculation of in_features.
            nn.Dropout(p=dropOut),
            nn.Linear(in_features=1024, out_features=outputSize)
        )

    def forward(self, x):
        ''' Forward Propagation
            x ---> tensor denoting input value.
            return x ---> prediction value.'''
        for i in range(5):
            x = self.conv_blocks[i](x)
            if self.batchNorm and i < 5:
                x = self.batch_norms[i](x)
            x = self.activate(x)
            x = self.pool(x)

        x = self.drop(x)
        x = self.DenseBlock(x)

        return x


In [13]:
def accuracy(y_true, y_pred):
    ''' accuracy Function for calculating the percentage of y_true[i] == y_pred[i]
        args : y_true ---> int actual value/ label(s) of for the input(s).
        return : accuracy ---> float [0,100] The accuracy of the batch.
    '''
    correct = torch.eq(y_true,y_pred)
    correct = correct.sum()
    val = correct.item()
    accuracy = 0.0
    accuracy = val/(len(y_true))*100
    return accuracy

In [14]:
def fit(model, trainDataLoader, valDataLoader, epochs, device, loss_fn, optimizer):
    ''' Function for training the model on the data set.
        args --->
            model -> CNNModule object 
            trainDataLoader --> torch dataLoader wrapper containing training set.
            valDataLoader --> torch dataLoader wrapper containing validation set.
            epochs --> int, number of epochs.
            device --> whether cpu or cuda.
            loss_fn ---> loss Function used.
            optimizer --> optimizer function used.
            
        return model --> CNN Module object with updated weights.
    '''
    for epoch in tqdm(range(epochs)):
        train_loss = 0
        train_acc = 0
        for batch, (X,y) in enumerate(trainDataLoader):
            X,y = X.to(device), y.to(device)
            model.train()
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            train_loss += loss
            train_acc += accuracy(y_true=y, y_pred=y_pred.argmax(dim=1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch%50 == 0:
                print(f"went through {batch}/{len(trainDataLoader)} samples")
            torch.cuda.empty_cache()

        train_loss /= len(trainDataLoader)
        train_acc /= len(trainDataLoader)
    
        val_loss = 0.0
        val_acc = 0
        model.eval()
        with torch.inference_mode():
            for X,y in valDataLoader:
                X,y = X.to(device), y.to(device)
                val_pred = model(X)
                val_loss += loss_fn(val_pred, y)
                val_acc += accuracy(y_true=y, y_pred=val_pred.argmax(dim=1))
            val_acc /= len(valDataLoader)
            val_loss /= len(valDataLoader)
        
        #wandb.log({"TrainingLoss" : train_loss, "ValidationLoss" : val_loss, "TrainingAccuracy" : train_acc, "ValidationAccuracy" : val_acc, "epoch": epoch})

        print(f"Train loss: {train_loss}, Train accuracy: {train_acc}, validation loss: {val_loss}, validation accuracy: {val_acc}\n")
    torch.save(model, "/kaggle/working/vit_16_model.pth")

    return model

def eval(model, testDataLoader, device, loss_fn):
    ''' Function for evaluating the training on unseen test Dataset.
        args --> testDataLoader torch DataLoader object for easy loading/unloading.
    '''
    device = "cuda" if torch.cuda.is_available() else "cpu"
    test_loss = 0.0
    test_acc = 0
    model.eval()
    with torch.inference_mode():
        for X,y in testDataLoader:
            X,y = X.to(device), y.to(device)
            test_pred = model(X)
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy(y_true=y, y_pred=test_pred.argmax(dim=1))
        test_acc /= len(testDataLoader)
        test_loss /= len(testDataLoader)
    print(f"test accuracy is {test_acc} and test loss os {test_loss}")


In [15]:
def masterTrainer(trainDataLoader, valDataLoader, testDataLoader, learningRate, kernels, layerSize, dropOut, batchNorm, activation, factor, epochs):
    ''' function to start the training and facilitate wandb logging.
        args ->
            trainDataLoader --> torch dataLoader wrapper containing training set.
            valDataLoader --> torch dataLoader wrapper containing validation set.
            testDataLoader --> torch dataLoader wrapper containing test dataset.
            
            learningRate ---> int, learning rate,
            kernels --> list of kernel sizes, one for each convolutional layer.
            layerSize --> filter size of first convolutional layer
            factor --> multiplier to number of filters for subsequent training.
            epochs --> int, number of epochs.
    '''
    activations = {
    "relu" : torch.nn.ReLU(),
    "gelu" : torch.nn.GELU(),
    "silu" : torch.nn.SiLU(),
    "mish" : torch.nn.Mish()
    }

    #import wandb    
    #wandb.init(project="DLAssignment2", entity="cs22m028")
    #wandb.run.name = "config_"+str(optimizer)+"_"+str(layerSize)+"_"+str(decay)+"_"+str(opt)+"_"+str(batchNorm)+"_"+str(dropOut)+"_"+str(activation)    
    activate= activations[activation]

    model_0 = CNNModel(activate, kernels, inputShape=3, hiddenUnit=layerSize,outputSize=10, dropOut = dropOut, batchNorm=True, factor = factor)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_0.to(device)
    #from helper_functions import accuracy_fn as accuracy # Note: could also use torchmetrics.Accuracy(task = 'multiclass', num_classes=len(class_names)).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(params = model_0.parameters(),lr=learningRate)
    print(model_0)
    model_0 = fit(model_0, trainDataLoader, valDataLoader, epochs, device, loss_fn, optimizer)
    
    eval(model_0, testDataLoader, device, loss_fn)
    randomImageModeler(model_0, testDataLoader, classNames)



In [16]:
def randomImageModeler(model, testDataLoader, classNames):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    wandb.init(project="DLAssignment2", entity="cs22m028")
    model.to("cpu")
    model.eval()
    count = 0
    images = []
    with torch.inference_mode():
        for X,y in testDataLoader:
            #X,y = X.to(device), y.to(device)
            prediction = model(X)
            # That is it...
            # print the image.
            for i in range(30):
                img = X[i].permute(1, 2, 0).numpy()
                image = wandb.Image(img, caption= classNames[prediction[i].argmax(dim=0)])
                images.append(image)
            break
        wandb.log({"Some predictions....": images})
    # It is done.

In [17]:
def testIt():
    ''' wandb trainer for initializing runs, loading data and running the entire training/ testing process.'''
    wandb.init(project="DLAssignment2", entity="cs22m028")
    wandb.run.name="config_activation="+str(wandb.config.activation)+"_epochs="+str(wandb.config.epochs)+"_dropOut="+str(wandb.config.dropOut)+"_batchSize="+str(wandb.config.batchSize) + "_filterSize="+str(wandb.config.filterSize)+"_batchNorm="+str(wandb.config.batchNorm)+"_augment="+str(wandb.config.augmentation)+"_learningRate="+str(wandb.config.learningRate)
    trainDataLoader, valDataLoader, testDataLoader, classLabels = prepData(augment=wandb.config.augmentation)
    #trainDataLoader, valDataLoader, testDataLoader, classLabels = prepData(augment=True)
    #masterTrainer(trainDataLoader, valDataLoader, testDataLoader, 0.001, 0.00001, [5,5,3,3,3], 32, "adam", "cross", 0.2, True, "gelu", 1)
    masterTrainer(trainDataLoader, valDataLoader, testDataLoader, wandb.config.learningRate, wandb.config.kernels, wandb.config.filterSize, wandb.config.dropOut, wandb.config.batchNorm, wandb.config.activation, wandb.config.factor, wandb.config.epochs)
    #masterTrainer(trainDataLoader, valDataLoader, testDataLoader, config.wandb.learningRate, config.wandb.decay, config.wandb.kernels, config.wandb.layerSize, config.wandb.dropOut, config.wandb.batchNorm, config.wandb.activation, config.wandb.factor)

Here, the possible parameters for the sweeps have been swapped by the best parameters only, and therefore, only one run will happen and the images will get logged.


In [None]:
if __name__ == '__main__':
    wandb.login()
    sweep_config = {
        'method': 'bayes'
        }

    metric = {
        'name': 'val_acc',
        'goal': 'maximize'   
        }

    sweep_config['metric'] = metric


    # initialize the dictionaty with only the best parameters.

    parameters_dict = {
        'epochs' : {
            'values':[20]
        },
        'factor':{
            'values':[2]},
        'filterSize': {
            'values': [32]
            },
        'dropOut' : {
            'values' : [0.2]
            },
        'batchSize' : {
            'values' : [32]
            },
        'activation' : {
            'values' : ['mish']
            },
        'augmentation' : {
            'values':[True]
        },
        'batchNorm' : {
            'values' : [True]
            },
        'kernels' : {
            'values' : [[5, 5, 3, 3, 3]]
            },
        'learningRate' : {
            'values' : [0.0001]
         }
        }

    sweep_config['parameters'] = parameters_dict

    # intitialize sweep id.
    sweep_id = wandb.sweep(sweep_config, project= "DLAssignment2")
    wandb.agent(sweep_id,project= "DLAssignment2" , function = testIt)

[34m[1mwandb[0m: Currently logged in as: [33mcs22m028[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: enbpitlx
Sweep URL: https://wandb.ai/cs22m028/DLAssignment2/sweeps/enbpitlx


[34m[1mwandb[0m: Agent Starting Run: c5wvtxjp with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	augmentation: True
[34m[1mwandb[0m: 	batchNorm: True
[34m[1mwandb[0m: 	batchSize: 32
[34m[1mwandb[0m: 	dropOut: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	factor: 2
[34m[1mwandb[0m: 	filterSize: 32
[34m[1mwandb[0m: 	kernels: [5, 5, 3, 3, 3]
[34m[1mwandb[0m: 	learningRate: 0.0001


train data : Dataset ImageFolder
    Number of datapoints: 9999
    Root location: inaturalist_12K/train
    StandardTransform
Transform: Compose(
               Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               RandomVerticalFlip(p=0.2)
               ToTensor()
           ) and test data : Dataset ImageFolder
    Number of datapoints: 2000
    Root location: inaturalist_12K/val
    StandardTransform
Transform: Compose(
               Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               RandomVerticalFlip(p=0.2)
               ToTensor()
           )




CNNModel(
  (conv_blocks): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (2-4): 3 x Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  )
  (batch_norms): ModuleList(
    (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1-4): 4 x BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (activate): Mish()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop): Dropout(p=0.2, inplace=False)
  (DenseBlock): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): LazyLinear(in_features=0, out_features=1024, bias=True)
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=1024, out_features=10, bias=True)
  )
)


  0%|          | 0/20 [00:00<?, ?it/s]

went through 0/250 samples
went through 50/250 samples
went through 100/250 samples
went through 150/250 samples
went through 200/250 samples
Train loss: 2.147568702697754, Train accuracy: 23.7875, validation loss: 2.1115660667419434, validation accuracy: 25.479497354497354

went through 0/250 samples
went through 50/250 samples
went through 100/250 samples
went through 150/250 samples
went through 200/250 samples
