In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from math import ceil as ceil
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip -q /kaggle/working/nature_12K.zip
#checking github

In [None]:
def prepData(augment:bool):
    if augment == True:
        preProcess = transforms.Compose([
            transforms.Resize(size = (128,128)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.2),
            transforms.ToTensor()
        ])
    else:
        preProcess = transforms.Compose([
            transforms.Resize(size = (128,128)),
            transforms.ToTensor()
        ])
    print("loading the data into tensors ==============================")
    trainData = datasets.ImageFolder(root = "/kaggle/working/inaturalist_12K/train",
                                    transform = preProcess,
                                    target_transform = None)
    classLabels = trainData.classes
    
    testData  = datasets.ImageFolder(root = "/kaggle/working/inaturalist_12K/val",
                                    transform = preProcess)

    print(f"train data : {trainData} and test data : {testData}")

    print("splitting into train and val ================================")
    trainSplit = ceil(0.8*len(trainData))
    trainData, valData = torch.utils.data.random_split(trainData, [trainSplit, len(trainData) - trainSplit])

    print("wrapping into train loader ==================================")

    trainDataLoader = torch.utils.data.DataLoader(trainData,
                                                shuffle=True,
                                                batch_size=32)

    valDataLoader = torch.utils.data.DataLoader(valData,
                                                shuffle=True,
                                                batch_size=32)

    testDataLoader = torch.utils.data.DataLoader(testData,
                                                shuffle=False,
                                                batch_size=32)
    
    print("loaders created for faster loading ===========================")



    return trainDataLoader, valDataLoader, testDataLoader, classLabels

In [None]:
class CNNModel(nn.Module):

    def __init__(self,  activation, kernels, inputShape : int, hiddenUnit : int, outputSize : int, dropOut: float, batchNorm: bool, factor:int):
        self.hiddenUnit = hiddenUnit
        self.factor = factor
        self.batchNorm = batchNorm
        print(activation)
        super().__init__()
        self.convBlockStart = nn.Conv2d(in_channels = inputShape,
                        out_channels = self.hiddenUnit,
                        kernel_size = kernels[0],
                        padding = 2)
        
        self.batch1 = nn.BatchNorm2d(self.hiddenUnit)
        
        self.activate= nn.GELU()

        self.pool = nn.MaxPool2d(kernel_size = 2,
                        stride =2)
        
        self.convBlock1=nn.Conv2d(in_channels =self.hiddenUnit,
                        out_channels = self.factor*self.hiddenUnit,
                        kernel_size = kernels[1],
                        padding = 2)        
        self.hiddenUnit*=self.factor
        self.batch2 = nn.BatchNorm2d(self.hiddenUnit)
        self.activate= nn.GELU()

        self.pool = nn.MaxPool2d(kernel_size = 2,
                        stride =2)

        self.convBlock2=nn.Conv2d(in_channels =self.hiddenUnit,
                        out_channels = self.factor*self.hiddenUnit,
                        kernel_size = kernels[2],
                        padding = 2)
        
        self.hiddenUnit*=self.factor

        self.batch3 = nn.BatchNorm2d(self.hiddenUnit)

        self.activate= nn.GELU()

        self.pool = nn.MaxPool2d(kernel_size = 2,
                        stride =2)

        self.convBlock3=nn.Conv2d(in_channels =self.hiddenUnit,
                        out_channels = self.factor*self.hiddenUnit,
                        kernel_size = kernels[3],
                        padding = 2)
        self.hiddenUnit*=self.factor

        self.batch4 = nn.BatchNorm2d(self.hiddenUnit)

        self.activate= nn.GELU()
        
        self.pool = nn.MaxPool2d(kernel_size = 2,
                        stride =2)
        self.convBlock4=nn.Conv2d(in_channels =self.hiddenUnit,
                        out_channels = self.factor*self.hiddenUnit,
                        kernel_size = kernels[4],
                        padding = 2)
        
        self.hiddenUnit*=self.factor
        
        self.batch5 = nn.BatchNorm2d(self.hiddenUnit)

        self.activate= nn.GELU()
                
        self.pool = nn.MaxPool2d(kernel_size = 2,
                        stride =2)
        
        self.drop = nn.Dropout(p=dropOut)
        self.DenseBlock = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(out_features = 1024, bias = True, device = None, dtype = None),
            nn.Dropout(p=dropOut),
            nn.Linear(in_features = 1024,
                    out_features = outputSize
                    )
        )
        
    def forward(self, x):
        x = self.convBlockStart(x)
        if self.batchNorm == True: 
            x = self.batch1(x)
        x = self.activate(x)
        x = self.pool(x)
        x = self.convBlock1(x)
        if self.batchNorm == True: 
            x = self.batch2(x)
        x = self.activate(x)
        x = self.pool(x)
        x = self.convBlock2(x)
        if self.batchNorm == True: 
            x = self.batch3(x)
        x = self.activate(x)
        x = self.pool(x)
        x = self.convBlock3(x)
        if self.batchNorm == True: 
            x = self.batch4(x)
        x = self.activate(x)
        x = self.pool(x)
        x = self.convBlock4(x)
        if self.batchNorm == True: 
            x = self.batch5(x)
        x = self.drop(x)
        x = self.activate(x)
        x = self.pool(x)
        x = self.DenseBlock(x)
        return x

In [None]:
def accuracy(y_true, y_pred):
    correct = torch.eq(y_true,y_pred).sum().item()
    accuracy = 0.0
    accuracy = correct/(len(y_true))*100
    return accuracy

In [None]:
def fit(model, trainDataLoader, valDataLoader, epochs, device, loss_fn, optimizer):
    for epoch in tqdm(range(epochs)):
        train_loss = 0
        train_acc = 0
        for batch, (X,y) in enumerate(trainDataLoader):
            X,y = X.to(device), y.to(device)
            model.train()
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            train_loss += loss
            train_acc += accuracy(y_true=y, y_pred=y_pred.argmax(dim=1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch%50 == 0:
                print(f"went through {batch}/{len(trainDataLoader)} samples")
            torch.cuda.empty_cache()

        train_loss /= len(trainDataLoader)
        train_acc /= len(trainDataLoader)
    
        val_loss = 0.0
        val_acc = 0
        model.eval()
        with torch.inference_mode():
            for X,y in valDataLoader:
                X,y = X.to(device), y.to(device)
                val_pred = model(X)
                val_loss += loss_fn(val_pred, y)
                val_acc += accuracy(y_true=y, y_pred=val_pred.argmax(dim=1))
            val_acc /= len(valDataLoader)
            val_loss /= len(valDataLoader)
        
        wandb.log({"TrainingLoss" : train_loss, "ValidationLoss" : val_loss, "TrainingAccuracy" : train_acc, "ValidationAccuracy" : val_acc, "epoch": epoch})

        print(f"Train loss: {train_loss}, Train accuracy: {train_acc}, validation loss: {val_loss}, validation accuracy: {val_acc}\n")

    def eval(testDataLoader):
        test_loss = 0.0
        test_acc = 0
        model.eval()
        with torch.inference_mode():
            for X,y in valDataLoader:
                X,y = X.to(device), y.to(device)
                test_pred = model(X)
                test_loss += loss_fn(val_pred, y)
                test_acc += accuracy(y_true=y, y_pred=test_pred.argmax(dim=1))
            test_acc /= len(testDataLoader)
            test_loss /= len(testDataLoader)


In [None]:
def masterTrainer(trainDataLoader, valDataLoader, testDataLoader, learninRate, kernels, layerSize, dropOut, batchNorm, activation, factor, epochs):
    activations = {
    "relu" : torch.nn.ReLU(),
    "gelu" : torch.nn.GELU(),
    "silu" : torch.nn.SiLU(),
    "mish" : torch.nn.Mish()
    }

    #import wandb    
    #wandb.init(project="DLAssignment2", entity="cs22m028")
    #wandb.run.name = "config_"+str(optimizer)+"_"+str(layerSize)+"_"+str(decay)+"_"+str(opt)+"_"+str(batchNorm)+"_"+str(dropOut)+"_"+str(activation)    
    activate= activations[activation]

    model_0 = CNNModel(activate, kernels, inputShape=3, hiddenUnit=layerSize,outputSize=10, dropOut = dropOut, batchNorm=True, factor = 2)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_0.to(device)
    #from helper_functions import accuracy_fn as accuracy # Note: could also use torchmetrics.Accuracy(task = 'multiclass', num_classes=len(class_names)).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(params = model_0.parameters())
    print(model_0)
    fit(model_0, trainDataLoader, valDataLoader, epochs, device, loss_fn, optimizer)


In [None]:
import wandb

In [None]:
def wandbTrainer():
    wandb.init(project="DLAssignment2", entity="cs22m028")
    wandb.run.name="config_activation="+str(wandb.config.activation)+"_epochs="+str(wandb.config.epochs)+"_dropOut="+str(wandb.config.dropOut)+"_batchSize="+str(wandb.config.batchSize) + "_filterSize="+str(wandb.config.filterSize)+"_batchNorm="+str(wandb.config.batchNorm)+"_augment="+str(wandb.config.augmentation)+"_learningRate="+str(wandb.config.learningRate)
    trainDataLoader, valDataLoader, testDataLoader, classLabels = prepData(augment=wandb.config.augmentation)
    #trainDataLoader, valDataLoader, testDataLoader, classLabels = prepData(augment=True)
    #masterTrainer(trainDataLoader, valDataLoader, testDataLoader, 0.001, 0.00001, [5,5,3,3,3], 32, "adam", "cross", 0.2, True, "gelu", 1)
    masterTrainer(trainDataLoader, valDataLoader, testDataLoader, wandb.config.learningRate, wandb.config.kernels, wandb.config.filterSize, wandb.config.dropOut, wandb.config.batchNorm, wandb.config.activation, wandb.config.factor, wandb.config.epochs)
    #masterTrainer(trainDataLoader, valDataLoader, testDataLoader, config.wandb.learningRate, config.wandb.decay, config.wandb.kernels, config.wandb.layerSize, config.wandb.dropOut, config.wandb.batchNorm, config.wandb.activation, config.wandb.factor)

In [None]:
import wandb
wandb.login(key='1f3d400868fd8a06335a2177ed2ee9def37df31d')
sweep_config = {
    'method': 'bayes'
    }

metric = {
    'name': 'val_acc',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'epochs' : {
        'values':[10,15,20]
    },
    'factor':{
        'values':[2, 1, 0.5]},
    'filterSize': {
        'values': [32, 64, 128]
        },
    'dropOut' : {
        'values' : [0.2, 0.3, 0.42, 0.5]
        },
    'batchSize' : {
        'values' : [16, 32, 64]
        },
    'activation' : {
        'values' : ['mish', 'gelu', 'relu', 'silu']
        },
    'augmentation' : {
        'values':[True, False]
    },
    'batchNorm' : {
        'values' : [True, False]
        },
    'kernels' : {
        'values' : [[3, 3, 3, 3, 3,], [5, 5, 3, 3, 3], [7, 5, 3, 3, 3]]
        },
    'learningRate' : {
        'values' : [0.001, 0.0005, 0.00075, 0.0001, 0.0002]
     }
    }

sweep_config['parameters'] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project= "DLAssignment2")
wandb.agent(sweep_id,project= "DLAssignment2" , function = wandbTrainer)