# Part-A

## Question-1

Source1- https://blog.paperspace.com/writing-cnns-from-scratch-in-pytorch/ 

source2- https://pyimagesearch.com/2021/07/19/pytorch-training-your-first-convolutional-neural-network-cnn/

### load Relevant libraries

In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

In [4]:
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU, GELU, SELU, Mish
from torch.nn import LogSoftmax
from torch import flatten

In [5]:
# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
print(device)

cuda


### Data Pre-Processing

In [18]:
# Use transforms.compose method to reformat images for modeling,
# and save to variable all_transforms for later use, find the size, mean and std for each channel of our dataset
def data_pre_processing(batch_size=16, data_augmentation=False):
    
    all_transforms = transforms.Compose([transforms.Resize((150,150)),
                                         transforms.ToTensor(),        #0-255 to 0-1 & numpy to tensor
                                         transforms.Normalize(mean=[0.4713, 0.4600, 0.3897],  #0-1 to [-1,1]
                                                              std=[0.2373, 0.2266, 0.2374])                                        
                                         ])

    # path for training and testing dataset directory
    train_path = r"C:\Users\HICLIPS-ASK\nature_12K\inaturalist_12K\train"
    test_path = r"C:\Users\HICLIPS-ASK\nature_12K\inaturalist_12K\val"

    train_dataset = torchvision.datasets.ImageFolder(root = train_path, transform = all_transforms)
    
    # converting train dataset into train and validation for hyperparameter tuning
    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
    
    # data augmentation
    if data_augmentation == True:
        augment_transforms = transforms.Compose([transforms.Resize((150,150)),                                         
                                                 transforms.RandomHorizontalFlip(p=0.5),
                                                 transforms.RandomRotation((-60,60)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize(mean=[0.4713, 0.4600, 0.3897], 
                                                              std=[0.2373, 0.2266, 0.2374])
                                                 
                                         ])
        
        
        # uploading train dataset to take a portion and augment it then concate with train dataset
        aug_dataset = torchvision.datasets.ImageFolder(root = train_path, transform = augment_transforms)
        discrad_size = int(0.8 * len(aug_dataset))
        aug_size = len(aug_dataset) - discrad_size
        
        _ , transformed_dataset = torch.utils.data.random_split(aug_dataset, [discrad_size, aug_size])
        train_dataset = torch.utils.data.ConcatDataset([transformed_dataset, train_dataset])

    test_dataset = torchvision.datasets.ImageFolder(root = test_path, transform = all_transforms)

    # Instantiate loader objects to facilitate processing
    # shuffle= True, will ensure data of each class present in each batch
    train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                               batch_size = batch_size,
                                               shuffle = True)


    test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                               batch_size = batch_size,
                                               shuffle = True)
    
    val_loader = torch.utils.data.DataLoader(dataset = val_dataset,
                                               batch_size = batch_size,
                                               shuffle = True)
    
    return train_loader, test_loader, val_loader, train_dataset,test_dataset

In [8]:
train_loader, test_loader, val_loader, train_dataset, test_dataset = data_pre_processing(batch_size=64,
                                                                                        data_augmentation=False)

### Finding the mean and std of our dataset

In [None]:
def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in dataloader:
        # Mean over batch, height and width, but not over the channels
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1
    
    mean = channels_sum / num_batches

    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [None]:
#get_mean_and_std(train_loader)
#[0.4713, 0.4600, 0.3897], [0.2373, 0.2266, 0.2374] for iNaturalist dataset at resize=[150,150]

### Show image of samples

In [None]:
def show_images(dataset):
    loader = torch.utils.data.DataLoader(dataset, batch_size = 1, shuffle = True)
    batch = next(iter(loader))
    images, labels = batch
    print(images.shape)
    
    grid = torchvision.utils.make_grid(images, nrow = 3)
    plt.figure(figsize= (11,11))
    plt.imshow(np.transpose(grid, (1,2,0)))
    print('labels', labels)                                   


In [None]:
#show_images(train_dataset)

### CNN Model

In [11]:
class ConvNeuNet(Module):
    
    def __init__(self, size_kernel=[(3,3),(3,3),(3,3),(3,3),(3,3)], num_stride=1, act_fu='selu', size_denseLayer=200,
                 data_augmentation=True, batch_normalisation=True, input_channels=3,
                 classes=10, padding=1, kernel_org=0.5, dropout_rate=0.2, num_filters=[16,16,16,16,16]):
        
        # call the parent constructor
        super(ConvNeuNet, self).__init__()
        
        self.batch_norm = batch_normalisation
        self.data_aug = data_augmentation
        width = 150
        height = 150
        
        #(batch_size = 64, input_channels=3, width=150, height=150)
        # initialize first set of CONV => RELU => POOL layers
        self.conv1 = Conv2d(in_channels=input_channels, out_channels=num_filters[0],
                    kernel_size=size_kernel[0], stride=num_stride, padding=padding)
        width = int((width- size_kernel[0][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[0][0] + 2*padding)/num_stride) + 1
        self.bn1 = nn.BatchNorm2d(num_features=num_filters[0])
        self.af1 = ReLU() if act_fu=='relu' else GELU() if act_fu=='gelu' else SELU() if act_fu=='selu' else Mish()
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.maxpool1 = MaxPool2d(kernel_size=size_kernel[0], stride=num_stride, padding=padding)
        
        # updating width and height of the next layer after maxpool
        width = int((width- size_kernel[0][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[0][0] + 2*padding)/num_stride) + 1
        
        # initialize second set of CONV => RELU => POOL layers
        
        self.conv2 = Conv2d(in_channels=num_filters[0], out_channels=num_filters[1],
                     kernel_size=size_kernel[1], stride=num_stride, padding=padding)
        width = int((width- size_kernel[1][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[1][0] + 2*padding)/num_stride) + 1
        self.bn2 = nn.BatchNorm2d(num_features=num_filters[1])
        self.af2 = ReLU() if act_fu=='relu' else GELU() if act_fu=='gelu' else SELU() if act_fu=='selu' else Mish()
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.maxpool2 = MaxPool2d(kernel_size=size_kernel[1], stride=num_stride, padding=padding)
        
        # updating width and height of the next layer after maxpool
        width = int((width- size_kernel[1][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[1][0] + 2*padding)/num_stride) + 1
        
        # initialize third set of CONV => RELU => POOL layers
        self.conv3 = Conv2d(in_channels=num_filters[1], out_channels=num_filters[2],
                     kernel_size=size_kernel[2], stride=num_stride, padding=padding)
        width = int((width- size_kernel[2][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[2][0] + 2*padding)/num_stride) + 1
        self.bn3 = nn.BatchNorm2d(num_features=num_filters[2])
        self.af3 = ReLU() if act_fu=='relu' else GELU() if act_fu=='gelu' else SELU() if act_fu=='selu' else Mish()
        self.dropout3 = nn.Dropout(p=dropout_rate)
        self.maxpool3 = MaxPool2d(kernel_size=size_kernel[2], stride=num_stride, padding=padding)
        
         # updating width and height of the next layer after maxpool
        width = int((width- size_kernel[2][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[2][0] + 2*padding)/num_stride) + 1
        
        # initialize fourth set of CONV => RELU => POOL layers
        self.conv4 = Conv2d(in_channels=num_filters[2], out_channels=num_filters[3],
                     kernel_size=size_kernel[3], stride=num_stride, padding=padding)
        width = int((width- size_kernel[3][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[3][0] + 2*padding)/num_stride) + 1
        self.bn4 = nn.BatchNorm2d(num_features=num_filters[3])
        self.af4 = ReLU() if act_fu=='relu' else GELU() if act_fu=='gelu' else SELU() if act_fu=='selu' else Mish()
        self.dropout4 = nn.Dropout(p=dropout_rate)
        self.maxpool4 = MaxPool2d(kernel_size=size_kernel[3], stride=num_stride, padding=padding)
        
        # updating width and height of the next layer after maxpool
        width = int((width- size_kernel[3][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[3][0] + 2*padding)/num_stride) + 1
        
        # initialize fifth set of CONV => RELU => POOL layers
        self.conv5 = Conv2d(in_channels=num_filters[3], out_channels=num_filters[4],
                     kernel_size=size_kernel[4], stride=num_stride, padding=padding)
        width = int((width- size_kernel[4][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[4][0] + 2*padding)/num_stride) + 1
        self.bn5 = nn.BatchNorm2d(num_features=num_filters[4])
        self.af5 = ReLU() if act_fu=='relu' else GELU() if act_fu=='gelu' else SELU() if act_fu=='selu' else Mish()
        self.dropout5 = nn.Dropout(p=dropout_rate)
        self.maxpool5 = MaxPool2d(kernel_size=size_kernel[4], stride=num_stride, padding=padding)
        
        # updating width and height of the next layer after maxpool
        width = int((width- size_kernel[4][0] + 2*padding)/num_stride) + 1
        height = int((height- size_kernel[4][0] + 2*padding)/num_stride) + 1
        #initialize first (and only) set of FC => RELU layers
        self.fc1 = Linear(in_features=int(num_filters[4]*width*height), out_features=size_denseLayer)
        self.af6 = ReLU() if act_fu=='relu' else GELU() if act_fu=='gelu' else SELU() if act_fu=='selu' else Mish()
        self.dropout6 = nn.Dropout(p=dropout_rate)
        
        # initialize our softmax classifier
        self.fc2 = Linear(in_features=size_denseLayer, out_features=classes)
        self.logSoftmax = LogSoftmax(dim=1)
        
    def forward(self, x):
        
        # pass the input through our first set of CONV => Batch_norm => RELU =>
        # POOL layers
        x = self.conv1(x)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.af1(x)
        x = self.dropout1(x)
        x = self.maxpool1(x)
       
        # pass the output from the previous layer through the second
        # set of CONV => Batch_norm => RELU => layers
        x = self.conv2(x)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.af2(x)
        x = self.dropout2(x)
        x = self.maxpool2(x)
        # pass the output from the previous layer through the third
        # set of CONV => Batch_norm => RELU => POOL layers
        x = self.conv3(x)
        if self.batch_norm:
            x = self.bn3(x)
        x = self.af3(x)
        x = self.dropout3(x)
        x = self.maxpool3(x)
        # pass the output from the previous layer through the fourth
        # set of CONV => Batch_norm => RELU => POOL layers
        x = self.conv4(x)
        if self.batch_norm:
            x = self.bn4(x)
        x = self.af4(x)
        x = self.dropout4(x)
        x = self.maxpool4(x)
        # pass the output from the previous layer through the fifth
        # set of CONV => Batch_norm => RELU => POOL layers
        x = self.conv5(x)
        if self.batch_norm:
            x = self.bn5(x)
        x = self.af5(x)
        x = self.dropout5(x)
        x = self.maxpool5(x)
        # flatten the output from the previous layer and pass it
        # through our only set of FC => RELU layers
        x = flatten(x, 1)
        x = self.fc1(x)
        x = self.af6(x)
        x = self.dropout6(x)
        # pass the output to our softmax classifier to get our output
        # predictions
        x = self.fc2(x)
        output = self.logSoftmax(x)
        # return the output predictions
        return output

In [12]:
model = ConvNeuNet().to(device)

In [13]:
#Optimizer and loss function
optimizer=torch.optim.Adam(model.parameters(),lr=0.0001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [14]:
num_epochs=3

In [15]:
# Define evaluation function
def evaluate(model, dataloader):
    model.eval() # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy


In [None]:
# !pip install wandb
# import wandb

In [16]:
#Model training and saving best model

def train_CNN():
    

#     # default values
#     config_defaults = {
#         'num_filters': [16,16,16,16,16],
#         'act_fu': 'selu',
#         'size_kernel': [(3,3),(3,3),(3,3),(3,3),(3,3)],
#         'data_augmentation': True,
#         'batch_normalisation': True,
#         'dropout_rate': 0.2
#          }
    
#     #loading the dataloaders and dataset
#     train_loader, test_loader, val_loader, train_dataset, test_dataset = data_pre_processing(batch_size=64,
#                                                                                         data_augmentation=True)

#     # initialize wandb
#     wandb.init(config=config_defaults)

#     # config is a data structure that holds hyperparameters and inputs
#     config = wandb.config

#     # Local variables, values obtained from wandb config
#     num_filters = config.num_filters
#     act_fu = config.act_fu
#     size_kernel = config.size_kernel
#     data_augmentation = config.data_augmentation
#     batch_normalisation = config.batch_normalisation
#     dropout_rate = config.dropout_rate

#     wandb.run.name  = "filSize_{}_af_{}_NF_{}_DA_{}_BN_{}_Drp_{}".format(size_kernel,
#                                                                           act_fu,
#                                                                           num_filters,
#                                                                           data_augmentation,
#                                                                           batch_normalisation,
#                                                                           dropout_rate)
                                                                             



#     print(wandb.run.name )

    best_accuracy=0.0    
    # Training on training dataset
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 100 == 99:
                train_loss=running_loss/100   
                running_loss = 0.0

        # Evaluate training set accuracy
        train_accuracy = evaluate(model, train_loader)

        # Evaluate test set accuracy
        val_accuracy = evaluate(model, val_loader)


        print("Epoch: "+str(epoch+1)+ ' Train Loss:'+ str(train_loss) +' Train Accuracy:'+
              str(train_accuracy) + ' Validation Accuracy: '+ str(val_accuracy)) 


        # save the best model
        if val_accuracy>best_accuracy:
            torch.save(model.state_dict(),'best_checkpoint.model')
            best_accuracy=val_accuracy
            
#         wandb.log({"validation accuracy": val_accuracy, "train accuracy": train_accuracy, 
#                     "train loss": train_loss, 'epoch': epoch+1})
    
#         wandb.run.name 
#         wandb.run.save()
#         wandb.run.finish()
            
       

In [17]:
train_CNN()

Epoch: 1 Train Loss:2.922202019691467 Train Accuracy:14.40180022502813 Validation Accuracy: 12.95
Epoch: 2 Train Loss:2.238641633987427 Train Accuracy:19.839979997499686 Validation Accuracy: 18.2
Epoch: 3 Train Loss:2.1790716528892515 Train Accuracy:23.990498812351543 Validation Accuracy: 22.2


# Running the wandb sweep

In [None]:
sweep_config = {"name": "cs6910_assignment2", "method": "bayes"}   
sweep_config["metric"] = {"name": "val_accuracy", "goal": "maximize"}

parameters_dict = {
              "num_filters": {"values": [[16,16,16,16,16],[4,8,16,32,64],[64,32,16,8,4]]},
              "act_fu": {"values": ["gelu","selu","mish"]},
              "size_kernel": {"values": [[(3,3),(3,3),(3,3),(3,3),(3,3)], [(3,3),(5,5),(5,5),(7,7),(7,7)], 
                                         [(7,7),(7,7),(5,5),(5,5),(3,3)]]}, 
                "data_augmentation": {"values": [True, False]} ,
                "batch_normalisation": {"values": [True, False]} ,
                "dropout_rate": {"values": [0.2, 0.3]}, 
                }
sweep_config["parameters"] = parameters_dict

sweep_id = wandb.sweep(sweep_config, entity="am22s020", project="cs6910_assignment2")
wandb.agent(sweep_id, train_CNN, count=150)