##ASSIGNMENT-2

Learn how to use CNNs: train from scratch, finetune a pretrained model, use a pre-trained model as it is.


**Installs**

In [1]:
!pip install -U albumentations
!pip install "opencv-python-headless<4.3" #for import albumentations as A
!pip install wandb #To install wandb and evaluate models

**Imports**

In [2]:
import os
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models,datasets,transforms
import torchvision

from torch.utils.data import Dataset, DataLoader, ConcatDataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import glob
import numpy as np
import random
import wandb
import gc
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
from itertools import chain
enable_GPU = 0

**Enabling GPU**

In [3]:
Device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.get_device_name(0))
enable_GPU = 1

**Download iNaturalist-12K dataset**

In [4]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip nature_12K.zip

In [5]:
!rm -r nature_12K.zip
actual_data_path = "./inaturalist_12K"

**Dataset Creating**

In [6]:
# get all the paths from train_data_path and returns image paths for train and validation set
def CreateTrainDataset(actual_data_path):
    train_data_path = os.path.join(actual_data_path, "train")
    train_image_paths = [] #to store image paths in list
    classes = [] #to store class values
    for data_path in glob.glob(train_data_path + "/*"):
        train_image_paths.append(glob.glob(data_path + '/*')) #stores all the training image paths in this list
    train_image_paths = list(chain.from_iterable(train_image_paths))
    random.shuffle(train_image_paths)

    # split train valid from train paths (90,10)
    train_image_paths, valid_image_paths = train_image_paths[:int(0.9*len(train_image_paths))], train_image_paths[int(0.9*len(train_image_paths)):] 
    return train_image_paths, valid_image_paths

# create the test_image_paths
def CreateTestDataset(actual_data_path):
    test_data_path = os.path.join(actual_data_path, "val")
    test_image_paths = []
    for data_path in glob.glob(test_data_path + '/*'):
        test_image_paths.append(glob.glob(data_path + '/*')) #stores all the test images path in this list
    test_image_paths = list(chain.from_iterable(test_image_paths))
    return test_image_paths

In [7]:
#Create dictionary for class indexes
train_data_path = os.path.join(actual_data_path, "train")
classes = [] #to store class values
for data_path in glob.glob(train_data_path + "/*"):
    classes.append(data_path.split('/')[-1])
idx_to_class = {i:j for i, j in enumerate(classes)} #index to class map
class_to_idx = {value:key for key,value in idx_to_class.items()} #class to index map

**Dataset Class**

In [8]:
#Function returns images and corresponding lebels after performing transforms
class iNaturalist_12KDataset(Dataset):
    def __init__(self, image_paths, transform=False):
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = image_filepath.split('/')[-2]
        label = class_to_idx[label]
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image, label

**Building the Model**

In [9]:
#Optimization Function
def OptimizerFunction(model, learning_rate,weight_decay, optimizer_name):
    if optimizer_name == "SGD":
        opt = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == "Adam":
        opt = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    del model
    gc.collect()
    torch.cuda.empty_cache()
    return opt
    

In [10]:
#Loss Function
def LossFunction():
    return nn.CrossEntropyLoss()

In [11]:
#Activation Function
#To add another activation just add another else statement for that activation and return the corresponding pytorch reference for that activation
def ActivationFunction(activation_name):
    if(activation_name == 'relu'):
        return F.relu  
    elif(activation_name == 'elu'):
        return F.elu
    elif(activation_name == 'sigmoid'):
        return F.sigmoid
    elif(activation_name == 'gelu'):
        return F.gelu
    else:
        return None

In [12]:
class CnnModel(nn.Module):
    def __init__(self, conv_attributes, pool_attributes,in_feature,activation_name,batch_normalization,dropout,dense_layer_size):
        super(CnnModel, self).__init__()

        self.batch_normalization = batch_normalization #batch normalization 
        self.dropout = dropout # dropout
        #First Convolution and Pooling Layer
        self.conv1= nn.Conv2d(conv_attributes[0]["in_channels"], conv_attributes[0]["out_channels"], conv_attributes[0]["kernel_size"])
        self.bn1 = nn.BatchNorm2d(conv_attributes[0]["out_channels"])
        self.act1 = ActivationFunction(activation_name)
        self.pool1= nn.MaxPool2d(pool_attributes[0]["kernel_size"], pool_attributes[0]["stride"])

        #Second Convolution and Pooling Layer
        self.conv2= nn.Conv2d(conv_attributes[1]["in_channels"], conv_attributes[1]["out_channels"], conv_attributes[1]["kernel_size"])
        self.bn2 = nn.BatchNorm2d(conv_attributes[1]["out_channels"])
        self.act2 = ActivationFunction(activation_name)
        self.pool2= nn.MaxPool2d(pool_attributes[1]["kernel_size"], pool_attributes[1]["stride"])

        #Third Convolution and Pooling Layer
        self.conv3= nn.Conv2d(conv_attributes[2]["in_channels"], conv_attributes[2]["out_channels"], conv_attributes[2]["kernel_size"])
        self.bn3 = nn.BatchNorm2d(conv_attributes[2]["out_channels"])
        self.act3 = ActivationFunction(activation_name)
        self.pool3= nn.MaxPool2d(pool_attributes[2]["kernel_size"], pool_attributes[2]["stride"])

        #Fourth Convolution and Pooling Layer
        self.conv4= nn.Conv2d(conv_attributes[3]["in_channels"], conv_attributes[3]["out_channels"], conv_attributes[3]["kernel_size"])
        self.bn4 = nn.BatchNorm2d(conv_attributes[3]["out_channels"])
        self.act4 = ActivationFunction(activation_name)
        self.pool4= nn.MaxPool2d(pool_attributes[3]["kernel_size"], pool_attributes[3]["stride"])

        #Fifth Convolution and Pooling Layer
        self.conv5= nn.Conv2d(conv_attributes[4]["in_channels"], conv_attributes[4]["out_channels"], conv_attributes[4]["kernel_size"])
        self.bn5 = nn.BatchNorm2d(conv_attributes[4]["out_channels"])
        self.act5 = ActivationFunction(activation_name)
        self.pool5= nn.MaxPool2d(pool_attributes[4]["kernel_size"], pool_attributes[4]["stride"])

        #First Dense Layer
        self.fc1 = nn.Linear(in_feature, dense_layer_size)
        self.fc1_act = ActivationFunction(activation_name)
        self.fc2 = nn.Linear(dense_layer_size, 10)

    def forward(self,x):
        if self.batch_normalization:
            y = self.conv1(x)
            x = self.pool1(self.act1(self.bn1(y))) #First block of layer containing one conv layer with batch normalization and activation function followed by one pooling layer
            x = self.pool2(self.act2(self.bn2(self.conv2(x)))) #Second block of layer containing one conv layer with batch normalization and activation function followed by one pooling layer
            x = self.pool3(self.act3(self.bn3(self.conv3(x)))) #Third block of layer containing one conv layer with batch normalization and activation function followed by one pooling layer
            x = self.pool4(self.act4(self.bn4(self.conv4(x)))) #Fourth block of layer containing one conv layer with batch normalization and activation function followed by one pooling layer
            x = self.pool5(self.act5(self.bn5(self.conv5(x)))) #Fifth block of layer containing one conv layer with batch normalization and activation function followed by one pooling layer
        else:
            x = self.pool1(self.act1(self.conv1(x))) #First block of layer containing one conv layer with  activation function followed by one pooling layer
            x = self.pool2(self.act2(self.conv2(x))) #Second block of layer containing one conv layer with  activation function followed by one pooling layer
            x = self.pool3(self.act3(self.conv3(x))) #Third block of layer containing one conv layer with  activation function followed by one pooling layer
            x = self.pool4(self.act4(self.conv4(x))) #Fourth block of layer containing one conv layer with  activation function followed by one pooling layer
            x = self.pool5(self.act5(self.conv5(x))) #Fifth block of layer containing one conv layer with  activation function followed by one pooling layer

        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc1(x)
        x = self.fc1_act(x)
        x = nn.Dropout(self.dropout)(x)
        x = self.fc2(x)
        x = F.softmax(x,dim=1)                     
        return x

In [13]:
#Training Function
def TrainNetwork(model,num_epochs, batch_size,learning_rate,optimizer_name,weight_decay,resized_shape,actual_data_path,dataset_augmentation,wandb_fn):
    loss_funt = LossFunction() #Loss function is called
    optimizer = OptimizerFunction(model, learning_rate, weight_decay, optimizer_name) #Optimization function is called
    #Calling Compose returns a transform function that performs image transformation.
    train_transforms = A.Compose([A.Resize(resized_shape,resized_shape),A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),ToTensorV2()])
  
    if dataset_augmentation:
        augmented_transforms = A.Compose([A.SmallestMaxSize(max_size=350),
              A.Resize(resized_shape,resized_shape),
              A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=360, p=0.5),
              A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
              A.RandomBrightnessContrast(p=0.5),
              A.MultiplicativeNoise(multiplier=[0.5,2], per_channel=True, p=0.2),
              A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
              A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
              A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
              ToTensorV2()])
  
    #Function to create train, validation dataset and returns the train and validation image paths
    train_image_paths, valid_image_paths=CreateTrainDataset(actual_data_path)
    #Training Dataset created with train_transforms
    train_dataset = iNaturalist_12KDataset(train_image_paths,train_transforms)
    if dataset_augmentation:
        transformed_dataset = iNaturalist_12KDataset(train_image_paths,augmented_transforms)   #Transformed Dataset created with augmented_transforms
        train_dataset = torch.utils.data.ConcatDataset([transformed_dataset,train_dataset])
    #Validation Dataset created
    valid_dataset = iNaturalist_12KDataset(valid_image_paths,train_transforms) #train transforms are applied
    #Dataloader loads train dataset
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    #Dataloader loads validation dataset
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    #Training the network
    n_total_steps = len(train_loader)
    total_data_size = len(train_loader.dataset)
    for epoch in range(num_epochs):
        cumulative_loss = 0  
        correct_training = 0  
        model.train(True) # For training
        for i, (images, labels) in enumerate(train_loader):
            if enable_GPU == 1 :
                images = images.to(Device)
                labels = labels.to(Device)

            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            correct_training += (predicted == labels).sum().item()
            loss = loss_funt(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            cumulative_loss += loss.item()
            if (i+1) % 30 == 0:
                print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

        print('Finished Training---------------------')

        #Validating the trained model
        with torch.no_grad():
            n_valid_steps = len(valid_loader)
            total_validation_data = len(valid_loader.dataset)
            model.train(False)
            correct_validation = 0
            for i, (images, labels) in enumerate(valid_loader):
                if enable_GPU == 1 :
                    images = images.to(Device)
                    labels = labels.to(Device)
                # Forward pass
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                correct_validation += (predicted == labels).sum().item()
        print("Train Accuracy in Epoch {0}/{1} = {2} : ".format(epoch+1 , num_epochs , 100*(correct_training/total_data_size))) 
        print("Val Accuracy in Epoch {0}/{1} = {2} : ".format(epoch+1 , num_epochs , 100*(correct_validation/total_validation_data)))  
        print("Loss in Epoch {0}/{1} = {2} : ".format(epoch+1 , num_epochs , cumulative_loss/total_data_size))  
        print()
    PlotGridOfImages(model,batch_size)
    visualize_filters(model,batch_size)
    #Deleting the model after use
    del model
    gc.collect()
    torch.cuda.empty_cache()

In [14]:
##Calculates the input feature for the dense linear layer
def LinearInFeatureCalculate(initial_dim,conv_attributes,pool_attributes):
    for i in range(5):
        D = (initial_dim + 2*conv_attributes[i]["padding"] - conv_attributes[i]["dilation"]*(conv_attributes[i]["kernel_size"]-1) - 1)//(conv_attributes[i]["stride"]) + 1
        D = (D - pool_attributes[i]["kernel_size"])//(pool_attributes[i]["stride"]) + 1
        initial_dim = D
    return D


In [15]:
#Supporting function for plotting visualizations
def de_normalize(img):
    mean, std = (0.485, 0.456, 0.406),(0.229, 0.224, 0.225)
    z = img * torch.tensor(std).view(3, 1, 1)
    z = z + torch.tensor(mean).view(3, 1, 1)

    img2 = transforms.ToPILImage(mode='RGB')(z)
    return img2

In [16]:
#Visualizing filters of Conv1 layer
def visualize_filters(model,batch_size):
    resized_shape=256
    test_transforms = A.Compose([A.Resize(resized_shape,resized_shape),A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),ToTensorV2()])

    #Function to create test dataset and returns the test image paths
    test_image_paths=CreateTestDataset(actual_data_path)

    #Test Dataset created
    test_dataset = iNaturalist_12KDataset(test_image_paths,test_transforms)

    #Dataloader loads test dataset
    test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=True)
    
    
    
    for images, labels in test_loader:
        img = images
    
    w = model.cpu().conv1.weight.data
    im_test = model.cpu().conv1(img)
    im_test = im_test[0]
    im_test= im_test[None, :]
    im_test=im_test.permute(1,0,2,3)
    del model
    gc.collect()
    torch.cuda.empty_cache()
    grid_w = torchvision.utils.make_grid(w, nrow=16, normalize=True, scale_each=True, )
    grid_im = torchvision.utils.make_grid(im_test, nrow=16, normalize=True, scale_each=True, )

    plt.figure(figsize=(30, 30))
    plt.title("Visualizationg of Filters of Conv1")
    plt.xticks([])
    plt.yticks([])
    plt.imshow(grid_w.permute(1, 2, 0))
    plt.savefig("filtersconv.png")
    
    
    plt.figure(figsize=(30, 30))
    plt.title("Visualizationg of image after using conv1 filters")
    plt.xticks([])
    plt.yticks([])
    plt.imshow(grid_im.permute(1, 2, 0))
    plt.savefig("filtersimg_test.png")
    

In [17]:
#Plotting visualizations of 30 predictions
def PlotGridOfImages(model,batch_size):
    #Function for image augmentation.Calling Compose returns a transform function that performs image augmentation.
    batch_size=32
    resized_shape=256
    print(batch_size)
    test_transforms = A.Compose([A.Resize(resized_shape,resized_shape),A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),ToTensorV2()])

    #Function to create test dataset and returns the test image paths
    test_image_paths=CreateTestDataset(actual_data_path)

    #Test Dataset created
    test_dataset = iNaturalist_12KDataset(test_image_paths,test_transforms)

    #Dataloader loads test dataset
    test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=True)
    
    for images, labels in test_loader:
        if enable_GPU == 1:
            images = images.to(Device)
            labels = labels.to(Device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        break
    del model
    gc.collect()
    torch.cuda.empty_cache()

    #Plotting model predictions
    fig = plt.figure(figsize=(10,30))
    for x in range(30):
        ax = fig.add_subplot(10,3,x+1)
        plt.imshow(de_normalize(images[x].cpu()))
        plt.xlabel("True: "+ idx_to_class[(labels[x].cpu()).item()])
        plt.ylabel("Pred: "+ idx_to_class[(predicted[x].cpu()).item()])
        plt.xticks([])
        plt.yticks([])
    fig.tight_layout()
    fig.savefig("Plots.png")
    

**Sweep Config**

In [18]:
#This is the config file for performing sweep in wandb
sweep_config = {
  'name': 'Assignment2_PartA_Q2',
  'method': 'bayes',
  'metric': {
      'name': 'Validation Accuracy',
      'goal': 'maximize'   
    },
  'parameters': {
      'epochs': {
            'values': [30]
        },
        'conv_attributes_channels': {
            'values': [[256,128,64,32,16]]
        },
        'conv_attributes_kernel_size': {
            'values': [[3,3,5,7,9]]
        },
        'pool_attributes_kernel_size': {
            'values': [[3,3,3,2,2]]
        },
        'pool_attributes_stride': {
            'values': [[2,2,2,2,1]]
        },
        'dense_layer_size': {
            'values': [128]
        },
        'learning_rate': {
            'values': [0.0001]
        },
        'activation': {
            'values': ['gelu']
        },
        'dropout': {
            'values': [0.3]
        },
        'batch_normalization': {
            'values': [True]
        },
        'batch_size': {
            'values': [16]
        },
        'weight_decay': {
            'values': [0.00001]
        },
        'dataset_augmentation':{
              'values': [True]
        },
        'optimizer_name':{
              'values': ['Adam']
        }
    }
}
sweep_id = wandb.sweep(sweep_config,entity="cs21s045_cs21s011",project="Assignment2_PartA")

In [19]:
#This function needs to be passed to sweep_agent
def train_wandb():
    run = wandb.init()
    config = run.config
    resized_shape = 256
    ##Hyper-parameters of the model training like number of epochs, batch size, learning rate etc from sweep
    num_epochs=config.epochs
    batch_size=config.batch_size
    learning_rate=config.learning_rate
    optimizer_name = config.optimizer_name
    weight_decay=config.weight_decay
    #Select the activation function
    activation_name = config.activation
    #Batch Normalization
    batch_normalization = config.batch_normalization
    #Dropout used
    dropout = config.dropout
    #dense layer size
    dense_layer_size = config.dense_layer_size
    #For data augmentation
    dataset_augmentation = config.dataset_augmentation
    conv_attributes = [{"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1}]
  
  
    ##Attributes for 1st Convolution Layer
    conv_attributes[0]["in_channels"]=3
    conv_attributes[0]["out_channels"]=config.conv_attributes_channels[0]
    conv_attributes[0]["kernel_size"]=config.conv_attributes_kernel_size[0]

    ##Attributes for 2nd Convolution Layer
    conv_attributes[1]["in_channels"]=conv_attributes[0]["out_channels"]
    conv_attributes[1]["out_channels"]=config.conv_attributes_channels[1]
    conv_attributes[1]["kernel_size"]=config.conv_attributes_kernel_size[1]

    ##Attributes for 3rd Convolution Layer
    conv_attributes[2]["in_channels"]=conv_attributes[1]["out_channels"]
    conv_attributes[2]["out_channels"]=config.conv_attributes_channels[2]
    conv_attributes[2]["kernel_size"]=config.conv_attributes_kernel_size[2]

    ##Attributes for 4th Convolution Layer
    conv_attributes[3]["in_channels"]=conv_attributes[2]["out_channels"]
    conv_attributes[3]["out_channels"]=config.conv_attributes_channels[3]
    conv_attributes[3]["kernel_size"]=config.conv_attributes_kernel_size[3]

    ##Attributes for 5th Convolution Layer
    conv_attributes[4]["in_channels"]=conv_attributes[3]["out_channels"]
    conv_attributes[4]["out_channels"]=config.conv_attributes_channels[4]
    conv_attributes[4]["kernel_size"]=config.conv_attributes_kernel_size[4]

    pool_attributes = [{"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1}]

    ##Attributes for 1st Pooling Layer
    pool_attributes[0]["kernel_size"]=config.pool_attributes_kernel_size[0]
    pool_attributes[0]["stride"]=config.pool_attributes_stride[0]

    ##Attributes for 2nd Pooling Layer
    pool_attributes[1]["kernel_size"]=config.pool_attributes_kernel_size[1]
    pool_attributes[1]["stride"]=config.pool_attributes_stride[1]
  
    ##Attributes for 3rd Pooling Layer
    pool_attributes[2]["kernel_size"]=config.pool_attributes_kernel_size[2]
    pool_attributes[2]["stride"]=config.pool_attributes_stride[2]

    ##Attributes for 4th Pooling Layer
    pool_attributes[3]["kernel_size"]=config.pool_attributes_kernel_size[3]
    pool_attributes[3]["stride"]=config.pool_attributes_stride[3]

    ##Attributes for 5th Pooling Layer
    pool_attributes[4]["kernel_size"]=config.pool_attributes_kernel_size[4]
    pool_attributes[4]["stride"]=config.pool_attributes_stride[4]

    ##Calculating the input dimension for the Dense Linear layer
    final_dim=LinearInFeatureCalculate(resized_shape,conv_attributes,pool_attributes) #height,width of the dense layer
    in_feature = (final_dim ** 2) * conv_attributes[4]["out_channels"] #number of input nodes in the dense layer  
 
    #If the enable_GPU flag is on then the run will use GPU
    if enable_GPU == 1:
        model = CnnModel(conv_attributes, pool_attributes,in_feature,activation_name,batch_normalization,dropout,dense_layer_size).to(Device)
    else :
        model = CnnModel(conv_attributes, pool_attributes,in_feature,activation_name,batch_normalization,dropout,dense_layer_size)

    #Function for training the model with parameters model,num_epochs, batch_size,learning_rate,optimizer_name
    TrainNetwork(model,num_epochs, batch_size,learning_rate,optimizer_name,weight_decay,resized_shape,actual_data_path,dataset_augmentation,True)
  
    #Deleting the model after use
    del model
    gc.collect()
    torch.cuda.empty_cache()
  

In [None]:
#Run this cell to start sweep
wandb.agent(sweep_id, train_wandb , project="Assignment2_PartA",count=1)

wandb.finish()