##ASSIGNMENT-2

Learn how to use CNNs: train from scratch, finetune a pretrained model, use a pre-trained model as it is.


**Installs**

In [1]:
!pip install -U albumentations
!pip install "opencv-python-headless<4.3" #for import albumentations as A

Collecting albumentations
  Downloading albumentations-1.1.0-py3-none-any.whl (102 kB)
[?25l[K     |███▏                            | 10 kB 33.6 MB/s eta 0:00:01[K     |██████▍                         | 20 kB 38.2 MB/s eta 0:00:01[K     |█████████▋                      | 30 kB 36.7 MB/s eta 0:00:01[K     |████████████▉                   | 40 kB 28.3 MB/s eta 0:00:01[K     |████████████████                | 51 kB 29.0 MB/s eta 0:00:01[K     |███████████████████▏            | 61 kB 32.3 MB/s eta 0:00:01[K     |██████████████████████▍         | 71 kB 25.5 MB/s eta 0:00:01[K     |█████████████████████████▋      | 81 kB 27.4 MB/s eta 0:00:01[K     |████████████████████████████▉   | 92 kB 29.7 MB/s eta 0:00:01[K     |████████████████████████████████| 102 kB 31.6 MB/s eta 0:00:01[K     |████████████████████████████████| 102 kB 31.6 MB/s 
Collecting qudida>=0.0.4
  Downloading qudida-0.0.4-py3-none-any.whl (3.5 kB)
Collecting opencv-python-headless>=4.1.1
  Downloading o

**Imports**

In [2]:
import os
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import glob
import numpy as np
import random

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
from itertools import chain
enable_GPU = 0

**Enabling GPU**

In [3]:
Device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.get_device_name(0))
enable_GPU = 1

Tesla T4


**Download iNaturalist-12K dataset**

In [4]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


**Dataset Creating**

In [5]:
# get all the paths from train_data_path and returns image paths for train and validation set
def CreateTrainDataset(actual_data_path):
  train_data_path = os.path.join(actual_data_path, "train")
  train_image_paths = [] #to store image paths in list
  classes = [] #to store class values
  for data_path in glob.glob(train_data_path + "/*"):
    classes.append(data_path.split('/')[-1]) 
    train_image_paths.append(glob.glob(data_path + '/*'))
  train_image_paths = list(chain.from_iterable(train_image_paths))
  random.shuffle(train_image_paths)

  # split train valid from train paths (90,10)
  train_image_paths, valid_image_paths = train_image_paths[:int(0.9*len(train_image_paths))], train_image_paths[int(0.9*len(train_image_paths)):] 
  return train_image_paths, valid_image_paths

# create the test_image_paths
def CreateTestDataset(actual_data_path):
  test_data_path = os.path.join(actual_data_path, "val")
  test_image_paths = []
  for data_path in glob.glob(test_data_path + '/*'):
      test_image_paths.append(glob.glob(data_path + '/*'))
  test_image_paths = list(chain.from_iterable(test_image_paths))
  return test_image_paths

In [6]:
#Create dictionary for class indexes
actual_data_path = "/content/drive/MyDrive/inaturalist_12K"
train_data_path = os.path.join(actual_data_path, "train")
classes = [] #to store class values
for data_path in glob.glob(train_data_path + "/*"):
  classes.append(data_path.split('/')[-1])
idx_to_class = {i:j for i, j in enumerate(classes)}
class_to_idx = {value:key for key,value in idx_to_class.items()}

In [None]:
class_to_idx

**Dataset Class**

In [7]:
class iNaturalist_12KDataset(Dataset):
    def __init__(self, image_paths, transform=False):
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = image_filepath.split('/')[-2]
        label = class_to_idx[label]
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image, label

**Building the Model**

In [17]:
class CnnModel(nn.Module):
  def __init__(self, conv_attributes, pool_attributes,in_feature):
    super(CnnModel, self).__init__()
    self.conv1= nn.Conv2d(conv_attributes[0]["in_channels"], conv_attributes[0]["out_channels"], conv_attributes[0]["kernel_size"])
    self.pool1= nn.MaxPool2d(pool_attributes[0]["kernel_size"], pool_attributes[0]["stride"])

    self.conv2= nn.Conv2d(conv_attributes[1]["in_channels"], conv_attributes[1]["out_channels"], conv_attributes[1]["kernel_size"])
    self.pool2= nn.MaxPool2d(pool_attributes[1]["kernel_size"], pool_attributes[1]["stride"])

    self.conv3= nn.Conv2d(conv_attributes[2]["in_channels"], conv_attributes[2]["out_channels"], conv_attributes[2]["kernel_size"])
    self.pool3= nn.MaxPool2d(pool_attributes[2]["kernel_size"], pool_attributes[2]["stride"])

    self.conv4= nn.Conv2d(conv_attributes[3]["in_channels"], conv_attributes[3]["out_channels"], conv_attributes[3]["kernel_size"])
    self.pool4= nn.MaxPool2d(pool_attributes[3]["kernel_size"], pool_attributes[3]["stride"])

    self.conv5= nn.Conv2d(conv_attributes[4]["in_channels"], conv_attributes[4]["out_channels"], conv_attributes[4]["kernel_size"])
    self.pool5= nn.MaxPool2d(pool_attributes[4]["kernel_size"], pool_attributes[4]["stride"])

    self.fc1 = nn.Linear(in_feature, 10)
   
  def forward(self,x):
    # print("FORWARD CHECK")
    x = self.pool1(F.relu(self.conv1(x)))
    x = self.pool2(F.relu(self.conv2(x)))
    x = self.pool3(F.relu(self.conv3(x)))
    x = self.pool4(F.relu(self.conv4(x)))
    x = self.pool5(F.relu(self.conv5(x)))

    x = torch.flatten(x, 1) # flatten all dimensions except batch
    x = self.fc1(x)                       
    return x

In [18]:
def OptimizerFunction(model, learning_rate, optimizer_name):
  if optimizer_name == "SGD":
    return torch.optim.SGD(model.parameters(), learning_rate)
  elif optimizer_name == "Adam":
    return torch.optim.Adam(model.parameters(), learning_rate)

In [19]:
def LossFunction():
  return nn.CrossEntropyLoss()

In [20]:
def TrainNetwork(model,num_epochs, batch_size,learning_rate,optimizer_name,resized_shape,actual_data_path):
  # print("TRAINING---------------")
  loss_funt = LossFunction()
  optimizer = OptimizerFunction(model, learning_rate, optimizer_name)

  #Function for image augmentation.Calling Compose returns a transform function that performs image augmentation.
  train_transforms = A.Compose([# A.SmallestMaxSize(max_size=350),
            A.Resize(resized_shape,resized_shape),
            # A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=360, p=0.5),
            # A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            # A.MultiplicativeNoise(multiplier=[0.5,2], per_channel=True, p=0.2),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            # A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            # A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            ToTensorV2(),])

  #Function to create train, validation dataset and returns the train and validation image paths
  train_image_paths, valid_image_paths=CreateTrainDataset(actual_data_path)

  #Training Dataset created
  train_dataset = iNaturalist_12KDataset(train_image_paths,train_transforms)

  #Dataloader loads train dataset
  train_loader = DataLoader(
      train_dataset, batch_size=batch_size, shuffle=True
  )
  n_total_steps = len(train_loader)
  for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
      if enable_GPU == 1 :
        images = images.to(Device)
        labels = labels.to(Device)

      # Forward pass
      # print(i)
      outputs = model(images)
      # print(outputs)
      loss = loss_funt(outputs, labels)

      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (i+1) % 200 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
  print('Finished Training---------------------')

In [21]:
def SaveModel(model):
  PATH = '/content/drive/MyDrive/cnn.pth'
  torch.save(model.state_dict(), PATH)

In [22]:
def TestNetwork(model,num_epochs, batch_size,learning_rate,resized_shape,actual_data_path):
  with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]

    #Function for image augmentation.Calling Compose returns a transform function that performs image augmentation.
    test_transforms = A.Compose([# A.SmallestMaxSize(max_size=350),
          # A.CenterCrop(height=256, width=256),
          A.Resize(resized_shape,resized_shape),
          A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
          ToTensorV2()])
    
    #Function to create test dataset and returns the test image paths
    test_image_paths=CreateTestDataset(actual_data_path)
    #Validation Dataset created
    # valid_dataset = iNaturalist_12KDataset(valid_image_paths,test_transforms) #test transforms are applied
    #Test Dataset created
    test_dataset = iNaturalist_12KDataset(test_image_paths,test_transforms)

    #Dataloader loads test dataset
    test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False)
    
    #Dataloader loads validation dataset
    # valid_loader = DataLoader(
    #     valid_dataset, batch_size=batch_size, shuffle=True)
    
    for images, labels in test_loader:
      if enable_GPU == 1:
        images = images.to(Device)
        labels = labels.to(Device)
      outputs = model(images)
      # max returns (value ,index)
      _, predicted = torch.max(outputs, 1)
      n_samples += labels.size(0)
      n_correct += (predicted == labels).sum().item()
      for i in range(predicted.size()[0]):
        label = labels[i]
        pred = predicted[i]
        if (label == pred):
            n_class_correct[label] += 1
        n_class_samples[label] += 1
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc} %')

In [23]:
##Calculates the input feature for the dense linear layer
def LinearInFeatureCalculate(initial_dim,conv_attributes,pool_attributes):
  for i in range(5):
    D = (initial_dim + 2*conv_attributes[i]["padding"] - conv_attributes[i]["dilation"]*(conv_attributes[i]["kernel_size"]-1) - 1)//(conv_attributes[i]["stride"]) + 1
    D = D//pool_attributes[i]["stride"]
    initial_dim = D
  return D


**Main function**

In [24]:
def main():
  print("Hello")
  resized_shape = 256

  ##Hyper-parameters of the model training like number of epochs, batch size, learning rate
  num_epochs=1
  batch_size=64
  learning_rate=0.001
  optimizer_name = "Adam"
  actual_data_path = "/content/drive/MyDrive/inaturalist_12K"

  conv_attributes = [{"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1},
                     {"in_channels":0,"out_channels":0,"kernel_size":0, "stride":1, "padding":0, "dilation":1}]
  
  
  ##Attributes for 1st Convolution Layer
  conv_attributes[0]["in_channels"]=3
  conv_attributes[0]["out_channels"]=6
  conv_attributes[0]["kernel_size"]=3

  ##Attributes for 2nd Convolution Layer
  conv_attributes[1]["in_channels"]=6
  conv_attributes[1]["out_channels"]=12
  conv_attributes[1]["kernel_size"]=3

  ##Attributes for 3rd Convolution Layer
  conv_attributes[2]["in_channels"]=12
  conv_attributes[2]["out_channels"]=16
  conv_attributes[2]["kernel_size"]=5

  ##Attributes for 4th Convolution Layer
  conv_attributes[3]["in_channels"]=16
  conv_attributes[3]["out_channels"]=32
  conv_attributes[3]["kernel_size"]=5

  ##Attributes for 5th Convolution Layer
  conv_attributes[4]["in_channels"]=32
  conv_attributes[4]["out_channels"]=32
  conv_attributes[4]["kernel_size"]=7

  pool_attributes = [{"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1},
                     {"kernel_size":1, "stride": 1}]

  ##Attributes for 1st Pooling Layer
  pool_attributes[0]["kernel_size"]=2
  pool_attributes[0]["stride"]=2

  ##Attributes for 2nd Pooling Layer
  pool_attributes[1]["kernel_size"]=2
  pool_attributes[1]["stride"]=2
  
  ##Attributes for 3rd Pooling Layer
  pool_attributes[2]["kernel_size"]=2
  pool_attributes[2]["stride"]=2

  ##Attributes for 4th Pooling Layer
  pool_attributes[3]["kernel_size"]=2
  pool_attributes[3]["stride"]=2

  ##Attributes for 5th Pooling Layer
  pool_attributes[4]["kernel_size"]=2
  pool_attributes[4]["stride"]=2

 ##Calculating the input dimension for the Dense Linear layer
  final_dim=LinearInFeatureCalculate(256,conv_attributes,pool_attributes) #height,width of the dense layer
  in_feature = (final_dim ** 2) * conv_attributes[4]["out_channels"] #number of input nodes in the dense layer
  print(in_feature)

  #If the enable_GPU flag is on then the run will use GPU
  if enable_GPU == 1:
    model = CnnModel(conv_attributes, pool_attributes,in_feature).to(Device)
  else :
    model = CnnModel(conv_attributes, pool_attributes,in_feature)
  print(model)

  #Function for training the model with parameters model,num_epochs, batch_size,learning_rate,optimizer_name
  TrainNetwork(model,num_epochs, batch_size,learning_rate,optimizer_name,resized_shape,actual_data_path)
  #Function for testing the model accuracy on the test data with parameters model,num_epochs, batch_size,learning_rate
  TestNetwork(model,num_epochs, batch_size,learning_rate,resized_shape,actual_data_path)

In [25]:
if  __name__ =="__main__":
  main()

Hello
288
CnnModel(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 12, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(12, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(32, 32, kernel_size=(7, 7), stride=(1, 1))
  (pool5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=288, out_features=10, bias=True)
)
Finished Training---------------------
Accuracy of the network: 20.5 %
Accuracy of Arachnida: 3.5 %
Accuracy of Amphibia: 18.5 %
Accuracy of Fungi: 10.5 %
Accuracy of Animalia: 4.0 %
Ac

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

#######################################################
#                  Visualize Dataset
#         Images are plotted after augmentation
#######################################################

def visualize_augmentations(dataset, idx=0, samples=10, cols=5, random_img = False):
    
    dataset = copy.deepcopy(dataset)
    #we remove the normalize and tensor conversion from our augmentation pipeline
    dataset.transform = A.Compose([t for t in dataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    rows = samples // cols
    
        
    figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 8))
    for i in range(samples):
        if random_img:
            idx = np.random.randint(1,len(train_image_paths))
        image, lab = dataset[idx]
        ax.ravel()[i].imshow(image)
        ax.ravel()[i].set_axis_off()
        ax.ravel()[i].set_title(idx_to_class[lab])
    plt.tight_layout(pad=1)
    plt.show()    

visualize_augmentations(train_dataset,np.random.randint(1,len(train_image_paths)), random_img = True)
