# Task-3 Image Classifiation (iNaturalist)
## Bhanu Pratap Singh (PH20C010)

Building a AlexNet model with 5 Convolutional layers and 3 fully connected layers. (60M Parameters)

# Imports & Setup

In [None]:
from IPython.display import clear_output

!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip /content/nature_12K.zip

clear_output()

In [None]:
from PIL import Image

import os
from glob import glob
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchsummary import summary

# Dataloader

- Dataset Class for Setting up the data loading process
- Sections to fill in this script: `_init_transform()`

In [None]:
class inaturalist(Dataset):
    def __init__(self, root_dir, mode , transform = True):
        self.data_dir = root_dir
        self.mode = mode
        self.transforms = transform      
        self._init_dataset()
        if transform:
            self._init_transform()

    def _init_dataset(self):
        self.files = []
        self.labels = []
        dirs = sorted(os.listdir(os.path.join(self.data_dir, 'train')))
        if self.mode == 'train': 
            for dir in range(len(dirs)):
                files = sorted(glob(os.path.join(self.data_dir, 'train', dirs[dir], '*.jpg')))
                self.labels += [dir]*len(files)            
                self.files += files
        elif self.mode == 'val':
            for dir in range(len(dirs)):
                files = sorted(glob(os.path.join(self.data_dir, 'val', dirs[dir], '*.jpg')))
                self.labels += [dir]*len(files)            
                self.files += files
        else:
            print("No Such Dataset Mode")
            return None
        
    def _init_transform(self):
        # resizing to 227x227
        self.transform = transforms.Compose([transforms.Resize((227,227)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
            # Useful link for this part: https://pytorch.org/vision/stable/transforms.html
        ])
        
    def __getitem__(self, index):
        img = Image.open(self.files[index]).convert('RGB')
        label = self.labels[index]

        if self.transforms:
            img = self.transform(img)

        label = torch.tensor(label, dtype = torch.long)

        return img, label

    def __len__(self):
        return len(self.files)

# Model

- Class to define the model which we will use for training
- Stuff to fill in: The Architecture of your model, the `forward` function to define the forward pass

NOTE!: You are NOT allowed to use pretrained models for this task

In [None]:

        # Useful Link: https://pytorch.org/docs/stable/nn.html
        #------------ENTER YOUR MODEL HERE----------------#  
class Classifier(nn.Module):
    def __init__(self, num_classes=10):
        super(Classifier, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
Alex = Classifier()

In [None]:
#To see the structure of our model function.
print(Alex)

Classifier(
  (layer1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()

In [None]:
#Tried the VGG19 model but takes a lot of time.
#VGG_19= [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
#class Classifier(nn.Module):
 #   def __init__(self, in_channels = 3, n_classes=10):
  #      super(Classifier, self).__init__()
   #     self.in_channels = in_channels
    #    self.n_classes = n_classes
     #   self.conv_layers = self.create_conv_layers(VGG_19)
      #  self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
       # self.fully_connected_layers = nn.Sequential(
        #                                        nn.Linear(in_features = 7*7*512 , out_features = 4096, bias = True),
         #                                       nn.ReLU(inplace = True),
          #                                      nn.Dropout(p = 0.5),
           #                                     nn.Linear(in_features = 4096, out_features = 4096, bias = True),
#                                                nn.ReLU(inplace = True),
#                                                nn.Dropout(p = 0.5),
#                                                nn.Linear(in_features = 4096, out_features = self.n_classes),
#                                              )
#    def create_conv_layers(self, arch):
#      layers = []
#      in_channels = self.in_channels
#      for layer in arch:
#        if(type(layer) == int):
#          out_channels = layer
#
#          layers += [nn.Conv2d(
#                            in_channels = in_channels,
#                            out_channels = out_channels,
#                            kernel_size = 3,
#                            stride = 1,
#                            padding = 1
#                          ),
#                  nn.BatchNorm2d(layer),
#                  nn.ReLU(inplace = True)
#                 ]
#          in_channels = layer
#        elif(layer == 'M'):
#          layers += [nn.MaxPool2d(kernel_size = 2, stride = 2)]
#
#      return nn.Sequential(*layers)
#
#
#        # Useful Link: https://pytorch.org/docs/stable/nn.html
#        #------------ENTER YOUR MODEL HERE----------------#        
#
#    def forward(self, x):
#        #---------Assuming x to be the input to the model, define the forward pass-----------#
#        x = self.conv_layers(x)
#        x = self.avg_pool(x)
#        x = torch.flatten(x, start_dim = 1, end_dim = -1)
#        x = self.fully_connected_layers(x)
#        return F.softmax(x,dim=1)

# Training

- Sections to Fill: Define `loss` function, `optimizer` and model, `train` and `eval` functions and the training loop


## Hyperparameters

Feel free to change these hyperparams based on your machine's capactiy

In [None]:
batch_size = 32
epochs = 10
learning_rate = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataloader

In [None]:
trainset = inaturalist(root_dir='/content/inaturalist_12K', mode='train',transform = True)
valset = inaturalist(root_dir='/content/inaturalist_12K', mode = 'val',transform = True)

trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
valloader = DataLoader(valset, batch_size=32, shuffle=False, num_workers=2)

## Loss Function and Optimizer

In [None]:
# USEFUL LINK: https://pytorch.org/docs/stable/nn.html#loss-functions

#---Define the loss function to use, model object and the optimizer for training---#
criterion = nn.CrossEntropyLoss()
anet= Classifier().to(device)
optimizer = torch.optim.Adam(anet.parameters(), lr = learning_rate) #Using Adam Optimizer instead of SDG as it saves time


## Checkpoints

To save your model weights

In [None]:
checkpoint_dir = 'checkpoints'
if not os.path.isdir(checkpoint_dir):
    os.makedirs(checkpoint_dir)

## Utility Functions

In [None]:
def get_model_summary(model, input_tensor_shape):
    summary(model, input_tensor_shape)

def accuracy(y_pred, y):
    _, predicted = torch.max(y_pred.data, 1)
    total = y.size(0)
    correct = (predicted == y).sum().item()
    return correct/total

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

## Train

In [None]:
def train(model, trainloader, optimizer, criterion, device):
    '''
    Write the function to train the model for one epoch
    Feel free to use the accuracy function defined above as an extra metric to track
    '''
     #------YOUR CODE HERE-----#
    for image, label in trainloader:
      image = image.to(device)
      label = label.to(device)
      label=label-1  
      #print('t_label_shape',label.shape)
      #to make it in range[0,9]
      optimizer.zero_grad()
      
      output = model(image)
      #print('t_output_sahpe',output.shape)
      loss = criterion(output, label)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
   

## Eval

In [None]:
def eval(model, dataset, criterion, device):
    #------YOUR CODE HERE-----#
    '''
    Write the function to validate the model after each epoch
    Feel free to use the accuracy function defined above as an extra metric to track
    '''
    with torch.no_grad():
      correct=0
      total=0
      for image, label in dataset:
        image = image.to(device)
        label = label.to(device)
        #to make it in range[0,9]
        label=label-1
        #print('v_label_shape',label.shape)
        outputs = model(image)
        
        #print('v_output_shape',outputs.shape)
        _, predicted = torch.max(outputs.data, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()
        del image, label, outputs
        #temp, pred = torch.max(outputs, dim = 1)
      print('Accuracy (%)',100*correct/total)
    

    

## Training

In [None]:
best_valid_loss = float('inf')

for epoch in range(epochs):
    
    start_time = time.monotonic()
    
    '''
    Insert code to train and evaluate the model (Hint: use the functions you previously made :P)
    Also save the weights of the model in the checkpoint directory
    '''
    #------YOUR CODE HERE-----#
    train(anet,trainloader, optimizer,criterion, device)

    eval(anet, valloader,criterion, device)
    

    end_time = time.monotonic()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print("\n\n\n TIME TAKEN FOR THE EPOCH: {} mins and {} seconds".format(epoch_mins, epoch_secs))
    

    #To avoid overfitting
    #if (acc>=0.80):
    #  break

print("OVERALL TRAINING COMPLETE")

Accuracy (%) 11.15



 TIME TAKEN FOR THE EPOCH: 29 mins and 14 seconds
Accuracy (%) 11.6



 TIME TAKEN FOR THE EPOCH: 29 mins and 14 seconds
Accuracy (%) 12.15



 TIME TAKEN FOR THE EPOCH: 33 mins and 39 seconds
Accuracy (%) 12.65



 TIME TAKEN FOR THE EPOCH: 33 mins and 6 seconds
Accuracy (%) 11.9



 TIME TAKEN FOR THE EPOCH: 33 mins and 0 seconds
Accuracy (%) 12.55



 TIME TAKEN FOR THE EPOCH: 33 mins and 0 seconds
Accuracy (%) 12.05



 TIME TAKEN FOR THE EPOCH: 32 mins and 55 seconds
Accuracy (%) 12.85



 TIME TAKEN FOR THE EPOCH: 33 mins and 20 seconds
Accuracy (%) 12.9



 TIME TAKEN FOR THE EPOCH: 33 mins and 11 seconds
Accuracy (%) 14.45



 TIME TAKEN FOR THE EPOCH: 33 mins and 6 seconds
OVERALL TRAINING COMPLETE
