In [1]:
#JV

I would develop the code for the assignment in this notebook as it is easy to quickly test (and even unit testing).

When a module/part is bug free I would add it to the .py file later.



In [2]:
import os
import shutil
import re

import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

import matplotlib.pyplot as plt

In [3]:
def make_dir(dir,returnIfDirAlreadyExists=False):
    """
    Function to create a directory, if it doesn't exist
    """
    try:
        os.mkdir(dir)
    except Exception as e:
        if "File exists" in str(e):
            if returnIfDirAlreadyExists:
                return True
            pass
        else:
            print(e)

In [4]:
seed = 76 #setting this as seed wherever randomness comes

In [5]:
## Train data downloaded from the given source (https://storage.googleapis.com/wandb_datasets/nature_12K.zip)

"""
Now, the goal is to split 20% of train data, in "train" folder to get validation data.
"""



data_base_dir = 'inaturalist_12K/'

def train_validation_split(base_dir,seed = 76):
    """
    Function to split 20% of the train data into validation data, Uniformly At Random (UAR). Import os and shutil before using this method.

    Note  : Instead of taking 20% of samples randomly out of the entire train data; 20% of train data of each class is taken (UAR), 
    so that for training there is a balance between the number of samples per class.

    Params:

        base_dir : The path to the directory in which the "train/" and "test/" directories are present after unzipping. It is assumed that the given dir path string has a "/ at the end.

        seed : The seed use in the random number generator, default : 76.

    Returns :

        None.
    """

    base_data_dir = base_dir
    train_base_dir = base_data_dir+'train/'
    train_data_class_dirs = os.listdir(train_base_dir)
    
    ## remove dirs starting with "." from the list
    train_data_class_dirs = [i for i in train_data_class_dirs if i[0] != "." ]

    ## Test data is called as val, which is confusing, hence renaming it to test
    os.rename(data_base_dir+"val/",base_data_dir+"test/")
    
    
    ## validation dir
    val_base_dir = base_data_dir+'validation/'
    make_dir(val_base_dir)
    
    ## Iterate over each class and
    ## take 20% data of each class at random as validation data
    
    random_num_generator = np.random.RandomState(seed)
    
    for class_label in train_data_class_dirs:
    
        current_class_train_filenames = os.listdir(train_base_dir+class_label+"/")
    
        num_of_files = len(current_class_train_filenames)
        
        validation_indices = random_num_generator.choice(num_of_files,int(0.2*num_of_files),replace=False)
        train_indices = np.array(list(set(np.arange(num_of_files)).difference(set(validation_indices))))
    
        ##create class dir validation dir
        cur_validation_dir = val_base_dir + class_label +"/"
        make_dir(cur_validation_dir)
        
        for i in validation_indices:
            shutil.move(train_base_dir+class_label+"/"+current_class_train_filenames[i],cur_validation_dir+current_class_train_filenames[i])
        
        print(f"Validation Split for {class_label} is Done!")



In [6]:
"""
Careful perform this train-validation split only once in the entire lifetime, that too on the unzipped dataset.

"""

base_data_dir = "inaturalist_12K/"

#train_validation_split(base_data_dir)

In [7]:
""" Create loader for train, test and validation data """

## Train data
train_path = base_data_dir+"train/"
train_dataset = torchvision.datasets.ImageFolder(root=train_path,transform=torchvision.transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=16,shuffle=True,num_workers=0)

## Validation data
val_path = base_data_dir+"validation/"
val_dataset = torchvision.datasets.ImageFolder(root=val_path,transform=torchvision.transforms.ToTensor())
val_loader = torch.utils.data.DataLoader(val_dataset,batch_size=16,shuffle=True,num_workers=0)

## Test data
test_path = base_data_dir+"test/"
test_dataset = torchvision.datasets.ImageFolder(root=test_path,transform=torchvision.transforms.ToTensor())
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=16,shuffle=True,num_workers=0)

In [None]:
"""
Some important points:

1. Images are of different shapes, so they all have to be resized by looking at the image shapes and picking the maximum [or mean/mode ?] width and height.

2. Now they have to be normalized. To zero mean and unit variance. This has to be done within train, test and validation datasets separately. One normalization won't work for the other because the images may go out of scale (i.e beyond +/- 1)


"""

In [8]:
def viz_image_sizes(ds):
    """
    
    
    """
    x_vals = []
    y_vals = []
    
    images = []
    
    for i in range(len(ds.samples)):
        
        x,y = train_dataset[i][0].shape[1:]
    
        x_vals.append(x)
        y_vals.append(y)
        
        #images.append(train_dataset[i][0])

    print(f"X min : {np.min(x_vals)}\tX Max:{np.max(x_vals)}")    
    print(f"Y min : {np.min(y_vals)}\tY Max:{np.max(y_vals)}")

In [78]:
viz_image_sizes(train_dataset)


X min : 800	X Max:800
Y min : 800	Y Max:800


In [79]:
viz_image_sizes(val_dataset)
viz_image_sizes(test_dataset)

X min : 800	X Max:800
Y min : 800	Y Max:800
X min : 800	X Max:800
Y min : 800	Y Max:800


In [9]:
"""
As the maximum dimensions are 800,800 for train, validatio and test data

Resizing all images to 800x800 size.
"""

'\nAs the maximum dimensions are 800,800 for train, validatio and test data\n\nResizing all images to 800x800 size.\n'

In [15]:
## Train data

"""data_transforms = transforms.Compose(
    [
        transforms.Resize(size=(800,800)),
        transforms.Normalize(mean=[0,0,0],std=[1,1,1]),
        transforms.ToTensor(),
    ]
)"""

data_transforms = transforms.Compose(
    [
        transforms.Resize(size=(800,800)),
        transforms.ToTensor(),
    ]
)

train_path = base_data_dir+"train/"
train_dataset = torchvision.datasets.ImageFolder(root=train_path,transform=data_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=16,shuffle=True,num_workers=0)

## Validation data
val_path = base_data_dir+"validation/"
val_dataset = torchvision.datasets.ImageFolder(root=val_path,transform=data_transforms)
val_loader = torch.utils.data.DataLoader(val_dataset,batch_size=16,shuffle=True,num_workers=0)

## Test data
test_path = base_data_dir+"test/"
test_dataset = torchvision.datasets.ImageFolder(root=test_path,transform=data_transforms)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=16,shuffle=True,num_workers=0)

In [16]:
for i, data in enumerate(train_loader, 0):
    inputs, labels = data
    break

In [52]:
"""
Iterate over:

number_of_filters, filter_size, stride, padding, max_pool_dim in convolution_layer_specifications:

and given the input image size H and W, compute the output size after last convolution.

"""

convolution_layer_specifications = [

    ["conv",6,5,1,0],
    ["maxpool",2,2],
    ["conv",16,5,1,0],
    ["maxpool",2,2]
]


convolution_layer_specifications = [

    ["conv",96,11,4,0],
    ["maxpool",3,2],
    ["conv",256,5,1,0],
    ["maxpool",3,2],
    ["conv",384,3,1,0],
    ["conv",384,3,1,0],
    ["conv",256,3,1,0],
    ["maxpool",3,2]
]

W=227
H=227

W_cur = W
H_cur = H
depth_cur = 3

total_params = 0

print(f"Shape Initial shape : {W_cur}x{H_cur}x{depth_cur}")

for config in convolution_layer_specifications: ## assuming square filters

    #print(number_of_filters, filter_size, stride, padding, max_pool)
    type = config[0]

    if type == "conv":  ## its a convolutional layer

        
        _, number_of_filters, filter_size, stride, padding = config
        
        W_cur = (W_cur - filter_size + 2*padding)//stride + 1
        H_cur = (H_cur - filter_size + 2*padding)//stride + 1

        params = filter_size*filter_size*depth_cur*number_of_filters
        total_params += params
        
        print(f"Number of Parameters : {params}\n")
        depth_cur = number_of_filters
        
        print(f"Shape after conv : {W_cur}x{H_cur}x{depth_cur}")

    elif type == "maxpool":
        _,max_pool_filter_size,max_pool_stride = config

        W_cur = (W_cur-max_pool_filter_size)//max_pool_stride + 1
        H_cur = (H_cur-max_pool_filter_size)//max_pool_stride + 1
        print(f"Shape after MP : {W_cur}x{H_cur}x{depth_cur}")


print(f"\nFinal Shape : {W_cur}x{H_cur}x{depth_cur}")

print(f"Total Convolutional params : {total_params}")
    

Shape Initial shape : 227x227x3
Number of Parameters : 34848

Shape after conv : 55x55x96
Shape after MP : 27x27x96
Number of Parameters : 614400

Shape after conv : 23x23x256
Shape after MP : 11x11x256
Number of Parameters : 884736

Shape after conv : 9x9x384
Number of Parameters : 1327104

Shape after conv : 7x7x384
Number of Parameters : 884736

Shape after conv : 5x5x256
Shape after MP : 2x2x256

Final Shape : 2x2x256
Total Convolutional params : 3745824


In [80]:
class CNN(nn.Module):
    """
    A class (that inherits nn.Module), to create the CNN architecture as required and to define the forward pass.
    """

    def compute_output_dimension_post_convolutions(self,convolution_layer_specifications):
            
            """
            Method to compute the dimension of the output after all convolution,maxpooling layers.
            This helps to compute the input size of first fully connected layer.

            Params:
                convolution_layer_specifications :  A list of lists. There exists one list per conv or maxpool. 

                        The first element of the list is a string "conv" or "maxpooling". Indicating the layer type
                "conv" is followed by number of filters, filter sizes, stride, paddings. It is assumed that every convolutional layer is followed by an activation layer.
                "maxpool" is followed by filter size and stride.
            
            Returns:
                A tuple containing the dimension (Height,Width,depth) of the output after all convolutional operations.
            """

            W_cur = self.W
            H_cur = self.H
            depth_cur = self.input_channels
            
            for config in convolution_layer_specifications: ## assuming square filters
            
                type = config[0]
            
                if type == "conv":  ## its a convolutional layer
            
                    
                    _, number_of_filters, filter_size, stride, padding = config
                    
                    W_cur = (W_cur - filter_size + 2*padding)//stride + 1
                    H_cur = (H_cur - filter_size + 2*padding)//stride + 1
                    depth_cur = number_of_filters
                    
                elif type == "maxpool":
                    _,max_pool_filter_size,max_pool_stride = config
            
                    W_cur = (W_cur-max_pool_filter_size)//max_pool_stride + 1
                    H_cur = (H_cur-max_pool_filter_size)//max_pool_stride + 1
            

            return (H_cur,W_cur,depth_cur)

    def initalize_weights_biases(self,m):
        """
        
        Method to initialize weights given a torch module.

        Using "HE" (kaiming_normal) Initialization, as it goes well with ReLU in CNN.
        
        """
        if isinstance(m, nn.Linear):  ## it its a fully connected layer
            torch.nn.init.kaiming_normal_(m.weight)
            m.bias.data.fill_(0.01) ##a small non-zero value

        elif isinstance(m, nn.Conv2d): ## if its a Convolutional layer
            torch.nn.init.kaiming_normal_(m.weight)
            m.bias.data.fill_(0.01) ##a small non-zero value

        ## Maxpool and activation layers would not have any parameters, so no initialization for them
    
    def __init__(self,num_output_neurons, activation, convolution_layer_specifications, hidden_layer_specifications, output_activation, W=800, H=800, input_depth=3):

        """
        Default Constructor.

        Params:

            num_output_neurons: Number of neurons in the output layer.

            activation : A torch nn method to be used as activation function.
            
            convolution_layer_specifications: A list of lists. There exists one list per conv or maxpool. 

                        The first element of the list is a string "conv" or "maxpooling". Indicating the layer type
                "conv" is followed by number of filters, filter sizes, stride, paddings. It is assumed that every convolutional layer is followed by an activation layer.
                "maxpool" is followed by filter size and stride.
            
            hidden_layer_specifications: A list of ints. Number of elements correspond to number of hidden layers and each value gives the number of neurons in the corresponding hidden layer.
            
            output_activation: The torch nn activation method to be used for the output layer.

            H : Height of the input image. [should be fixed for the dataset] (default:800)

            W : Width of the input image.  [should be fixed for the dataset]  (default:800)

            input_depth : Depth of the input image (number of channels). [should be fixed for the dataset] (default:3)


        Returns:
        
            None.
        """

        super(CNN, self).__init__()

        self.H = H
        self.W = W
        self.input_channels = input_depth
        self.output_activation = output_activation
        
        self.convolutional_layers = nn.ModuleList() ## Create a module list to organize the convolutional layers
        
        
        ## iterate over the convolution_layer_specifications and create the convolutional layers accordingly

        cur_depth = self.input_channels
        for config in convolution_layer_specifications:

            ## Assuming filter is a square matrix, so filter_size is int.
            type = config[0]

            if type == "conv":
                _, number_of_filters, filter_size, stride, padding = config
                self.convolutional_layers.append(nn.Conv2d(cur_depth, number_of_filters, filter_size, stride, padding))
                self.convolutional_layers.append(activation)  # Add activation after each convolutional layer
            
            elif type == "maxpool":
                _,max_pool_filter_size,max_pool_stride = config
                self.convolutional_layers.append(nn.MaxPool2d(max_pool_filter_size,max_pool_stride))
            
            cur_depth = number_of_filters  # Update input depth for next layer

        self.convolutional_layers.apply(self.initalize_weights_biases)
        
        ## iterate over the hidden_layer_specifications and create CNN layers accordingly.
        self.hidden_layers = nn.ModuleList()

        conv_output_height,conv_output_height,conv_output_depth = self.compute_output_dimension_post_convolutions(convolution_layer_specifications)
        
        fan_in =  conv_output_height*conv_output_height*conv_output_depth ## interface betweeon maxpooling and dense layer
        
        for hidden_size in hidden_layer_specifications:
            self.hidden_layers.append(nn.Linear(fan_in, hidden_size))
            self.hidden_layers.append(nn.ReLU())  # Add ReLU activation after each dense layer
            fan_in = hidden_size  # Update number of input features for next layer

        self.hidden_layers.apply(self.initalize_weights_biases)

        self.output_layer = nn.Linear(hidden_layer_specifications[-1], num_output_neurons)

        self.output_layer.apply(self.initalize_weights_biases)


        def forward(self,x):

            ## pass through convolution, activation, pooling layer set
            for layer in self.convolutional_layers:
                x = layer(x)
            
            x = torch.flatten(x, 1)


            ## pass through hidden layers
            for layer in self.dense_layers:
                x = layer(x)

            ## compute and activate the output
            output = self.output_activation(self.output_layer(x))
            
            return output


In [82]:
num_classes =  len(train_dataset.classes)

convolution_layer_specifications = [

    ["conv",96,11,4,0],
    ["maxpool",3,2],
    ["conv",256,5,1,0],
    ["maxpool",3,2],
    ["conv",384,3,1,0],
    ["conv",384,3,1,0],
    ["conv",256,3,1,0],
    ["maxpool",3,2]
]

hidden_layer_specifications = [256]



model = CNN(num_output_neurons=num_classes, activation=nn.ReLU(), convolution_layer_specifications=convolution_layer_specifications, hidden_layer_specifications=hidden_layer_specifications, output_activation = nn.Softmax(), W=800, H=800, input_depth=3)

In [83]:
def print_network_architecture(model):
    print(model)
    print("Network Architecture:")
    for name, module in model.named_children():
        print("-" * 40)
        print(f"Layer Name: {name}")
        print(module)
        print("-" * 40)

In [84]:
print_network_architecture(model)

CNN(
  (output_activation): Softmax(dim=None)
  (convolutional_layers): ModuleList(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (hidden_layers): ModuleList(
    (0): Linear(in_features=92416, out_features=256, bias=True)
    (1): ReLU()
  )
  (output_layer): Linear(in_features=256, out_features=10, bias=True)
)
Network Architecture:
----------------------------------------
Layer Name: output_activ

In [None]:
    ["conv",6,5,1,0],
    ["maxpool",2,2],
    ["conv",16,5,1,0],
    ["maxpool",2,2]

### References:

1. https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
2. https://pytorch.org/tutorials/beginner/basics/data_tutorial.html#:~:text=PyTorch%20provides%20two%20data%20primitives,easy%20access%20to%20the%20samples.
3. https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel
4. https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
5. https://pytorch.org/vision/main/generated/torchvision.transforms.Compose.html
6. https://pytorch.org/vision/main/generated/torchvision.transforms.Resize.html
7. https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.Normalize

In [None]:
"""def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           img_size,
                           valid_size=0.1,
                           shuffle=True,
                           show_sample=False,
                           num_workers=4,
                           pin_memory=False):
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - augment: whether to apply the data augmentation scheme
      mentioned in the paper. Only applied on the train split.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=torchvision.transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor()])
    )

    valid_dataset = train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=valid_transform
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=9, shuffle=shuffle,
            num_workers=num_workers, pin_memory=pin_memory,
        )
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy().transpose([0, 2, 3, 1])
        plot_images(X, labels)

    return (train_loader, valid_loader)"""