<a href="https://colab.research.google.com/github/PranavPhanindra/Deep-Learning-PyTorch/blob/main/Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Questions

# Part-1

1. Consider a neural network with 4 layers of 500 neurons each. Initialize the weights of each of these
layers to samples from Gaussian with mean 0 and standard deviation 0.01. The i/p layer consists of
1000 features randomly sampled from the standard Gaussian. Do only the forward propagation once
of the input with tanh as the activation function in all the layers. At the end of forward propagation, plot
layer-wise (including input layer) histogram of activation values. What do you observe?
2. Repeat the above experiment for sigmoid activation. What do you observe?
3. Repeat the above experiment for ReLU activation. What do you observe?
4. Repeat the above experiment again for tanh, sigmoid and ReLU, but now with weights initialized to
samples from standard Gaussian. What do you observe?
5. Now repeat the above experiment with Xavier Weight Initialization on all the 3 activations. What do
you observe?
6. Now repeat the above experiment with He Weight Initialization on all the 3 activations. What do you
observe?


| Experiment | Weight Initialization | Activation | Observations |
|------------|-----------------------|------------|--------------|
| Experiment 1 | Gaussian (mean=0, std=0.01) | Tanh |  |
| Experiment 2 | Gaussian (mean=0, std=0.01) | Sigmoid |  |
| Experiment 3 | Gaussian (mean=0, std=0.01) | ReLU |  |
| Experiment 4 | Standard Gaussian | Tanh |  |
| Experiment 5 | Standard Gaussian | Sigmoid |  |
| Experiment 6 | Standard Gaussian | ReLU |  |
| Experiment 7 | Xavier Weight Initialization | Tanh |  |
| Experiment 8 | Xavier Weight Initialization | Sigmoid |  |
| Experiment 9 | Xavier Weight Initialization | ReLU |  |
| Experiment 10 | He Weight Initialization | Tanh |  |
| Experiment 11 | He Weight Initialization | Sigmoid |  |
| Experiment 12 | He Weight Initialization | ReLU |  |


## Imports

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import torch
import os

from torch import nn
from torchsummary import summary
from plotly.subplots import make_subplots

## Model Definition & Function Definition

In [None]:
class ExperimentNet(nn.Module):
    def __init__(self, activation='tanh', weight_init=None):
        super().__init__()

        input_size = 1000
        hidden_size = 500

        # Define dictionaries to map activation names to functions
        activation_functions = {
            'tanh': nn.Tanh,
            'relu': nn.ReLU,
            'sigmoid': nn.Sigmoid
        }

        # Define weight initialization functions with their parameters
        weight_init_functions = {
            'gaussian_0.01': {'mean': 0, 'std': 0.01},
            'gaussian_1': {'mean': 0, 'std': 1},
            'xavier_normal': {},
            'kaiming_normal': {'mode': 'fan_in', 'nonlinearity': 'relu'}
            # Add more weight initialization functions and their parameters as needed
        }

        # Initialize layers with the specified weight initialization method
        self.Layer1 = nn.Linear(input_size, hidden_size)
        if weight_init is not None:
            self.weight_init_used = weight_init['name']  # Store the name of the weight initialization method
            weight_init_func = weight_init['function']
            weight_init_params = weight_init['parameters']
            weight_init_func(self.Layer1.weight, **weight_init_params)

        self.Layer2 = nn.Linear(hidden_size, hidden_size)
        if weight_init is not None:
            weight_init_func = weight_init['function']
            weight_init_params = weight_init['parameters']
            weight_init_func(self.Layer2.weight, **weight_init_params)

        self.Layer3 = nn.Linear(hidden_size, hidden_size)
        if weight_init is not None:
            weight_init_func = weight_init['function']
            weight_init_params = weight_init['parameters']
            weight_init_func(self.Layer3.weight, **weight_init_params)

        self.Layer4 = nn.Linear(hidden_size, hidden_size)
        if weight_init is not None:
            weight_init_func = weight_init['function']
            weight_init_params = weight_init['parameters']
            weight_init_func(self.Layer4.weight, **weight_init_params)

        # Create the sequential stack with specified activation function
        self.linear_activation_stack = nn.Sequential(
            self.Layer1,
            activation_functions[activation](),
            self.Layer2,
            activation_functions[activation](),
            self.Layer3,
            activation_functions[activation](),
            self.Layer4,
            activation_functions[activation](),
        )

    def forward(self, x):
        logits = self.linear_activation_stack(x)
        return logits

def getActivation(name,activationDictionary):
  def hook(model, input, output):
      activationDictionary[name] = output.detach()
  return hook

def plotHistogram(activationDictionary, title, x_label, y_label, bins=None, title_position=(0.5, 0.9), template='plotly_dark'):
    fig = make_subplots(rows=1, cols=1)

    for key, values in activationDictionary.items():
        histogram = go.Histogram(x=values, opacity=0.5, name=key, nbinsx=bins)
        fig.add_trace(histogram)

    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        title_x=title_position[0],
        title_y=title_position[1],
        template=template
    )
    fig.show()

In [None]:
#Input Tensor
inputTensor = torch.randn(1000)

## Exp-1
Gaussian (mean=0, std=0.01)
Tanh

In [None]:
# Example usage:
activation = 'tanh'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'gaussian_0.01',  # Store the name of the weight initialization method
    'function': nn.init.normal_,
    'parameters': {'mean': 0, 'std': 0.01}  # Gaussian with mean=0, stddev=0.01
}

Experiment1 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment1,(1000,))

In [None]:
Experiment1Activations = {}
Experiment1Activations['inputTensor']=inputTensor
Experiment1.Layer1.register_forward_hook(getActivation('Layer1',Experiment1Activations))
Experiment1.Layer2.register_forward_hook(getActivation('Layer2',Experiment1Activations))
Experiment1.Layer3.register_forward_hook(getActivation('Layer3',Experiment1Activations))
Experiment1.Layer4.register_forward_hook(getActivation('Layer4',Experiment1Activations))
Experiment1Output = Experiment1(inputTensor)

plotHistogram(Experiment1Activations,
              "Exp1--Gaussian (mean=0, std=0.01)--Tanh",
              'Values',
              'Frequency')

## Exp-2
Gaussian (mean=0, std=0.01)	Sigmoid

In [None]:
# Example usage:
activation = 'sigmoid'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'gaussian_0.01',  # Store the name of the weight initialization method
    'function': nn.init.normal_,
    'parameters': {'mean': 0, 'std': 0.01}  # Gaussian with mean=0, stddev=0.01
}

Experiment2 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment2,(1000,))

In [None]:
Experiment2Activations = {}
Experiment2Activations['inputTensor'] = inputTensor
Experiment2.Layer1.register_forward_hook(getActivation('Layer1',Experiment2Activations))
Experiment2.Layer2.register_forward_hook(getActivation('Layer2',Experiment2Activations))
Experiment2.Layer3.register_forward_hook(getActivation('Layer3',Experiment2Activations))
Experiment2.Layer4.register_forward_hook(getActivation('Layer4',Experiment2Activations))
Experiment2Output = Experiment2(inputTensor)

plotHistogram(Experiment2Activations,
              "Exp2--Gaussian (mean=0, std=0.01)--Sigmoid",
              'Values',
              'Frequency')

## Exp-3
Gaussian (mean=0, std=0.01)	ReLU

In [None]:
# Example usage:
activation = 'relu'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'gaussian_0.01',  # Store the name of the weight initialization method
    'function': nn.init.normal_,
    'parameters': {'mean': 0, 'std': 0.01}  # Gaussian with mean=0, stddev=0.01
}

Experiment3 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment3,(1000,))

In [None]:
Experiment3Activations = {}
Experiment3Activations['inputTensor'] = inputTensor
Experiment3.Layer1.register_forward_hook(getActivation('Layer1',Experiment3Activations))
Experiment3.Layer2.register_forward_hook(getActivation('Layer2',Experiment3Activations))
Experiment3.Layer3.register_forward_hook(getActivation('Layer3',Experiment3Activations))
Experiment3.Layer4.register_forward_hook(getActivation('Layer4',Experiment3Activations))
Experiment3Output = Experiment3(inputTensor)

plotHistogram(Experiment3Activations,
              "Exp3--Gaussian (mean=0, std=0.01)--ReLU",
              'Values',
              'Frequency')

## Exp-4
Standard Gaussian	Tanh

In [None]:
# Example usage:
activation = 'tanh'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'gaussian_1',  # Store the name of the weight initialization method
    'function': nn.init.normal_,
    'parameters': {'mean': 0, 'std': 1}  # Gaussian with mean=0, stddev=1
}

Experiment4 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment4,(1000,))

In [None]:
Experiment4Activations = {}
Experiment4Activations['inputTensor'] = inputTensor
Experiment4.Layer1.register_forward_hook(getActivation('Layer1',Experiment4Activations))
Experiment4.Layer2.register_forward_hook(getActivation('Layer2',Experiment4Activations))
Experiment4.Layer3.register_forward_hook(getActivation('Layer3',Experiment4Activations))
Experiment4.Layer4.register_forward_hook(getActivation('Layer4',Experiment4Activations))
Experiment4Output = Experiment4(inputTensor)

plotHistogram(Experiment4Activations,
              "Exp4--Standard Gaussian (mean=0, std=1)--tanh",
              'Values',
              'Frequency')

## Exp-5
Standard Gaussian	Sigmoid

In [None]:
# Example usage:
activation = 'sigmoid'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'gaussian_1',  # Store the name of the weight initialization method
    'function': nn.init.normal_,
    'parameters': {'mean': 0, 'std': 1}  # Gaussian with mean=0, stddev=1
}

Experiment5 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment5,(1000,))

In [None]:
Experiment5Activations = {}
Experiment5Activations['inputTensor'] = inputTensor
Experiment5.Layer1.register_forward_hook(getActivation('Layer1',Experiment5Activations))
Experiment5.Layer2.register_forward_hook(getActivation('Layer2',Experiment5Activations))
Experiment5.Layer3.register_forward_hook(getActivation('Layer3',Experiment5Activations))
Experiment5.Layer4.register_forward_hook(getActivation('Layer4',Experiment5Activations))
Experiment5Output = Experiment5(inputTensor)

plotHistogram(Experiment5Activations,
              "Exp5--Standard Gaussian (mean=0, std=1)--sigmoid",
              'Values',
              'Frequency')

## Exp-6
Standard Gaussian	ReLU

In [None]:
# Example usage:
activation = 'relu'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'gaussian_1',  # Store the name of the weight initialization method
    'function': nn.init.normal_,
    'parameters': {'mean': 0, 'std': 1}  # Gaussian with mean=0, stddev=1
}

Experiment6 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment6,(1000,))

In [None]:
Experiment6Activations = {}
Experiment6Activations['inputTensor'] = inputTensor
Experiment6.Layer1.register_forward_hook(getActivation('Layer1',Experiment6Activations))
Experiment6.Layer2.register_forward_hook(getActivation('Layer2',Experiment6Activations))
Experiment6.Layer3.register_forward_hook(getActivation('Layer3',Experiment6Activations))
Experiment6.Layer4.register_forward_hook(getActivation('Layer4',Experiment6Activations))
Experiment6Output = Experiment6(inputTensor)

plotHistogram(Experiment6Activations,
              "Exp6--Standard Gaussian (mean=0, std=1)--ReLU",
              'Values',
              'Frequency')

## Exp-7
Xavier Tanh

In [None]:
# Example usage:
activation = 'tanh'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'xavier_normal',  # Store the name of the weight initialization method
    'function': nn.init.xavier_normal_,
    'parameters': {}
}

Experiment7 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment7,(1000,))

In [None]:
Experiment7Activations = {}
Experiment7Activations['inputTensor'] = inputTensor
Experiment7.Layer1.register_forward_hook(getActivation('Layer1',Experiment7Activations))
Experiment7.Layer2.register_forward_hook(getActivation('Layer2',Experiment7Activations))
Experiment7.Layer3.register_forward_hook(getActivation('Layer3',Experiment7Activations))
Experiment7.Layer4.register_forward_hook(getActivation('Layer4',Experiment7Activations))
Experiment7Output = Experiment7(inputTensor)

plotHistogram(Experiment7Activations,
              "Exp7--Xavier--tanh",
              'Values',
              'Frequency')

## Exp-8
Xavier	Sigmoid

In [None]:
# Example usage:
activation = 'sigmoid'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'xavier_normal',  # Store the name of the weight initialization method
    'function': nn.init.xavier_normal_,
    'parameters': {}
}

Experiment8 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment8,(1000,))

In [None]:
Experiment8Activations = {}
Experiment8Activations['inputTensor'] = inputTensor
Experiment8.Layer1.register_forward_hook(getActivation('Layer1',Experiment8Activations))
Experiment8.Layer2.register_forward_hook(getActivation('Layer2',Experiment8Activations))
Experiment8.Layer3.register_forward_hook(getActivation('Layer3',Experiment8Activations))
Experiment8.Layer4.register_forward_hook(getActivation('Layer4',Experiment8Activations))
Experiment8Output = Experiment8(inputTensor)

plotHistogram(Experiment8Activations,
              "Exp8--Xavier--sigmoid",
              'Values',
              'Frequency')

## Exp-9
Xavier ReLU

In [None]:
# Example usage:
activation = 'relu'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'xavier_normal',  # Store the name of the weight initialization method
    'function': nn.init.xavier_normal_,
    'parameters': {}
}

Experiment9 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment9,(1000,))

In [None]:
Experiment9Activations = {}
Experiment9Activations['inputTensor'] = inputTensor
Experiment9.Layer1.register_forward_hook(getActivation('Layer1',Experiment9Activations))
Experiment9.Layer2.register_forward_hook(getActivation('Layer2',Experiment9Activations))
Experiment9.Layer3.register_forward_hook(getActivation('Layer3',Experiment9Activations))
Experiment9.Layer4.register_forward_hook(getActivation('Layer4',Experiment9Activations))
Experiment9Output = Experiment9(inputTensor)

plotHistogram(Experiment9Activations,
              "Exp9--Xavier--ReLU",
              'Values',
              'Frequency')

## Exp-10
He Tanh

In [None]:
# Example usage:
activation = 'tanh'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'kaiming_normal',  # Store the name of the weight initialization method
    'function': nn.init.kaiming_normal_,
    'parameters': {'mode': 'fan_in','nonlinearity': 'relu'}
}

Experiment10 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment10,(1000,))

In [None]:
Experiment10Activations = {}
Experiment10Activations['inputTensor'] = inputTensor
Experiment10.Layer1.register_forward_hook(getActivation('Layer1',Experiment10Activations))
Experiment10.Layer2.register_forward_hook(getActivation('Layer2',Experiment10Activations))
Experiment10.Layer3.register_forward_hook(getActivation('Layer3',Experiment10Activations))
Experiment10.Layer4.register_forward_hook(getActivation('Layer4',Experiment10Activations))
Experiment10Output = Experiment10(inputTensor)

plotHistogram(Experiment10Activations,
              "Exp10--Kaiming--tanh",
              'Values',
              'Frequency')

## Exp-11
He Sigmoid

In [None]:
# Example usage:
activation = 'sigmoid'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'kaiming_normal',  # Store the name of the weight initialization method
    'function': nn.init.kaiming_normal_,
    'parameters': {'mode': 'fan_in','nonlinearity': 'relu'}
}

Experiment11 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment11,(1000,))

In [None]:
Experiment11Activations = {}
Experiment11Activations['inputTensor'] = inputTensor
Experiment11.Layer1.register_forward_hook(getActivation('Layer1',Experiment11Activations))
Experiment11.Layer2.register_forward_hook(getActivation('Layer2',Experiment11Activations))
Experiment11.Layer3.register_forward_hook(getActivation('Layer3',Experiment11Activations))
Experiment11.Layer4.register_forward_hook(getActivation('Layer4',Experiment11Activations))
Experiment11Output = Experiment11(inputTensor)

plotHistogram(Experiment11Activations,
              "Exp11--Kaiming--sigmoid",
              'Values',
              'Frequency')

## Exp-12
He ReLu

In [None]:
# Example usage:
activation = 'relu'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'kaiming_normal',  # Store the name of the weight initialization method
    'function': nn.init.kaiming_normal_,
    'parameters': {'mode': 'fan_in','nonlinearity': 'relu'}
}

Experiment12 = ExperimentNet(activation=activation, weight_init=weight_init)
#summary(Experiment12,(1000,))

In [None]:
Experiment12Activations = {}
Experiment12Activations['inputTensor'] = inputTensor
Experiment12.Layer1.register_forward_hook(getActivation('Layer1',Experiment12Activations))
Experiment12.Layer2.register_forward_hook(getActivation('Layer2',Experiment12Activations))
Experiment12.Layer3.register_forward_hook(getActivation('Layer3',Experiment12Activations))
Experiment12.Layer4.register_forward_hook(getActivation('Layer4',Experiment12Activations))
Experiment12Output = Experiment12(inputTensor)

plotHistogram(Experiment12Activations,
              "Exp12--Kaiming--relu",
              'Values',
              'Frequency')

# Part-2


7. In the first lab assignment, use ReLU activation (in the hidden layers) and He weight initialization
and train the model. Does the model train faster? Does it perform better on valid/test set?
8. In the first lab assignment, use ReLU activation (in the hidden layers) but allow the weights to be
automatically initialized by pytorch linear module. Add batch normalization layer after every
application of ReLU and train the model. Does the model train faster? Does it perform better on
valid/test set? Compare the results with results from experiment 7.
9. In the first lab assignment, use ReLU activation (in the hidden layers) but allow the weights to be
automatically initialized by pytorch linear module. Deploy drop-out in every hidden layer. Retain/drop
probabilities may be chosen by trial and error. Does the model perform better?
10. Instead of drop-out, use L2 regularization. Regularization param may be chosen by trial and error.
Does the model perform better?


| Experiment | Weight Initialization | Activation | Additional Layers |
|------------|-----------------------|------------|-------------------|
| Experiment 2.1 | He initialization | ReLU | None |
| Experiment 2.2 | PyTorch linear module (Automatic) | ReLU | Batch Normalization after every ReLU |
| Experiment 2.3 | PyTorch linear module (Automatic) | ReLU | Dropout in every hidden layer |
| Experiment 2.4 | PyTorch linear module (Automatic) | ReLU | L2 regularization |


## Imports

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import pandas as pd

## Data Loading & Transforms

In [None]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:00<00:00, 117436102.90it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 7568312.19it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 4422102/4422102 [00:00<00:00, 62712853.91it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 8219366.96it/s]


Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [None]:
#Labels are given as classes but in numerical so we are just mapping them to Human readable names to relate to classes to which the image belongs to
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}

## Model , Train & Test Loop

In [None]:
def plot_accuracy_comparison(dataframes, legends):
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Train Accuracy", "Test Accuracy"))

    for df, legend in zip(dataframes, legends):
        fig.add_trace(go.Scatter(x=df['Epoch'], y=df['TrainError'], mode='lines', name=legend), row=1, col=1)
        fig.add_trace(go.Scatter(x=df['Epoch'], y=df['TestError'], mode='lines', name=legend), row=1, col=2)

    # Update the layout to have separate legends for each subplot
    fig.update_layout(
        title='Accuracy Comparison',
        xaxis_title='Epoch',
        yaxis_title='Accuracy',
        template='plotly_dark'
    )

    # Add separate legend groups for each subplot
    fig.update_traces(showlegend=True, row=1, col=1)
    fig.update_traces(showlegend=True, row=1, col=2)

    fig.show()


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, activation='tanh', weight_init=None, batch_norm=False, dropout=None):
        super().__init__()
        self.flatten = nn.Flatten()

        # Define dictionaries to map activation names to functions
        activation_functions = {
            'tanh': nn.Tanh(),
            'relu': nn.ReLU(),
            'sigmoid': nn.Sigmoid()
        }

        # Define dictionaries for weight initialization and regularization methods
        weight_init_functions = {
            'gaussian_0.01': {'mean': 0, 'std': 0.01},
            'gaussian_1': {'mean': 0, 'std': 1},
            'xavier_normal': {'function': nn.init.xavier_normal_},
            'kaiming_normal': {'function': nn.init.kaiming_normal_, 'parameters': {'mode': 'fan_in', 'nonlinearity': 'relu'}}
            # Add more weight initialization functions and their parameters as needed
        }


        # Initialize layers separately and apply weight initialization
        self.Layer1 = nn.Linear(28 * 28, 64)
        if weight_init is not None:
            weight_init_func = weight_init_functions[weight_init['name']]['function']
            weight_init_params = weight_init.get('parameters', {})
            weight_init_func(self.Layer1.weight, **weight_init_params)

        # Include batch normalization layers if requested
        if batch_norm:
            self.batch_norm1 = nn.BatchNorm1d(64)

        # Include dropout layers if requested
        if dropout is not None:
            self.dropout1 = nn.Dropout(**dropout)

        self.Layer2 = nn.Linear(64, 128)
        if weight_init is not None:
            weight_init_func = weight_init_functions[weight_init['name']]['function']
            weight_init_params = weight_init.get('parameters', {})
            weight_init_func(self.Layer2.weight, **weight_init_params)

        if batch_norm:
            self.batch_norm2 = nn.BatchNorm1d(128)

        if dropout is not None:
            self.dropout2 = nn.Dropout(**dropout)

        self.Layer3 = nn.Linear(128, 10)
        if weight_init is not None:
            weight_init_func = weight_init_functions[weight_init['name']]['function']
            weight_init_params = weight_init.get('parameters', {})
            weight_init_func(self.Layer3.weight, **weight_init_params)


        # Use layers in the sequential module with provided activation function and regularizations
        self.linear_relu_stack = nn.Sequential(
            self.Layer1,
            self.batch_norm1 if batch_norm else nn.Identity(),  # Add batch normalization layer if requested
            self.dropout1 if dropout is not None else nn.Identity(),  # Add dropout layer if requested
            activation_functions[activation],
            self.Layer2,
            self.batch_norm2 if batch_norm else nn.Identity(),  # Add batch normalization layer if requested
            self.dropout2 if dropout is not None else nn.Identity(),  # Add dropout layer if requested
            activation_functions[activation],
            self.Layer3,
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
def train_loop(dataloader,model,lossFn,optimizer1) :

  size = len(dataloader.dataset)
  # Set the model to training mode
  model.train()

  for batch,(X,y) in enumerate(dataloader) :

    #Predictions and loss as we call forward and loss is calculated to be further used
    pred = model(X)
    loss = lossFn(pred,y)

    #Backpropagation
    #Calculation of Gradient
    loss.backward()
    #This would update the weights and biases
    optimizer1.step()
    #This would zero down the gradients so that they arent added up in next step
    optimizer1.zero_grad()

    if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test_loop(dataloader, model, loss_fn):
    #Set model in evaluation mode
    model.eval()

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    #Mean or average of loss
    test_loss /= num_batches
    correct /= size
    print(f"Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    return 100*correct

In [None]:
learningRate = 1e-1
batchSize  = 128
epochs = 20

In [None]:
train_dataloader = DataLoader(training_data, batch_size=batchSize,shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batchSize,shuffle=True)

## Exp 2_1
Compare He+ReLU with No initialisation and ReLU

In [None]:
# Without He Initialisation
activation = 'relu'

exp2_1_1 = NeuralNetwork(activation=activation,
                       weight_init=None,
                       batch_norm=False,
                       dropout=None
                       )

##summary(exp2_1_1,(28*28,))

In [None]:
# With He Initialisation
activation = 'relu'

# Define the weight initialization function with its parameters
weight_init = {
    'name': 'kaiming_normal',  # Store the name of the weight initialization method
    'parameters': {'mode': 'fan_in', 'nonlinearity': 'relu'}
}

exp2_1_2 = NeuralNetwork(activation=activation,
                       weight_init=weight_init,
                       batch_norm=False,
                       dropout=None
                       )

##summary(exp2_1_2,(28*28,))

In [None]:
#Combines LogSoftmax and NLLLoss - Negativce log likelihood
lossFn = nn.CrossEntropyLoss()
optimizer2_1_1 = torch.optim.SGD(exp2_1_1.parameters(),lr = learningRate)
optimizer2_1_2 = torch.optim.SGD(exp2_1_2.parameters(),lr = learningRate)

In [None]:
data2_1_1 = {'Epoch': [], 'TrainError': [], 'TestError': []}

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    #Training phase here where we check for bias of the model
    train_loop(train_dataloader, exp2_1_1, lossFn, optimizer2_1_1)

    print("Train Error : ",end = '\t')
    trainAccuracy = test_loop(train_dataloader, exp2_1_1, lossFn)

    #test_loop(test_dataloader, model, lossFn)
    print("Test Error : ",end = '\t')
    testAccuracy = test_loop(test_dataloader, exp2_1_1, lossFn)

    if (trainAccuracy > testAccuracy) and (trainAccuracy-testAccuracy) > 5 :
      print("Low bias , High Variance\n")
    else :
      print("Decent\n")

    # Append data to the dictionary
    data2_1_1['Epoch'].append(t+1)
    data2_1_1['TrainError'].append(trainAccuracy)
    data2_1_1['TestError'].append(testAccuracy)

print("Done!")

# Create a DataFrame from the dictionary
df2_1_1 = pd.DataFrame(data2_1_1)
df2_1_1

Epoch 1
-------------------------------
loss: 2.304456  [  128/60000]
loss: 0.903343  [12928/60000]
loss: 0.667891  [25728/60000]
loss: 0.684988  [38528/60000]
loss: 0.712634  [51328/60000]
Train Error : 	Accuracy: 80.6%, Avg loss: 0.549212 

Test Error : 	Accuracy: 79.8%, Avg loss: 0.571867 

Decent

Epoch 2
-------------------------------
loss: 0.432758  [  128/60000]
loss: 0.504087  [12928/60000]
loss: 0.581418  [25728/60000]
loss: 0.367854  [38528/60000]
loss: 0.628311  [51328/60000]
Train Error : 	Accuracy: 82.6%, Avg loss: 0.470935 

Test Error : 	Accuracy: 81.7%, Avg loss: 0.499380 

Decent

Epoch 3
-------------------------------
loss: 0.579694  [  128/60000]
loss: 0.462457  [12928/60000]
loss: 0.417098  [25728/60000]
loss: 0.453880  [38528/60000]
loss: 0.411846  [51328/60000]
Train Error : 	Accuracy: 84.9%, Avg loss: 0.414041 

Test Error : 	Accuracy: 83.5%, Avg loss: 0.453194 

Decent

Epoch 4
-------------------------------
loss: 0.372827  [  128/60000]
loss: 0.338078  [1292

Unnamed: 0,Epoch,TrainError,TestError
0,1,80.605,79.76
1,2,82.585,81.69
2,3,84.91,83.53
3,4,85.821667,84.17
4,5,83.345,81.71
5,6,86.608333,85.25
6,7,87.008333,85.27
7,8,88.195,86.3
8,9,86.146667,84.22
9,10,88.988333,87.11


In [None]:
data2_1_2 = {'Epoch': [], 'TrainError': [], 'TestError': []}

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    #Training phase here where we check for bias of the model
    train_loop(train_dataloader, exp2_1_2, lossFn, optimizer2_1_2)

    print("Train Error : ",end = '\t')
    trainAccuracy = test_loop(train_dataloader, exp2_1_2, lossFn)

    #test_loop(test_dataloader, model, lossFn)
    print("Test Error : ",end = '\t')
    testAccuracy = test_loop(test_dataloader, exp2_1_2, lossFn)

    if (trainAccuracy > testAccuracy) and (trainAccuracy-testAccuracy) > 5 :
      print("Low bias , High Variance\n")
    else :
      print("Decent\n")

    # Append data to the dictionary
    data2_1_2['Epoch'].append(t+1)
    data2_1_2['TrainError'].append(trainAccuracy)
    data2_1_2['TestError'].append(testAccuracy)

print("Done!")

# Create a DataFrame from the dictionary
df2_1_2 = pd.DataFrame(data2_1_2)
df2_1_2

Epoch 1
-------------------------------
loss: 2.335837  [  128/60000]
loss: 0.880261  [12928/60000]
loss: 0.613563  [25728/60000]
loss: 0.565421  [38528/60000]
loss: 0.476342  [51328/60000]
Train Error : 	Accuracy: 83.9%, Avg loss: 0.454755 

Test Error : 	Accuracy: 82.7%, Avg loss: 0.486901 

Decent

Epoch 2
-------------------------------
loss: 0.452625  [  128/60000]
loss: 0.424365  [12928/60000]
loss: 0.441369  [25728/60000]
loss: 0.412467  [38528/60000]
loss: 0.475901  [51328/60000]
Train Error : 	Accuracy: 85.2%, Avg loss: 0.412967 

Test Error : 	Accuracy: 83.6%, Avg loss: 0.453180 

Decent

Epoch 3
-------------------------------
loss: 0.336964  [  128/60000]
loss: 0.443564  [12928/60000]
loss: 0.431024  [25728/60000]
loss: 0.507004  [38528/60000]
loss: 0.438297  [51328/60000]
Train Error : 	Accuracy: 86.9%, Avg loss: 0.363562 

Test Error : 	Accuracy: 85.3%, Avg loss: 0.411550 

Decent

Epoch 4
-------------------------------
loss: 0.353991  [  128/60000]
loss: 0.480280  [1292

Unnamed: 0,Epoch,TrainError,TestError
0,1,83.886667,82.7
1,2,85.25,83.59
2,3,86.863333,85.26
3,4,86.818333,85.21
4,5,87.998333,86.16
5,6,88.156667,86.04
6,7,88.808333,86.9
7,8,89.066667,86.92
8,9,88.866667,86.25
9,10,89.813333,87.15


In [None]:
dfs2_1 = [df2_1_1,df2_1_2]
plot_accuracy_comparison(dfs2_1,["RandomInit","He+ReLU"])

## Exp 2_2
Random+ReLU+BarchNorm

In [None]:
class NeuralNetwork2(nn.Module):
  def __init__(self):
      super().__init__()
      self.flatten = nn.Flatten()
      self.Layer1 = nn.Linear(28 * 28, 64)
      self.Layer2 = nn.Linear(64, 128)
      self.Layer3 = nn.Linear(128, 10)

      # Use layers in the sequential module without calling them
      self.linear_relu_stack = nn.Sequential(
          self.Layer1,
          nn.BatchNorm1d(64),
          nn.ReLU(),
          #nn.Dropout1d(p = prob),
          self.Layer2,
          nn.BatchNorm1d(128),
          nn.ReLU(),
          #nn.Dropout1d(p = prob),
          self.Layer3
      )

  def forward(self, x):
      x = self.flatten(x)
      logits = self.linear_relu_stack(x)
      return logits

In [None]:
def train_loop(dataloader,model,lossFn,optimizer1) :

  size = len(dataloader.dataset)
  # Set the model to training mode
  model.train()

  for batch,(X,y) in enumerate(dataloader) :

    #Predictions and loss as we call forward and loss is calculated to be further used
    pred = model(X)
    loss = lossFn(pred,y)

    #Backpropagation
    #Calculation of Gradient
    loss.backward()
    #This would update the weights and biases
    optimizer1.step()
    #This would zero down the gradients so that they arent added up in next step
    optimizer1.zero_grad()

    if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test_loop(dataloader, model, loss_fn):
    #Set model in evaluation mode
    model.eval()

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    #Mean or average of loss
    test_loss /= num_batches
    correct /= size
    print(f"Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    return 100*correct

In [None]:
learningRate = 1e-1
batchSize  = 128
epochs = 20
train_dataloader = DataLoader(training_data, batch_size=batchSize,shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batchSize,shuffle=True)

In [None]:
exp2_2 = NeuralNetwork()

In [None]:
#Combines LogSoftmax and NLLLoss - Negativce log likelihood
lossFn = nn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD(exp2_2.parameters(),lr = learningRate)

In [None]:
data2_2 = {'Epoch': [], 'TrainError': [], 'TestError': []}

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    #Training phase here where we check for bias of the model
    train_loop(train_dataloader, exp2_2, lossFn, optimizer1)

    print("Train Error : ",end = '\t')
    trainAccuracy = test_loop(train_dataloader, exp2_2, lossFn)

    #test_loop(test_dataloader, model, lossFn)
    print("Test Error : ",end = '\t')
    testAccuracy = test_loop(test_dataloader, exp2_2, lossFn)

    if (trainAccuracy > testAccuracy) and (trainAccuracy-testAccuracy) > 5 :
      print("Low bias , High Variance\n")
    else :
      print("Decent\n")

    # Append data to the dictionary
    data2_2['Epoch'].append(t+1)
    data2_2['TrainError'].append(trainAccuracy)
    data2_2['TestError'].append(testAccuracy)

print("Done!")

# Create a DataFrame from the dictionary
df2_2 = pd.DataFrame(data2_2)
df2_2

Epoch 1
-------------------------------
loss: 0.291756  [  128/60000]
loss: 0.426501  [12928/60000]
loss: 0.313133  [25728/60000]
loss: 0.313550  [38528/60000]
loss: 0.303722  [51328/60000]
Train Error : 	Accuracy: 87.6%, Avg loss: 0.335936 

Test Error : 	Accuracy: 84.9%, Avg loss: 0.414450 

Decent

Epoch 2
-------------------------------
loss: 0.277063  [  128/60000]
loss: 0.342437  [12928/60000]
loss: 0.426689  [25728/60000]
loss: 0.250914  [38528/60000]
loss: 0.294195  [51328/60000]
Train Error : 	Accuracy: 88.6%, Avg loss: 0.310638 

Test Error : 	Accuracy: 85.9%, Avg loss: 0.391184 

Decent

Epoch 3
-------------------------------
loss: 0.396803  [  128/60000]
loss: 0.283741  [12928/60000]
loss: 0.311824  [25728/60000]
loss: 0.332489  [38528/60000]
loss: 0.361928  [51328/60000]
Train Error : 	Accuracy: 88.9%, Avg loss: 0.302364 

Test Error : 	Accuracy: 86.1%, Avg loss: 0.390103 

Decent

Epoch 4
-------------------------------
loss: 0.296696  [  128/60000]
loss: 0.244129  [1292

Unnamed: 0,Epoch,TrainError,TestError
0,1,87.588333,84.93
1,2,88.616667,85.88
2,3,88.88,86.06
3,4,87.92,84.99
4,5,88.921667,86.08
5,6,89.533333,86.9
6,7,89.045,85.87
7,8,87.411667,84.26
8,9,88.941667,86.04
9,10,88.645,85.55


In [None]:
dfs2_2 = [df2_1_1,df2_2]
plot_accuracy_comparison(dfs2_2,["RandomInit","Random+ReLU+BatchNorm"])

## Exp 2_3

Random+ReLU+DropOut(0.3)

It was already observed that Dropout results were good for probability 0.3 and siimilarly L2 regularisation was working well when weight decay was taken as 0.001 where batch size was 128,learning rate was 1e-1 when the model was trained for 20 epochs

In [None]:
# With He Initialisation
activation = 'relu'


exp2_3 = NeuralNetwork(activation=activation,
                       weight_init=None,
                       batch_norm=False,
                       dropout={'p': 0.2}
                       )


In [None]:
learningRate = 1e-1
batchSize  = 128
epochs = 20
train_dataloader = DataLoader(training_data, batch_size=batchSize,shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batchSize,shuffle=True)

In [None]:
#Combines LogSoftmax and NLLLoss - Negativce log likelihood
lossFn = nn.CrossEntropyLoss()

optimizer2_3 = torch.optim.SGD(exp2_3.parameters(),lr = learningRate)

In [None]:
data2_3 = {'Epoch': [], 'TrainError': [], 'TestError': []}

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    #Training phase here where we check for bias of the model
    train_loop(train_dataloader, exp2_3, lossFn, optimizer2_3)

    print("Train Error : ",end = '\t')
    trainAccuracy = test_loop(train_dataloader, exp2_3, lossFn)

    #test_loop(test_dataloader, model, lossFn)
    print("Test Error : ",end = '\t')
    testAccuracy = test_loop(test_dataloader, exp2_3, lossFn)

    if (trainAccuracy > testAccuracy) and (trainAccuracy-testAccuracy) > 5 :
      print("Low bias , High Variance\n")
    else :
      print("Decent\n")

    # Append data to the dictionary
    data2_3['Epoch'].append(t+1)
    data2_3['TrainError'].append(trainAccuracy)
    data2_3['TestError'].append(testAccuracy)

print("Done!")

# Create a DataFrame from the dictionary
df2_3 = pd.DataFrame(data2_3)
df2_3

Epoch 1
-------------------------------
loss: 2.311474  [  128/60000]
loss: 1.091189  [12928/60000]
loss: 0.923344  [25728/60000]
loss: 0.750902  [38528/60000]
loss: 0.631929  [51328/60000]
Train Error : 	Accuracy: 79.7%, Avg loss: 0.556932 

Test Error : 	Accuracy: 78.8%, Avg loss: 0.579291 

Decent

Epoch 2
-------------------------------
loss: 0.646215  [  128/60000]
loss: 0.572156  [12928/60000]
loss: 0.519485  [25728/60000]
loss: 0.424963  [38528/60000]
loss: 0.389021  [51328/60000]
Train Error : 	Accuracy: 83.1%, Avg loss: 0.466615 

Test Error : 	Accuracy: 81.8%, Avg loss: 0.492685 

Decent

Epoch 3
-------------------------------
loss: 0.551143  [  128/60000]
loss: 0.481865  [12928/60000]
loss: 0.607014  [25728/60000]
loss: 0.601497  [38528/60000]
loss: 0.495616  [51328/60000]
Train Error : 	Accuracy: 84.5%, Avg loss: 0.421111 

Test Error : 	Accuracy: 83.2%, Avg loss: 0.456619 

Decent

Epoch 4
-------------------------------
loss: 0.437070  [  128/60000]
loss: 0.593724  [1292

In [None]:
dfs2_3 = [df2_1_1,df2_3]
plot_accuracy_comparison(dfs2_3,["RandomInit","Random+ReLU+DropOut(0.3)"])

## Exp 2_4

In [None]:
# With He Initialisation
activation = 'relu'


exp2_4 = NeuralNetwork(activation=activation,
                       weight_init=None,
                       batch_norm=False,
                       dropout=None
                       )

In [None]:
#Combines LogSoftmax and NLLLoss - Negativce log likelihood
lossFn = nn.CrossEntropyLoss()
optimizer2_4 = torch.optim.SGD(model.parameters(),lr = learningRate,weight_decay = 0.001)

In [None]:
data2_4 = {'Epoch': [], 'TrainError': [], 'TestError': []}

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    #Training phase here where we check for bias of the model
    train_loop(train_dataloader, exp2_4, lossFn, optimizer2_4)

    print("Train Error : ",end = '\t')
    trainAccuracy = test_loop(train_dataloader, exp2_4, lossFn)

    #test_loop(test_dataloader, model, lossFn)
    print("Test Error : ",end = '\t')
    testAccuracy = test_loop(test_dataloader, exp2_4, lossFn)

    if (trainAccuracy > testAccuracy) and (trainAccuracy-testAccuracy) > 5 :
      print("Low bias , High Variance\n")
    else :
      print("Decent\n")

    # Append data to the dictionary
    data2_4['Epoch'].append(t+1)
    data2_4['TrainError'].append(trainAccuracy)
    data2_4['TestError'].append(testAccuracy)

print("Done!")

# Create a DataFrame from the dictionary
df2_4 = pd.DataFrame(data2_4)
df2_4

In [None]:
dfs2_3 = [df2_1_1,df2_3]
plot_accuracy_comparison(dfs2_3,["RandomInit","Random+ReLU+DropOut(0.3)"])