In [24]:
# Import the libraries we need to use in this lab

# Using the following line code to install the torchvision library
# !mamba install -y torchvision

import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pylab as plt
import numpy as np

torch.manual_seed(0)

<torch._C.Generator at 0x229c71dc290>

In [25]:
# Create Xavier initialization neural network 

class Net_Xavier(nn.Module):
    # Constructor
    def __init__(self, Layers):
        super(Net_Xavier, self).__init__()
        self.hidden = nn.ModuleList()

        for input_size, output_size in zip(Layers, Layers[1:]):
            linear = nn.Linear(input_size, output_size)
            torch.nn.init.xavier_uniform_(linear.weight)
            self.hidden.append(linear)

    # Predictor 
    def forward(self, x):
        L = len(self.hidden)
        for (l, linear_transform) in zip(range(L), self.hidden):
            if l < L -1:
                x = torch.tanh(linear_transform(z))
            else:
                x = linear_transform(x)
        return x

In [26]:
# Define neural network with uniform initialization 

class Net_Uniform(nn.Module):
    # Constructor 
    def __init__(self, Layers):
        super(Net_Uniform, self).__init__()
        self.hidden = nn.ModuleList()

        for input_size, output_size in zip(Layers, Layers[:]):
            linear = nn.Linear(input_size, output_size)
            linear.weight.data.uniform(0,1)
            self.hidden.append(linear)

    # Predictor 
    def forward(self, x):
        L = len(self.hidden)
        for (l, linear_transform) in zip(range(L), self.hidden):
            if l < L -1 :
                x = torch.tanh(linear_transform(x))
            else:
                x = linear_transform(x)
        return x 

In [27]:
# Define neural network with default initialization 

class Net(nn.Module):
    # Constructor 
    def __init__(self, Layers):
        super(Net, self).__init__()
        self.hidden = nn.ModuleList()

        for input_size, output_size in zip(Layers, Layers[:]):
            linear = nn.Linear(input_size, output_size)
            linear.weight.uniform_(0,1)
            self.hidden.append(linear)

    # Predictor 
    def forward(self, x):
        L = len(self.hidden)
        for (l, linear_transform) in zip(range(L), self.hidden):
            if l < L -1 :
                x = torch.tanh(linear_transform(x))
            else:
                x = linear_transform(x)
        return x 

In [28]:
# Define the train model 

def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    loss_accuracy = {'training_loss':[], 'validation_accuracy':[]}

    for epoch in range(epochs):
        for i,(x,y) in enumerate(train_loader):
            optimizer.zero_grad()
            z = model(x.view(-1,28*28))
            loss = criterion(z,y)
            loss.backward()
            optimizer.step()
            loss_accuracy['training_loss'].append(loss.data.item())

        correct = 0
        for x,y in validation_loader:
            yhat = model(x.view(-1,28*28))
            _,label = torch.max(yhat, 1)
            correct += (label ==y).sum().item()
        accuracy = 100 * (correct/len(validation_dataset))
        loss_accuracy['validation_accuracy'].append(accuracy)

    return loss_accuracy

In [29]:
# Create the train data 
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())

In [30]:
# Create the validation data 
validation_dataset = dsets.MNIST(root='./data', download=True, transform=transforms.ToTensor())

In [31]:
# CReate loader for train data 
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)

# CReate loader foralidation data 
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000, shuffle=True)

In [32]:
# Define parameter, criterion and oprimzier 
criterion = nn.CrossEntropyLoss()

# Set the parameter 
input_dim = 28*28
output_dim = 10
layers = [input_dim, 100,10,100,10,100, output_dim]
epochs = 15

In [33]:
# Test the initialization 

model = Net(layers)
learning_rate = 0.1 
optimizer= torch.optim.SGD(model.parameters(), lr=learning_rate)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=epoch)

RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.

In [None]:
# Train the model with Uniform initialization

model_Uniform = Net_Uniform(layers)
optimizer = torch.optim.SGD(model_Uniform.parameters(), lr=learning_rate)
training_results_Uniform = train(model_Uniform, criterion, train_loader, validation_loader, optimizer, epochs=epochs)

In [None]:
# Plot the loss

plt.plot(training_results_Xavier['training_loss'], label='Xavier')
plt.plot(training_results['training_loss'], label='Default')
plt.plot(training_results_Uniform['training_loss'], label='Uniform')
plt.ylabel('loss')
plt.xlabel('iteration ')  
plt.title('training loss iterations')
plt.legend()



In [None]:
# Plot the accuracy

plt.plot(training_results_Xavier['validation_accuracy'], label='Xavier')
plt.plot(training_results['validation_accuracy'], label='Default')
plt.plot(training_results_Uniform['validation_accuracy'], label='Uniform') 
plt.ylabel('validation accuracy')
plt.xlabel('epochs')   
plt.legend()