Imports needed 

In [116]:
import torch
import numpy as np
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import SGD

from sklearn.model_selection import train_test_split


Loading Dataset

In [117]:
#Reading from given archive file both the training and the test sets

dataset = datasets.MNIST(root="/archive", download=False, train=True, transform=ToTensor())

test_data = datasets.MNIST(root="/archive", download=False, train=False, transform=ToTensor())
test_loader = DataLoader(test_data, batch_size=32)


Splitting Data

In [118]:
#Specifying features and targets
x = dataset.data
y = dataset.targets
#Splitting into train set and validation set
x_train, x_valid, y_train, y_valid = train_test_split(x,y,test_size=0.2)


train_data = TensorDataset(x_train,y_train)

valid_data = TensorDataset(x_valid,y_valid)

#Dataloader was used to create batch sizes -->32


PreProcessing Data

In [119]:
x_train = x_train.reshape(-1,400).float()/255.0
x_valid = x_valid.reshape(-1,400).float()/255.0


Building Neural network Model Architecture (Class)


In [120]:
#class containing constructor and forward_prop function 
#Constructor --> defining input/hidden/output layers and the realtion between them
#Forward_prop --> defining the activation function for forward pass (relu)

class neural_net(nn.Module):
    def __init__(self):
        super().__init__()
        #defining architecture

        #Fully Connected layers
        #400-->280-->150-->80-->30-->10

        # In layer --> 400 neurons
        # hidden1 --> 280 neurons
        # hidden2 --> 150 neurons
        # hidden3 --> 80 neurons
        # hidden4 --> 30 neurons
        # out layer --> 10 neurons (representing 0-->9)
        self.fc1 = nn.Linear(400,280)
        self.fc2 = nn.Linear(280,150)
        self.fc3 = nn.Linear(150,80)
        self.fc4 = nn.Linear(80,30)
        self.out = nn.Linear(30,10)
        

    #forward propagation function    
    def forward(self,x):
        #using ReLu Activation function
        x = F.relu(self.fc1(x))
        #Bonus: dropout layer --> for regularization
        x = F.dropout(x, training=self.training)
        
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.out(x)

        return F.softmax(x) #--> returns probability of each digit
    

#create model instance
model = neural_net()

Set Optimizer,Learning Rate,Loss Function definition

In [121]:
#Learning rate --> 0.01 (the smaller lr is the longer training time)
#Op --> SGD
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)

#Loss --> Cross Entropy
loss = nn.CrossEntropyLoss()

Loaders

In [122]:
#Array to track training
train_loss_array = []
train_correct_array = []

validation_loss_array =[]
validtaion_correct_array =[]


#Epochs Number (no. of runs on training set)
def loading(b):
    global train_loader
    train_loader= DataLoader(list(zip(x_train,y_train)),shuffle=True, batch_size=b)
    global valid_loader 
    valid_loader= DataLoader(list(zip(x_valid,y_valid)),shuffle=False, batch_size=b)

 
#If the loss doesn't decrease in output --> smaller learning rate


Training Function

In [123]:
def training(epoch,b):
    model.train()
    for i,(x_batch ,y_batch) in enumerate(train_loader,0):
        optimizer.zero_grad()
        y_pred = model(x_batch)
        trainingloss = loss(y_pred,y_batch)
        train_loss_array.append(trainingloss.item())
        
        trainingloss.backward()
        optimizer.step()
    print('Epoch: {}: Batch: {}, Training Loss: {}'.format(epoch + 1, b, np.mean(train_loss_array)))
   

Validation function

In [124]:
def validating(b):
    model.eval()
    for i,(x_batch, y_batch) in enumerate(valid_loader,0):

        y_pred = model(x_batch)
        validloss = loss(y_pred,y_batch)
        validation_loss_array.append(validloss.item())

        validloss.backward()
    print('          Batch: {}, Validation Loss: {}'.format(b, np.mean(validation_loss_array)))
    print('-------------------------------------------')


Training the model and evaluating 

In [125]:
batch_sizes = [32, 64, 128, 256, 512]
for batch_size in batch_sizes:
    loading(batch_size)
    for epoch in range(3):
        training(epoch,batch_size)
    validating(batch_size)

  return F.softmax(x) #--> returns probability of each digit


Epoch: 1: Batch: 32, Training Loss: 2.3023904250462848
Epoch: 2: Batch: 32, Training Loss: 2.3023361012935637
Epoch: 3: Batch: 32, Training Loss: 2.3022822767363653
          Batch: 32, Validation Loss: 2.302283129374186
-------------------------------------------
Epoch: 1: Batch: 64, Training Loss: 2.302256688390459
Epoch: 2: Batch: 64, Training Loss: 2.3022319426139197
Epoch: 3: Batch: 64, Training Loss: 2.302207581449438
          Batch: 64, Validation Loss: 2.302248960701653
-------------------------------------------
Epoch: 1: Batch: 128, Training Loss: 2.302195402580395
Epoch: 2: Batch: 128, Training Loss: 2.3021834071795144
Epoch: 3: Batch: 128, Training Loss: 2.3021716345529706
          Batch: 128, Validation Loss: 2.3022334829675914
-------------------------------------------
Epoch: 1: Batch: 256, Training Loss: 2.3021658556818827
Epoch: 2: Batch: 256, Training Loss: 2.3021601100586904
Epoch: 3: Batch: 256, Training Loss: 2.3021544271883405
          Batch: 256, Validation Lo

Plotting Training and validation loss and accuracy

Testing 

In [126]:
#Test And count how many was correct

Saving Model