Imports needed 

In [28]:
import torch

from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import train_test_split


Loading Dataset

In [29]:
#Reading from given archive file both the training and the test sets

dataset = datasets.MNIST(root="/archive", download=False, train=True, transform=ToTensor())
data_loaded = DataLoader(dataset, batch_size=32)

#Dataloader was used to create batch sizes -->32

test_data = datasets.MNIST(root="/archive", download=False, train=False, transform=ToTensor())
test_dataloaded = DataLoader(test_data, batch_size=32)


PreProcessing Data

Splitting Data

In [43]:
#Specifying features and targets
x = dataset.data
y = dataset.targets
#Splitting into train set and validation set
x_train, x_valid, y_train, y_valid = train_test_split(x,y,test_size=0.2)

#Training dataset 
train_data = x_train.join(y_train)

Building Neural network Model Architecture (Class)


In [25]:
#class containing constructor and forward_prop function 
#Constructor --> defining input/hidden/output layers and the realtion between them
#Forward_prop --> defining the activation function for forward pass (relu)

#using CNN because there are some layers that don't need fully connected layers -->locally connected only 
#using CNN for processing image so it won't take a long processing time like regular neural networks
#using pooling decreases size of the convulotion output

#Image --> Convolution --> Pooling --> Flatten --> Fully connected

class neural_net(nn.Module):
    def __init__(self):
        super(neural_net,self).__init__()
        #defining architecture

        # 1) Convolution Layer
        self.conv1 = nn.Conv2d(1,10,kernel_size=5)
        self.conv2 = nn.Conv2d(10,20,kernel_size=5)

        #bonus -->drop out layer (ignores random neural so not all of it is used in training)
        #Doesn't change the shape of the data
        self.dropout2 = nn.Dropout2d()

        #Fully Connected layers
        #320-->150-->80-->30-->10
        # In layer --> 320 neurons
        # hidden1 --> 150 neurons
        # hidden2 --> 80 neurons
        # hidden3 --> 30 neurons
        # out layer --> 10 neurons (representing 0-->9)
        self.fc1 = nn.Linear(320,150)
        self.fc2 = nn.Linear(150,80)
        self.fc3 = nn.Linear(80,30)
        self.out = nn.Linear(30,10)
        

    #forward propagation function    
    def forward_prop(self,x):
        # 2) Pooling Layer
        #using ReLu Activation function
        x = F.relu(F.max_pool2d(self.conv1(x),2))
        x = F.relu(F.max_pool2d(self.dropout2(self.conv2(x)), 2))

        # 3) FLatten the data --> for the linear layers
        x.view(-1,320) # -1 --> so we can change batch sizes

        # 4) Fully connected layers
        x = F.relu(self.fc1(x))
        #another dropout layer --> for regularization
        x = F.dropout(x, training=self.training)
        
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.out(x)

        return F.softmax(x) #--> returns probability of each digit
    

#create model instance
model = neural_net()

neural_net(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (dropout2): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=320, out_features=150, bias=True)
  (fc2): Linear(in_features=150, out_features=80, bias=True)
  (fc3): Linear(in_features=80, out_features=30, bias=True)
  (out): Linear(in_features=30, out_features=10, bias=True)
)

Set Optimizer,Learning Rate,Loss Function definition

In [None]:
#Learning rate --> 0.01 (the smaller lr is the longer training time)
#Op --> SGD
optimizer = optim.sgd(model.parameters(), lr = 0.01, momentum = 0.5)

#Loss --> Cross Entropy
loss = nn.CrossEntropyLoss()

Training the Model (Loop)

In [None]:
#Array to track training
train_loss = []
train_correct = []

validation_loss =[]
validtaion_correct =[]


#Epochs Number (no. of runs on training set)

#Training Function

#Update the paramters
#If the loss doesn't decrease in output --> smaller learning rate


Plotting Training and validation loss and accuracy

Calling the training and validation Function

In [None]:
#Change learning rates, batch sizes


Testing 

In [None]:
#Test And count how many was correct

Saving Model