In [1]:
%matplotlib inline
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision import transforms, datasets

In [2]:

BatchSize = 8  #BATCHSIZE MAY BE INCREASED FOR FASTER TRAINING

trainset = datasets.MNIST(root='./MNIST', train=True, download=True, transform=transforms.Compose([
                       transforms.Resize((8, 8)),
                       transforms.ToTensor()]))
trainLoader = torch.utils.data.DataLoader(trainset, batch_size=BatchSize,
                                          shuffle=True, num_workers=4) # Creating dataloader

# Validation set with random rotations in the range [-90,90]
testset = datasets.MNIST(root='./MNIST', train=False, download=True, transform=transforms.Compose([
                       transforms.Resize((8, 8)),
                       transforms.ToTensor()]))
testLoader = torch.utils.data.DataLoader(testset, batch_size=BatchSize,
                                         shuffle=False, num_workers=4) # Creating dataloader

In [3]:

# Size of train and test datasets
print('No. of samples in train set: '+str(len(trainLoader.dataset)))
print('No. of samples in test set: '+str(len(testLoader.dataset)))

No. of samples in train set: 60000
No. of samples in test set: 10000


In [4]:

class DNN(nn.Module):  
    def __init__(self):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(64,50)
        self.fc2 = nn.Linear(50,50)
        self.fc3 = nn.Linear(50,25)
        self.fc4 = nn.Linear(25,25)
        self.fc5 = nn.Linear(25,10)
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        x = F.relu(x)
        x = self.fc5(x)
        x = F.log_softmax(x,dim=1)
        return x
    
net=DNN()
print(net)

DNN(
  (fc1): Linear(in_features=64, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=25, bias=True)
  (fc4): Linear(in_features=25, out_features=25, bias=True)
  (fc5): Linear(in_features=25, out_features=10, bias=True)
)


#parameters = 7985


In [5]:
from torch import optim
learning_rate=0.020
num_epochs=80
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate) #STOCHASTIC GRADIENT DESCENT

In [6]:
train_loss = []
train_acc = []
for epoch in range(num_epochs):
                running_loss = 0.0 
                running_corr = 0
          
                for i,data in enumerate(trainLoader):
                    inputs,labels = data
                  # Initializing model gradients to zero
                    net.zero_grad()
                    optimizer.zero_grad() 
                    # Data feed-forward through the network
                    outputs = net(inputs.view(inputs.shape[0],-1))
                    # Predicted class is the one with maximum probability
                    preds = torch.argmax(outputs,dim=1)
                    # Finding the loss
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    # Accumulating the loss for each batch
                    running_loss += loss 
                    # Accumulate number of correct predictions
                    running_corr += torch.sum(preds==labels)    
                    
                totalLoss = running_loss/(i+1)
                # Calculating gradients
                

                # Updating the model parameters
                for f in net.parameters():
                    f.data.sub_(f.grad.data * learning_rate)
                    
                epoch_loss = running_loss.item()/(i+1)   #Total loss for one epoch
                epoch_acc = running_corr.item()/60000
                
                
                    
                train_loss.append(epoch_loss) #Saving the loss over epochs for plotting the graph
                train_acc.append(epoch_acc) #Saving the accuracy over epochs for plotting the graph
                  
                    
                print('Epoch {:.0f}/{:.0f} : Training loss: {:.4f} | Training Accuracy: {:.4f}'.format(epoch+1,num_epochs,epoch_loss,epoch_acc*100))




Epoch 1/80 : Training loss: 1.4750 | Training Accuracy: 45.2900
Epoch 2/80 : Training loss: 0.4064 | Training Accuracy: 87.3450
Epoch 3/80 : Training loss: 0.2747 | Training Accuracy: 91.6167
Epoch 4/80 : Training loss: 0.2278 | Training Accuracy: 93.0133
Epoch 5/80 : Training loss: 0.1990 | Training Accuracy: 93.9350
Epoch 6/80 : Training loss: 0.1762 | Training Accuracy: 94.5517
Epoch 7/80 : Training loss: 0.1602 | Training Accuracy: 95.0733
Epoch 8/80 : Training loss: 0.1486 | Training Accuracy: 95.3983
Epoch 9/80 : Training loss: 0.1400 | Training Accuracy: 95.5933
Epoch 10/80 : Training loss: 0.1306 | Training Accuracy: 95.9133
Epoch 11/80 : Training loss: 0.1240 | Training Accuracy: 96.1000
Epoch 12/80 : Training loss: 0.1189 | Training Accuracy: 96.2500
Epoch 13/80 : Training loss: 0.1139 | Training Accuracy: 96.4400
Epoch 14/80 : Training loss: 0.1082 | Training Accuracy: 96.5983
Epoch 15/80 : Training loss: 0.1039 | Training Accuracy: 96.6883
Epoch 16/80 : Training loss: 0.101

In [7]:
correct_pred = 0
for data in testLoader:
    inputs,labels = data
    # Feedforward train data batch through model
    output = net(inputs.view(inputs.shape[0],-1)) 
    # Predicted class is the one with maximum probability
    preds = torch.argmax(output,dim=1)
    correct_pred += torch.sum(preds==labels)

test_accuracy = correct_pred.item()/10000.0
print('Testing accuracy = ',test_accuracy*100)

Testing accuracy =  97.39


was able to produce only this much accuracy due to limitation of parametres.