In [1]:
import os
from pathlib import Path
import torch
from torch.utils.data import TensorDataset ,DataLoader
from torch import nn,optim
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform=transforms.Compose([transforms.ToTensor(),
                             transforms.Normalize([0.5],[0.5])])
trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform)
testset = datasets.MNIST('MNIST_data/', download=False, train=True, transform=transform)

In [3]:
batchsize=64
train_loader=torch.utils.data.DataLoader(trainset,batch_size=batchsize,shuffle=True)
test_loader=torch.utils.data.DataLoader(testset,batch_size=batchsize,shuffle=True)

### Model A: 1 Hidden Layer Feedforward Neural Network (Sigmoid Activation)

In [40]:
class Network(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.sigmoid(self.fc1(x)))
       

        # output so no dropout here
        x = F.log_softmax(self.fc2(x), dim=1)

        return x
        


In [41]:
input_dim = 784
hidden_dim = 64
output_dim = 10
learning_rate = 0.1
model=Network(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [42]:
print(model.parameters())
print(len(list(model.parameters())))
# FC 1 Parameters 
print(list(model.parameters())[0].size())
# FC 1 Bias Parameters
print(list(model.parameters())[1].size())
# FC 2 Parameters
print(list(model.parameters())[2].size())
# FC 2 Bias Parameters
print(list(model.parameters())[3].size())
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

<generator object Module.parameters at 0x000000000720D5E8>
4
torch.Size([64, 784])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])
No.of parameters 50890


In [12]:
from torch.autograd import Variable
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        labels = Variable(labels)
        optimizer.zero_grad()
        log_ps = model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
                labels = Variable(labels)
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.718..  Test Loss: 0.383..  Test Accuracy: 0.893
Epoch: 2/10..  Training Loss: 0.364..  Test Loss: 0.293..  Test Accuracy: 0.915
Epoch: 3/10..  Training Loss: 0.309..  Test Loss: 0.256..  Test Accuracy: 0.923
Epoch: 4/10..  Training Loss: 0.275..  Test Loss: 0.230..  Test Accuracy: 0.932
Epoch: 5/10..  Training Loss: 0.252..  Test Loss: 0.204..  Test Accuracy: 0.943
Epoch: 6/10..  Training Loss: 0.233..  Test Loss: 0.185..  Test Accuracy: 0.946
Epoch: 7/10..  Training Loss: 0.217..  Test Loss: 0.169..  Test Accuracy: 0.951
Epoch: 8/10..  Training Loss: 0.205..  Test Loss: 0.159..  Test Accuracy: 0.953
Epoch: 9/10..  Training Loss: 0.196..  Test Loss: 0.152..  Test Accuracy: 0.955
Epoch: 10/10..  Training Loss: 0.186..  Test Loss: 0.140..  Test Accuracy: 0.960


###  Model B:  1 Hidden Layer Feedforward Neural Network (Tanh Activation)

In [13]:
class NetworkT1(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.tanh(self.fc1(x)))
       

        # output so no dropout here
        x = F.log_softmax(self.fc2(x), dim=1)

        return x

In [14]:
input_dim = 784
hidden_dim = 64
output_dim = 10
learning_rate = 0.1
model=NetworkT1(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [15]:
print(model.parameters())
print(len(list(model.parameters())))
# FC 1 Parameters 
print(list(model.parameters())[0].size())
# FC 1 Bias Parameters
print(list(model.parameters())[1].size())
# FC 2 Parameters
print(list(model.parameters())[2].size())
# FC 2 Bias Parameters
print(list(model.parameters())[3].size())

<generator object Module.parameters at 0x0000000007191048>
4
torch.Size([64, 784])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [16]:
from torch.autograd import Variable
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        labels = Variable(labels)
        optimizer.zero_grad()
        log_ps = model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
                labels = Variable(labels)
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))



Epoch: 1/10..  Training Loss: 0.434..  Test Loss: 0.278..  Test Accuracy: 0.914
Epoch: 2/10..  Training Loss: 0.260..  Test Loss: 0.194..  Test Accuracy: 0.943
Epoch: 3/10..  Training Loss: 0.220..  Test Loss: 0.207..  Test Accuracy: 0.936
Epoch: 4/10..  Training Loss: 0.197..  Test Loss: 0.159..  Test Accuracy: 0.952
Epoch: 5/10..  Training Loss: 0.183..  Test Loss: 0.142..  Test Accuracy: 0.958
Epoch: 6/10..  Training Loss: 0.171..  Test Loss: 0.120..  Test Accuracy: 0.965
Epoch: 7/10..  Training Loss: 0.160..  Test Loss: 0.121..  Test Accuracy: 0.963
Epoch: 8/10..  Training Loss: 0.153..  Test Loss: 0.107..  Test Accuracy: 0.968
Epoch: 9/10..  Training Loss: 0.149..  Test Loss: 0.106..  Test Accuracy: 0.968
Epoch: 10/10..  Training Loss: 0.143..  Test Loss: 0.095..  Test Accuracy: 0.971


### ModelC: 1 Hidden Layer Feedforward Neural Network (ReLU Activation)

In [37]:
class NetworkR1(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
       

        # output so no dropout here
        x = F.log_softmax(self.fc2(x), dim=1)

        return x

In [21]:
input_dim = 784
hidden_dim = 64
output_dim = 10
learning_rate = 0.1
model=NetworkR1(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [22]:
from torch.autograd import Variable
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        labels = Variable(labels)
        optimizer.zero_grad()
        log_ps = model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
                labels = Variable(labels)
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.491..  Test Loss: 0.379..  Test Accuracy: 0.871
Epoch: 2/10..  Training Loss: 0.275..  Test Loss: 0.248..  Test Accuracy: 0.917
Epoch: 3/10..  Training Loss: 0.231..  Test Loss: 0.149..  Test Accuracy: 0.955
Epoch: 4/10..  Training Loss: 0.204..  Test Loss: 0.133..  Test Accuracy: 0.959
Epoch: 5/10..  Training Loss: 0.191..  Test Loss: 0.157..  Test Accuracy: 0.950
Epoch: 6/10..  Training Loss: 0.178..  Test Loss: 0.109..  Test Accuracy: 0.967
Epoch: 7/10..  Training Loss: 0.168..  Test Loss: 0.100..  Test Accuracy: 0.970
Epoch: 8/10..  Training Loss: 0.161..  Test Loss: 0.100..  Test Accuracy: 0.970
Epoch: 9/10..  Training Loss: 0.157..  Test Loss: 0.102..  Test Accuracy: 0.969
Epoch: 10/10..  Training Loss: 0.151..  Test Loss: 0.099..  Test Accuracy: 0.969


### Model D: 2 Hidden Layer Feedforward Neural Network (ReLU Activation)

In [52]:
class NetworkR2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        

        # output so no dropout here
        x = F.log_softmax(self.fc3(x), dim=1)

        return x
        


In [53]:

learning_rate = 0.1
model=NetworkR2()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [54]:
print(model.parameters())
print(len(list(model.parameters())))
# FC 1 Parameters 
print(list(model.parameters())[0].size())
# FC 1 Bias Parameters
print(list(model.parameters())[1].size())
# FC 2 Parameters
print(list(model.parameters())[2].size())
# FC 2 Bias Parameters
print(list(model.parameters())[3].size())

<generator object Module.parameters at 0x000000000721A930>
6
torch.Size([128, 784])
torch.Size([128])
torch.Size([64, 128])
torch.Size([64])


In [None]:
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        #images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        #labels = Variable(labels)
        optimizer.zero_grad()
        
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 2.298..  Test Loss: 2.306..  Test Accuracy: 0.104
Epoch: 2/10..  Training Loss: 2.296..  Test Loss: 2.304..  Test Accuracy: 0.099
Epoch: 3/10..  Training Loss: 2.300..  Test Loss: 2.304..  Test Accuracy: 0.112
Epoch: 4/10..  Training Loss: 2.296..  Test Loss: 2.306..  Test Accuracy: 0.112
Epoch: 5/10..  Training Loss: 2.289..  Test Loss: 2.308..  Test Accuracy: 0.098
Epoch: 6/10..  Training Loss: 2.293..  Test Loss: 2.303..  Test Accuracy: 0.112
Epoch: 7/10..  Training Loss: 2.291..  Test Loss: 2.304.. 

In [60]:
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

No.of parameters 109386


### Model E: 3 Hidden Layer Feedforward Neural Network (ReLU Activation)

In [None]:
class NetworkR3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))

        # output so no dropout here
        x = F.log_softmax(self.fc4(x), dim=1)

        return x


In [None]:

learning_rate = 0.1
model=NetworkR3()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [None]:
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

In [29]:
print(model.parameters())
print(len(list(model.parameters())))
# FC 1 Parameters 
print(list(model.parameters())[0].size())
# FC 1 Bias Parameters
print(list(model.parameters())[1].size())
# FC 2 Parameters
print(list(model.parameters())[2].size())
# FC 2 Bias Parameters
print(list(model.parameters())[3].size())

<generator object Module.parameters at 0x000000000720D6D8>
8
torch.Size([256, 784])
torch.Size([256])
torch.Size([128, 256])
torch.Size([128])


In [30]:
from torch.autograd import Variable
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        labels = Variable(labels)
        optimizer.zero_grad()
        log_ps = model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
                labels = Variable(labels)
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.643..  Test Loss: 0.235..  Test Accuracy: 0.928
Epoch: 2/10..  Training Loss: 0.251..  Test Loss: 0.168..  Test Accuracy: 0.949
Epoch: 3/10..  Training Loss: 0.193..  Test Loss: 0.141..  Test Accuracy: 0.956
Epoch: 4/10..  Training Loss: 0.161..  Test Loss: 0.109..  Test Accuracy: 0.966
Epoch: 5/10..  Training Loss: 0.140..  Test Loss: 0.113..  Test Accuracy: 0.966
Epoch: 6/10..  Training Loss: 0.125..  Test Loss: 0.096..  Test Accuracy: 0.969
Epoch: 7/10..  Training Loss: 0.113..  Test Loss: 0.072..  Test Accuracy: 0.977
Epoch: 8/10..  Training Loss: 0.108..  Test Loss: 0.056..  Test Accuracy: 0.983
Epoch: 9/10..  Training Loss: 0.096..  Test Loss: 0.056..  Test Accuracy: 0.983
Epoch: 10/10..  Training Loss: 0.092..  Test Loss: 0.048..  Test Accuracy: 0.985


### Model F: 4 Hidden Layer Feedforward Neural Network (ReLU Activation)

In [33]:
class NetworkR4(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 384)
        self.fc2 = nn.Linear(384, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))

        # output so no dropout here
        x = F.log_softmax(self.fc5(x), dim=1)

        return x


In [34]:

learning_rate = 0.1
model=NetworkR4()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [35]:
print(model.parameters())
print(len(list(model.parameters())))
# FC 1 Parameters 
print(list(model.parameters())[0].size())
# FC 1 Bias Parameters
print(list(model.parameters())[1].size())
# FC 2 Parameters
print(list(model.parameters())[2].size())
# FC 2 Bias Parameters
print(list(model.parameters())[3].size())

<generator object Module.parameters at 0x000000000720D5E8>
10
torch.Size([384, 784])
torch.Size([384])
torch.Size([256, 384])
torch.Size([256])


In [36]:
from torch.autograd import Variable
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        labels = Variable(labels)
        optimizer.zero_grad()
        log_ps = model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
                labels = Variable(labels)
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.810..  Test Loss: 0.253..  Test Accuracy: 0.927
Epoch: 2/10..  Training Loss: 0.269..  Test Loss: 0.166..  Test Accuracy: 0.949
Epoch: 3/10..  Training Loss: 0.195..  Test Loss: 0.159..  Test Accuracy: 0.952
Epoch: 4/10..  Training Loss: 0.163..  Test Loss: 0.150..  Test Accuracy: 0.954
Epoch: 5/10..  Training Loss: 0.136..  Test Loss: 0.114..  Test Accuracy: 0.965
Epoch: 6/10..  Training Loss: 0.122..  Test Loss: 0.089..  Test Accuracy: 0.972
Epoch: 7/10..  Training Loss: 0.109..  Test Loss: 0.075..  Test Accuracy: 0.977
Epoch: 8/10..  Training Loss: 0.100..  Test Loss: 0.054..  Test Accuracy: 0.984
Epoch: 9/10..  Training Loss: 0.092..  Test Loss: 0.057..  Test Accuracy: 0.983
Epoch: 10/10..  Training Loss: 0.087..  Test Loss: 0.053..  Test Accuracy: 0.984


In [39]:
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

No.of parameters 441802


### Model G: 2 Hidden Layer Feedforward Neural Network (ReLU Activation)-learning rate = 0.01

In [61]:
class NetworkR21(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        

        # output so no dropout here
        x = F.log_softmax(self.fc3(x), dim=1)

        return x
        


In [62]:

learning_rate = 0.01
model=NetworkR21()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [63]:
print(model.parameters())
print(len(list(model.parameters())))
# FC 1 Parameters 
print(list(model.parameters())[0].size())
# FC 1 Bias Parameters
print(list(model.parameters())[1].size())
# FC 2 Parameters
print(list(model.parameters())[2].size())
# FC 2 Bias Parameters
print(list(model.parameters())[3].size())

<generator object Module.parameters at 0x000000000721A9A8>
6
torch.Size([128, 784])
torch.Size([128])
torch.Size([64, 128])
torch.Size([64])


In [66]:
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        #images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        #labels = Variable(labels)
        optimizer.zero_grad()
        
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.204..  Test Loss: 0.145..  Test Accuracy: 0.956
Epoch: 2/10..  Training Loss: 0.192..  Test Loss: 0.135..  Test Accuracy: 0.959
Epoch: 3/10..  Training Loss: 0.184..  Test Loss: 0.126..  Test Accuracy: 0.962
Epoch: 4/10..  Training Loss: 0.175..  Test Loss: 0.118..  Test Accuracy: 0.964
Epoch: 5/10..  Training Loss: 0.171..  Test Loss: 0.113..  Test Accuracy: 0.966
Epoch: 6/10..  Training Loss: 0.162..  Test Loss: 0.107..  Test Accuracy: 0.968
Epoch: 7/10..  Training Loss: 0.157..  Test Loss: 0.106..  Test Accuracy: 0.968
Epoch: 8/10..  Training Loss: 0.151..  Test Loss: 0.104..  Test Accuracy: 0.969
Epoch: 9/10..  Training Loss: 0.149..  Test Loss: 0.095..  Test Accuracy: 0.972
Epoch: 10/10..  Training Loss: 0.143..  Test Loss: 0.090..  Test Accuracy: 0.974


In [65]:
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

No.of parameters 109386


### Model H: 2 Hidden Layer Feedforward Neural Network (ReLU Activation)-learning rate = 0.05

In [75]:
class NetworkR2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        

        # output so no dropout here
        x = F.log_softmax(self.fc3(x), dim=1)

        return x

In [76]:
learning_rate = 0.05
model=NetworkR21()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
criterion=nn.NLLLoss()

In [78]:
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        #images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        #labels = Variable(labels)
        optimizer.zero_grad()
        
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.617..  Test Loss: 0.333..  Test Accuracy: 0.903
Epoch: 2/10..  Training Loss: 0.294..  Test Loss: 0.202..  Test Accuracy: 0.940
Epoch: 3/10..  Training Loss: 0.232..  Test Loss: 0.177..  Test Accuracy: 0.944
Epoch: 4/10..  Training Loss: 0.198..  Test Loss: 0.142..  Test Accuracy: 0.957
Epoch: 5/10..  Training Loss: 0.172..  Test Loss: 0.113..  Test Accuracy: 0.965
Epoch: 6/10..  Training Loss: 0.155..  Test Loss: 0.125..  Test Accuracy: 0.959
Epoch: 7/10..  Training Loss: 0.139..  Test Loss: 0.090..  Test Accuracy: 0.972
Epoch: 8/10..  Training Loss: 0.129..  Test Loss: 0.080..  Test Accuracy: 0.975
Epoch: 9/10..  Training Loss: 0.123..  Test Loss: 0.077..  Test Accuracy: 0.976
Epoch: 10/10..  Training Loss: 0.115..  Test Loss: 0.066..  Test Accuracy: 0.980


In [77]:
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

No.of parameters 109386


### Model I: 2 Hidden Layer Feedforward Neural Network (ReLU Activation)-learning rate = 0.05, momentum = 0.9

In [79]:
class NetworkR2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        

        # output so no dropout here
        x = F.log_softmax(self.fc3(x), dim=1)

        return x

In [80]:
learning_rate = 0.05
model=NetworkR21()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9) 
criterion=nn.NLLLoss()

In [82]:
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        #images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        #labels = Variable(labels)
        optimizer.zero_grad()
        
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 0.632..  Test Loss: 0.285..  Test Accuracy: 0.916
Epoch: 2/10..  Training Loss: 0.446..  Test Loss: 0.288..  Test Accuracy: 0.920
Epoch: 3/10..  Training Loss: 0.400..  Test Loss: 0.255..  Test Accuracy: 0.926
Epoch: 4/10..  Training Loss: 0.374..  Test Loss: 0.291..  Test Accuracy: 0.911
Epoch: 5/10..  Training Loss: 0.346..  Test Loss: 0.235..  Test Accuracy: 0.932
Epoch: 6/10..  Training Loss: 0.327..  Test Loss: 0.228..  Test Accuracy: 0.935
Epoch: 7/10..  Training Loss: 0.314..  Test Loss: 0.203..  Test Accuracy: 0.941
Epoch: 8/10..  Training Loss: 0.306..  Test Loss: 0.225..  Test Accuracy: 0.936
Epoch: 9/10..  Training Loss: 0.297..  Test Loss: 0.181..  Test Accuracy: 0.948
Epoch: 10/10..  Training Loss: 0.290..  Test Loss: 0.157..  Test Accuracy: 0.953


In [81]:
print("No.of parameters", sum([p.numel() for p in model.parameters()]))

No.of parameters 109386


### Model J: 2 Hidden Layer Feedforward Neural Network (ReLU Activation)-learning rate = 0.1, momentum = 0.9

In [83]:
class NetworkR2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        

        # output so no dropout here
        x = F.log_softmax(self.fc3(x), dim=1)

        return x

In [84]:
learning_rate = 0.1
model=NetworkR21()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9) 
criterion=nn.NLLLoss()

In [85]:
epochs=10
train_losses,test_losses=[],[]

for e in range(epochs):
    running_loss=0
    
    for images, labels in train_loader:
        #images = Variable(images.view(-1, 28*28))         # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
        #labels = Variable(labels)
        optimizer.zero_grad()
        
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/10..  Training Loss: 1.449..  Test Loss: 1.289..  Test Accuracy: 0.502
Epoch: 2/10..  Training Loss: 1.745..  Test Loss: 1.454..  Test Accuracy: 0.440
Epoch: 3/10..  Training Loss: 2.044..  Test Loss: 2.309..  Test Accuracy: 0.112
Epoch: 4/10..  Training Loss: 2.269..  Test Loss: 2.305..  Test Accuracy: 0.112
Epoch: 5/10..  Training Loss: 2.265..  Test Loss: 2.306..  Test Accuracy: 0.099
Epoch: 6/10..  Training Loss: 2.298..  Test Loss: 2.304..  Test Accuracy: 0.112
Epoch: 7/10..  Training Loss: 2.296..  Test Loss: 2.305..  Test Accuracy: 0.112
Epoch: 8/10..  Training Loss: 2.296..  Test Loss: 2.305..  Test Accuracy: 0.099
Epoch: 9/10..  Training Loss: 2.297..  Test Loss: 2.306..  Test Accuracy: 0.099
Epoch: 10/10..  Training Loss: 2.297..  Test Loss: 2.305..  Test Accuracy: 0.104
