# Package and Data Loading

Inspiration:
https://arxiv.org/pdf/1512.03385.pdf
ResNet architecture - split convolutional layers into blocks based on size of neurons

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')



In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(( 0.5), ( 0.5))])

batch_size = 128

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=4, pin_memory=True)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4, pin_memory=True)

#classes = ('plane', 'car', 'bird', 'cat',
#           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [3]:

results = pd.DataFrame()

# Base CNN (2 Layers) 

In [4]:
# define network

class Net_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1 #### change to 64?
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(12544, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2()
net.to(device)

Net_2(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=12544, out_features=1024, bias=True)
    (8): ReLU()
    (9): Linear(in_features=1024, out_features=512, bias=True)
    (10): ReLU()
    (11): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [5]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [6]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        #print(labels)
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


epoch : 1
training loss: 0.0032, acc 90.3050 
Accuracy of the network on the 10000 test images: 97.09 %
epoch : 2
training loss: 0.0007, acc 97.7317 
Accuracy of the network on the 10000 test images: 98.27 %
epoch : 3
training loss: 0.0004, acc 98.4567 
Accuracy of the network on the 10000 test images: 98.51 %
epoch : 4
training loss: 0.0004, acc 98.8250 
Accuracy of the network on the 10000 test images: 98.67 %
epoch : 5
training loss: 0.0003, acc 98.9717 
Accuracy of the network on the 10000 test images: 98.69 %
epoch : 6
training loss: 0.0003, acc 99.1650 
Accuracy of the network on the 10000 test images: 98.89 %
epoch : 7
training loss: 0.0002, acc 99.2900 
Accuracy of the network on the 10000 test images: 98.83 %
epoch : 8
training loss: 0.0002, acc 99.3883 
Accuracy of the network on the 10000 test images: 99.01 %
epoch : 9
training loss: 0.0002, acc 99.4867 
Accuracy of the network on the 10000 test images: 98.91 %
epoch : 10
training loss: 0.0001, acc 99.5867 
Accuracy of the n

In [8]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net1 - training loss'] = running_loss_history
results['net1 - training accuracy'] = running_corrects_history
results['net1 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

  running_loss_history = torch.tensor(running_loss_history)
  running_corrects_history = torch.tensor(running_corrects_history)
  test_acc_history = torch.tensor(test_acc_history)


# 4 Layers

In [9]:
# define network

class Net_4(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(25088, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4()
net.to(device)

Net_4(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=25088, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [10]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [11]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


epoch : 1
training loss: 0.0024, acc 93.0517 
Accuracy of the network on the 10000 test images: 98.12 %
epoch : 2
training loss: 0.0004, acc 98.4933 
Accuracy of the network on the 10000 test images: 98.74 %
epoch : 3
training loss: 0.0003, acc 98.9767 
Accuracy of the network on the 10000 test images: 99.01 %
epoch : 4
training loss: 0.0002, acc 99.2567 
Accuracy of the network on the 10000 test images: 99.06 %
epoch : 5
training loss: 0.0002, acc 99.4183 
Accuracy of the network on the 10000 test images: 99.14 %
epoch : 6
training loss: 0.0001, acc 99.5567 
Accuracy of the network on the 10000 test images: 99.21 %
epoch : 7
training loss: 0.0001, acc 99.6733 
Accuracy of the network on the 10000 test images: 99.32 %
epoch : 8
training loss: 0.0001, acc 99.7517 
Accuracy of the network on the 10000 test images: 99.17 %
epoch : 9
training loss: 0.0001, acc 99.8067 
Accuracy of the network on the 10000 test images: 99.22 %
epoch : 10
training loss: 0.0001, acc 99.8817 
Accuracy of the n

In [12]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net2 - training loss'] = running_loss_history
results['net2 - training accuracy'] = running_corrects_history
results['net2 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

# 6 Layers

In [13]:
# define network

class Net_6(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(50176, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6()
net.to(device)

Net_6(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Flatten(start_dim=1, end_dim=-1)
    (17): Linear

In [14]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [15]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0017, acc 94.6467 
Accuracy of the network on the 10000 test images: 98.64 %
epoch : 2
training loss: 0.0003, acc 98.9000 
Accuracy of the network on the 10000 test images: 98.97 %
epoch : 3
training loss: 0.0002, acc 99.3000 
Accuracy of the network on the 10000 test images: 99.18 %
epoch : 4
training loss: 0.0001, acc 99.5083 
Accuracy of the network on the 10000 test images: 99.26 %
epoch : 5
training loss: 0.0001, acc 99.7067 
Accuracy of the network on the 10000 test images: 99.29 %
epoch : 6
training loss: 0.0001, acc 99.7883 
Accuracy of the network on the 10000 test images: 99.29 %
epoch : 7
training loss: 0.0001, acc 99.8700 
Accuracy of the network on the 10000 test images: 99.3 %
epoch : 8
training loss: 0.0000, acc 99.9100 
Accuracy of the network on the 10000 test images: 99.27 %
epoch : 9
training loss: 0.0000, acc 99.9533 
Accuracy of the network on the 10000 test images: 99.27 %
epoch : 10
training loss: 0.0000, acc 99.9750 
Accuracy of the ne

In [16]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net3 - training loss'] = running_loss_history
results['net3 - training accuracy'] = running_corrects_history
results['net3 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

# 8 Layers

In [17]:
# define network

class Net_8(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
             
            #block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(100352, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8()
net.to(device)

Net_8(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1

In [18]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [19]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0014, acc 95.1683 
Accuracy of the network on the 10000 test images: 98.83 %
epoch : 2
training loss: 0.0002, acc 99.1300 
Accuracy of the network on the 10000 test images: 99.08 %
epoch : 3
training loss: 0.0001, acc 99.5117 
Accuracy of the network on the 10000 test images: 99.16 %
epoch : 4
training loss: 0.0001, acc 99.7300 
Accuracy of the network on the 10000 test images: 99.13 %
epoch : 5
training loss: 0.0001, acc 99.8900 
Accuracy of the network on the 10000 test images: 99.36 %
epoch : 6
training loss: 0.0000, acc 99.9567 
Accuracy of the network on the 10000 test images: 99.35 %
epoch : 7
training loss: 0.0000, acc 99.9767 
Accuracy of the network on the 10000 test images: 99.36 %
epoch : 8
training loss: 0.0000, acc 99.9967 
Accuracy of the network on the 10000 test images: 99.38 %
epoch : 9
training loss: 0.0000, acc 99.9967 
Accuracy of the network on the 10000 test images: 99.39 %
epoch : 10
training loss: 0.0000, acc 99.9983 
Accuracy of the n

In [20]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net4 - training loss'] = running_loss_history
results['net4 - training accuracy'] = running_corrects_history
results['net4 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

# 10 Layers

In [21]:
# define network

class Net_10(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
             
            #block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
            
            #block 5
            nn.Conv2d(512, 1024, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(1024),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(200704, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10()
net.to(device)

Net_10(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 

In [22]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [23]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0012, acc 95.5667 
Accuracy of the network on the 10000 test images: 99.08 %
epoch : 2
training loss: 0.0002, acc 99.3767 
Accuracy of the network on the 10000 test images: 99.08 %
epoch : 3
training loss: 0.0001, acc 99.7950 
Accuracy of the network on the 10000 test images: 99.32 %
epoch : 4
training loss: 0.0000, acc 99.9550 
Accuracy of the network on the 10000 test images: 99.37 %
epoch : 5
training loss: 0.0000, acc 99.9850 
Accuracy of the network on the 10000 test images: 99.37 %
epoch : 6
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.44 %
epoch : 7
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.42 %
epoch : 8
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.44 %
epoch : 9
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.44 %
epoch : 10
training loss: 0.0000, acc 99.9983 
Accuracy of the n

In [24]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net5 - training loss'] = running_loss_history
results['net5 - training accuracy'] = running_corrects_history
results['net5 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

# ---------------- Changing Neurons in Each Network ----------------------

## 2 Layers

In [25]:
# define network

class Net_2_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(32),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(6272, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2_2()
net.to(device)

Net_2_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=6272, out_features=1024, bias=True)
    (8): ReLU()
    (9): Linear(in_features=1024, out_features=512, bias=True)
    (10): ReLU()
    (11): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [26]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [27]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0040, acc 88.4967 
Accuracy of the network on the 10000 test images: 96.69 %
epoch : 2
training loss: 0.0008, acc 97.2167 
Accuracy of the network on the 10000 test images: 97.81 %
epoch : 3
training loss: 0.0006, acc 98.0517 
Accuracy of the network on the 10000 test images: 98.15 %
epoch : 4
training loss: 0.0004, acc 98.4700 
Accuracy of the network on the 10000 test images: 98.49 %
epoch : 5
training loss: 0.0004, acc 98.7083 
Accuracy of the network on the 10000 test images: 98.75 %
epoch : 6
training loss: 0.0003, acc 98.8967 
Accuracy of the network on the 10000 test images: 98.78 %
epoch : 7
training loss: 0.0003, acc 99.0383 
Accuracy of the network on the 10000 test images: 98.86 %
epoch : 8
training loss: 0.0002, acc 99.1500 
Accuracy of the network on the 10000 test images: 98.9 %
epoch : 9
training loss: 0.0002, acc 99.2467 
Accuracy of the network on the 10000 test images: 98.84 %
epoch : 10
training loss: 0.0002, acc 99.3333 
Accuracy of the ne

In [28]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net6 - training loss'] = running_loss_history
results['net6 - training accuracy'] = running_corrects_history
results['net6 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 4 Layers

In [29]:
# define network

class Net_4_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(12544, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4_2()
net.to(device)

Net_4_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=12544, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [30]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [31]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0030, acc 90.5750 
Accuracy of the network on the 10000 test images: 97.77 %
epoch : 2
training loss: 0.0005, acc 98.0800 
Accuracy of the network on the 10000 test images: 98.49 %
epoch : 3
training loss: 0.0004, acc 98.6900 
Accuracy of the network on the 10000 test images: 98.74 %
epoch : 4
training loss: 0.0003, acc 98.9083 
Accuracy of the network on the 10000 test images: 98.83 %
epoch : 5
training loss: 0.0002, acc 99.1667 
Accuracy of the network on the 10000 test images: 99.0 %
epoch : 6
training loss: 0.0002, acc 99.2700 
Accuracy of the network on the 10000 test images: 99.09 %
epoch : 7
training loss: 0.0002, acc 99.4000 
Accuracy of the network on the 10000 test images: 99.0 %
epoch : 8
training loss: 0.0002, acc 99.4950 
Accuracy of the network on the 10000 test images: 99.0 %
epoch : 9
training loss: 0.0001, acc 99.5683 
Accuracy of the network on the 10000 test images: 99.04 %
epoch : 10
training loss: 0.0001, acc 99.6217 
Accuracy of the netw

In [32]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net7 - training loss'] = running_loss_history
results['net7 - training accuracy'] = running_corrects_history
results['net7 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 6 Layers

In [33]:
# define network

class Net_6_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(25088, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6_2()
net.to(device)

Net_6_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Flatten(start_dim=1, end_dim=-1)
    (17): Linear(in

In [34]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [35]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0024, acc 92.5867 
Accuracy of the network on the 10000 test images: 98.35 %
epoch : 2
training loss: 0.0004, acc 98.5567 
Accuracy of the network on the 10000 test images: 98.72 %
epoch : 3
training loss: 0.0003, acc 99.0033 
Accuracy of the network on the 10000 test images: 98.94 %
epoch : 4
training loss: 0.0002, acc 99.2967 
Accuracy of the network on the 10000 test images: 99.14 %
epoch : 5
training loss: 0.0002, acc 99.4900 
Accuracy of the network on the 10000 test images: 99.25 %
epoch : 6
training loss: 0.0001, acc 99.6050 
Accuracy of the network on the 10000 test images: 99.32 %
epoch : 7
training loss: 0.0001, acc 99.7017 
Accuracy of the network on the 10000 test images: 99.22 %
epoch : 8
training loss: 0.0001, acc 99.7850 
Accuracy of the network on the 10000 test images: 99.27 %
epoch : 9
training loss: 0.0001, acc 99.8433 
Accuracy of the network on the 10000 test images: 99.26 %
epoch : 10
training loss: 0.0001, acc 99.8883 
Accuracy of the n

In [36]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net8 - training loss'] = running_loss_history
results['net8 - training accuracy'] = running_corrects_history
results['net8 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 8 Layers

In [37]:
# define network

class Net_8_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(50176, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8_2()
net.to(device)

Net_8_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), 

In [38]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [39]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0019, acc 93.4833 
Accuracy of the network on the 10000 test images: 98.48 %
epoch : 2
training loss: 0.0003, acc 98.7983 
Accuracy of the network on the 10000 test images: 99.06 %
epoch : 3
training loss: 0.0002, acc 99.2500 
Accuracy of the network on the 10000 test images: 99.16 %
epoch : 4
training loss: 0.0001, acc 99.4650 
Accuracy of the network on the 10000 test images: 99.23 %
epoch : 5
training loss: 0.0001, acc 99.6717 
Accuracy of the network on the 10000 test images: 99.18 %
epoch : 6
training loss: 0.0001, acc 99.7967 
Accuracy of the network on the 10000 test images: 99.18 %
epoch : 7
training loss: 0.0001, acc 99.8617 
Accuracy of the network on the 10000 test images: 99.26 %
epoch : 8
training loss: 0.0000, acc 99.9317 
Accuracy of the network on the 10000 test images: 99.3 %
epoch : 9
training loss: 0.0000, acc 99.9683 
Accuracy of the network on the 10000 test images: 99.28 %
epoch : 10
training loss: 0.0000, acc 99.9817 
Accuracy of the ne

In [40]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net9 - training loss'] = running_loss_history
results['net9 - training accuracy'] = running_corrects_history
results['net9 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 10 Layers 

In [41]:
# define network

class Net_10_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            #block 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(100352, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10_2()
net.to(device)

Net_10_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),

In [42]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [43]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0016, acc 94.6050 
Accuracy of the network on the 10000 test images: 98.86 %
epoch : 2
training loss: 0.0002, acc 99.1383 
Accuracy of the network on the 10000 test images: 99.11 %
epoch : 3
training loss: 0.0001, acc 99.5383 
Accuracy of the network on the 10000 test images: 99.09 %
epoch : 4
training loss: 0.0001, acc 99.7917 
Accuracy of the network on the 10000 test images: 99.33 %
epoch : 5
training loss: 0.0000, acc 99.9217 
Accuracy of the network on the 10000 test images: 99.38 %
epoch : 6
training loss: 0.0000, acc 99.9650 
Accuracy of the network on the 10000 test images: 99.27 %
epoch : 7
training loss: 0.0000, acc 99.9933 
Accuracy of the network on the 10000 test images: 99.39 %
epoch : 8
training loss: 0.0000, acc 99.9967 
Accuracy of the network on the 10000 test images: 99.27 %
epoch : 9
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.34 %
epoch : 10
training loss: 0.0000, acc 99.9983 
Accuracy of the n

In [44]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net10 - training loss'] = running_loss_history
results['net10 - training accuracy'] = running_corrects_history
results['net10 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

# ------------------- Added Convolutional Layer per Block -------------------------

## 2 + 1

In [45]:
# define network

class Net_2_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # extra
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(6272, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2_3()
net.to(device)

Net_2_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=6272, out_features=1024, bias=True)
    (10): ReLU()
    (11): Linear(in_features=1024, out_features=512, bias=True)
    (12): ReLU()
    (13): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [46]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [47]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0040, acc 89.1467 
Accuracy of the network on the 10000 test images: 96.88 %
epoch : 2
training loss: 0.0008, acc 97.3683 
Accuracy of the network on the 10000 test images: 98.06 %
epoch : 3
training loss: 0.0005, acc 98.2217 
Accuracy of the network on the 10000 test images: 98.51 %
epoch : 4
training loss: 0.0004, acc 98.5733 
Accuracy of the network on the 10000 test images: 98.7 %
epoch : 5
training loss: 0.0003, acc 98.8083 
Accuracy of the network on the 10000 test images: 98.79 %
epoch : 6
training loss: 0.0003, acc 98.9967 
Accuracy of the network on the 10000 test images: 98.9 %
epoch : 7
training loss: 0.0003, acc 99.1450 
Accuracy of the network on the 10000 test images: 98.86 %
epoch : 8
training loss: 0.0002, acc 99.2200 
Accuracy of the network on the 10000 test images: 98.96 %
epoch : 9
training loss: 0.0002, acc 99.2683 
Accuracy of the network on the 10000 test images: 98.96 %
epoch : 10
training loss: 0.0002, acc 99.3867 
Accuracy of the net

In [48]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net11 - training loss'] = running_loss_history
results['net11 - training accuracy'] = running_corrects_history
results['net11 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 4 + 2

In [49]:
# define network

class Net_4_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # extra
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), # extra
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(12544, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4_3()
net.to(device)

Net_4_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Flatten(start_dim=1, end_dim=-1)
    (16): Linear(in_features=12544, out_features=1024, bias=True)
    (17): ReLU()
    (18): Linear(in_features

In [50]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [51]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0029, acc 91.6683 
Accuracy of the network on the 10000 test images: 98.31 %
epoch : 2
training loss: 0.0005, acc 98.2750 
Accuracy of the network on the 10000 test images: 98.85 %
epoch : 3
training loss: 0.0003, acc 98.8400 
Accuracy of the network on the 10000 test images: 99.09 %
epoch : 4
training loss: 0.0003, acc 99.0200 
Accuracy of the network on the 10000 test images: 99.18 %
epoch : 5
training loss: 0.0002, acc 99.2783 
Accuracy of the network on the 10000 test images: 99.08 %
epoch : 6
training loss: 0.0002, acc 99.2917 
Accuracy of the network on the 10000 test images: 99.22 %
epoch : 7
training loss: 0.0002, acc 99.4350 
Accuracy of the network on the 10000 test images: 99.24 %
epoch : 8
training loss: 0.0001, acc 99.5067 
Accuracy of the network on the 10000 test images: 99.22 %
epoch : 9
training loss: 0.0001, acc 99.5983 
Accuracy of the network on the 10000 test images: 99.36 %
epoch : 10
training loss: 0.0001, acc 99.6717 
Accuracy of the n

In [52]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net12 - training loss'] = running_loss_history
results['net12 - training accuracy'] = running_corrects_history
results['net12 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 6 + 3

In [53]:
# define network

class Net_6_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(25088, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6_3()
net.to(device)

Net_6_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [54]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [55]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0021, acc 93.5967 
Accuracy of the network on the 10000 test images: 98.6 %
epoch : 2
training loss: 0.0004, acc 98.6967 
Accuracy of the network on the 10000 test images: 98.93 %
epoch : 3
training loss: 0.0003, acc 99.0733 
Accuracy of the network on the 10000 test images: 99.09 %
epoch : 4
training loss: 0.0002, acc 99.2783 
Accuracy of the network on the 10000 test images: 99.14 %
epoch : 5
training loss: 0.0002, acc 99.4433 
Accuracy of the network on the 10000 test images: 99.18 %
epoch : 6
training loss: 0.0001, acc 99.5217 
Accuracy of the network on the 10000 test images: 99.17 %
epoch : 7
training loss: 0.0001, acc 99.6683 
Accuracy of the network on the 10000 test images: 99.3 %
epoch : 8
training loss: 0.0001, acc 99.7350 
Accuracy of the network on the 10000 test images: 99.44 %
epoch : 9
training loss: 0.0001, acc 99.7933 
Accuracy of the network on the 10000 test images: 99.47 %
epoch : 10
training loss: 0.0001, acc 99.8817 
Accuracy of the net

In [56]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net13 - training loss'] = running_loss_history
results['net13 - training accuracy'] = running_corrects_history
results['net13 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 8 + 4

In [57]:
# define network

class Net_8_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(50176, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8_3()
net.to(device)

Net_8_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [58]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [59]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0020, acc 93.2017 
Accuracy of the network on the 10000 test images: 98.67 %
epoch : 2
training loss: 0.0003, acc 98.8700 
Accuracy of the network on the 10000 test images: 99.13 %
epoch : 3
training loss: 0.0002, acc 99.2267 
Accuracy of the network on the 10000 test images: 99.25 %
epoch : 4
training loss: 0.0001, acc 99.4867 
Accuracy of the network on the 10000 test images: 99.19 %
epoch : 5
training loss: 0.0001, acc 99.6533 
Accuracy of the network on the 10000 test images: 99.35 %
epoch : 6
training loss: 0.0001, acc 99.7333 
Accuracy of the network on the 10000 test images: 99.36 %
epoch : 7
training loss: 0.0001, acc 99.8333 
Accuracy of the network on the 10000 test images: 99.24 %
epoch : 8
training loss: 0.0000, acc 99.9033 
Accuracy of the network on the 10000 test images: 99.31 %
epoch : 9
training loss: 0.0000, acc 99.9633 
Accuracy of the network on the 10000 test images: 99.43 %
epoch : 10
training loss: 0.0000, acc 99.9567 
Accuracy of the n

In [60]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net14 - training loss'] = running_loss_history
results['net14 - training accuracy'] = running_corrects_history
results['net14 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')

## 10 + 5

In [4]:
# define network

class Net_10_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            #block 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(100352, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10_3()
net.to(device)

Net_10_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), 

In [5]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [7]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0017, acc 93.5667 
Accuracy of the network on the 10000 test images: 98.99 %
epoch : 2
training loss: 0.0003, acc 99.0183 
Accuracy of the network on the 10000 test images: 99.15 %
epoch : 3
training loss: 0.0001, acc 99.4583 
Accuracy of the network on the 10000 test images: 99.29 %
epoch : 4
training loss: 0.0001, acc 99.7117 
Accuracy of the network on the 10000 test images: 98.98 %
epoch : 5
training loss: 0.0001, acc 99.8317 
Accuracy of the network on the 10000 test images: 99.4 %
epoch : 6
training loss: 0.0000, acc 99.9450 
Accuracy of the network on the 10000 test images: 99.31 %
epoch : 7
training loss: 0.0000, acc 99.9733 
Accuracy of the network on the 10000 test images: 99.34 %
epoch : 8
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.39 %
epoch : 9
training loss: 0.0000, acc 99.9983 
Accuracy of the network on the 10000 test images: 99.43 %
epoch : 10
training loss: 0.0000, acc 99.9983 
Accuracy of the ne

In [8]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net15 - training loss'] = running_loss_history
results['net15 - training accuracy'] = running_corrects_history
results['net15 - testing accuracy'] = test_acc_history

results.to_csv('net_results_mnist.csv')