# Package and Data Loading

Inspiration:
https://arxiv.org/pdf/1512.03385.pdf
ResNet architecture - split convolutional layers into blocks based on size of neurons

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')



In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 128

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=4, pin_memory=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [8]:

results = pd.DataFrame()

# Base CNN (2 Layers) 

In [4]:
# define network

class Net_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1), #1 #### change to 64?
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(64*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2()
net.to(device)

Net_2(
  (network): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=16384, out_features=1024, bias=True)
    (8): ReLU()
    (9): Linear(in_features=1024, out_features=512, bias=True)
    (10): ReLU()
    (11): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [5]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [6]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


epoch : 1
training loss: 0.0127, acc 42.4120 
Accuracy of the network on the 10000 test images: 54.41 %
epoch : 2
training loss: 0.0088, acc 59.7600 
Accuracy of the network on the 10000 test images: 61.8 %
epoch : 3
training loss: 0.0071, acc 68.2540 
Accuracy of the network on the 10000 test images: 66.86 %
epoch : 4
training loss: 0.0060, acc 73.6020 
Accuracy of the network on the 10000 test images: 70.23 %
epoch : 5
training loss: 0.0051, acc 77.8820 
Accuracy of the network on the 10000 test images: 70.84 %
epoch : 6
training loss: 0.0042, acc 81.9360 
Accuracy of the network on the 10000 test images: 71.12 %
epoch : 7
training loss: 0.0034, acc 86.1220 
Accuracy of the network on the 10000 test images: 71.53 %
epoch : 8
training loss: 0.0026, acc 90.0220 
Accuracy of the network on the 10000 test images: 71.99 %
epoch : 9
training loss: 0.0019, acc 93.6380 
Accuracy of the network on the 10000 test images: 72.12 %
epoch : 10
training loss: 0.0013, acc 96.5140 
Accuracy of the ne

In [7]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net1 - training loss'] = running_loss_history
results['net1 - training accuracy'] = running_corrects_history
results['net1 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

# 4 Layers

In [8]:
# define network

class Net_4(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(128*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4()
net.to(device)

Net_4(
  (network): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=32768, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [9]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [10]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


epoch : 1
training loss: 0.0114, acc 48.5180 
Accuracy of the network on the 10000 test images: 59.77 %
epoch : 2
training loss: 0.0074, acc 66.6260 
Accuracy of the network on the 10000 test images: 68.75 %
epoch : 3
training loss: 0.0057, acc 74.8220 
Accuracy of the network on the 10000 test images: 72.24 %
epoch : 4
training loss: 0.0044, acc 80.6320 
Accuracy of the network on the 10000 test images: 73.45 %
epoch : 5
training loss: 0.0034, acc 86.1020 
Accuracy of the network on the 10000 test images: 74.7 %
epoch : 6
training loss: 0.0023, acc 91.1780 
Accuracy of the network on the 10000 test images: 75.26 %
epoch : 7
training loss: 0.0014, acc 95.5940 
Accuracy of the network on the 10000 test images: 75.0 %
epoch : 8
training loss: 0.0008, acc 98.4500 
Accuracy of the network on the 10000 test images: 75.28 %
epoch : 9
training loss: 0.0004, acc 99.5840 
Accuracy of the network on the 10000 test images: 75.67 %
epoch : 10
training loss: 0.0002, acc 99.9040 
Accuracy of the net

In [11]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net2 - training loss'] = running_loss_history
results['net2 - training accuracy'] = running_corrects_history
results['net2 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

# 6 Layers

In [12]:
# define network

class Net_6(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(256*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6()
net.to(device)

Net_6(
  (network): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Flatten(start_dim=1, end_dim=-1)
    (17): Linear

In [13]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [14]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0108, acc 50.8220 
Accuracy of the network on the 10000 test images: 61.73 %
epoch : 2
training loss: 0.0068, acc 69.1860 
Accuracy of the network on the 10000 test images: 68.59 %
epoch : 3
training loss: 0.0050, acc 78.1040 
Accuracy of the network on the 10000 test images: 72.57 %
epoch : 4
training loss: 0.0034, acc 85.6640 
Accuracy of the network on the 10000 test images: 74.36 %
epoch : 5
training loss: 0.0019, acc 92.9460 
Accuracy of the network on the 10000 test images: 73.94 %
epoch : 6
training loss: 0.0008, acc 97.9880 
Accuracy of the network on the 10000 test images: 74.81 %
epoch : 7
training loss: 0.0003, acc 99.7640 
Accuracy of the network on the 10000 test images: 75.38 %
epoch : 8
training loss: 0.0001, acc 99.9500 
Accuracy of the network on the 10000 test images: 76.51 %
epoch : 9
training loss: 0.0001, acc 99.9940 
Accuracy of the network on the 10000 test images: 76.41 %
epoch : 10
training loss: 0.0001, acc 100.0000 
Accuracy of the 

In [15]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net3 - training loss'] = running_loss_history
results['net3 - training accuracy'] = running_corrects_history
results['net3 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

# 8 Layers

In [16]:
# define network

class Net_8(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
             
            #block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(512*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8()
net.to(device)

Net_8(
  (network): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1

In [17]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [18]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0102, acc 53.2280 
Accuracy of the network on the 10000 test images: 66.09 %
epoch : 2
training loss: 0.0060, acc 73.1140 
Accuracy of the network on the 10000 test images: 72.04 %
epoch : 3
training loss: 0.0038, acc 83.7520 
Accuracy of the network on the 10000 test images: 74.57 %
epoch : 4
training loss: 0.0019, acc 93.2400 
Accuracy of the network on the 10000 test images: 76.02 %
epoch : 5
training loss: 0.0006, acc 98.7380 
Accuracy of the network on the 10000 test images: 76.41 %
epoch : 6
training loss: 0.0001, acc 99.9440 
Accuracy of the network on the 10000 test images: 78.2 %
epoch : 7
training loss: 0.0000, acc 99.9920 
Accuracy of the network on the 10000 test images: 78.4 %
epoch : 8
training loss: 0.0000, acc 100.0000 
Accuracy of the network on the 10000 test images: 78.54 %
epoch : 9
training loss: 0.0000, acc 100.0000 
Accuracy of the network on the 10000 test images: 78.52 %
epoch : 10
training loss: 0.0000, acc 100.0000 
Accuracy of the 

In [19]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net4 - training loss'] = running_loss_history
results['net4 - training accuracy'] = running_corrects_history
results['net4 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

# 10 Layers

In [4]:
# define network

class Net_10(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
             
            #block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
            
            #block 5
            nn.Conv2d(512, 1024, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(1024),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(1024*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10()
net.to(device)

Net_10(
  (network): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 

In [5]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [6]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0103, acc 52.4480 
Accuracy of the network on the 10000 test images: 64.85 %
epoch : 2
training loss: 0.0058, acc 73.7400 
Accuracy of the network on the 10000 test images: 72.32 %
epoch : 3
training loss: 0.0032, acc 86.6280 
Accuracy of the network on the 10000 test images: 73.49 %
epoch : 4
training loss: 0.0010, acc 96.8480 
Accuracy of the network on the 10000 test images: 74.84 %
epoch : 5
training loss: 0.0002, acc 99.7620 
Accuracy of the network on the 10000 test images: 76.77 %
epoch : 6
training loss: 0.0000, acc 99.9900 
Accuracy of the network on the 10000 test images: 77.68 %
epoch : 7
training loss: 0.0000, acc 100.0000 
Accuracy of the network on the 10000 test images: 77.61 %
epoch : 8
training loss: 0.0000, acc 100.0000 
Accuracy of the network on the 10000 test images: 77.78 %
epoch : 9
training loss: 0.0000, acc 100.0000 
Accuracy of the network on the 10000 test images: 77.88 %
epoch : 10
training loss: 0.0000, acc 100.0000 
Accuracy of t

In [10]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net5 - training loss'] = running_loss_history
results['net5 - training accuracy'] = running_corrects_history
results['net5 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

  running_loss_history = torch.tensor(running_loss_history)
  running_corrects_history = torch.tensor(running_corrects_history)
  test_acc_history = torch.tensor(test_acc_history)


# ---------------- Changing Neurons in Each Network ----------------------

## 2 Layers

In [11]:
# define network

class Net_2_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(32),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(32*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2_2()
net.to(device)

Net_2_2(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=8192, out_features=1024, bias=True)
    (8): ReLU()
    (9): Linear(in_features=1024, out_features=512, bias=True)
    (10): ReLU()
    (11): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [12]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [13]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0138, acc 38.6780 
Accuracy of the network on the 10000 test images: 49.9 %
epoch : 2
training loss: 0.0099, acc 55.1600 
Accuracy of the network on the 10000 test images: 59.08 %
epoch : 3
training loss: 0.0082, acc 63.3060 
Accuracy of the network on the 10000 test images: 64.47 %
epoch : 4
training loss: 0.0070, acc 68.8340 
Accuracy of the network on the 10000 test images: 67.85 %
epoch : 5
training loss: 0.0062, acc 72.6760 
Accuracy of the network on the 10000 test images: 68.5 %
epoch : 6
training loss: 0.0055, acc 75.7360 
Accuracy of the network on the 10000 test images: 68.91 %
epoch : 7
training loss: 0.0049, acc 78.4800 
Accuracy of the network on the 10000 test images: 69.3 %
epoch : 8
training loss: 0.0043, acc 81.5360 
Accuracy of the network on the 10000 test images: 70.66 %
epoch : 9
training loss: 0.0037, acc 84.4400 
Accuracy of the network on the 10000 test images: 70.83 %
epoch : 10
training loss: 0.0031, acc 87.3500 
Accuracy of the netw

In [14]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net6 - training loss'] = running_loss_history
results['net6 - training accuracy'] = running_corrects_history
results['net6 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 4 Layers

In [15]:
# define network

class Net_4_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(64*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4_2()
net.to(device)

Net_4_2(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=16384, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [16]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [17]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0124, acc 43.5500 
Accuracy of the network on the 10000 test images: 55.86 %
epoch : 2
training loss: 0.0085, acc 61.5120 
Accuracy of the network on the 10000 test images: 64.55 %
epoch : 3
training loss: 0.0068, acc 69.3980 
Accuracy of the network on the 10000 test images: 68.92 %
epoch : 4
training loss: 0.0058, acc 74.2160 
Accuracy of the network on the 10000 test images: 71.55 %
epoch : 5
training loss: 0.0049, acc 78.3460 
Accuracy of the network on the 10000 test images: 72.67 %
epoch : 6
training loss: 0.0042, acc 81.9900 
Accuracy of the network on the 10000 test images: 73.2 %
epoch : 7
training loss: 0.0034, acc 85.9200 
Accuracy of the network on the 10000 test images: 74.19 %
epoch : 8
training loss: 0.0026, acc 89.6260 
Accuracy of the network on the 10000 test images: 74.49 %
epoch : 9
training loss: 0.0019, acc 93.1560 
Accuracy of the network on the 10000 test images: 74.47 %
epoch : 10
training loss: 0.0013, acc 96.1460 
Accuracy of the ne

In [18]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net7 - training loss'] = running_loss_history
results['net7 - training accuracy'] = running_corrects_history
results['net7 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 6 Layers

In [19]:
# define network

class Net_6_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(128*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6_2()
net.to(device)

Net_6_2(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Flatten(start_dim=1, end_dim=-1)
    (17): Linear(in

In [20]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [21]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0121, acc 44.4940 
Accuracy of the network on the 10000 test images: 56.59 %
epoch : 2
training loss: 0.0082, acc 62.9260 
Accuracy of the network on the 10000 test images: 64.48 %
epoch : 3
training loss: 0.0064, acc 71.2960 
Accuracy of the network on the 10000 test images: 69.5 %
epoch : 4
training loss: 0.0051, acc 77.7640 
Accuracy of the network on the 10000 test images: 72.25 %
epoch : 5
training loss: 0.0039, acc 83.5940 
Accuracy of the network on the 10000 test images: 72.8 %
epoch : 6
training loss: 0.0028, acc 88.9240 
Accuracy of the network on the 10000 test images: 72.95 %
epoch : 7
training loss: 0.0017, acc 94.0980 
Accuracy of the network on the 10000 test images: 73.27 %
epoch : 8
training loss: 0.0010, acc 97.5720 
Accuracy of the network on the 10000 test images: 73.51 %
epoch : 9
training loss: 0.0005, acc 99.4020 
Accuracy of the network on the 10000 test images: 73.67 %
epoch : 10
training loss: 0.0002, acc 99.9120 
Accuracy of the net

In [22]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net8 - training loss'] = running_loss_history
results['net8 - training accuracy'] = running_corrects_history
results['net8 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 8 Layers

In [23]:
# define network

class Net_8_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(256*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8_2()
net.to(device)

Net_8_2(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), 

In [24]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [25]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0113, acc 47.8100 
Accuracy of the network on the 10000 test images: 60.22 %
epoch : 2
training loss: 0.0072, acc 67.4440 
Accuracy of the network on the 10000 test images: 68.49 %
epoch : 3
training loss: 0.0053, acc 76.4100 
Accuracy of the network on the 10000 test images: 72.04 %
epoch : 4
training loss: 0.0038, acc 83.8280 
Accuracy of the network on the 10000 test images: 73.71 %
epoch : 5
training loss: 0.0023, acc 91.0380 
Accuracy of the network on the 10000 test images: 73.23 %
epoch : 6
training loss: 0.0011, acc 96.7300 
Accuracy of the network on the 10000 test images: 74.57 %
epoch : 7
training loss: 0.0004, acc 99.3940 
Accuracy of the network on the 10000 test images: 74.76 %
epoch : 8
training loss: 0.0001, acc 99.9260 
Accuracy of the network on the 10000 test images: 74.83 %
epoch : 9
training loss: 0.0001, acc 99.9860 
Accuracy of the network on the 10000 test images: 75.24 %
epoch : 10
training loss: 0.0000, acc 100.0000 
Accuracy of the 

In [26]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net9 - training loss'] = running_loss_history
results['net9 - training accuracy'] = running_corrects_history
results['net9 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 10 Layers 

In [27]:
# define network

class Net_10_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            #block 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(512*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10_2()
net.to(device)

Net_10_2(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),

In [28]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [29]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0111, acc 48.9840 
Accuracy of the network on the 10000 test images: 60.34 %
epoch : 2
training loss: 0.0070, acc 68.5720 
Accuracy of the network on the 10000 test images: 68.31 %
epoch : 3
training loss: 0.0048, acc 78.9500 
Accuracy of the network on the 10000 test images: 71.26 %
epoch : 4
training loss: 0.0028, acc 88.6920 
Accuracy of the network on the 10000 test images: 72.02 %
epoch : 5
training loss: 0.0011, acc 96.8080 
Accuracy of the network on the 10000 test images: 72.67 %
epoch : 6
training loss: 0.0003, acc 99.5680 
Accuracy of the network on the 10000 test images: 73.62 %
epoch : 7
training loss: 0.0001, acc 99.9840 
Accuracy of the network on the 10000 test images: 74.72 %
epoch : 8
training loss: 0.0000, acc 99.9980 
Accuracy of the network on the 10000 test images: 75.09 %
epoch : 9
training loss: 0.0000, acc 100.0000 
Accuracy of the network on the 10000 test images: 75.35 %
epoch : 10
training loss: 0.0000, acc 100.0000 
Accuracy of the

In [30]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net10 - training loss'] = running_loss_history
results['net10 - training accuracy'] = running_corrects_history
results['net10 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

# ------------------- Added Convolutional Layer per Block -------------------------

## 2 + 1

In [31]:
# define network

class Net_2_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # extra
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(32*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2_3()
net.to(device)

Net_2_3(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=8192, out_features=1024, bias=True)
    (10): ReLU()
    (11): Linear(in_features=1024, out_features=512, bias=True)
    (12): ReLU()
    (13): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [32]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [33]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0135, acc 39.0920 
Accuracy of the network on the 10000 test images: 50.31 %
epoch : 2
training loss: 0.0100, acc 54.1160 
Accuracy of the network on the 10000 test images: 57.81 %
epoch : 3
training loss: 0.0085, acc 61.7300 
Accuracy of the network on the 10000 test images: 63.45 %
epoch : 4
training loss: 0.0073, acc 67.4460 
Accuracy of the network on the 10000 test images: 66.66 %
epoch : 5
training loss: 0.0064, acc 71.6060 
Accuracy of the network on the 10000 test images: 68.45 %
epoch : 6
training loss: 0.0057, acc 74.9980 
Accuracy of the network on the 10000 test images: 70.29 %
epoch : 7
training loss: 0.0051, acc 77.9280 
Accuracy of the network on the 10000 test images: 70.6 %
epoch : 8
training loss: 0.0045, acc 80.9720 
Accuracy of the network on the 10000 test images: 70.72 %
epoch : 9
training loss: 0.0039, acc 83.6180 
Accuracy of the network on the 10000 test images: 71.33 %
epoch : 10
training loss: 0.0033, acc 86.7660 
Accuracy of the ne

In [34]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net11 - training loss'] = running_loss_history
results['net11 - training accuracy'] = running_corrects_history
results['net11 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 4 + 2

In [35]:
# define network

class Net_4_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # extra
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), # extra
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(64*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4_3()
net.to(device)

Net_4_3(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Flatten(start_dim=1, end_dim=-1)
    (16): Linear(in_features=16384, out_features=1024, bias=True)
    (17): ReLU()
    (18): Linear(in_features

In [36]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [37]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0124, acc 42.8600 
Accuracy of the network on the 10000 test images: 54.12 %
epoch : 2
training loss: 0.0088, acc 60.0320 
Accuracy of the network on the 10000 test images: 63.21 %
epoch : 3
training loss: 0.0071, acc 67.9780 
Accuracy of the network on the 10000 test images: 69.44 %
epoch : 4
training loss: 0.0061, acc 72.8540 
Accuracy of the network on the 10000 test images: 70.57 %
epoch : 5
training loss: 0.0053, acc 76.6900 
Accuracy of the network on the 10000 test images: 73.21 %
epoch : 6
training loss: 0.0046, acc 79.9300 
Accuracy of the network on the 10000 test images: 74.2 %
epoch : 7
training loss: 0.0039, acc 83.0880 
Accuracy of the network on the 10000 test images: 74.6 %
epoch : 8
training loss: 0.0032, acc 86.4560 
Accuracy of the network on the 10000 test images: 75.11 %
epoch : 9
training loss: 0.0026, acc 89.8540 
Accuracy of the network on the 10000 test images: 75.28 %
epoch : 10
training loss: 0.0019, acc 92.9060 
Accuracy of the net

In [38]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net12 - training loss'] = running_loss_history
results['net12 - training accuracy'] = running_corrects_history
results['net12 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 6 + 3

In [39]:
# define network

class Net_6_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(128*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6_3()
net.to(device)

Net_6_3(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [40]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [41]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0119, acc 44.5060 
Accuracy of the network on the 10000 test images: 55.5 %
epoch : 2
training loss: 0.0081, acc 62.9220 
Accuracy of the network on the 10000 test images: 66.01 %
epoch : 3
training loss: 0.0064, acc 71.0520 
Accuracy of the network on the 10000 test images: 71.41 %
epoch : 4
training loss: 0.0053, acc 76.3320 
Accuracy of the network on the 10000 test images: 73.8 %
epoch : 5
training loss: 0.0044, acc 80.5840 
Accuracy of the network on the 10000 test images: 75.14 %
epoch : 6
training loss: 0.0036, acc 84.7740 
Accuracy of the network on the 10000 test images: 75.98 %
epoch : 7
training loss: 0.0027, acc 88.4800 
Accuracy of the network on the 10000 test images: 76.06 %
epoch : 8
training loss: 0.0020, acc 92.4360 
Accuracy of the network on the 10000 test images: 75.85 %
epoch : 9
training loss: 0.0012, acc 95.9640 
Accuracy of the network on the 10000 test images: 76.19 %
epoch : 10
training loss: 0.0007, acc 98.4600 
Accuracy of the net

In [42]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net13 - training loss'] = running_loss_history
results['net13 - training accuracy'] = running_corrects_history
results['net13 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 8 + 4

In [43]:
# define network

class Net_8_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(256*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8_3()
net.to(device)

Net_8_3(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [44]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [45]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0117, acc 45.2900 
Accuracy of the network on the 10000 test images: 59.48 %
epoch : 2
training loss: 0.0078, acc 64.5020 
Accuracy of the network on the 10000 test images: 66.33 %
epoch : 3
training loss: 0.0060, acc 72.9580 
Accuracy of the network on the 10000 test images: 71.66 %
epoch : 4
training loss: 0.0047, acc 78.9400 
Accuracy of the network on the 10000 test images: 73.89 %
epoch : 5
training loss: 0.0037, acc 83.9980 
Accuracy of the network on the 10000 test images: 75.71 %
epoch : 6
training loss: 0.0026, acc 89.1800 
Accuracy of the network on the 10000 test images: 75.73 %
epoch : 7
training loss: 0.0016, acc 94.0960 
Accuracy of the network on the 10000 test images: 75.93 %
epoch : 8
training loss: 0.0008, acc 97.7520 
Accuracy of the network on the 10000 test images: 76.41 %
epoch : 9
training loss: 0.0003, acc 99.5080 
Accuracy of the network on the 10000 test images: 77.3 %
epoch : 10
training loss: 0.0001, acc 99.9600 
Accuracy of the ne

In [46]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net14 - training loss'] = running_loss_history
results['net14 - training accuracy'] = running_corrects_history
results['net14 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

## 10 + 5

In [3]:
# define network

class Net_10_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            #block 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(512*16*16, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 10)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10_3()
net.to(device)

Net_10_3(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), 

In [4]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [5]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0117, acc 44.9540 
Accuracy of the network on the 10000 test images: 57.77 %
epoch : 2
training loss: 0.0077, acc 65.0560 
Accuracy of the network on the 10000 test images: 68.03 %
epoch : 3
training loss: 0.0057, acc 74.4900 
Accuracy of the network on the 10000 test images: 72.72 %
epoch : 4
training loss: 0.0043, acc 81.1420 
Accuracy of the network on the 10000 test images: 74.81 %
epoch : 5
training loss: 0.0030, acc 86.9520 
Accuracy of the network on the 10000 test images: 75.42 %
epoch : 6
training loss: 0.0017, acc 93.3800 
Accuracy of the network on the 10000 test images: 76.1 %
epoch : 7
training loss: 0.0008, acc 97.5640 
Accuracy of the network on the 10000 test images: 76.68 %
epoch : 8
training loss: 0.0003, acc 99.5400 
Accuracy of the network on the 10000 test images: 77.53 %
epoch : 9
training loss: 0.0001, acc 99.9560 
Accuracy of the network on the 10000 test images: 78.54 %
epoch : 10
training loss: 0.0000, acc 100.0000 
Accuracy of the n

In [9]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net15 - training loss'] = running_loss_history
results['net15 - training accuracy'] = running_corrects_history
results['net15 - testing accuracy'] = test_acc_history

results.to_csv('net_results_initial.csv')

  running_loss_history = torch.tensor(running_loss_history)
  running_corrects_history = torch.tensor(running_corrects_history)
  test_acc_history = torch.tensor(test_acc_history)
