# Package and Data Loading

Inspiration:
https://arxiv.org/pdf/1512.03385.pdf
ResNet architecture - split convolutional layers into blocks based on size of neurons

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')



In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(( 0.5), ( 0.5))])

batch_size = 128

trainset = torchvision.datasets.EMNIST(root='./data', train=True,
                                        download=True, split='balanced',transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=4, pin_memory=True)

testset = torchvision.datasets.EMNIST(root='./data', train=False,
                                       download=True,split='balanced', transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4, pin_memory=True)

#classes = ('plane', 'car', 'bird', 'cat',
#           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [3]:

results = pd.DataFrame()

# Base CNN (2 Layers) 

In [5]:
# define network

class Net_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1 #### change to 64?
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(12544, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2()
net.to(device)

Net_2(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=12544, out_features=1024, bias=True)
    (8): ReLU()
    (9): Linear(in_features=1024, out_features=512, bias=True)
    (10): ReLU()
    (11): Linear(in_features=512, out_features=47, bias=True)
  )
)

In [7]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [8]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        #print(labels)
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


epoch : 1
training loss: 0.0102, acc 66.4929 
Accuracy of the network on the 10000 test images: 80.77659574468085 %
epoch : 2
training loss: 0.0040, acc 83.5284 
Accuracy of the network on the 10000 test images: 83.78191489361703 %
epoch : 3
training loss: 0.0034, acc 85.7890 
Accuracy of the network on the 10000 test images: 84.94148936170212 %
epoch : 4
training loss: 0.0030, acc 86.9761 
Accuracy of the network on the 10000 test images: 85.68085106382979 %
epoch : 5
training loss: 0.0028, acc 87.8590 
Accuracy of the network on the 10000 test images: 85.86702127659575 %
epoch : 6
training loss: 0.0026, acc 88.6082 
Accuracy of the network on the 10000 test images: 86.76063829787235 %
epoch : 7
training loss: 0.0024, acc 89.1950 
Accuracy of the network on the 10000 test images: 87.1436170212766 %
epoch : 8
training loss: 0.0023, acc 89.7145 
Accuracy of the network on the 10000 test images: 87.42553191489361 %
epoch : 9
training loss: 0.0022, acc 90.2314 
Accuracy of the network on 

In [9]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net1 - training loss'] = running_loss_history
results['net1 - training accuracy'] = running_corrects_history
results['net1 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

# 4 Layers

In [10]:
# define network

class Net_4(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(25088, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4()
net.to(device)

Net_4(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=25088, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=47, bias=True)
  )
)

In [11]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [12]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


epoch : 1
training loss: 0.0076, acc 74.3741 
Accuracy of the network on the 10000 test images: 83.14893617021276 %
epoch : 2
training loss: 0.0032, acc 86.3014 
Accuracy of the network on the 10000 test images: 85.90425531914893 %
epoch : 3
training loss: 0.0027, acc 88.0771 
Accuracy of the network on the 10000 test images: 87.33510638297872 %
epoch : 4
training loss: 0.0024, acc 89.0186 
Accuracy of the network on the 10000 test images: 87.92553191489361 %
epoch : 5
training loss: 0.0022, acc 89.7810 
Accuracy of the network on the 10000 test images: 87.45212765957447 %
epoch : 6
training loss: 0.0021, acc 90.4885 
Accuracy of the network on the 10000 test images: 88.63829787234043 %
epoch : 7
training loss: 0.0020, acc 90.9920 
Accuracy of the network on the 10000 test images: 88.7872340425532 %
epoch : 8
training loss: 0.0018, acc 91.4832 
Accuracy of the network on the 10000 test images: 88.55851063829788 %
epoch : 9
training loss: 0.0017, acc 92.0816 
Accuracy of the network on 

In [13]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net2 - training loss'] = running_loss_history
results['net2 - training accuracy'] = running_corrects_history
results['net2 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

# 6 Layers

In [14]:
# define network

class Net_6(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(50176, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6()
net.to(device)

Net_6(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Flatten(start_dim=1, end_dim=-1)
    (17): Linear

In [15]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [16]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0062, acc 77.7456 
Accuracy of the network on the 10000 test images: 86.03191489361703 %
epoch : 2
training loss: 0.0027, acc 87.9699 
Accuracy of the network on the 10000 test images: 88.04255319148936 %
epoch : 3
training loss: 0.0023, acc 89.4176 
Accuracy of the network on the 10000 test images: 88.88297872340425 %
epoch : 4
training loss: 0.0021, acc 90.3218 
Accuracy of the network on the 10000 test images: 88.85106382978724 %
epoch : 5
training loss: 0.0019, acc 91.1179 
Accuracy of the network on the 10000 test images: 88.38829787234043 %
epoch : 6
training loss: 0.0017, acc 91.7979 
Accuracy of the network on the 10000 test images: 89.33510638297872 %
epoch : 7
training loss: 0.0016, acc 92.6614 
Accuracy of the network on the 10000 test images: 88.07978723404256 %
epoch : 8
training loss: 0.0014, acc 93.3661 
Accuracy of the network on the 10000 test images: 89.22340425531915 %
epoch : 9
training loss: 0.0013, acc 94.0612 
Accuracy of the network on

In [17]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net3 - training loss'] = running_loss_history
results['net3 - training accuracy'] = running_corrects_history
results['net3 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

# 8 Layers

In [18]:
# define network

class Net_8(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
             
            #block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(100352, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8()
net.to(device)

Net_8(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1

In [19]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [20]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0054, acc 79.6383 
Accuracy of the network on the 10000 test images: 86.65425531914893 %
epoch : 2
training loss: 0.0025, acc 88.7252 
Accuracy of the network on the 10000 test images: 88.05319148936171 %
epoch : 3
training loss: 0.0021, acc 90.2934 
Accuracy of the network on the 10000 test images: 88.69680851063829 %
epoch : 4
training loss: 0.0018, acc 91.3395 
Accuracy of the network on the 10000 test images: 88.95744680851064 %
epoch : 5
training loss: 0.0016, acc 92.5771 
Accuracy of the network on the 10000 test images: 89.02127659574468 %
epoch : 6
training loss: 0.0014, acc 93.5505 
Accuracy of the network on the 10000 test images: 88.72340425531915 %
epoch : 7
training loss: 0.0012, acc 94.6020 
Accuracy of the network on the 10000 test images: 89.18617021276596 %
epoch : 8
training loss: 0.0010, acc 95.6312 
Accuracy of the network on the 10000 test images: 89.28191489361703 %
epoch : 9
training loss: 0.0008, acc 96.6206 
Accuracy of the network on

In [21]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net4 - training loss'] = running_loss_history
results['net4 - training accuracy'] = running_corrects_history
results['net4 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

# 10 Layers

In [22]:
# define network

class Net_10(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),
            
            #block 2
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            #block 3
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
             
            #block 4
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
            
            #block 5
            nn.Conv2d(512, 1024, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(1024),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(200704, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10()
net.to(device)

Net_10(
  (network): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 

In [23]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [24]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0049, acc 80.9681 
Accuracy of the network on the 10000 test images: 87.39893617021276 %
epoch : 2
training loss: 0.0023, acc 89.3715 
Accuracy of the network on the 10000 test images: 88.37765957446808 %
epoch : 3
training loss: 0.0018, acc 91.2145 
Accuracy of the network on the 10000 test images: 89.13829787234043 %
epoch : 4
training loss: 0.0014, acc 93.0709 
Accuracy of the network on the 10000 test images: 89.30851063829788 %
epoch : 5
training loss: 0.0011, acc 94.5355 
Accuracy of the network on the 10000 test images: 88.59042553191489 %
epoch : 6
training loss: 0.0009, acc 96.1170 
Accuracy of the network on the 10000 test images: 88.68085106382979 %
epoch : 7
training loss: 0.0006, acc 97.2376 
Accuracy of the network on the 10000 test images: 89.03191489361703 %
epoch : 8
training loss: 0.0005, acc 98.0275 
Accuracy of the network on the 10000 test images: 88.9627659574468 %
epoch : 9
training loss: 0.0004, acc 98.5825 
Accuracy of the network on 

In [25]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net5 - training loss'] = running_loss_history
results['net5 - training accuracy'] = running_corrects_history
results['net5 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

# ---------------- Changing Neurons in Each Network ----------------------

## 2 Layers

In [26]:
# define network

class Net_2_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(32),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(6272, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2_2()
net.to(device)

Net_2_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=6272, out_features=1024, bias=True)
    (8): ReLU()
    (9): Linear(in_features=1024, out_features=512, bias=True)
    (10): ReLU()
    (11): Linear(in_features=512, out_features=47, bias=True)
  )
)

In [27]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [28]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0121, acc 61.9973 
Accuracy of the network on the 10000 test images: 78.7127659574468 %
epoch : 2
training loss: 0.0045, acc 81.6817 
Accuracy of the network on the 10000 test images: 83.04255319148936 %
epoch : 3
training loss: 0.0037, acc 84.6747 
Accuracy of the network on the 10000 test images: 84.19680851063829 %
epoch : 4
training loss: 0.0033, acc 86.1170 
Accuracy of the network on the 10000 test images: 85.38829787234043 %
epoch : 5
training loss: 0.0030, acc 86.9778 
Accuracy of the network on the 10000 test images: 85.9627659574468 %
epoch : 6
training loss: 0.0028, acc 87.6649 
Accuracy of the network on the 10000 test images: 86.11170212765957 %
epoch : 7
training loss: 0.0027, acc 88.3555 
Accuracy of the network on the 10000 test images: 86.62765957446808 %
epoch : 8
training loss: 0.0025, acc 88.7651 
Accuracy of the network on the 10000 test images: 86.87234042553192 %
epoch : 9
training loss: 0.0024, acc 89.3254 
Accuracy of the network on t

In [29]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net6 - training loss'] = running_loss_history
results['net6 - training accuracy'] = running_corrects_history
results['net6 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 4 Layers

In [30]:
# define network

class Net_4_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(12544, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4_2()
net.to(device)

Net_4_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=12544, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=512, bias=True)
    (15): ReLU()
    (16): Linear(in_features=512, out_features=47, bias=True)
  )
)

In [31]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [32]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0095, acc 69.0621 
Accuracy of the network on the 10000 test images: 83.07446808510639 %
epoch : 2
training loss: 0.0035, acc 85.0160 
Accuracy of the network on the 10000 test images: 85.37234042553192 %
epoch : 3
training loss: 0.0030, acc 87.0284 
Accuracy of the network on the 10000 test images: 86.30319148936171 %
epoch : 4
training loss: 0.0027, acc 88.0479 
Accuracy of the network on the 10000 test images: 87.27127659574468 %
epoch : 5
training loss: 0.0025, acc 88.8537 
Accuracy of the network on the 10000 test images: 87.90425531914893 %
epoch : 6
training loss: 0.0023, acc 89.4353 
Accuracy of the network on the 10000 test images: 88.22872340425532 %
epoch : 7
training loss: 0.0022, acc 89.9140 
Accuracy of the network on the 10000 test images: 88.55851063829788 %
epoch : 8
training loss: 0.0021, acc 90.1897 
Accuracy of the network on the 10000 test images: 88.26063829787235 %
epoch : 9
training loss: 0.0020, acc 90.7278 
Accuracy of the network on

In [33]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net7 - training loss'] = running_loss_history
results['net7 - training accuracy'] = running_corrects_history
results['net7 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 6 Layers

In [34]:
# define network

class Net_6_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(25088, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6_2()
net.to(device)

Net_6_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Flatten(start_dim=1, end_dim=-1)
    (17): Linear(in

In [35]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [36]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0074, acc 74.6746 
Accuracy of the network on the 10000 test images: 85.19148936170212 %
epoch : 2
training loss: 0.0030, acc 86.8245 
Accuracy of the network on the 10000 test images: 87.2127659574468 %
epoch : 3
training loss: 0.0026, acc 88.4601 
Accuracy of the network on the 10000 test images: 87.59042553191489 %
epoch : 4
training loss: 0.0023, acc 89.2908 
Accuracy of the network on the 10000 test images: 88.15425531914893 %
epoch : 5
training loss: 0.0022, acc 89.8812 
Accuracy of the network on the 10000 test images: 88.69148936170212 %
epoch : 6
training loss: 0.0020, acc 90.6002 
Accuracy of the network on the 10000 test images: 88.97872340425532 %
epoch : 7
training loss: 0.0019, acc 91.1108 
Accuracy of the network on the 10000 test images: 88.7872340425532 %
epoch : 8
training loss: 0.0018, acc 91.5674 
Accuracy of the network on the 10000 test images: 89.16489361702128 %
epoch : 9
training loss: 0.0017, acc 92.1028 
Accuracy of the network on t

In [37]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net8 - training loss'] = running_loss_history
results['net8 - training accuracy'] = running_corrects_history
results['net8 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 8 Layers

In [38]:
# define network

class Net_8_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(50176, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8_2()
net.to(device)

Net_8_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), 

In [39]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [40]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0064, acc 77.1667 
Accuracy of the network on the 10000 test images: 86.29255319148936 %
epoch : 2
training loss: 0.0027, acc 87.7358 
Accuracy of the network on the 10000 test images: 87.9627659574468 %
epoch : 3
training loss: 0.0023, acc 89.3050 
Accuracy of the network on the 10000 test images: 88.36702127659575 %
epoch : 4
training loss: 0.0021, acc 90.2376 
Accuracy of the network on the 10000 test images: 88.65425531914893 %
epoch : 5
training loss: 0.0019, acc 91.0301 
Accuracy of the network on the 10000 test images: 88.90425531914893 %
epoch : 6
training loss: 0.0017, acc 91.8493 
Accuracy of the network on the 10000 test images: 89.14893617021276 %
epoch : 7
training loss: 0.0016, acc 92.5656 
Accuracy of the network on the 10000 test images: 89.28191489361703 %
epoch : 8
training loss: 0.0014, acc 93.3174 
Accuracy of the network on the 10000 test images: 89.16489361702128 %
epoch : 9
training loss: 0.0013, acc 94.1259 
Accuracy of the network on 

In [41]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net9 - training loss'] = running_loss_history
results['net9 - training accuracy'] = running_corrects_history
results['net9 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 10 Layers 

In [42]:
# define network

class Net_10_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            #block 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(100352, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10_2()
net.to(device)

Net_10_2(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),

In [43]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [44]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0058, acc 78.2385 
Accuracy of the network on the 10000 test images: 86.52659574468085 %
epoch : 2
training loss: 0.0025, acc 88.4264 
Accuracy of the network on the 10000 test images: 87.83510638297872 %
epoch : 3
training loss: 0.0021, acc 90.0426 
Accuracy of the network on the 10000 test images: 88.37765957446808 %
epoch : 4
training loss: 0.0018, acc 91.2553 
Accuracy of the network on the 10000 test images: 88.96808510638297 %
epoch : 5
training loss: 0.0016, acc 92.2748 
Accuracy of the network on the 10000 test images: 88.97340425531915 %
epoch : 6
training loss: 0.0014, acc 93.3839 
Accuracy of the network on the 10000 test images: 88.79255319148936 %
epoch : 7
training loss: 0.0012, acc 94.5904 
Accuracy of the network on the 10000 test images: 89.1063829787234 %
epoch : 8
training loss: 0.0010, acc 95.7207 
Accuracy of the network on the 10000 test images: 88.73404255319149 %
epoch : 9
training loss: 0.0008, acc 96.6747 
Accuracy of the network on 

In [45]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net10 - training loss'] = running_loss_history
results['net10 - training accuracy'] = running_corrects_history
results['net10 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

# ------------------- Added Convolutional Layer per Block -------------------------

## 2 + 1

In [46]:
# define network

class Net_2_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # extra
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),

            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(6272, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_2_3()
net.to(device)

Net_2_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=6272, out_features=1024, bias=True)
    (10): ReLU()
    (11): Linear(in_features=1024, out_features=512, bias=True)
    (12): ReLU()
    (13): Linear(in_features=512, out_features=47, bias=True)
  )
)

In [47]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [48]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0118, acc 63.0381 
Accuracy of the network on the 10000 test images: 79.38297872340425 %
epoch : 2
training loss: 0.0043, acc 82.2447 
Accuracy of the network on the 10000 test images: 83.0372340425532 %
epoch : 3
training loss: 0.0036, acc 84.9229 
Accuracy of the network on the 10000 test images: 84.90425531914893 %
epoch : 4
training loss: 0.0032, acc 86.1543 
Accuracy of the network on the 10000 test images: 85.94680851063829 %
epoch : 5
training loss: 0.0030, acc 87.1791 
Accuracy of the network on the 10000 test images: 86.31914893617021 %
epoch : 6
training loss: 0.0028, acc 87.8555 
Accuracy of the network on the 10000 test images: 86.47872340425532 %
epoch : 7
training loss: 0.0026, acc 88.3546 
Accuracy of the network on the 10000 test images: 87.31914893617021 %
epoch : 8
training loss: 0.0025, acc 88.8679 
Accuracy of the network on the 10000 test images: 87.68085106382979 %
epoch : 9
training loss: 0.0024, acc 89.2145 
Accuracy of the network on 

In [49]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net11 - training loss'] = running_loss_history
results['net11 - training accuracy'] = running_corrects_history
results['net11 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 4 + 2

In [50]:
# define network

class Net_4_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # extra
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), # extra
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(12544, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_4_3()
net.to(device)

Net_4_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Flatten(start_dim=1, end_dim=-1)
    (16): Linear(in_features=12544, out_features=1024, bias=True)
    (17): ReLU()
    (18): Linear(in_features

In [51]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [52]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0092, acc 69.5612 
Accuracy of the network on the 10000 test images: 83.28191489361703 %
epoch : 2
training loss: 0.0034, acc 85.3041 
Accuracy of the network on the 10000 test images: 86.18085106382979 %
epoch : 3
training loss: 0.0029, acc 87.1684 
Accuracy of the network on the 10000 test images: 87.08510638297872 %
epoch : 4
training loss: 0.0026, acc 88.0612 
Accuracy of the network on the 10000 test images: 87.25531914893617 %
epoch : 5
training loss: 0.0024, acc 89.0310 
Accuracy of the network on the 10000 test images: 87.63297872340425 %
epoch : 6
training loss: 0.0023, acc 89.3821 
Accuracy of the network on the 10000 test images: 88.38829787234043 %
epoch : 7
training loss: 0.0022, acc 89.7988 
Accuracy of the network on the 10000 test images: 88.56382978723404 %
epoch : 8
training loss: 0.0021, acc 90.3316 
Accuracy of the network on the 10000 test images: 88.42553191489361 %
epoch : 9
training loss: 0.0020, acc 90.5745 
Accuracy of the network on

In [53]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net12 - training loss'] = running_loss_history
results['net12 - training accuracy'] = running_corrects_history
results['net12 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 6 + 3

In [54]:
# define network

class Net_6_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(25088, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_6_3()
net.to(device)

Net_6_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [55]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [56]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0073, acc 74.6862 
Accuracy of the network on the 10000 test images: 84.14893617021276 %
epoch : 2
training loss: 0.0029, acc 87.0231 
Accuracy of the network on the 10000 test images: 86.69680851063829 %
epoch : 3
training loss: 0.0025, acc 88.5523 
Accuracy of the network on the 10000 test images: 87.8563829787234 %
epoch : 4
training loss: 0.0023, acc 89.2855 
Accuracy of the network on the 10000 test images: 88.31914893617021 %
epoch : 5
training loss: 0.0022, acc 89.9087 
Accuracy of the network on the 10000 test images: 88.88297872340425 %
epoch : 6
training loss: 0.0020, acc 90.4255 
Accuracy of the network on the 10000 test images: 89.08510638297872 %
epoch : 7
training loss: 0.0019, acc 90.8378 
Accuracy of the network on the 10000 test images: 89.10106382978724 %
epoch : 8
training loss: 0.0018, acc 91.3200 
Accuracy of the network on the 10000 test images: 88.9627659574468 %
epoch : 9
training loss: 0.0017, acc 91.5390 
Accuracy of the network on t

In [57]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net13 - training loss'] = running_loss_history
results['net13 - training accuracy'] = running_corrects_history
results['net13 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 8 + 4

In [58]:
# define network

class Net_8_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(50176, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_8_3()
net.to(device)

Net_8_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [59]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [60]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0064, acc 76.5895 
Accuracy of the network on the 10000 test images: 86.01595744680851 %
epoch : 2
training loss: 0.0027, acc 87.6383 
Accuracy of the network on the 10000 test images: 88.0372340425532 %
epoch : 3
training loss: 0.0023, acc 89.0541 
Accuracy of the network on the 10000 test images: 88.32978723404256 %
epoch : 4
training loss: 0.0021, acc 90.0098 
Accuracy of the network on the 10000 test images: 89.1063829787234 %
epoch : 5
training loss: 0.0020, acc 90.6383 
Accuracy of the network on the 10000 test images: 88.91489361702128 %
epoch : 6
training loss: 0.0018, acc 91.2908 
Accuracy of the network on the 10000 test images: 89.5 %
epoch : 7
training loss: 0.0017, acc 91.8041 
Accuracy of the network on the 10000 test images: 89.12234042553192 %
epoch : 8
training loss: 0.0016, acc 92.3271 
Accuracy of the network on the 10000 test images: 89.70212765957447 %
epoch : 9
training loss: 0.0015, acc 92.9592 
Accuracy of the network on the 10000 test

In [61]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net14 - training loss'] = running_loss_history
results['net14 - training accuracy'] = running_corrects_history
results['net14 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')

## 10 + 5

In [4]:
# define network

class Net_10_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            #block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.BatchNorm2d(32),
            
            #block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(64),
            
            #block 3
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(128),
             
            #block 4
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(256),
            
            #block 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), #1
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), #2
            nn.ReLU(),
            #nn.MaxPool2d(2, 2), max pooling operators removed after first block
            nn.BatchNorm2d(512),
        
            
            # -----------------------------------------------
            
            nn.Flatten(),
            nn.Linear(100352, 1024), #FC1    
            nn.ReLU(),
            nn.Linear(1024, 512), #FC2
            nn.ReLU(),
            nn.Linear(512, 47)) #FC3
    
        
    def forward(self, x):
        return self.network(x)


net = Net_10_3()
net.to(device)

Net_10_3(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (16): ReLU()
    (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), 

In [5]:
# criterion + optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

In [7]:
epochs = 50

running_loss_history = []
running_corrects_history = []
test_acc_history=[]

for e in range(epochs): # training our model, put input according to every batch.
  
    running_loss = 0.0
    running_corrects = 0.0


    for inputs, labels in trainloader:
        
        inputs = inputs.to(device) # input to device as our model is running in mentioned device.
        labels = labels.to(device)
        outputs = net(inputs) # every batch of 100 images are put as an input.
        loss = criterion(outputs, labels) # Calc loss after each batch i/p by comparing it to actual labels. 

        optimizer.zero_grad() #setting the initial gradient to 0
        loss.backward() # backpropagating the loss
        optimizer.step() # updating the weights and bias values for every single step.

        _, preds = torch.max(outputs, 1) # taking the highest value of prediction.
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data) # calculating te accuracy by taking the sum of all the correct predictions in a batch.

        
    epoch_loss = running_loss/len(trainset) # loss per epoch
    epoch_acc = 100*(running_corrects.float()/ len(trainset)) # accuracy per epoch
    running_loss_history.append(epoch_loss) # appending for displaying 
    running_corrects_history.append(epoch_acc)
                
    print('epoch :', (e+1))
    print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
    
    # testing

    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc_history.append(100 * correct / total)
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

epoch : 1
training loss: 0.0033, acc 85.3741 
Accuracy of the network on the 10000 test images: 86.65957446808511 %
epoch : 2
training loss: 0.0024, acc 88.5922 
Accuracy of the network on the 10000 test images: 88.00531914893617 %
epoch : 3
training loss: 0.0021, acc 89.8599 
Accuracy of the network on the 10000 test images: 89.04255319148936 %
epoch : 4
training loss: 0.0019, acc 90.6888 
Accuracy of the network on the 10000 test images: 89.15425531914893 %
epoch : 5
training loss: 0.0017, acc 91.5825 
Accuracy of the network on the 10000 test images: 89.13829787234043 %
epoch : 6
training loss: 0.0016, acc 92.2979 
Accuracy of the network on the 10000 test images: 89.34574468085107 %
epoch : 7
training loss: 0.0014, acc 93.1002 
Accuracy of the network on the 10000 test images: 89.06382978723404 %
epoch : 8
training loss: 0.0012, acc 93.9131 
Accuracy of the network on the 10000 test images: 88.83510638297872 %
epoch : 9
training loss: 0.0011, acc 94.7270 
Accuracy of the network on

In [8]:
running_loss_history = torch.tensor(running_loss_history)
running_loss_history.numpy()
running_corrects_history = torch.tensor(running_corrects_history)
running_corrects_history.numpy()
test_acc_history = torch.tensor(test_acc_history)
test_acc_history.numpy()




results['net15 - training loss'] = running_loss_history
results['net15 - training accuracy'] = running_corrects_history
results['net15 - testing accuracy'] = test_acc_history

results.to_csv('net_results_emnist.csv')