In [79]:
import pandas as pd
import numpy as np
from torch import nn, optim, tensor
import torch
from torchvision import datasets, transforms
import warnings
warnings.filterwarnings('ignore')

In [80]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

In [81]:
train = datasets.MNIST('data', train = True, download = True, transform = transform)
test = datasets.MNIST('data', train = False, download = True, transform = transform)

In [82]:
dataset = torch.utils.data.ConcatDataset([train, test])

In [83]:
split = int(0.67 * len(dataset))
train, test = torch.utils.data.random_split(dataset, [split, len(dataset) - split])

In [84]:
x_train = torch.stack([sample[0] for sample in train]).reshape(-1, 784) 
y_train = torch.tensor([sample[1] for sample in train])
x_test = torch.stack([sample[0] for sample in test]).reshape(-1, 784) 
y_test = torch.tensor([sample[1] for sample in test])

### Models with 2 hidden layers

In [85]:
model1 = nn.Sequential(nn.Linear(784, 150), nn.ReLU(), nn.Linear(150, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model2 = nn.Sequential(nn.Linear(784, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model3 = nn.Sequential(nn.Linear(784, 150), nn.ReLU(), nn.Linear(150, 150), nn.ReLU(), nn.Linear(150, 100), nn.ReLU(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model4 = nn.Sequential(nn.Linear(784, 150), nn.Sigmoid(), nn.Linear(150, 100), nn.Sigmoid(), nn.Linear(100, 100), nn.Sigmoid(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model5 = nn.Sequential(nn.Linear(784, 100), nn.Sigmoid(), nn.Linear(100, 100), nn.Sigmoid(), nn.Linear(100, 100), nn.Sigmoid(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model6 = nn.Sequential(nn.Linear(784, 150), nn.Sigmoid(), nn.Linear(150, 150), nn.Sigmoid(), nn.Linear(150, 100), nn.Sigmoid(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model7 = nn.Sequential(nn.Linear(784, 150), nn.Tanh(), nn.Linear(150, 100), nn.Tanh(), nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model8 = nn.Sequential(nn.Linear(784, 100), nn.Tanh(), nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model9 = nn.Sequential(nn.Linear(784, 150), nn.Tanh(), nn.Linear(150, 150), nn.Tanh(), nn.Linear(150, 100), nn.Tanh(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))

### Models with 3 hidden layers

In [86]:
model10 = nn.Sequential(nn.Linear(784, 150), nn.ReLU(), nn.Linear(150, 150), nn.ReLU(), nn.Linear(150, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model11 = nn.Sequential(nn.Linear(784, 150), nn.ReLU(), nn.Linear(150, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model12 = nn.Sequential(nn.Linear(784, 150), nn.ReLU(), nn.Linear(150, 150), nn.ReLU(), nn.Linear(150, 150), nn.ReLU(), nn.Linear(150, 100), nn.ReLU(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model13 = nn.Sequential(nn.Linear(784, 150), nn.Sigmoid(), nn.Linear(150, 150), nn.Sigmoid(), nn.Linear(150, 100), nn.Sigmoid(), nn.Linear(100, 100), nn.Sigmoid(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model14 = nn.Sequential(nn.Linear(784, 150), nn.Sigmoid(), nn.Linear(150, 100), nn.Sigmoid(), nn.Linear(100, 100), nn.Sigmoid(), nn.Linear(100, 100), nn.Sigmoid(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model15 = nn.Sequential(nn.Linear(784, 150), nn.Sigmoid(), nn.Linear(150, 150), nn.Sigmoid(), nn.Linear(150, 150), nn.Sigmoid(), nn.Linear(150, 100), nn.Sigmoid(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model16 = nn.Sequential(nn.Linear(784, 150), nn.Tanh(), nn.Linear(150, 150), nn.Tanh(), nn.Linear(150, 100), nn.Tanh(), nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model17 = nn.Sequential(nn.Linear(784, 150), nn.Tanh(), nn.Linear(150, 100), nn.Tanh(), nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))
model18 = nn.Sequential(nn.Linear(784, 150), nn.Tanh(), nn.Linear(150, 150), nn.Tanh(), nn.Linear(150, 150), nn.Tanh(), nn.Linear(150, 100), nn.Tanh(), nn.Linear(100, 10), nn.LogSoftmax(dim = 1))

### List of all models

In [87]:
allModels = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11, model12, model13, model14, model15, model16, model17, model18]

### Function to train the model, returns a dataFrame with epoch vs Loss

In [88]:
def training(model, optimizerType, trainloader, iter):
    if (optimizerType == "SGD"):
        optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr = 0.01)
    
    lossFunction = nn.NLLLoss()

    df = pd.DataFrame(columns = ["Iteration", "Negative Log Likelihood Loss"])

    for i in range(iter):
        currentLoss = 0
        model.train()
        for img, label in trainloader:
            optimizer.zero_grad()
            output = model(img)
            loss = lossFunction(output, label)
            loss.backward()
            optimizer.step()
            currentLoss += loss.item()
        df.loc[len(df.index)] = [i + 1, currentLoss / len(trainloader)]
    
    return df

In [89]:
allLossVsIter = []

In [90]:
trainloader = torch.utils.data.DataLoader(list(zip(x_train, y_train)), shuffle = True, batch_size = 100)
testloader = torch.utils.data.DataLoader(list(zip(x_test, y_test)), shuffle=True, batch_size=100)

In [91]:
for x in allModels:
    allLossVsIter.append(training(x, "SGD", trainloader, 20))

In [92]:
def testing(model, testloader):
    match = 0
    for img, label in testloader:   
        for i in range(len(label)):
            image = img[i].reshape([1, 784])
            with torch.no_grad():
                out = model(image)

            probability = list(out.numpy()[0])
            prediction = probability.index(max(probability))
            y = label.numpy()[i]

            if (y == prediction):
                match += 1

    return (match / len(testloader))

In [93]:
accuracies = pd.DataFrame(columns = ["Model", "Accuracy"])
for i in range(len(allModels)):
    accuracies.loc[len(accuracies.index)] = [i + 1, testing(allModels[i], testloader)]

In [94]:
allLossVsIter[0]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.701983
1,2.0,0.244411
2,3.0,0.175538
3,4.0,0.135699
4,5.0,0.113434
5,6.0,0.096817
6,7.0,0.086974
7,8.0,0.07372
8,9.0,0.062665
9,10.0,0.055443


In [95]:
allLossVsIter[1]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.733618
1,2.0,0.243296
2,3.0,0.174719
3,4.0,0.140065
4,5.0,0.120199
5,6.0,0.101807
6,7.0,0.088099
7,8.0,0.077079
8,9.0,0.065444
9,10.0,0.060505


In [96]:
allLossVsIter[2]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.693284
1,2.0,0.234234
2,3.0,0.165478
3,4.0,0.131159
4,5.0,0.108738
5,6.0,0.091738
6,7.0,0.079644
7,8.0,0.067504
8,9.0,0.061997
9,10.0,0.055218


In [97]:
allLossVsIter[3]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,2.306147
1,2.0,2.301633
2,3.0,2.275365
3,4.0,1.881549
4,5.0,1.25999
5,6.0,0.768969
6,7.0,0.601185
7,8.0,0.509433
8,9.0,0.443863
9,10.0,0.390677


In [98]:
allLossVsIter[4]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,2.307224
1,2.0,2.302216
2,3.0,2.282231
3,4.0,1.908104
4,5.0,1.228642
5,6.0,0.833883
6,7.0,0.633639
7,8.0,0.519764
8,9.0,0.446019
9,10.0,0.392399


In [99]:
allLossVsIter[5]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,2.306634
1,2.0,2.302369
2,3.0,2.280969
3,4.0,1.89755
4,5.0,1.231232
5,6.0,0.777906
6,7.0,0.593959
7,8.0,0.49855
8,9.0,0.435207
9,10.0,0.383505


In [100]:
allLossVsIter[6]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.601047
1,2.0,0.232934
2,3.0,0.168543
3,4.0,0.133545
4,5.0,0.105476
5,6.0,0.092025
6,7.0,0.078791
7,8.0,0.066261
8,9.0,0.059037
9,10.0,0.052788


In [101]:
allLossVsIter[7]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.618738
1,2.0,0.235913
2,3.0,0.173729
3,4.0,0.136048
4,5.0,0.112802
5,6.0,0.094754
6,7.0,0.082061
7,8.0,0.072838
8,9.0,0.062933
9,10.0,0.057259


In [102]:
allLossVsIter[8]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.607743
1,2.0,0.233661
2,3.0,0.167137
3,4.0,0.131676
4,5.0,0.107918
5,6.0,0.093018
6,7.0,0.079014
7,8.0,0.068043
8,9.0,0.060846
9,10.0,0.054197


In [103]:
allLossVsIter[9]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,1.014209
1,2.0,0.261531
2,3.0,0.180325
3,4.0,0.139918
4,5.0,0.111713
5,6.0,0.094337
6,7.0,0.084918
7,8.0,0.074158
8,9.0,0.06457
9,10.0,0.056032


In [104]:
allLossVsIter[10]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.925491
1,2.0,0.260933
2,3.0,0.176717
3,4.0,0.134588
4,5.0,0.117659
5,6.0,0.096529
6,7.0,0.083917
7,8.0,0.074572
8,9.0,0.063963
9,10.0,0.058739


In [105]:
allLossVsIter[11]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.980251
1,2.0,0.259008
2,3.0,0.176264
3,4.0,0.138011
4,5.0,0.113878
5,6.0,0.101028
6,7.0,0.087794
7,8.0,0.07867
8,9.0,0.066612
9,10.0,0.060438


In [106]:
allLossVsIter[12]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,2.30737
1,2.0,2.307005
2,3.0,2.306645
3,4.0,2.306646
4,5.0,2.306044
5,6.0,2.305286
6,7.0,2.305121
7,8.0,2.304186
8,9.0,2.304131
9,10.0,2.303926


In [107]:
allLossVsIter[13]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,2.307594
1,2.0,2.307347
2,3.0,2.306294
3,4.0,2.30636
4,5.0,2.305104
5,6.0,2.305538
6,7.0,2.30537
7,8.0,2.304853
8,9.0,2.304554
9,10.0,2.30325


In [108]:
allLossVsIter[14]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,2.307244
1,2.0,2.306847
2,3.0,2.305408
3,4.0,2.305702
4,5.0,2.305258
5,6.0,2.305032
6,7.0,2.304012
7,8.0,2.3045
8,9.0,2.303472
9,10.0,2.303056


In [109]:
allLossVsIter[15]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.697604
1,2.0,0.239903
2,3.0,0.169977
3,4.0,0.133778
4,5.0,0.110914
5,6.0,0.09396
6,7.0,0.078572
7,8.0,0.070809
8,9.0,0.062585
9,10.0,0.056225


In [110]:
allLossVsIter[16]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.711497
1,2.0,0.241057
2,3.0,0.166188
3,4.0,0.130264
4,5.0,0.104215
5,6.0,0.090223
6,7.0,0.079424
7,8.0,0.070017
8,9.0,0.061738
9,10.0,0.049415


In [111]:
allLossVsIter[17]

Unnamed: 0,Iteration,Negative Log Likelihood Loss
0,1.0,0.693189
1,2.0,0.236701
2,3.0,0.166864
3,4.0,0.13331
4,5.0,0.107125
5,6.0,0.094346
6,7.0,0.08167
7,8.0,0.072037
8,9.0,0.059057
9,10.0,0.054082


In [112]:
accuracies

Unnamed: 0,Model,Accuracy
0,1.0,97.658009
1,2.0,97.5671
2,3.0,97.662338
3,4.0,95.298701
4,5.0,94.848485
5,6.0,95.147186
6,7.0,97.580087
7,8.0,97.463203
8,9.0,97.705628
9,10.0,97.419913


In [113]:
# optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
# criteria = nn.NLLLoss()
# trainloader = torch.utils.data.DataLoader(list(zip(x_train, y_train)), shuffle=True, batch_size=100)

# for e in range(30):
#     running_loss = 0
#     model.train()
#     for img, label in trainloader:

#         optimizer.zero_grad()
#         output = model(img)
#         loss = criteria(output, label)
#         loss.backward()
#         optimizer.step()
        
#         running_loss += loss.item()
#     print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))

In [114]:
# correct_count, all_count = 0, 0
# testloader = torch.utils.data.DataLoader(list(zip(x_test, y_test)), shuffle=True, batch_size=100)
# for images, labels in testloader:   
#     for i in range(len(labels)):
#         img = images[i].reshape([1, 784])
#         with torch.no_grad():
#             logps = model(img)
    
#         ps = torch.exp(logps)
#         probab = list(ps.numpy()[0])
#         pred_label = probab.index(max(probab))
#         true_label = labels.numpy()[i]
#         if(true_label == pred_label):
#             correct_count += 1
#         all_count += 1

# print("Number Of Images Tested =", all_count)
# print("\nModel Accuracy =", (correct_count/all_count))