# IMPORTS

In [None]:
import os
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import random_split

import matplotlib.pyplot as plt
%matplotlib inline

# SETTINGS

In [None]:
epochs = 15
batch_size = 264

# LOAD DATA

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor()])


trainvalset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainset, valset = random_split(trainvalset, (50000, 10000))
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size*4,
                                          shuffle=False, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size*4,
                                         shuffle=False, num_workers=2)

del trainset, valset, testset, trainvalset

# NN ARCHITECTURE

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, 1, 1)
        self.conv2 = nn.Conv2d(16, 4, 3, 1, 1)
        self.fc = nn.Linear(196, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1)
        x = F.log_softmax(self.fc(x), dim=1)
        return x

# CREATE A NN

In [None]:
def get_nn(learning_rate=0.001):
    net = Net()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    return net, criterion, optimizer, device

# TRAINING

In [None]:
def train(net, criterion, optimizer, device, epochs=5):

    trainloss = []
    valloss = []

    for epoch in range(epochs):

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 2000 == 1999:
                print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

        with torch.no_grad():   
            l = 0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = net(inputs)
                l += criterion(outputs, labels)
            trainloss.append(l/len(trainloader))
            
            if 'valloader' in dir():
                l = 0
                for i, data in enumerate(valloader, 0):
                    inputs, labels = data[0].to(device), data[1].to(device)
                    outputs = net(inputs)
                    l += criterion(outputs, labels)
                valloss.append(l/len(valloader))  

        print(f'EPOCH {epoch} ||')
        print(f'\tTRAIN: {trainloss[-1]} ||')
                      
        if 'valloader' in dir():
            print(f'VAL: {valloss[-1]}')
    
    if 'valloader' in dir():
        return trainloss, valloss
    else:
        return trainloss

In [None]:
trainresult = np.zeros((5, 15))
valresult = np.zeros((5, 15))

for i in range(5):
    print(f'ITERATION {i}')
    net, criterion, optimizer, device = get_nn(learning_rate=0.005)
    
    trainloss, valloss = train(net, criterion, optimizer, device, epochs=15)
    
    trainresult[i] = trainloss
    valresult[i] = valloss

In [None]:
trainresult2 = np.zeros((5, 15))
valresult2 = np.zeros((5, 15))

for i in range(5):
    print(f'ITERATION {i}')
    net, criterion, optimizer, device = get_nn(learning_rate=0.001)
    
    trainloss, valloss = train(net, criterion, optimizer, device, epochs=15)
    
    trainresult[i] = trainloss
    valresult[i] = valloss

In [None]:
trainresult3 = np.zeros((5, 15))
valresult3 = np.zeros((5, 15))

for i in range(5):
    print(f'ITERATION {i}')
    net, criterion, optimizer, device = get_nn(learning_rate=0.0005)
    
    trainloss, valloss = train(net, criterion, optimizer, device, epochs=15)
    
    trainresult[i] = trainloss
    valresult[i] = valloss

# EVALUATION

In [None]:
##### batch_size = 264, epochs = 15 #####

Tr001 = np.array([[0.31474945, 0.20616516, 0.16378577, 0.14059964, 0.12357964,
        0.11633095, 0.10979732, 0.09746411, 0.09751423, 0.09484882,
        0.08633662, 0.08969431, 0.07861999, 0.07764962, 0.07902008],
       [0.30680719, 0.21827681, 0.17699517, 0.15303053, 0.13843828,
        0.12726253, 0.11727257, 0.11087177, 0.09999228, 0.09569807,
        0.09076178, 0.08479355, 0.08525715, 0.07666479, 0.07749713],
       [0.35068539, 0.23424751, 0.18476433, 0.16082682, 0.13696234,
        0.1251477 , 0.11646529, 0.1109904 , 0.10379943, 0.09925129,
        0.09304589, 0.09013391, 0.08281144, 0.0798907 , 0.07755478],
       [0.43918547, 0.35232794, 0.30566332, 0.26773351, 0.21882735,
        0.18889731, 0.15960349, 0.13807294, 0.12559281, 0.11705201,
        0.10862825, 0.10354333, 0.10043009, 0.09534948, 0.09378526],
       [0.3797583 , 0.27990371, 0.21176998, 0.16085176, 0.13076909,
        0.11333763, 0.10199824, 0.09475242, 0.08620581, 0.07944243,
        0.07450339, 0.07208928, 0.07061998, 0.06544194, 0.06160825]])
Val001 = np.array([[0.33555955, 0.22373414, 0.17996122, 0.15734513, 0.13930024,
        0.13315164, 0.12922783, 0.11691816, 0.11848835, 0.11626675,
        0.10838666, 0.11573636, 0.10037606, 0.10144424, 0.10258617],
       [0.32957476, 0.23399308, 0.18967538, 0.16703968, 0.15278476,
        0.14267381, 0.12976891, 0.12577443, 0.11646718, 0.11120118,
        0.11080954, 0.10309409, 0.10516   , 0.09699281, 0.09979912],
       [0.38150427, 0.25652882, 0.2036778 , 0.18024346, 0.15516524,
        0.14551182, 0.13858835, 0.13329849, 0.12443324, 0.12278662,
        0.11786441, 0.11587249, 0.10735756, 0.10616543, 0.10267975],
       [0.46268398, 0.37751541, 0.32522815, 0.28846043, 0.23507552,
        0.20678769, 0.17327757, 0.15363982, 0.14362997, 0.13482277,
        0.12823687, 0.12201282, 0.12001085, 0.11563803, 0.11680868],
       [0.40373942, 0.29773885, 0.22769074, 0.1774826 , 0.14512101,
        0.12798278, 0.1180227 , 0.11338321, 0.10481732, 0.09793787,
        0.0937638 , 0.09392189, 0.09297325, 0.08732148, 0.08236488]])

Tr005 = np.array([[0.25970817, 0.13165502, 0.10036188, 0.08979715, 0.08640306,
        0.07377179, 0.06935777, 0.06419412, 0.06486621, 0.05846115,
        0.05549387, 0.05594687, 0.05956002, 0.05067385, 0.04939837],
       [0.2525284 , 0.15795287, 0.13500471, 0.11101262, 0.11214416,
        0.09952364, 0.11118082, 0.09459263, 0.1034699 , 0.09916721,
        0.0898119 , 0.08271453, 0.08261592, 0.08062547, 0.0807986 ],
       [0.15459928, 0.11166361, 0.08678886, 0.10857711, 0.07431044,
        0.07632867, 0.06157049, 0.05625704, 0.05701309, 0.05581316,
        0.05359763, 0.0552398 , 0.0429869 , 0.04245877, 0.04247933],
       [0.18069653, 0.11656231, 0.09150515, 0.08905601, 0.08199372,
        0.07366855, 0.07797817, 0.0675329 , 0.07800166, 0.06270258,
        0.05830973, 0.0591176 , 0.05984771, 0.05561578, 0.05534544],
       [0.15283312, 0.1105253 , 0.08695336, 0.07273209, 0.06869704,
        0.05919097, 0.05834841, 0.04996386, 0.05464161, 0.05541461,
        0.05116295, 0.04775501, 0.04409708, 0.05527179, 0.0397753 ]])
Val005 = np.array([[0.2757031 , 0.14913861, 0.11667317, 0.10698155, 0.10946987,
        0.09324982, 0.09440534, 0.0866102 , 0.09049191, 0.08421662,
        0.08717527, 0.08405174, 0.08869913, 0.08271113, 0.08021093],
       [0.27275094, 0.18098673, 0.15854602, 0.13505512, 0.14066471,
        0.12846607, 0.14617699, 0.12639982, 0.13972548, 0.12624553,
        0.1228248 , 0.11594468, 0.11571361, 0.11537092, 0.11328931],
       [0.16845165, 0.12939985, 0.1095859 , 0.13002427, 0.09923001,
        0.10129827, 0.08868746, 0.0818914 , 0.08553504, 0.08721352,
        0.08069589, 0.08657378, 0.08010703, 0.07615194, 0.07777274],
       [0.19308451, 0.13635036, 0.11478456, 0.11746486, 0.11088292,
        0.1023929 , 0.11174098, 0.09485953, 0.1077436 , 0.09503523,
        0.09275126, 0.09268952, 0.09083874, 0.09044095, 0.0888153 ],
       [0.17318176, 0.12557732, 0.10619797, 0.0968906 , 0.09284385,
        0.08342581, 0.08737677, 0.07737605, 0.08639332, 0.08644295,
        0.07997695, 0.08025545, 0.08115023, 0.0886455 , 0.07684118]])

Tr0005 = np.array([[0.54404891, 0.3742241 , 0.32069299, 0.28194693, 0.24865218,
        0.21242993, 0.18681173, 0.16628873, 0.15389916, 0.13868897,
        0.12813438, 0.11933328, 0.11245593, 0.10683271, 0.10305476],
       [0.54143584, 0.38072559, 0.32601529, 0.29482228, 0.26310447,
        0.23966195, 0.21785745, 0.19808467, 0.1821164 , 0.16733468,
        0.15699449, 0.14665689, 0.1383245 , 0.1395949 , 0.12660789],
       [0.51873469, 0.36141801, 0.3065913 , 0.26510701, 0.229754  ,
        0.20194298, 0.17383268, 0.15894645, 0.14309184, 0.12950891,
        0.12211503, 0.11536687, 0.10715348, 0.10145348, 0.09773394],
       [0.4536694 , 0.29532668, 0.24630019, 0.21774861, 0.19747753,
        0.18121175, 0.17105512, 0.16151299, 0.15270406, 0.14883561,
        0.14223936, 0.13812864, 0.13216646, 0.12886891, 0.12677656],
       [0.5090754 , 0.36697951, 0.30763257, 0.27095798, 0.24148753,
        0.22337331, 0.19389677, 0.17812392, 0.16715343, 0.15391363,
        0.14001179, 0.13133219, 0.12238302, 0.11827273, 0.11173678]])
Val0005 = np.array([[0.56371456, 0.39919326, 0.34435099, 0.304436  , 0.26631185,
        0.23099618, 0.20328303, 0.18292068, 0.16978583, 0.15367405,
        0.14461555, 0.13576211, 0.12829004, 0.1231632 , 0.11930799],
       [0.55869371, 0.40093383, 0.34724513, 0.31373376, 0.28069019,
        0.25570101, 0.23414214, 0.21497826, 0.19633742, 0.18155685,
        0.17195155, 0.16187638, 0.15481564, 0.15380354, 0.14201556],
       [0.5361256 , 0.38282689, 0.32527199, 0.28049859, 0.24196605,
        0.21402383, 0.18566085, 0.17257425, 0.15611392, 0.14318419,
        0.13591801, 0.13009866, 0.12409488, 0.11858173, 0.11509367],
       [0.47310925, 0.31585214, 0.26405978, 0.23629363, 0.21481743,
        0.1985812 , 0.18842463, 0.17883441, 0.16964333, 0.16689895,
        0.16170117, 0.15715431, 0.15173787, 0.15018415, 0.14763325],
       [0.52899235, 0.38816255, 0.32814357, 0.28632629, 0.25608152,
        0.23640506, 0.20689945, 0.18967098, 0.17897335, 0.16758189,
        0.15351024, 0.14587049, 0.13714264, 0.13190079, 0.12694463]])



In [None]:
t = np.arange(1, 16)
fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True, figsize=(12, 6))


ax0.plot(t, Tr005.mean(axis=0), lw=2, label='Train', color='blue')
ax0.plot(t, Val005.mean(axis=0), lw=2, label='Val', color='red')
ax0.fill_between(t, Tr005.mean(axis=0)+Tr005.std(axis=0), 
                 Tr005.mean(axis=0)-Tr005.std(axis=0), facecolor='blue', alpha=0.5)
ax0.fill_between(t, Val005.mean(axis=0)+Val005.std(axis=0), 
                 Val005.mean(axis=0)-Val005.std(axis=0), facecolor='red', alpha=0.5)
ax0.set_title('LR: 0.005')


ax1.plot(t, Tr001.mean(axis=0), lw=2, label='Train', color='blue')
ax1.plot(t, Val001.mean(axis=0), lw=2, label='Val', color='red')
ax1.fill_between(t, Tr001.mean(axis=0)+Tr001.std(axis=0), 
                 Tr001.mean(axis=0)-Tr001.std(axis=0), facecolor='blue', alpha=0.5)
ax1.fill_between(t, Val001.mean(axis=0)+Val001.std(axis=0), 
                 Val001.mean(axis=0)-Val001.std(axis=0), facecolor='red', alpha=0.5)
ax1.set_title('LR: 0.001')



ax2.plot(t, Tr0005.mean(axis=0), lw=2, label='Train', color='blue')
ax2.plot(t, Val0005.mean(axis=0), lw=2, label='Val', color='red')
ax2.fill_between(t, Tr0005.mean(axis=0)+Tr0005.std(axis=0), 
                 Tr0005.mean(axis=0)-Tr0005.std(axis=0), facecolor='blue', alpha=0.5)
ax2.fill_between(t, Val0005.mean(axis=0)+Val0005.std(axis=0), 
                 Val0005.mean(axis=0)-Val0005.std(axis=0), facecolor='red', alpha=0.5)
ax2.set_title('LR: 0.0005')

ax2.legend(loc='upper right')
ax1.set_xlabel('Epochs')
ax0.set_ylabel('Negative Log Likelihood')
ax0.grid()
ax1.grid()
ax2.grid()

fig.suptitle('Effect of Learning Rate on Training and Validation Loss')
plt.savefig('Question 5')
plt.show()

In [None]:
def evaluate_acc(loader):

    correct = 0
    total = 0
        
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    
    with torch.no_grad():
        for data in loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            c = (predicted == labels).squeeze()
            for i, label in enumerate(labels):
                class_correct[label] += c[i].item()
                class_total[label] += 1

    print('Accuracy of the network: %d %%' % (
        100 * correct / total))

    for i in range(10):
        print('Accuracy of %5s : %2d %%' % (
            i, 100 * class_correct[i] / class_total[i]))

In [None]:
net, criterion, optimizer, device = get_nn(learning_rate=0.005)
trainloss, valloss = train(net, criterion, optimizer, device, epochs=15)
evaluate_acc(trainloader)
evaluate_acc(valloader)

In [None]:
net, criterion, optimizer, device = get_nn(learning_rate=0.001)
trainloss, valloss = train(net, criterion, optimizer, device, epochs=15)
evaluate_acc(trainloader)
evaluate_acc(valloader)

In [None]:
net, criterion, optimizer, device = get_nn(learning_rate=0.0005)
trainloss, valloss = train(net, criterion, optimizer, device, epochs=15)
evaluate_acc(trainloader)
evaluate_acc(valloader)

# FINAL MODEL

In [None]:
#del trainloader, valloader, testloader

transform = transforms.Compose(
    [transforms.ToTensor()])


trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

del trainset, testset

In [None]:
net, criterion, optimizer, device = get_nn(learning_rate=0.001)
trainloss = train(net, criterion, optimizer, device, epochs=15)

In [None]:
evaluate_acc(trainloader)
evaluate_acc(testloader)