# Mathematics for AI - Coursework Task 3

The third task is about comparing your results with architectures developed using PyTorch.
Compare the results obtained in Task 2 to the results obtained using the same architectures implemented in PyTorch. Then,  propose improvements and new architectures that make use of more advanced methods (e.g. Convolutional Neural Networks, dropout, ...). Compare the results. Finally, present the confusion matrix of your best model.

In [8]:
# Import PyTorch Libraries
import torchvision
import torch
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F

In [9]:
# Split into training and testing dataset
train = datasets.MNIST("", train = True, download = True,
                       transform = transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("", train = False, download = True,
                       transform = transforms.Compose([transforms.ToTensor()]))
train_set = torch.utils.data.DataLoader(train, batch_size = 10, shuffle = True)
test_set = torch.utils.data.DataLoader(test, batch_size = 10, shuffle = True)

In [10]:
# Build the network
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.input = nn.Linear(28*28, 64)
        self.hidden_1 = nn.Linear(64, 64)
        self.output = nn.Linear(64, 10)

    def forward(self, X):
        X = F.relu(self.input(X))
        X = F.relu(self.hidden_1(X))
        X = self.output(X)

        return F.log_softmax(X, dim = 1)

In [11]:
network = Network()

In [12]:
import torch.optim as optim

optimizer = optim.Adam(network.parameters(), lr = 0.001)

EPOCHS = 6

for epochs in range(EPOCHS):
    for data in train_set:
        X,y = data
        network.zero_grad()
        output = network(X.view(-1, 28*28))
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print(loss)

tensor(0.1696, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.0802, grad_fn=<NllLossBackward>)
tensor(0.0387, grad_fn=<NllLossBackward>)
tensor(0.0175, grad_fn=<NllLossBackward>)
tensor(0.0962, grad_fn=<NllLossBackward>)


In [13]:
# Evaluate Performance - Accuracy
correct = 0
total = 0

with torch.no_grad():
    for data in train_set:
        X,y = data
        output = network(X.view(-1, 28*28))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1

print("Accuracy: "+round(correct/total, 3))

0.99


## Increased Number of Epochs

In [14]:
networkEpochs = Network()

In [None]:
import torch.optim as optim

optimizer = optim.SGD(network.parameters(), lr = 0.001)

EPOCHS = 100

for epochs in range(EPOCHS):
    for data in train_set:
        X,y = data
        network.zero_grad()
        output = networkEpochs(X.view(-1, 28*28))
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print(loss)

tensor(2.3265, grad_fn=<NllLossBackward>)
tensor(2.2758, grad_fn=<NllLossBackward>)
tensor(2.2619, grad_fn=<NllLossBackward>)
tensor(2.3401, grad_fn=<NllLossBackward>)
tensor(2.3149, grad_fn=<NllLossBackward>)
tensor(2.3397, grad_fn=<NllLossBackward>)
tensor(2.2974, grad_fn=<NllLossBackward>)
tensor(2.2667, grad_fn=<NllLossBackward>)
tensor(2.3346, grad_fn=<NllLossBackward>)
tensor(2.3152, grad_fn=<NllLossBackward>)


In [None]:
# Evaluate Performance - Accuracy
correct = 0
total = 0

with torch.no_grad():
    for data in train_set:
        X,y = data
        output = networkEpochs(X.view(-1, 28*28))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1

print("Accuracy: "+round(correct/total, 3))

In [None]:
zeroT = []
zeroF = []
oneT = []
oneF = []
twoT = []
twoF = []
threeT = []
threeF = []
fourT = []
fourF = []
fiveT = []
fiveF = []
sixT = []
sixF = []
sevenT = []
sevenF = []
eightT = []
eightF = []
nineT = []
nineF = []


with torch.no_grad():
    for data in train_set:
        X,y = data
        output = network(X.view(-1, 28*28))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                 if torch.argmax(i) == 0:
                    zeroT.append(1)
            elif torch.argmax(i) == 1:
                   oneT.append(1)
            elif torch.argmax(i) == 2:
                   twoT.append(1)
            elif torch.argmax(i) == 3:
                   threeT.append(1)
            elif torch.argmax(i) == 4:
                   fourT.append(1)
            elif torch.argmax(i) == 5:
                   fiveT.append(1)
            elif torch.argmax(i) == 6:
                   sixT.append(1)
            elif torch.argmax(i) == 7:
                   sevenT.append(1)
            elif torch.argmax(i) == 8:
                   eightT.append(1)
            elif torch.argmax(i) == 9:
                    nineT.append(1)
        for idx, i in enumerate(output):
            if torch.argmax(i) != y[idx]:
                if y[idx] == 0:
                    zeroF.append(1)
                elif y[idx] == 1:
                    oneF.append(1)
                elif y[idx] == 2:
                    twoF.append(1)
                elif y[idx] == 3:
                    threeF.append(1)
                elif y[idx] == 4:
                    fourF.append(1)
                elif y[idx] == 5:
                    fiveF.append(1)
                elif y[idx] == 6:
                    sixF.append(1)
                elif y[idx] == 7:
                    sevenF.append(1)
                elif y[idx] == 8:
                    eightF.append(1)
                elif y[idx] == 9:
                    nineF.append(1)

In [None]:
zeroT_sum = sum(zeroT)
zeroF_sum = sum(zeroF)
oneT_sum = sum(oneT)
oneF_sum = sum(oneF)
twoT_sum = sum(twoT)
twoF_sum = sum(twoF)
threeT_sum = sum(threeT)
threeF_sum = sum(threeF)
fourT_sum = sum(fourT)
fourF_sum = sum(fourF)
fiveT_sum = sum(fiveT)
fiveF_sum = sum(fiveF)
sixT_sum = sum(sixT)
sixF_sum = sum(sixF)
sevenT_sum = sum(sevenT)
eightT_sum = sum(eightT)
eightF_sum = sum(eightF)
nineT_sum = sum(nineT)
nineF_sum = sum(nineF)

In [None]:
total_sum = [zeroT_sum, oneT_sum, twoT_sum, threeT_sum, fourT_sum, fiveT_sum, sixT_sum, sevenT_sum, eightT_sum, nineT_sum ]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

def confusionMatrix(pred, test):
    mat = confusion_matrix(pred, test)
    sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
    plt.xlabel('true label')
    plt.ylabel('predicted label');

In [None]:
confusionMatrix(total_sum, total_sum)

## Dropout Layer

In [None]:
class NetworkWithDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.input = nn.Linear(28*28, 64)
        self.hidden_1 = nn.Linear(64, 64)
        self.dropout_1 = nn.Dropout(p=0.1)
        self.hidden_2 = nn.Linear(64, 64)
        self.output = nn.Linear(64, 10)

    def forward(self, X):
        X = torch.sigmoid(self.input(X))
        X = torch.sigmoid(self.hidden_1(X))
        X = torch.sigmoid(self.hidden_2(X))
        X = self.output(X)
        return F.log_softmax(X, dim = 1)

In [None]:
networkD = NetworkWithDropout()

In [None]:
optimizer = optim.SGD(networkD.parameters(), lr = 0.01)

EPOCHS = 10

for epochs in range(EPOCHS):
    for data in train_set:
        X,y = data
        networkD.zero_grad()
        output = networkD(X.view(-1, 28*28))
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print(loss)

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in train_set:
        X,y = data
        output = networkD(X.view(-1, 28*28))
    for idx, i in enumerate(output):
        if torch.argmax(i) == y[idx]:
            correct += 1
        total += 1

print(round(correct/total, 3))