In [10]:
import numpy as np
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import warnings
warnings.filterwarnings('ignore')

In [11]:
trainData = datasets.MNIST(root='data', train= True, download= True, transform= ToTensor())

In [12]:
testData = datasets.MNIST(root='data', train= False, download= True, transform= ToTensor())

In [13]:
print(f"{testData} \n\n {trainData}")

Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor() 

 Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()


In [14]:
trainLoader = torch.utils.data.DataLoader(trainData, batch_size= 64, shuffle= True)
testLoader = torch.utils.data.DataLoader(testData, batch_size= 100, shuffle=True)

In [19]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.convLayer1 = nn.Conv2d(in_channels= 1, out_channels= 10, kernel_size=5)
        self.convLayer2 = nn.Conv2d(in_channels= 10, out_channels= 20,kernel_size=5)
        self.conv2Drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320,50)
        self.fc2 = nn.Linear(50,10)
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.convLayer1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2Drop(self.convLayer2(x)), 2))
        x = x.view(-1,320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x,training=self.training)
        x = self.fc2(x)

        return F.log_softmax(x)

In [20]:

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Currently used device: {device}")

Currently used device: mps


In [21]:
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
calcLoss = nn.CrossEntropyLoss()

In [9]:
def train(epoch):
    model.train()
    for (data, target) in trainLoader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = calcLoss(output, target)
        loss.backward()
        optimizer.step()

In [11]:
def test():
    model.eval()
    testLoss = 0
    correctVal = 0

    with torch.no_grad():
        for data, target in testLoader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            testLoss += F.nll_loss(input= output, target= target, size_average=False).item()
            prediction = output.data.max(1, keepdim=True)[1]
            correctVal += prediction.eq(target.data.view_as(prediction)).sum()

    testLoss /= len(testLoader.dataset)
    print(f"Average loss: {testLoss:.4f}, accuracy {correctVal}/{len(testLoader.dataset)} ({100. * correctVal / len(testLoader.dataset):.0f}%)\n")
            

In [None]:
for epoch in range(1, 10):
    train(epoch)
    test()

In [29]:
import matplotlib.pyplot as plt
import itertools
def plotConfusionMatrix(confusion_matrix, cmap=plt.cm.Reds):
    classes = range(10)
    plt.figure(figsize=(8, 8))
    plt.imshow(confusion_matrix,
               interpolation='nearest',
               cmap=cmap)
    plt.title('Confusion matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    thresh = confusion_matrix.max() / 2.
    for i, j in itertools.product(range(confusion_matrix.shape[0]), range(confusion_matrix.shape[1])):
        plt.text(j, i, confusion_matrix[i, j],
                 horizontalalignment="center",
                 color="white" if confusion_matrix[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')