## Import PyTorch and ...

In [35]:
import torch
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Load MNIST dataset

In [36]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform_set = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,)),
])

trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform_set)
testset = datasets.MNIST('MNIST_data/', download=True, train=False, transform=transform_set)

batch_size = 64
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

## Defining the Model

In [37]:
from torch import nn
from torch.nn import functional as F

class Network(nn.Module):
    def __init__(self):
        super().__init__()

        self.hidden1 = nn.Linear(784, 128)
        self.hidden2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.log_softmax(self.output(x), dim=1)

        return x

## Training the Model and Validation

In [38]:
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu') # Set the device to train model

# If you have gpu and you do not have an apple arm64 processor, you should use the line below.
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# For these simple datasets like MNIST, we don't need to use gpu, so I commented the parts that send model and data to gpu.

In [40]:
model = Network()
# model.to(device)

criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

epochs = 5
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # images, labels = images.to(device), labels.to(device)
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad()

        logps = model.forward(images)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss
    else:
        test_loss = 0
        accuracy = 0
        for images, labels in testloader:
            # images, labels = images.to(device), labels.to(device)
            images = images.view(images.shape[0], -1)

            with torch.no_grad():
                logps = model.forward(images)
                loss = criterion(logps, labels)
                ps = torch.exp(logps)
                top_p, top_class = ps.topk(1, dim=1)
                corrects = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(corrects.type(torch.FloatTensor))

        test_loss += loss

        print(f'Epoch {e+1}/{epochs}..')
        print(f'Training loss: {running_loss/len(trainloader)}..')
        print(f'Test loss: {test_loss/len(testloader)}..')
        print(f'Test accuracy: {accuracy/len(testloader)}\n')


Epoch 1/5..
Training loss: 0.3304550051689148..
Test loss: 0.001200884347781539..
Test accuracy: 0.9414808750152588

Epoch 2/5..
Training loss: 0.17088116705417633..
Test loss: 0.000170154104125686..
Test accuracy: 0.9519307613372803

Epoch 3/5..
Training loss: 0.13949739933013916..
Test loss: 0.00026801988133229315..
Test accuracy: 0.950238823890686

Epoch 4/5..
Training loss: 0.12199438363313675..
Test loss: 0.0029800916090607643..
Test accuracy: 0.9357085824012756

Epoch 5/5..
Training loss: 0.11426801234483719..
Test loss: 0.0005691692349500954..
Test accuracy: 0.9609872698783875

