In [8]:
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.multiprocessing as mp
from torchinfo import summary

rng = np.random.default_rng()

In [9]:
training_data = datasets.EMNIST(root="Torch MNIST", split="digits", train=True,download=True, transform=transforms.ToTensor())
test_data = datasets.EMNIST(root="Torch MNIST", split="digits", train=False,download=True, transform=transforms.ToTensor())

In [10]:
batch_size = 2**16
train_dataloader = DataLoader(training_data, batch_size = batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size = batch_size, shuffle=True)

In [11]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self,):
        super().__init__()
        self.linear_stack = nn.Sequential(OrderedDict([
            ("flatten", nn.Flatten()),
            ("dropout1", nn.Dropout(0.1)),
            ("linear1", nn.Linear(28*28*1,256)),
            ("act1", nn.LeakyReLU()),
            ("dropout2", nn.Dropout(0.1)),
            ("linear2", nn.Linear(256,64)),
            ("act2", nn.LeakyReLU()),
            ("linear3", nn.Linear(64,10))
        ]))

    def forward(self, x):
        x = self.linear_stack(x)
        return x

In [13]:
model = NeuralNetwork().to(device)
summary(model, input_size=(batch_size, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
NeuralNetwork                            [65536, 10]               --
├─Sequential: 1-1                        [65536, 10]               --
│    └─Flatten: 2-1                      [65536, 784]              --
│    └─Dropout: 2-2                      [65536, 784]              --
│    └─Linear: 2-3                       [65536, 256]              200,960
│    └─LeakyReLU: 2-4                    [65536, 256]              --
│    └─Dropout: 2-5                      [65536, 256]              --
│    └─Linear: 2-6                       [65536, 64]               16,448
│    └─LeakyReLU: 2-7                    [65536, 64]               --
│    └─Linear: 2-8                       [65536, 10]               650
Total params: 218,058
Trainable params: 218,058
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 14.29
Input size (MB): 205.52
Forward/backward pass size (MB): 173.02
Params size (MB): 0.87
Estimated Total

In [14]:
loss_fn = nn.CrossEntropyLoss()
Optimiser = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
model.train(True)

for epoch in range(0,100):
    running_loss = 0.0
    for batch_num, batch_data in enumerate(train_dataloader):
        #print("Epoch {0} Batch {1}".format(epoch+1, batch_num+1))
        Optimiser.zero_grad()
        
        data, labels = batch_data
        labels = nn.functional.one_hot(labels, num_classes = 10).float()
        data = data.to(device)
        labels = labels.to(device)
        
        prediction = model(data)
        loss = loss_fn(prediction,labels)
        
        loss.backward()
        
        running_loss += loss.item()
        log_num = 1
        if log_num == 1:
            print(f'[{epoch + 1}, {batch_num + 1:5d}] loss: {running_loss:.3f}')
            running_loss = 0.0
        else:
            if batch_num % log_num == (log_num-1):
                print(f'[{epoch + 1}, {batch_num + 1:5d}] loss: {running_loss / log_num:.3f}')
                running_loss = 0.0
        print()
        Optimiser.step()

[1,     1] loss: 2.304
[1,     2] loss: 2.299
[1,     3] loss: 2.290
[1,     4] loss: 2.277
[2,     1] loss: 2.260
[2,     2] loss: 2.240
[2,     3] loss: 2.216
[2,     4] loss: 2.184
[3,     1] loss: 2.145
[3,     2] loss: 2.095
[3,     3] loss: 2.032
[3,     4] loss: 1.953
[4,     1] loss: 1.857
[4,     2] loss: 1.740
[4,     3] loss: 1.609
[4,     4] loss: 1.470
[5,     1] loss: 1.324
[5,     2] loss: 1.177
[5,     3] loss: 1.040
[5,     4] loss: 0.920
[6,     1] loss: 0.809
[6,     2] loss: 0.717
[6,     3] loss: 0.646
[6,     4] loss: 0.600
[7,     1] loss: 0.553
[7,     2] loss: 0.533
[7,     3] loss: 0.505
[7,     4] loss: 0.491
[8,     1] loss: 0.482
[8,     2] loss: 0.468
[8,     3] loss: 0.459
[8,     4] loss: 0.468
[9,     1] loss: 0.447
[9,     2] loss: 0.440
[9,     3] loss: 0.441
[9,     4] loss: 0.426
[10,     1] loss: 0.413
[10,     2] loss: 0.411
[10,     3] loss: 0.403
[10,     4] loss: 0.393
[11,     1] loss: 0.383
[11,     2] loss: 0.376
[11,     3] loss: 0.370
[11,

In [17]:
total = 0
correct = 0
model.eval()

with torch.no_grad():
    for loader_data in test_dataloader:
        data, labels = loader_data
        data = data.to(device)
        labels = labels.to(device)
        output = model(data)
        _, predicted = torch.max(output, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(1 - (correct/total))

0.02057500000000001
