In [11]:
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
train_dataset = datasets.MNIST(root="root",
                               train=True,
                               download=True,
                               transform=transforms.ToTensor(),
                               target_transform=None)
test_dataset = datasets.MNIST(root="root",
                              train=False,
                              download=True,
                              transform=transforms.ToTensor(),
                              target_transform=None)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to root/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 10204792.19it/s]


Extracting root/MNIST/raw/train-images-idx3-ubyte.gz to root/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to root/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1138279.40it/s]


Extracting root/MNIST/raw/train-labels-idx1-ubyte.gz to root/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to root/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 9611946.18it/s]


Extracting root/MNIST/raw/t10k-images-idx3-ubyte.gz to root/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to root/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7344074.31it/s]

Extracting root/MNIST/raw/t10k-labels-idx1-ubyte.gz to root/MNIST/raw






In [4]:
class_names = train_dataset.classes
class_names

['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']

In [7]:
test_batch = DataLoader(train_dataset, batch_size=32, shuffle=True)
train_batch = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [8]:
from torch import nn
class MNISTModelV0(nn.Module):
  def __init__(self, input_shape, hidden_layers, output_shape):
    super().__init__()
    self.layer_stack_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_layers,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.layer_stack_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_layers,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_layers,
                  out_channels=hidden_layers,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.layer_stack_3 = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_layers*7*7,
                  out_features=output_shape)
    )

  def forward(self, x):
    x = self.layer_stack_1(x)
    # print(x.shape)
    x = self.layer_stack_2(x)
    # print(x.shape)
    x = self.layer_stack_3(x)
    # print(x.shape)
    return x

model_0 = MNISTModelV0(input_shape=1,
                       hidden_layers=10,
                       output_shape=len(class_names)).to(device)

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_0.parameters(),
                         lr=0.1)

In [10]:
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc


In [13]:
epochs = 10

for epoch in tqdm(range(epochs)):
  model_0.to(device)
  train_loss, train_acc = 0, 0

  model_0.train()
  for batch, (X, y) in tqdm(enumerate(train_batch)):
    X, y = X.to(device), y.to(device)
    # print(f"X:{X.shape}")

    y_pred = model_0(X)
    # print(f"y: {y}\n")
    # print(f"y_pred: {y_pred}")
    loss = loss_fn(y_pred, y)
    train_loss += loss
    train_acc += accuracy_fn(y_true=y,
                           y_pred=y_pred.argmax(dim=1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


  model_0.eval()
  with torch.inference_mode():
    test_loss, test_acc = 0, 0
    for batch, (X, y) in enumerate(test_batch):
      X, y = X.to(device), y.to(device)
      test_pred = model_0(X)
      loss = loss_fn(test_pred, y)
      test_loss += loss
      test_acc += accuracy_fn(y_true=y,
                            y_pred=test_pred.argmax(dim=1))

  print(f"Train loss = {train_loss/len(train_batch)} | Train acc = {train_acc/len(train_batch)} | Test loss = {test_loss/len(test_batch)} | Test acc = {test_acc/len(test_batch)}")


  0%|          | 0/10 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Train loss = 0.13439415395259857 | Train acc = 95.7667731629393 | Test loss = 0.14557431638240814 | Test acc = 95.43166666666667


0it [00:00, ?it/s]

Train loss = 0.0931708812713623 | Train acc = 97.13458466453675 | Test loss = 0.13716445863246918 | Test acc = 95.77833333333334


0it [00:00, ?it/s]

Train loss = 0.07206112146377563 | Train acc = 97.7935303514377 | Test loss = 0.11333918571472168 | Test acc = 96.61666666666666


0it [00:00, ?it/s]

Train loss = 0.05974609777331352 | Train acc = 98.01317891373802 | Test loss = 0.10547298938035965 | Test acc = 96.85666666666667


0it [00:00, ?it/s]

Train loss = 0.048521507531404495 | Train acc = 98.30271565495208 | Test loss = 0.12509235739707947 | Test acc = 96.53666666666666


0it [00:00, ?it/s]

Train loss = 0.043489404022693634 | Train acc = 98.5423322683706 | Test loss = 0.12066573649644852 | Test acc = 96.82666666666667


0it [00:00, ?it/s]

Train loss = 0.034510351717472076 | Train acc = 98.79193290734824 | Test loss = 0.12702298164367676 | Test acc = 96.51833333333333


0it [00:00, ?it/s]

Train loss = 0.0325436107814312 | Train acc = 98.84185303514377 | Test loss = 0.1702665090560913 | Test acc = 95.95


0it [00:00, ?it/s]

Train loss = 0.029047777876257896 | Train acc = 99.05151757188499 | Test loss = 0.10607660561800003 | Test acc = 97.20166666666667


0it [00:00, ?it/s]

Train loss = 0.023632986471056938 | Train acc = 99.2611821086262 | Test loss = 0.1252860128879547 | Test acc = 97.02166666666666


In [27]:
torch.save(model_0.state_dict(), "model_0.pth")