In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam

# Visualization tools
import torchvision
import torchvision.transforms.v2 as transforms
import torchvision.transforms.functional as F
import matplotlib.pyplot as plt

In [2]:
train_set = torchvision.datasets.MNIST("./data/", train=True, download=True)
valid_set = torchvision.datasets.MNIST("./data/", train=False, download=True)

In [3]:
train_set

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train

In [4]:
valid_set

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/
    Split: Test

In [5]:
trans = transforms.Compose([transforms.ToTensor()])



In [6]:
train_set.transform = trans
valid_set.transform = trans

In [7]:
batch_size = 32

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size)

In [8]:
n_classes = 10
input_size = 1 * 28 * 28 
layers = [
    nn.Flatten(),
    nn.Linear(input_size, 512),  # Input 512 neurons
    nn.ReLU(),  # Activation for input
    nn.Linear(512, 512),  # Hidden
    nn.ReLU(),  # Activation for hidden
    nn.Linear(512, n_classes)  # Output
]
layers

[Flatten(start_dim=1, end_dim=-1),
 Linear(in_features=784, out_features=512, bias=True),
 ReLU(),
 Linear(in_features=512, out_features=512, bias=True),
 ReLU(),
 Linear(in_features=512, out_features=10, bias=True)]

In [9]:
model = nn.Sequential(*layers)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=512, bias=True)
  (4): ReLU()
  (5): Linear(in_features=512, out_features=10, bias=True)
)

In [10]:
model = torch.compile(model)

In [11]:
loss_function = nn.CrossEntropyLoss()

In [12]:
optimizer = Adam(model.parameters())

In [13]:
train_N = len(train_loader.dataset)
valid_N = len(valid_loader.dataset)

In [14]:
def get_batch_accuracy(output, y, N):
    pred = output.argmax(dim=1, keepdim=True)
    correct = pred.eq(y.view_as(pred)).sum().item()
    return correct / N

In [15]:
def train():
    loss = 0
    accuracy = 0

    model.train()
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        output = model(x)
        optimizer.zero_grad()
        batch_loss = loss_function(output, y)
        batch_loss.backward()
        optimizer.step()

        loss += batch_loss.item()
        accuracy += get_batch_accuracy(output, y, train_N)
    print('Train - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))

In [16]:
def validate():
    loss = 0
    accuracy = 0

    model.eval()
    with torch.no_grad():
        for x, y in valid_loader:
            x, y = x.to(device), y.to(device)
            output = model(x)

            loss += loss_function(output, y).item()
            accuracy += get_batch_accuracy(output, y, valid_N)
    print('Valid - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()


False

In [18]:
epochs = 20


for epoch in range(epochs):
    print('Epoch: {}'.format(epoch))
    train()
    validate()

Epoch: 0
Train - Loss: 383.2889 Accuracy: 0.9363
Valid - Loss: 40.2021 Accuracy: 0.9588
Epoch: 1
Train - Loss: 157.5357 Accuracy: 0.9736
Valid - Loss: 29.0261 Accuracy: 0.9699
Epoch: 2
Train - Loss: 107.4230 Accuracy: 0.9824
Valid - Loss: 36.7149 Accuracy: 0.9675
Epoch: 3
Train - Loss: 83.7541 Accuracy: 0.9859
Valid - Loss: 29.9132 Accuracy: 0.9739
Epoch: 4
Train - Loss: 64.3917 Accuracy: 0.9888
Valid - Loss: 25.2523 Accuracy: 0.9771
Epoch: 5
Train - Loss: 54.7731 Accuracy: 0.9902
Valid - Loss: 32.9801 Accuracy: 0.9754
Epoch: 6
Train - Loss: 49.5868 Accuracy: 0.9915
Valid - Loss: 30.3259 Accuracy: 0.9787
Epoch: 7
Train - Loss: 42.2626 Accuracy: 0.9931
Valid - Loss: 27.5130 Accuracy: 0.9795
Epoch: 8
Train - Loss: 38.1425 Accuracy: 0.9938
Valid - Loss: 33.2193 Accuracy: 0.9779
Epoch: 9
Train - Loss: 32.4775 Accuracy: 0.9944
Valid - Loss: 27.0272 Accuracy: 0.9810
Epoch: 10
Train - Loss: 34.4415 Accuracy: 0.9947
Valid - Loss: 30.4484 Accuracy: 0.9812
Epoch: 11
Train - Loss: 22.9364 Accurac

In [19]:
x0, y0 = train_set[7]

y0

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [20]:

trans = transforms.Compose([transforms.ToTensor()])
x0tensor = trans(x0)


In [21]:

prediction = model(x0tensor)
prediction

tensor([[ -81.0914,  -26.9573,  -40.9045,   30.0106,  -62.0796,  -39.2270,
         -103.4740,  -22.2291,  -36.0049,  -34.7299]],
       grad_fn=<AddmmBackward0>)

In [22]:
prediction.argmax(dim=1, keepdim=True)

tensor([[3]])

In [23]:
x0, y0 = train_set[7]
y0

3