# Imports

In [114]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision.transforms import v2

In [115]:
cuda_available = torch.cuda.is_available()
print(f"CUDA Available: {cuda_available}")

CUDA Available: True


In [116]:
torch.set_float32_matmul_precision('high')

# Hyperparameters


In [117]:
batch_size = 2048
epochs = 25
learning_rate = 0.01
layers = 4
dropout = 0.4
units = 512

# Data loading

In [118]:
transform = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True)
])

# Load the training and test datasets
trainset = datasets.MNIST(root='./data', train=True, transform=transform)
testset = datasets.MNIST(root='./data', train=False, transform=transform)
trainset, testset

(Dataset MNIST
     Number of datapoints: 60000
     Root location: ./data
     Split: Train
     StandardTransform
 Transform: Compose(
                  ToImage()
                  ToDtype(scale=True)
            ),
 Dataset MNIST
     Number of datapoints: 10000
     Root location: ./data
     Split: Test
     StandardTransform
 Transform: Compose(
                  ToImage()
                  ToDtype(scale=True)
            ))

In [119]:
# Split trainset into train and validation
trainset, valset = torch.utils.data.random_split(trainset, [50000, 10000])
# Create data loaders
trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=batch_size, 
    pin_memory=True,
    num_workers=4,
    shuffle=True)
valloader = torch.utils.data.DataLoader(
    valset, 
    batch_size=batch_size, 
    pin_memory=True,
    num_workers=4,
    shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

# Model definition

In [120]:
def model_builder(layers=3, units=512, dropout=0.3):
    model = nn.Sequential()
    model.add_module("flatten", nn.Flatten())
    model.add_module("input", nn.Linear(28*28, units))
    model.add_module("relu_input", nn.ReLU())
    model.add_module("dropout_input", nn.Dropout(dropout))
    for i in range(layers-1):
        model.add_module(f"linear_{i}", nn.Linear(units, units))
        model.add_module(f"relu_{i}", nn.ReLU())
        model.add_module(f"dropout_{i}", nn.Dropout(dropout))
    model.add_module("output", nn.Linear(units, 10))
    return model

In [121]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
        nn.init.normal_(m.bias, 0, 0.001)



In [122]:
model = model_builder(layers, units, dropout)
if cuda_available:
    model.to("cuda")

model.apply(init_weights)
print(model)

Sequential(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (input): Linear(in_features=784, out_features=512, bias=True)
  (relu_input): ReLU()
  (dropout_input): Dropout(p=0.4, inplace=False)
  (linear_0): Linear(in_features=512, out_features=512, bias=True)
  (relu_0): ReLU()
  (dropout_0): Dropout(p=0.4, inplace=False)
  (linear_1): Linear(in_features=512, out_features=512, bias=True)
  (relu_1): ReLU()
  (dropout_1): Dropout(p=0.4, inplace=False)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (relu_2): ReLU()
  (dropout_2): Dropout(p=0.4, inplace=False)
  (output): Linear(in_features=512, out_features=10, bias=True)
)


# Train

In [123]:
def test(model, test):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in test:
            if cuda_available:
                x, y = x.cuda(), y.cuda()
            y_hat = model(x)
            _, predicted = torch.max(y_hat.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    return correct / total

In [124]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
accuracies = []

def train(model, trainloader, valloader, epochs, loss_fn, optimizer):
    for epoch in range(epochs):
        model.train()
        for x, y in trainloader:
            if cuda_available:
                x, y = x.cuda(), y.cuda()
            optimizer.zero_grad()
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            loss.backward()
            optimizer.step()
        accuracy = test(model, valloader)
        accuracies.append(accuracy)
        print(f"Epoch {epoch+1:>2}/{epochs}, Loss: {loss.item():.4f}, Accuracy: {accuracy}")

In [125]:
train(model, trainloader, valloader, epochs, loss_fn, optimizer)

Epoch  1/25, Loss: 0.6455, Accuracy: 0.8457
Epoch  2/25, Loss: 0.4501, Accuracy: 0.9335
Epoch  3/25, Loss: 0.3065, Accuracy: 0.9451
Epoch  4/25, Loss: 0.3371, Accuracy: 0.9508
Epoch  5/25, Loss: 0.2253, Accuracy: 0.9554
Epoch  6/25, Loss: 0.2313, Accuracy: 0.9599
Epoch  7/25, Loss: 0.2150, Accuracy: 0.9567
Epoch  8/25, Loss: 0.1788, Accuracy: 0.9617
Epoch  9/25, Loss: 0.1693, Accuracy: 0.9637
Epoch 10/25, Loss: 0.2417, Accuracy: 0.9625
Epoch 11/25, Loss: 0.1793, Accuracy: 0.9649
Epoch 12/25, Loss: 0.1653, Accuracy: 0.9644
Epoch 13/25, Loss: 0.1864, Accuracy: 0.9654
Epoch 14/25, Loss: 0.1492, Accuracy: 0.9668
Epoch 15/25, Loss: 0.1253, Accuracy: 0.9671
Epoch 16/25, Loss: 0.1479, Accuracy: 0.9688
Epoch 17/25, Loss: 0.1423, Accuracy: 0.9675
Epoch 18/25, Loss: 0.1919, Accuracy: 0.97
Epoch 19/25, Loss: 0.1247, Accuracy: 0.9672
Epoch 20/25, Loss: 0.1600, Accuracy: 0.9662
Epoch 21/25, Loss: 0.1622, Accuracy: 0.9696
Epoch 22/25, Loss: 0.1584, Accuracy: 0.9678
Epoch 23/25, Loss: 0.1504, Accurac

In [126]:
test_accuracy = test(model, testloader)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.9716
