# Imports

In [67]:
import lightning as L
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics import Accuracy
from torchvision import datasets
from torchvision.transforms import v2

In [68]:
cuda_available = torch.cuda.is_available()
print(f"CUDA Available: {cuda_available}")

CUDA Available: True


In [69]:
torch.set_float32_matmul_precision('high')

# Hyperparameters


In [70]:
batch_size = 256
epochs = 100
learning_rate = 0.002
layers = 4
dropout = 0.3
units = 512
batch_norm = True

# Data loading

In [71]:
transform = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True)
])

# Load the training and test datasets
trainset = datasets.MNIST(root='./data', train=True, transform=transform)
testset = datasets.MNIST(root='./data', train=False, transform=transform)
trainset, testset

(Dataset MNIST
     Number of datapoints: 60000
     Root location: ./data
     Split: Train
     StandardTransform
 Transform: Compose(
                  ToImage()
                  ToDtype(scale=True)
            ),
 Dataset MNIST
     Number of datapoints: 10000
     Root location: ./data
     Split: Test
     StandardTransform
 Transform: Compose(
                  ToImage()
                  ToDtype(scale=True)
            ))

In [72]:
# Split trainset into train and validation
trainset, valset = torch.utils.data.random_split(trainset, [50000, 10000])
# Create data loaders
trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=batch_size, 
    pin_memory=True,
    num_workers=4,
    shuffle=True)
valloader = torch.utils.data.DataLoader(
    valset, 
    batch_size=batch_size, 
    pin_memory=True,
    num_workers=4,
    shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

# Model definition

In [73]:
def model_builder(layers=3, units=512, dropout=0.3, batch_norm=False):
    model = nn.Sequential()
    model.add_module("flatten", nn.Flatten())
    model.add_module("input", nn.Linear(28*28, units))
    if batch_norm:
        model.add_module("batch_norm_input", nn.BatchNorm1d(units))
    model.add_module("relu_input", nn.ReLU())
    model.add_module("dropout_input", nn.Dropout(dropout))
    for i in range(layers-1):
        model.add_module(f"linear_{i}", nn.Linear(units, units))
        if batch_norm:
            model.add_module(f"batch_norm_{i}", nn.BatchNorm1d(units))
        model.add_module(f"relu_{i}", nn.ReLU())
        model.add_module(f"dropout_{i}", nn.Dropout(dropout))
    model.add_module("output", nn.Linear(units, 10))
    return model

In [74]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
        nn.init.normal_(m.bias, 0, 0.001)

In [75]:
model = model_builder(layers, units, dropout, batch_norm)
if cuda_available:
    model.to("cuda")

model.apply(init_weights)
print(model)

Sequential(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (input): Linear(in_features=784, out_features=512, bias=True)
  (batch_norm_input): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_input): ReLU()
  (dropout_input): Dropout(p=0.3, inplace=False)
  (linear_0): Linear(in_features=512, out_features=512, bias=True)
  (batch_norm_0): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_0): ReLU()
  (dropout_0): Dropout(p=0.3, inplace=False)
  (linear_1): Linear(in_features=512, out_features=512, bias=True)
  (batch_norm_1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (dropout_1): Dropout(p=0.3, inplace=False)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (batch_norm_2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (dropout_2): Dropout(p=0.3, inplace=False)
  (output): L

# Lightning

In [76]:
class MNISTLightning(L.LightningModule):
    def __init__(self, model, learning_rate=0.01):
        super().__init__()
        self.model = model
        self.learning_rate = learning_rate
        self.accuracy = Accuracy(task='multiclass', num_classes=10)
    
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("val_loss", loss)
        self.log("val_acc", self.accuracy(y_hat, y), on_epoch=True, prog_bar=True)
        self.log('learning_rate', self.lr_schedulers().get_last_lr()[0], on_epoch=True, prog_bar=True)
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("test_loss", loss)
        self.log("test_acc", self.accuracy(y_hat, y))
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
        sch = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": sch,
                "interval": "epoch",
                "monitor": "val_acc"
            }
        }

In [77]:
early_stop = L.pytorch.callbacks.EarlyStopping(monitor='val_acc', patience=10, mode='max')

trainer = L.Trainer(callbacks=[early_stop])
mnist_lightning = MNISTLightning(model, learning_rate)
trainer.fit(mnist_lightning, trainloader, valloader)
trainer.test(mnist_lightning, testloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/chandon/miniconda3/envs/pyto/lib/python3.11/site-packages/lightning/pytorch/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type               | Params | Mode 
--------------------------------------------------------
0 | model    | Sequential         | 1.2 M  | train
1 | accuracy | MulticlassAccuracy | 0      | train
--------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.796     Total estimated model params size (MB)
20        Modules in train mode
0         Modules in eval mode


Epoch 45: 100%|██████████| 196/196 [00:02<00:00, 79.73it/s, v_num=0, val_acc=0.986, learning_rate=0.00025] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/chandon/miniconda3/envs/pyto/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 40/40 [00:01<00:00, 37.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.9858999848365784
        test_loss           0.0683051273226738
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.0683051273226738, 'test_acc': 0.9858999848365784}]