In [119]:
import torch
import torchvision

In [120]:
device = torch.device("cuda")

In [121]:
training_data = torchvision.datasets.MNIST('data', train=True, download=True)
test_data = torchvision.datasets.MNIST('data', train=False, download=True)

In [None]:
from torchvision import transforms

train_transform = transforms.Compose([
    # random +-10 degrees
    transforms.RandomRotation(10), 
    # random translation + scaling + rotation
    transforms.RandomAffine(20, translate=(0.15, 0.15), scale=(0.85, 1.15)),
    #convert to tensor
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
])


In [None]:
from torch.utils.data import Dataset
from torchvision.datasets import MNIST
import torch.nn.functional as F

class MNISTDataset(Dataset):
    def __init__(self, partition="train", transform=None):
        """
        A custom dataset for the MNIST dataset.
        """
        self.partition = partition
        self.transform = transform
        
        self.mnist_data = MNIST(
            root="./data", 
            train=(partition == "train"), 
            download=True
        )

    def __len__(self):
        return len(self.mnist_data)

    def __getitem__(self, idx):
        image, label = self.mnist_data[idx]

        if self.transform:
            image = self.transform(image)

        # reshape the image to a flat vector of size 784 (1x28x28 -> 784)
        image = image.view(-1)

        # one hot encode labels
        label = F.one_hot(torch.tensor(label), num_classes=10).float()

        return {"idx": idx, "img": image, "label": label}

# create the datasets
train_dataset = MNISTDataset(partition="train", transform=train_transform)
test_dataset = MNISTDataset(partition="test", transform=test_transform)


In [145]:
import torch
from torch.utils.data import DataLoader
from torch import nn

# DataLoader Settings
batch_size = 100
train_dataloader = DataLoader(
    dataset=train_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=0
)

test_dataloader = DataLoader(
    dataset=test_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=0
)

class Net(nn.Module):
    def __init__(self, sizes=None, criterion=None):
        super(Net, self).__init__()

        self.layers = nn.ModuleList()
        if sizes is None:
            sizes = [[784, 1024], [1024, 1024], [1024, 1024], [1024, 10]]

        # add intermediate layers
        for in_dim, out_dim in sizes[:-1]:
            self.layers.append(nn.Linear(in_dim, out_dim))
            self.layers.append(nn.BatchNorm1d(out_dim))
            self.layers.append(nn.ReLU())

        # ddd the classifier (final layer)
        in_dim, out_dim = sizes[-1]
        self.classifier = nn.Linear(in_dim, out_dim)

        self.criterion = criterion

    def forward(self, x, y=None):
        for layer in self.layers:
            x = layer(x)
        x = self.classifier(x)

        if y is not None:
            loss = self.criterion(x, y)
            return loss, x
        return x

# Training Settings
criterion = nn.CrossEntropyLoss()

# Instantiate the Network
num_classes = 10
net = Net(
    sizes=[[784, 1024], [1024, 1024], [1024, 1024], [1024, num_classes]],
    criterion=criterion
)
print(net)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Trainable Parameters: ", count_parameters(net))


Net(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=1024, out_features=1024, bias=True)
    (4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=1024, out_features=1024, bias=True)
    (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (classifier): Linear(in_features=1024, out_features=10, bias=True)
  (criterion): CrossEntropyLoss()
)
Trainable Parameters:  2919434


In [148]:
import torch.optim as optim
from tqdm import tqdm

# Optimizer and Scheduler
optimizer = optim.SGD(
    net.parameters(), lr=0.1, weight_decay=1e-6, momentum=0.9
)
scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[25, 50, 75], gamma=0.1
)

# Move the model to the target device
net = net.to(device)

# Training settings
epochs = 1
best_accuracy = -1
best_epoch = 0

print("\n---- Start Training ----")
for epoch in range(epochs):
    # Training Phase
    net.train()
    train_loss, train_correct = 0, 0

    with tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs}", unit="batch") as tepoch:
        for batch in tepoch:
            # Load batch data
            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # Reset gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = net(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Update training metrics
            labels = torch.argmax(labels, dim=1)
            predictions = torch.argmax(outputs, dim=1)
            train_correct += predictions.eq(labels).sum().item()
            train_loss += loss.item()

    # Normalize training loss
    train_loss /= len(train_dataloader.dataset)

    # Testing Phase
    net.eval()
    test_loss, test_correct = 0, 0

    with torch.no_grad():
        with tqdm(test_dataloader, desc=f"Test {epoch + 1}/{epochs}", unit="batch") as tepoch:
            for batch in tepoch:
                # Load batch data
                images = batch["img"].to(device)
                labels = batch["label"].to(device)

                # Forward pass
                outputs = net(images)
                test_loss += criterion(outputs, labels).item()

                # Update testing metrics
                labels = torch.argmax(labels, dim=1)
                predictions = torch.argmax(outputs, dim=1)
                test_correct += predictions.eq(labels).sum().item()

    # Normalize testing loss and calculate accuracy
    test_loss /= len(test_dataloader.dataset)
    test_accuracy = 100.0 * test_correct / len(test_dataloader.dataset)
    train_accuracy = 100.0 * train_correct / len(train_dataloader.dataset)

    print(f"[Epoch {epoch + 1}] "
          f"Train Loss: {train_loss:.6f} - Test Loss: {test_loss:.6f} "
          f"Train Accuracy: {train_accuracy:.2f}% - Test Accuracy: {test_accuracy:.2f}%")

    # Save the best model
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_epoch = epoch + 1
        torch.save(net.state_dict(), "best_model.pt")

    # Step the learning rate scheduler
    scheduler.step()

# Summary of best accuracy
print(f"\nBEST TEST ACCURACY: {best_accuracy:.2f}% at epoch {best_epoch}")



---- Start Training ----


Epoch 1/1: 100%|██████████| 600/600 [00:13<00:00, 45.74batch/s]
Test 1/1: 100%|██████████| 100/100 [00:00<00:00, 120.61batch/s]

[Epoch 1] Train Loss: 0.000639 - Test Loss: 0.000202 Train Accuracy: 98.01% - Test Accuracy: 99.36%

BEST TEST ACCURACY: 99.36% at epoch 1





In [150]:
# Load Best Model Weights
net.load_state_dict(torch.load("./models/best_model.pt"))
net.eval()

# Test the Model
test_loss, test_correct = 0, 0

print("\n---- Evaluating the Best Model ----")
with torch.no_grad():
    with tqdm(test_dataloader, desc="Testing Best Model", unit="batch") as tepoch:
        for batch in tepoch:
            # Load batch data
            images = batch["img"].to(device)
            labels = batch["label"].to(device)

            # Forward pass
            outputs = net(images)
            test_loss += criterion(outputs, labels).item()

            # Convert one-hot encoded labels to indices
            labels = torch.argmax(labels, dim=1)
            predictions = torch.argmax(outputs, dim=1)

            # Update correct prediction count
            test_correct += predictions.eq(labels).sum().item()

# Normalize the loss and calculate accuracy
test_loss /= len(test_dataloader.dataset)
test_accuracy = 100.0 * test_correct / len(test_dataloader.dataset)

print(f"Final Best Accuracy: {test_accuracy:.2f}%")


  net.load_state_dict(torch.load("./models/best_model.pt"))



---- Evaluating the Best Model ----


Testing Best Model: 100%|██████████| 100/100 [00:00<00:00, 107.76batch/s]

Final Best Accuracy: 99.52%



