In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
batch_size = 128
epochs = 50
learning_rate = 0.001
dropout_prob = 0.5
patience = 5

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [5]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

100%|██████████| 9.91M/9.91M [00:01<00:00, 5.10MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 134kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.27MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.03MB/s]


In [6]:
class MLP(nn.Module):
  def __init__(self):
    super(MLP,self).__init__()
    self.model = nn.Sequential(
        nn.Flatten(),
        nn.Linear(28*28,512),
        nn.ReLU(),
        nn.Dropout(dropout_prob),
        nn.Linear(512,256),
        nn.ReLU(),
        nn.Dropout(dropout_prob),
        nn.Linear(256,10)
    )

  def forward(self,x):
    return self.model(x)

model = MLP().to(device)

In [7]:
criterion  = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = learning_rate)

In [8]:
best_val_loss = float('inf')
epochs_no_imporve = 0

In [10]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()


    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {running_loss:.4f} | Val Loss: {val_loss:.4f}")


    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"\n Early stopping at epoch {epoch+1} — no improvement for {patience} epochs.")
        break

Epoch 1/50 | Train Loss: 99.7988 | Val Loss: 9.0921
Epoch 2/50 | Train Loss: 94.8836 | Val Loss: 8.6187
Epoch 3/50 | Train Loss: 88.7592 | Val Loss: 8.2523
Epoch 4/50 | Train Loss: 84.6559 | Val Loss: 7.1568
Epoch 5/50 | Train Loss: 82.3125 | Val Loss: 8.2518
Epoch 6/50 | Train Loss: 77.6233 | Val Loss: 7.4128
Epoch 7/50 | Train Loss: 73.8530 | Val Loss: 7.7225
Epoch 8/50 | Train Loss: 75.6226 | Val Loss: 6.6923
Epoch 9/50 | Train Loss: 71.6139 | Val Loss: 6.6765
Epoch 10/50 | Train Loss: 70.4500 | Val Loss: 6.6638
Epoch 11/50 | Train Loss: 68.6212 | Val Loss: 6.6581
Epoch 12/50 | Train Loss: 66.6655 | Val Loss: 6.8949
Epoch 13/50 | Train Loss: 64.6412 | Val Loss: 6.1691
Epoch 14/50 | Train Loss: 64.5130 | Val Loss: 5.8755
Epoch 15/50 | Train Loss: 62.1938 | Val Loss: 6.7062
Epoch 16/50 | Train Loss: 62.7493 | Val Loss: 6.1211
Epoch 17/50 | Train Loss: 60.9950 | Val Loss: 6.0153
Epoch 18/50 | Train Loss: 59.0796 | Val Loss: 5.8997
Epoch 19/50 | Train Loss: 59.7197 | Val Loss: 6.3219

 

In [11]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
  for inputs ,targets in test_loader:
    inputs , targets = inputs.to(device),targets.to(device)
    outputs = model(inputs)
    _ , predicted = torch.max(outputs,1)
    total += targets.size(0)
    correct += (predicted == targets).sum().item()

accuracy = correct / total * 100
print(f"Fianl MNIST Test Accuracy: {accuracy: .2f}%")

Fianl MNIST Test Accuracy:  97.48%
