In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F

# ---------- Hyper-parameters ----------
batch_size = 10
epochs = 10
lr = 0.1                           
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ---------- Data ----------
transform = transforms.Compose([
    transforms.ToTensor(),         # converts [0,255] → float32 in [0,1]
    transforms.Lambda(lambda x: x.view(-1))   # flatten 1×28×28 → 784
])

train_set = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_set  = datasets.MNIST(root='data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_set,  batch_size=batch_size, shuffle=False)

# ---------- Model ----------
model = nn.Sequential(
    nn.Linear(784, 392),
    nn.ReLU(),
    nn.Linear(392, 196),
    nn.ReLU(),
    nn.Linear(196, 10),
    nn.ReLU()                   
).to(device)

print(model)                       # like model.summary()

# ---------- Loss & Optimizer ----------
criterion = nn.CrossEntropyLoss()           
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# ---------- Training loop ----------
for epoch in range(1, epochs + 1):
    model.train()
    epoch_loss, correct, total = 0.0, 0, 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        y_onehot = F.one_hot(y, num_classes=10).float()  # to_categorical

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y_onehot)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * x.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

    print(f"Epoch {epoch:2d}/{epochs} "
          f"loss {epoch_loss/total:.4f}  "
          f"acc {correct/total:.4f}")

# ---------- Evaluation ----------
model.eval()
test_loss, correct, total = 0.0, 0, 0
with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        y_onehot = F.one_hot(y, num_classes=10).float()
        outputs = model(x)
        test_loss += criterion(outputs, y_onehot).item() * x.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

print("\nTest loss:", test_loss/total)
print("Test accuracy:", correct/total)


Sequential(
  (0): Linear(in_features=784, out_features=392, bias=True)
  (1): ReLU()
  (2): Linear(in_features=392, out_features=196, bias=True)
  (3): ReLU()
  (4): Linear(in_features=196, out_features=10, bias=True)
  (5): ReLU()
)
Epoch  1/10 loss 0.2756  acc 0.9174
Epoch  2/10 loss 0.0965  acc 0.9701
Epoch  3/10 loss 0.0642  acc 0.9795
Epoch  4/10 loss 0.0476  acc 0.9846
Epoch  5/10 loss 0.0375  acc 0.9879
Epoch  6/10 loss 0.0282  acc 0.9909
Epoch  7/10 loss 0.0238  acc 0.9922
Epoch  8/10 loss 0.0204  acc 0.9934
Epoch  9/10 loss 0.0142  acc 0.9956
Epoch 10/10 loss 0.0115  acc 0.9962

Test loss: 0.0915462679719631
Test accuracy: 0.9776
