# MNIST (PyTorch) Training Notebook
This notebook trains a CNN on MNIST and saves **mnist_cnn.pt**.


In [1]:
# If you're in Colab, torch/torchvision are usually preinstalled.
# This is safe to run anyway.
!pip -q install torch torchvision


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)


Device: cuda


## Data loaders (train + test)

In [5]:
BATCH = 128

tfm = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_ds = datasets.MNIST("./data", train=True, download=True, transform=tfm)
test_ds  = datasets.MNIST("./data", train=False, download=True, transform=tfm)

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH, shuffle=False, num_workers=2, pin_memory=True)

len(train_ds), len(test_ds)


(60000, 10000)

## Model (CNN)

In [6]:
class MNIST_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),               # 28 -> 14
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),               # 14 -> 7
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128), nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self.net(x)

model = MNIST_CNN().to(DEVICE)
model


MNIST_CNN(
  (net): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=3136, out_features=128, bias=True)
    (8): ReLU()
    (9): Dropout(p=0.2, inplace=False)
    (10): Linear(in_features=128, out_features=10, bias=True)
  )
)

## Train + evaluate + save

In [7]:
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr=1e-3)

@torch.no_grad()
def test_accuracy():
    model.eval()
    correct = total = 0
    for x, y in test_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        pred = model(x).argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return correct / total

EPOCHS = 10
for epoch in range(1, EPOCHS + 1):
    model.train()
    running = 0.0
    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        opt.zero_grad(set_to_none=True)
        logits = model(x)
        loss = loss_fn(logits, y)
        loss.backward()
        opt.step()
        running += loss.item()

    acc = test_accuracy() * 100
    print(f"Epoch {epoch}/{EPOCHS} | avg train loss: {running/len(train_loader):.4f} | test acc: {acc:.2f}%")

torch.save(model.state_dict(), "mnist_cnn.pt")
print("Saved -> mnist_cnn.pt")


Epoch 1/10 | avg train loss: 0.1952 | test acc: 98.59%
Epoch 2/10 | avg train loss: 0.0574 | test acc: 98.87%
Epoch 3/10 | avg train loss: 0.0405 | test acc: 98.98%
Epoch 4/10 | avg train loss: 0.0318 | test acc: 99.06%
Epoch 5/10 | avg train loss: 0.0243 | test acc: 99.09%
Epoch 6/10 | avg train loss: 0.0211 | test acc: 99.02%
Epoch 7/10 | avg train loss: 0.0185 | test acc: 99.27%
Epoch 8/10 | avg train loss: 0.0162 | test acc: 99.30%
Epoch 9/10 | avg train loss: 0.0135 | test acc: 99.27%
Epoch 10/10 | avg train loss: 0.0113 | test acc: 99.26%
Saved -> mnist_cnn.pt


## (Colab) Download the weight file to your computer

In [None]:
# If you're NOT in Colab, you can ignore this cell.




SyntaxError: invalid syntax (ipython-input-4065229179.py, line 2)