In [3]:
import torch
from torchvision.transforms import v2
from uuid import uuid4

from data.download import mnist_loader
from models.mlp import MLP

---

In [4]:
LR = 0.01
MOMENTUM = 0.9
EPOCHS = 1
BATCH_SIZE = 32
SAVE = True
GPU = True
ROOT = "./data"

In [5]:
TRANSFORMS = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize((0.5,), (0.5,))  # need to provide iterable
])

---

In [6]:
model = MLP()
train_loader = mnist_loader(root=ROOT, train=True, transforms=TRANSFORMS)
test_loader = mnist_loader(root=ROOT, train=False, transforms=TRANSFORMS)

In [None]:
y_hat = model(next(iter(train_loader))[0])
y = torch.tensor([0, 8, 1, 1, 3, 0, 2, 2, 3, 1, 1, 4, 6, 0, 6, 3, 9, 1, 5, 8, 1, 3, 2, 5, 0, 5, 9, 2, 5, 8, 8, 3])
print(torch.nn.CrossEntropyLoss(y_hat.squeeze(axis=1), y))
print(y_hat.shape)

- `y`: 1 x 32, values ranging from (0, NUM_Y_HAT_COLS - 1)
- `y_hat`: 32 x 10

In [8]:
train_loader = mnist_loader(root=ROOT, train=True, transforms=TRANSFORMS)
model = MLP()
if GPU:
    model.to('cuda')
optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
for epoch in range(EPOCHS):
    for img, label in train_loader:
        if GPU:
            img, label = img.to('cuda'), label.to('cuda')
        out = model(img).squeeze(axis=1)
        loss = criterion(out, label)
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

    print(f"Epoch {epoch} loss={loss.item()}")

if SAVE:
    path = f"./models/saves/{uuid4()}-mlp"
    torch.save(model.state_dict(), path)
    print("Model saved under " + path)

- We squeeze the output matrix on axis 1, as the forward pass outputs shape [32, 1, 10]. 
- Cross-entropy loss expects class indices for target variable. `y` shape is [32], `y_hat` shape is [32, 10], where each of 32 points has raw outputs from output layer.