In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load the dataset
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)
test_dataset = datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=64,
    shuffle=True
)
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=64,
    shuffle=False
)

# Define the model
class FashionMNISTModel(nn.Module):
    def __init__(self):
        super(FashionMNISTModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 10),  # 10 classes for FashionMNIST
            nn.LogSoftmax(dim=1)  # LogSoftmax for multi-class classification
        ) # often abbreviated to "ops"
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits # log probabilities that indicate the confidence of the model

model = FashionMNISTModel()

# Define the loss function and optimizer
loss_function = nn.NLLLoss() # Negative Log Likelihood Loss - for multi-class classification
# passes parameters to the optimizer to optimize for lower loss
optimizer = optim.Adam(model.parameters()) # Faster and more accurate than SGD

# Train the model
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # model can be set to different modes to optimize performance for training, inference or evaluation
    model.train() # Set the model to training mode, which enables dropout and batch normalization layers
    for batch, (X,y) in enumerate(dataloader): # Use dataloader only loads the data when needed
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
# Training process
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, loss_function, optimizer)
print("Done!")

Epoch 1
-------------------------------
loss: 2.301527  [    0/60000]
loss: 0.751910  [ 6400/60000]
loss: 0.571789  [12800/60000]
loss: 0.601602  [19200/60000]
loss: 0.397758  [25600/60000]
loss: 0.469718  [32000/60000]
loss: 0.481952  [38400/60000]
loss: 0.572340  [44800/60000]
loss: 0.466431  [51200/60000]
loss: 0.562081  [57600/60000]
Epoch 2
-------------------------------
loss: 0.358575  [    0/60000]
loss: 0.267882  [ 6400/60000]
loss: 0.509594  [12800/60000]
loss: 0.563227  [19200/60000]
loss: 0.331341  [25600/60000]
loss: 0.426450  [32000/60000]
loss: 0.427761  [38400/60000]
loss: 0.385155  [44800/60000]
loss: 0.398908  [51200/60000]
loss: 0.419235  [57600/60000]
Epoch 3
-------------------------------
loss: 0.259246  [    0/60000]
loss: 0.368579  [ 6400/60000]
loss: 0.277375  [12800/60000]
loss: 0.313012  [19200/60000]
loss: 0.468999  [25600/60000]
loss: 0.260043  [32000/60000]
loss: 0.429482  [38400/60000]
loss: 0.346746  [44800/60000]
loss: 0.335817  [51200/60000]
loss: 0.25