In [2]:
import torch
import numpy as np
from torch import nn
from torch import optim
from torchvision import datasets
from torchvision.transforms import ToTensor

In [5]:
train_data = datasets.MNIST('data', train=True, download=True, transform=ToTensor())
train, val = torch.utils.data.random_split(train_data, [55000, 5000])
train_loader = torch.utils.data.DataLoader(train, batch_size=32)
val_loader = torch.utils.data.DataLoader(val, batch_size=32)

In [10]:
model = nn.Sequential(
    nn.Linear(28*28,64),
    nn.ReLU(),
    nn.Linear(64,64),
    nn.ReLU(),
    nn.Linear(64,10)
)

# move model to GPU, if available mps device
device = torch.device("mps")
print("Using {} device".format(device))
model.to(device)

Using mps device


Sequential(
  (0): Linear(in_features=784, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
)

In [11]:
optimizer = optim.SGD(model.parameters(),lr=1e-2)
loss = nn.CrossEntropyLoss()


In [12]:
# training loop
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = []
    for batch in train_loader:
        x,y = batch
        b = x.size(0)
        x = x.view(b,-1).to(device)
        
        l = model(x)
        
        J = loss(l,y.to(device))
        
        optimizer.zero_grad()
        
        J.backward()
        
        optimizer.step()
        
        losses.append(J.item())
        
    print(f'Epoch: {epoch+1}, Train Loss: {torch.tensor(losses).mean():.2f}')
    
# validation loop
losses = []
for batch in val_loader:
    x,y = batch
    b = x.size(0)
    x = x.view(b,-1).to(device)
    
    with torch.no_grad():
        l = model(x)
        J = loss(l,y.to(device))
        losses.append(J.item())
print(f'Validation Loss: {torch.tensor(losses).mean():.2f}')

Epoch: 1, Train Loss: 1.19
Epoch: 2, Train Loss: 0.40
Epoch: 3, Train Loss: 0.33
Epoch: 4, Train Loss: 0.29
Epoch: 5, Train Loss: 0.26
Validation Loss: 0.27
