In [34]:
import torch
from torch import nn
from tqdm.auto import tqdm
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
torch.__version__

'2.7.1+cu118'

In [2]:
torch.cuda.is_available()

True

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
train_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor(),
    target_transform=None
)

In [5]:
test_data = datasets.MNIST(
    root = "data",
    train=False,
    download=True,
    transform=torchvision.transforms.ToTensor(),
    target_transform=None
)

In [6]:
len(train_data), len(test_data), train_data[0][0].shape

(60000, 10000, torch.Size([1, 28, 28]))

In [55]:
class_names = train_data.classes
class_to_idx = train_data.class_to_idx

In [8]:
BATCH_SIZE = 32

train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True) # shuffle = True -> reshuffles the data at every epoch to avoid overfitting
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

In [46]:
class MNISTModel1(nn.Module):
    def __init__(self, in_features: int, hidden_units: int, out_features: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=in_features,
                      out_channels=hidden_units,
                      kernel_size=5,
                      stride=1,
                      padding=1
                     ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                        stride=2),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1
                     ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2
                        )
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*6*6,
                      out_features=out_features
                     )
        )
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.block_1(x)
        return self.classifier(x)

In [47]:
model_1 = MNISTModel1(in_features=1,
                     hidden_units=64,
                     out_features=10).to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    params=model_1.parameters(),
    lr = 0.1
)

model_1, next(model_1.parameters()).shape

(MNISTModel1(
   (block_1): Sequential(
     (0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
     (1): ReLU()
     (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (4): ReLU()
     (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   )
   (classifier): Sequential(
     (0): Flatten(start_dim=1, end_dim=-1)
     (1): Linear(in_features=2304, out_features=10, bias=True)
   )
 ),
 torch.Size([64, 1, 5, 5]))

In [48]:
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [62]:
def train_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn,
              accuracy_fn,
              optimizer: torch.optim.Optimizer,
              device: torch.device = device):

    train_loss, train_acc = 0, 0

    model.to(device)
    model.train()

    for batch, (X, y) in enumerate(data_loader):
        X, y = X.to(device), y.to(device)

        y_logits = model(X)

        loss = loss_fn(y_logits, y)
        train_loss+=loss
        train_acc += accuracy_fn(y_true=y, y_pred=y_logits.argmax(dim=1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Training loss: {train_loss} | Training accuracy: {train_acc}")

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0

    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            X, y = X.to(device), y.to(device)

            test_pred = model(X)

            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))

        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")
    

In [66]:
epochs = 10

for epochs in range(epochs):

    train_step(model=model_1,
               data_loader=train_dataloader,
               loss_fn=loss_fn,
               accuracy_fn=accuracy_fn,
               optimizer=optimizer,
               device=device
              )

    if (epochs % 3) == 0:
        test_step(model=model_1,
                 data_loader=test_dataloader,
                 loss_fn=loss_fn,
                 accuracy_fn=accuracy_fn,
                 device=device)
    
        

Training loss: 0.003768961876630783 | Training accuracy: 99.905
Test loss: 0.03179 | Test accuracy: 99.12%

Training loss: 0.003024549689143896 | Training accuracy: 99.915
Training loss: 0.0028212794568389654 | Training accuracy: 99.94
Training loss: 0.0025187775027006865 | Training accuracy: 99.93833333333333
Test loss: 0.03347 | Test accuracy: 99.17%

Training loss: 0.0016659379471093416 | Training accuracy: 99.96333333333334
Training loss: 0.0013387012295424938 | Training accuracy: 99.97
Training loss: 0.0009136534645222127 | Training accuracy: 99.98833333333333
Test loss: 0.03596 | Test accuracy: 99.10%

Training loss: 0.000660798919852823 | Training accuracy: 99.99166666666666
Training loss: 0.00069605209864676 | Training accuracy: 99.98833333333333
Training loss: 0.0005080957198515534 | Training accuracy: 99.995
Test loss: 0.03512 | Test accuracy: 99.23%



In [67]:
torch.manual_seed(42)

def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn,
               device: torch.device = device):
    """Evaluates a given model on a given dataset.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.
        device (str, optional): Target device to compute on. Defaults to device.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """


    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
      for X, y in tqdm(data_loader):

        X, y = X.to(device), y.to(device)

        y_pred = model(X)
        loss += loss_fn(y_pred, y)
        acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))

      loss /= len(data_loader)
      acc /= len(data_loader)
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}


model_1_results = eval_model(
    model=model_1,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn = accuracy_fn,
    device=device
)

model_1_results

100%|█████████████████████████████████████████████████████████████████████████████| 313/313 [00:01<00:00, 172.85it/s]


{'model_name': 'MNISTModel1',
 'model_loss': 0.03511681780219078,
 'model_acc': 99.23123003194888}