In [109]:
import numpy as np
import matplotlib.pyplot as plt

from datetime import timedelta
from time import time

import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

import importlib
import models.convnets as convnets

# Force reload of models module
importlib.reload(convnets)

# Seed for reproducability
torch.manual_seed(123)
torch.cuda.manual_seed(123)

# Device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

ModuleNotFoundError: No module named 'convnets'

In [95]:
# Directory to store downloaded datasets
DOWNLOAD_DIRECTORY = "./datasets"

# Directory to store trained models
TRAINED_DIRECTORY = "./models/trained"

# Directory to store optimal, chosen models
MODEL_DIRECTORY = "./models"

### Import and load the datasets.

From [data_exploration.ipynb](./data_exploration.ipynb), we have gathered that the given training data has a mean of ~0.1307 and a standard deviation of ~0.3081. We use this information to normalize the data on import. This feature normalization is step is important when doing machine learning, but we have to be consequent and use these normalization values for all.

In [96]:
# Specify transformations on load
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.1307, 0.3081)
])

dataset_train_val = datasets.MNIST(root=DOWNLOAD_DIRECTORY, train=True, download=True, transform=transform)
dataset_test = datasets.MNIST(root=DOWNLOAD_DIRECTORY, train=False, download=True, transform=transform)

In [97]:
# Split dataset into training and validation sets
train_val_split = 5.0/6.0
train_size = int(train_val_split * len(dataset_train_val))
val_size = len(dataset_train_val) - train_size
dataset_train, dataset_val = random_split(dataset_train_val, [train_size, val_size])

batch_size = 512
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)

### Define a training function

In [98]:
def train(model, n_epochs, optimizer, loss_fn, train_loader, val_loader=None, device="cpu"):
    """ Training function for a model, supports addition of a validation loader to extract intermediary validation loss. """
    # Store intermediary training and validation losses
    train_losses, val_losses = [], []
    start_time = time()

    model.train()
    optimizer.zero_grad(set_to_none = True)

    if torch.cuda.is_available() and device == "cuda":
        model.cuda()
    
    print(" ======== Training", model._get_name(), "======= ")

    for i in range(1, n_epochs + 1):
        train_loss = 0
        for imgs, labels in train_loader:
            output = model(imgs.to(device=device))

            # Calculate mean batch loss and perform backward pass
            loss = loss_fn(output, labels.to(device=device)).mean()
            loss.backward()

            # Update parameters and zero out the gradients
            optimizer.step()
            optimizer.zero_grad()

            # Add loss to stored training loss
            train_loss += loss.item()

        # If we have a validation loader, store validation loss aswell
        val_loss = 0
        if val_loader:
            # Set to eval mode
            model.eval()

            # Calculate epoch's loss over validation loader
            for imgs, labels in val_loader:
                output = model(imgs.to(device=device))
                loss = loss_fn(output, labels.to(device=device)).mean()

                val_loss += loss.item()

            # Set back to training mode
            model.train()

        # Get average epoch loss for each type
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)

        # Print epoch loss first 3 epochs and every 5 afterwards
        if i <= 3 or i % 5 == 0:
            print(timedelta(seconds=round(time() - start_time)), "| Epoch", i, "| Training loss %.5f" %train_loss, ("| Validation loss %.5f" %val_loss) if val_loader else "")

        # Store this epoch's losses
        train_losses.append(train_loss)
        val_losses.append(val_loss)
    
    return train_losses, val_losses if val_loader else None

### Initialize and train different model architectures capable of doing image classification.

In [110]:
n_epochs = 50
loss_fn = nn.CrossEntropyLoss()

# Adam hyperparameters
optimizer_hyperparameters = [
    { "lr" : 0.001 },
    { "lr" : 0.001, "weight_decay": 0.0001 },
    { "lr" : 0.005 },
    { "lr" : 0.005, "weight_decay": 0.0001 },
    { "lr" : 0.01 },
    { "lr" : 0.01, "weight_decay": 0.0001 },
]

# Different model architectures
architechtures = [convnets.ConvNet1, convnets.ConvNet2, convnets.ConvNet3]

# Perform model selection, storing intermediate losses
losses = []
models = []
for hyperparameters in optimizer_hyperparameters:
    for architecture in architechtures:
        # Define the model and optimizer
        model = architecture()
        optimizer = optim.Adam(model.parameters(), **hyperparameters)

        # Train the model
        train_losses, val_losses = train(model, n_epochs, optimizer, loss_fn, train_loader, val_loader, device=DEVICE)

        # Store intermediary losses and model
        losses.append((train_losses, val_losses))
        models.append(model)

0:00:36 | Epoch 1 | Training loss 0.88218 | Validation loss 0.04949
0:01:08 | Epoch 2 | Training loss 0.18944 | Validation loss 0.03042
0:01:38 | Epoch 3 | Training loss 0.13193 | Validation loss 0.02295


KeyboardInterrupt: 