In [None]:
!pip install --quiet datasets

In [None]:
from datasets import load_dataset
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

## Data Loading and Preprocessing

Load the FasionMNIST dataset from Hugging Face Hub.

In [None]:
dataset = load_dataset("fashion_mnist")

See what's inside of the dataset

In [None]:
dataset

The images in the dataset are in the PIL format originally.

In [None]:
dataset['train']['image'][0]

Extract all the label names.

In [None]:
class_names = dataset['train'].features['label'].names
print(class_names)

Visualize more examples.

In [None]:
torch.manual_seed(42)
fig = plt.figure(figsize=(9, 9))
rows, cols = 4, 4
for i in range(1, rows * cols + 1):
    random_idx = torch.randint(0, len(dataset['train']), size=[1]).item()
    img, label = dataset['train']['image'][random_idx], dataset['train']['label'][random_idx]
    fig.add_subplot(rows, cols, i)
    plt.imshow(img, cmap="gray")
    plt.title(class_names[label])
    plt.axis(False);

We need to convert from the PIL format to the Pytorch tensors for our model.

In [None]:
transform = ToTensor()

In [None]:
transform(dataset['train']['image'][0])

Check the dimensions of our image. It should be 28x28.

In [None]:
transform(dataset['train']['image'][0]).shape

Apply the transformation to all the images in the dataset.

In [None]:
def map_pil_to_tensor(sample):
    sample['image'] = [transform(image) for image in sample['image']]
    sample['label'] = torch.tensor(sample['label'])
    return sample

train_dataset = dataset['train'].with_transform(map_pil_to_tensor)
test_dataset = dataset['test'].with_transform(map_pil_to_tensor)

Define the Dataloader to iterate the dataset with the mini-batches.

In [None]:
BATCH_SIZE = 32

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# Check out what's inside the training dataloader
train_batch = next(iter(train_dataloader))
train_batch['image'], train_batch['label']

## Model

Create the `device` variable for the device-agnostic code.

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

Implement the TinyVGG architecture that looks like this:

```
TinyVGG(
  (block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)
```

You will need the following layers:

- [`nn.Conv2d`](https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#conv2d)
- [`nn.MaxPool2d`](https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#maxpool2d)
- [`nn.ReLU`](https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html#relu)
- [`nn.Flatten`](https://pytorch.org/docs/stable/generated/torch.nn.Flatten.html#flatten)
- [`nn.Linear`](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#linear)
- [`nn.Sequential`](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html#sequential)

In [None]:
class TinyVGG(nn.Module):
    """
    Model architecture copying TinyVGG from:
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        ...

    def forward(self, x: torch.Tensor):
        ...
        return x

torch.manual_seed(42)
model = TinyVGG(input_shape=1,
    hidden_units=10,
    output_shape=len(class_names)).to(device)
model

## Training

In [None]:
# Setup loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(),
                             lr=0.1)

In [None]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

Implement the training step and test step.

Remember, for training step:

1. Set the model to the training mode (`model.train()`)
2. Take a data mini-batch (transfer the data to the `device`)
3. Forward pass of the model
4. Calculate loss (and other metrics)
5. Zero the gradients
6. Perform backward pass (`loss.backward()`)
7. Do the optimizer step
8. Print out the information if necessary

For test step:

1. Set the model to the evaluation mode (`model.eval()`) and switch off the gradients (`with torch.no_grad()`)
2. Take a data mini-batch (transfer the data to the `device`)
3. Forward pass of the model
4. Calculate loss (and other metrics)
5. Print out the information if necessary

In [None]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss, train_acc = 0, 0
    model.to(device)
    model.train()
    for batch, batch_data in enumerate(data_loader):
        # Send data to GPU
        ...

        # 1. Forward pass
        ...

        # 2. Calculate loss
        loss = ...
        train_loss += loss
        train_acc += accuracy_fn(y_true=y,
                                 y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

        # 3. Optimizer zero grad
        ...

        # 4. Loss backward
        ...

        # 5. Optimizer step
        ...

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    model.to(device)
    model.eval() # put model in eval mode
    # Turn on inference context manager
    with torch.inference_mode():
        for batch_data in data_loader:
            # Send data to GPU
            ...

            # 1. Forward pass
            ...

            # 2. Calculate loss and accuracy
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y,
                y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
            )

        # Adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

Time to train the model and look at the metrics (for extra, you can try to add the Tensorboard logging here)

In [None]:
torch.manual_seed(42)

# Measure time
from timeit import default_timer as timer
train_time_start_model = timer()

def print_train_time(start: float, end: float, device: torch.device = None):
    """Prints difference between start and end time.

    Args:
        start (float): Start time of computation (preferred in timeit format).
        end (float): End time of computation.
        device ([type], optional): Device that compute is running on. Defaults to None.

    Returns:
        float: time between start and end in seconds (higher is longer).
    """
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

# Train and test model
epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader,
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=test_dataloader,
        model=model,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_model = timer()
total_train_time_model = print_train_time(start=train_time_start_model,
                                           end=train_time_end_model,
                                           device=device)

## Saving the model

Implement the model saving here