# Deep Learning Lab #1 - My first Neural Network

Congratulation! You survived the first laboratory of the course. After seeing the basics of PyTorch, let us build and train a small neural network for digit recognition. We will use the MNIST and SVHN dataset and a simple MLP to do that.

As a first step, let's install and import the modules we need. The `torch` module contains all the tools we need to build and train the network, whereas `torchvision` contains several Computer Vision oriented utilities, such as shortcuts to standard benchmarks.

We are also importing some additional libraries to help us with visualization and debugging.

In [None]:
!pip install scikit-learn -q

In [None]:
%matplotlib inline

import torch
from torch.utils.tensorboard import SummaryWriter
import torchvision
import matplotlib.pyplot as plt
import random

## Step #1: Get the datasets
PyTorch provides useful utilities to efficiently load training, testing, and evaluation data, namely the `Dataset` and `Dataloader` modules. The former implements all the functionalities needed to load the dataset in the desired format, while the latter provides the corresponding iteration utilities. PyTorch provides an implemented dataset for [MNIST](https://pytorch.org/vision/stable/generated/torchvision.datasets.MNIST.html#torchvision.datasets.MNIST) and [SVHN](https://pytorch.org/vision/stable/generated/torchvision.datasets.SVHN.html#torchvision.datasets.SVHN); for the Dataloader, we can use the [default implementation](https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader).

First, let's explore a bit the dataset. To do that, we will initialize an instance of the MNIST dataset and plot a few samples.

In [None]:
# Load the training split of MNIST
dataset = torchvision.datasets.MNIST('./data', train=True, transform=None, download=True)

# Let's then gather some info about a sample
image, label = dataset[0]
print(f"Image has size: {image.size}")
print(f"Image tensor has shape: {torchvision.transforms.functional.to_tensor(image).shape}")
print(f"Label is {label}")

# Visualize some samples in a 3x3 matrix
fig, axs = plt.subplots(3, 3)
fig.suptitle("MNIST")
for idx, ax in enumerate(axs.flatten()):
    # Let's take one image randomly
    image, label = dataset[random.randint(0, len(dataset))]

    ax.axis("off")
    im = ax.imshow(image, cmap="gray")
    ax.set_title(f"Class: {label}")
    plt.colorbar(im, ax=ax)

plt.tight_layout()
plt.show()

In [None]:
# Load the training split of SVHN
dataset = torchvision.datasets.SVHN('./data', split="train", transform=None, download=True)

# Let's then gather some info about a sample
image, label = dataset[0]
print(f"Image has size: {image.size}")
print(f"Image tensor has shape: {torchvision.transforms.functional.to_tensor(image).shape}")
print(f"Label is {label}")

# Visualize some samples in a 3x3 matrix
fig, axs = plt.subplots(3, 3)
fig.suptitle("SVHN")
for idx, ax in enumerate(axs.flatten()):
    # Let's take one image randomly
    image, label = dataset[random.randint(0, len(dataset))]

    ax.axis("off")
    im = ax.imshow(image, cmap="gray")
    ax.set_title(f"Class: {label}")
    plt.colorbar(im, ax=ax)

plt.tight_layout()
plt.show()

Let's now define a function that allow us to:
- (a) download the dataset
- (b) split the dataset into chunks, if needed
- (c) return the DataLoaders to feed the NN

In [None]:
def get_mnist(batch_size, transforms, val_split=0.2):
    # Load datasets
    full_training_data = torchvision.datasets.MNIST('./data', train=True, transform=transforms, download=True)
    test_data = torchvision.datasets.MNIST('./data', train=False, transform=transforms, download=True)

    # Create train and validation splits
    num_samples = len(full_training_data)
    training_samples = int(num_samples * (1 - val_split) + 1)
    validation_samples = num_samples - training_samples

    training_data, validation_data = torch.utils.data.random_split(full_training_data, [training_samples, validation_samples])

    # Print some stats
    print(f"# of training samples: {len(training_data)}")
    print(f"# of validation samples: {len(validation_data)}")
    print(f"# of test samples: {len(test_data)}")

    # Initialize dataloaders
    train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(validation_data, batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

def get_svhn(batch_size, transforms, val_split=0.2):
    full_training_data = torchvision.datasets.SVHN('./data', split="train", transform=transforms, download=True)
    test_data = torchvision.datasets.SVHN('./data', split="test", transform=transforms, download=True)

    # Create train and validation splits
    num_samples = len(full_training_data)
    training_samples = int(num_samples * (1 - val_split) + 1)
    validation_samples = num_samples - training_samples

    training_data, validation_data = torch.utils.data.random_split(full_training_data, [training_samples, validation_samples])

    # Print some stats
    print(f"# of training samples: {len(training_data)}")
    print(f"# of validation samples: {len(validation_data)}")
    print(f"# of test samples: {len(test_data)}")

    # Initialize dataloaders
    train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(validation_data, batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

## Step #2: Build the neural network
We now have to define the architecture of our MLP, which consists of two fully connected linear layers. Luckily, we don't need to implement them by hand like in the first lab, because PyTorch has already done it for us. We also need to include an activation function between the two layers, e.g., Sigmoid (`torch.nn.Sigmoid`).

In [None]:
class MyFirstNetwork(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MyFirstNetwork, self).__init__()

        # Input linear layer
        self.input_to_hidden = torch.nn.Linear(input_dim, hidden_dim)

        # Output linear layer
        self.hidden_to_output = torch.nn.Linear(hidden_dim, output_dim)

        # Activation function
        self.activation = torch.nn.Sigmoid()

    def forward(self, x):
        # x.shape is [batch_size, height, width].
        # To forward x through the network, we need to flatten the height and width
        # NOTE: this is not always required, e.g. CNNs take as input the 2D/3D pixel matrix directly

        x = x.reshape(x.shape[0], -1)
        x = self.input_to_hidden(x)
        x = self.activation(x)
        x = self.hidden_to_output(x)

        return x

## Step #3: Define the optimization algorithm
The optimizer is the tool that actually carries out the optimization of the parameters with respect to the chosen loss function. There are a variety of implemented optimizers in the [`torch.optim`](https://pytorch.org/docs/stable/optim.html) module. Let's define our optimizer, giving as input the network parameters, the learning rate, the weight decay coefficient, and the momentum.

In [None]:
def get_optimizer(net, lr, wd, momentum):
    optimizer = torch.optim.SGD(net.parameters(), lr=lr, weight_decay=wd, momentum=momentum)
    return optimizer

## Step #4: Define the loss function
The loss/cost function expresses the value that you wish to minimize by optimizing the parameters of your network. In other words, it should efficiently express the prediction error. Given that we are addressing a multi-class classification task, a suitable choice is a cross-entropy with softmax. This is available, along with many alternatives, in the [`torch.nn`](https://pytorch.org/docs/stable/nn.html#loss-functions) module. Note that `torch.nn.CrossEntropyLoss` already applies the softmax function, i.e. we don't need to manually define it.

In [None]:
def get_loss_function():
    loss_function = torch.nn.CrossEntropyLoss()
    return loss_function

## Step #5: Define the training and test loops
We are ready to define our training and test loops. These should be two separate functions which:
1.   **iterate** over a given set of data
2.   **forward** the data through the neural network
3.   **compare** the network output with the ground truth labels to compute the loss and/or evaluation metrics

Additionally, inside the training loop, we need these steps to actually carry out the optimization
1.   perform the backward pass (`loss.backward()`) to **compute gradients**
2.   call the optimizer to consequently **update the weights** (`optimizer.step()`)
3.   **reset** the gradients in order not to accumulate it (`optimizer.zero_grad()`)

In [None]:
def training_step(net, data_loader, optimizer, cost_function, device="cuda"):
    samples = 0.0
    cumulative_loss = 0.0
    cumulative_accuracy = 0.0

    # Set the network to training mode
    net.train()

    # Iterate over the training set
    for batch_idx, (inputs, targets) in enumerate(data_loader):
        # Load data into GPU
        inputs = inputs.to(device)
        targets = targets.to(device)

        # Forward pass
        outputs = net(inputs)

        # Loss computation
        loss = cost_function(outputs, targets)

        # Backward pass
        loss.backward()

        # Parameters update
        optimizer.step()

        # Gradients reset
        optimizer.zero_grad()

        # Fetch prediction and loss value
        samples += inputs.shape[0]
        cumulative_loss += loss.item()
        _, predicted = outputs.max(dim=1) # max() returns (maximum_value, index_of_maximum_value)

        # Compute training accuracy
        cumulative_accuracy += predicted.eq(targets).sum().item()

    return cumulative_loss / samples, cumulative_accuracy / samples * 100

def test_step(net, data_loader, cost_function, device="cuda"):
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.

    # Set the network to evaluation mode
    net.eval()

    # Disable gradient computation (we are only testing, we do not want our model to be modified in this step!)
    with torch.no_grad():
        # Iterate over the test set
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            # Load data into GPU
            inputs = inputs.to(device)
            targets = targets.to(device)

            # Forward pass
            outputs = net(inputs)

            # Loss computation
            loss = cost_function(outputs, targets)

            # Fetch prediction and loss value
            samples += inputs.shape[0]
            cumulative_loss += loss.item() # Note: the .item() is needed to extract scalars from tensors
            _, predicted = outputs.max(1)

            # Compute accuracy
            cumulative_accuracy += predicted.eq(targets).sum().item()

    return cumulative_loss / samples, cumulative_accuracy / samples * 100

## Put it all together!
We need a compact procedure to apply all the components and functions defined so far into the actual optimization procedure. In particular, we want our model to iterate over the training step and test step for multiple epochs, tracking the partial results.

In [None]:
# Tensorboard logging utilities
def log_values(writer, step, loss, accuracy, prefix):
  writer.add_scalar(f"{prefix}/loss", loss, step)
  writer.add_scalar(f"{prefix}/accuracy", accuracy, step)

# Main function
def main(
    batch_size=128,
    input_dim=28*28,
    hidden_dim=100,
    output_dim=10,
    device='cuda:0',
    learning_rate=0.0001,
    weight_decay=0.0000001,
    momentum=0.9,
    epochs=10,
    exp_name="exp1"
):
    # Create a logger for the experiment
    writer = SummaryWriter(log_dir=f"runs/{exp_name}")

    # Create data transforms
    # Images in the dataset are stored as PIL objects. We need to convert it to a tensor.
    transforms = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor()
    ])

    # Get dataloaders
    train_loader, val_loader, test_loader = get_mnist(batch_size, transforms)
    # train_loader, val_loader, test_loader = get_svhn(batch_size, transforms)

    # Instantiate the network and move it to the chosen device (GPU)
    net = MyFirstNetwork(input_dim, hidden_dim, output_dim).to(device)

    # Let's "print" the network to view all the modules
    print(net)

    # Instantiate the optimizer
    optimizer = get_optimizer(net, learning_rate, weight_decay, momentum)

    # Define the cost function
    loss_function = get_loss_function()

    # Computes evaluation results before training
    print("Before training:")
    train_loss, train_accuracy = test_step(net, train_loader, loss_function)
    val_loss, val_accuracy = test_step(net, val_loader, loss_function)
    test_loss, test_accuracy = test_step(net, test_loader, loss_function)

    # Log to TensorBoard
    log_values(writer, -1, train_loss, train_accuracy, "Train")
    log_values(writer, -1, val_loss, val_accuracy, "Validation")
    log_values(writer, -1, test_loss, test_accuracy, "Test")

    print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
    print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
    print(f"\tTest loss {test_loss:.5f}, Test accuracy {test_accuracy:.2f}")
    print("-----------------------------------------------------")

    # For each epoch, train the network and then compute evaluation results
    for e in range(epochs):
        train_loss, train_accuracy = training_step(net, train_loader, optimizer, loss_function)
        val_loss, val_accuracy = test_step(net, val_loader, loss_function)

        # Logs to TensorBoard
        log_values(writer, e, train_loss, train_accuracy, "Train")
        log_values(writer, e, val_loss, val_accuracy, "Validation")

        print(f"Epoch: {e + 1}")
        print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
        print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
        print("-----------------------------------------------------")

    # Compute final evaluation results
    print("After training:")
    train_loss, train_accuracy = test_step(net, train_loader, loss_function)
    val_loss, val_accuracy = test_step(net, val_loader, loss_function)
    test_loss, test_accuracy = test_step(net, test_loader, loss_function)

    # Log to TensorBoard
    log_values(writer, epochs + 1, train_loss, train_accuracy, "Train")
    log_values(writer, epochs + 1, val_loss, val_accuracy, "Validation")
    log_values(writer, epochs + 1, test_loss, test_accuracy, "Test")

    print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
    print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
    print(f"\tTest loss {test_loss:.5f}, Test accuracy {test_accuracy:.2f}")
    print("-----------------------------------------------------")

    # Closes the logger
    writer.close()

    # Let's return the net
    return net

## Run it!
Let's run our model

In [None]:
!rm -r runs

In [None]:
net = main(exp_name="hello_training")

In [None]:
# Let's check the results on tensorboard
%load_ext tensorboard
%tensorboard --logdir=runs

## Visualize predictions

Let us now take the trained model and visualize the predictions w.r.t. ground truth.

In [None]:
rows, cols = 3, 3
device = "cuda:0"
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
_, _, test_loader = get_mnist(rows * cols, transforms)

# Load data
inputs, targets = next(iter(test_loader))
inputs = inputs.to(device)
targets = targets.to(device)

# Forward pass
outputs = net(inputs)
preds = outputs.argmax(dim=1)

# Visualize some samples in a matrix
fig, axs = plt.subplots(rows, cols)
fig.suptitle("MNIST")
for idx, ax in enumerate(axs.flatten()):
    image, label = inputs[idx], targets[idx]
    pred = preds[idx]

    if pred != label:
        ax.title.set_color("red")
    else:
        ax.title.set_color("green")

    ax.axis("off")
    im = ax.imshow(torchvision.transforms.functional.to_pil_image(image), cmap="gray")
    ax.set_title(f"Pred: {pred.item()}")
    plt.colorbar(im, ax=ax)

plt.tight_layout()
plt.show()

---

# Are MLPs all we need?

The neural network we trained achieved good results on digit recognition, but is it actually good? Let's test it out! Let's create a new version of the MNIST dataset where the digits are slightly shifted from the center of the image. What do you think will happen?

In [None]:
# Let's create function that pads the original input so that the digit is not centered in the image
def padding(x):
    pad_size = 28
    left_padding = torch.randint(low=0, high=pad_size, size=(1,))
    top_padding = torch.randint(low=0, high=pad_size, size=(1,))
    return torch.nn.functional.pad(x, (left_padding,  pad_size - left_padding,  top_padding,  pad_size - top_padding), "constant", 0)

transforms_padded = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Lambda(lambda x: padding(x)),
])
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Lambda(lambda x: torch.nn.functional.pad(x, (14, 14, 14, 14), "constant", 0))
])

# Let's create both normal and padded datasets, and view their differences
rows, cols = 3, 3
_, _, test_loader = get_mnist(rows * cols, transforms)
_, _, test_loader_padded = get_mnist(rows * cols, transforms_padded)

for idx, (inputs, targets) in enumerate([next(iter(test_loader)), next(iter(test_loader_padded))]):
    fig, axs = plt.subplots(rows, cols)
    fig.suptitle(f"MNIST {'padded' if idx else ''}")
    for idx, ax in enumerate(axs.flatten()):
        image, label = inputs[idx], targets[idx]

        ax.axis("off")
        im = ax.imshow(torchvision.transforms.functional.to_pil_image(image), cmap="gray")
        plt.colorbar(im, ax=ax)

    plt.tight_layout()
    plt.show()

Let us now train the same MLP of before with the new dataset

In [None]:
# Let's slightly change the main function above
def main_pad(
    batch_size=128,
    input_dim=56*56,
    hidden_dim=100,
    output_dim=10,
    device='cuda:0',
    learning_rate=0.0001,
    weight_decay=0.0000001,
    momentum=0.9,
    epochs=10,
    exp_name="exp1_pad"
):
    # Create a logger for the experiment
    writer = SummaryWriter(log_dir=f"runs/{exp_name}")

    # Create data transforms
    # Images in the dataset are stored as PIL objects. We need to convert it to a tensor.
    transforms_padded = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Lambda(lambda x: padding(x)),
    ])

    # Get dataloaders
    train_loader, val_loader, test_loader = get_mnist(batch_size, transforms_padded)

    # Instantiate the network and move it to the chosen device (GPU)
    net = MyFirstNetwork(input_dim, hidden_dim, output_dim).to(device)

    # Let's "print" the network to view all the modules
    print(net)

    # Instantiate the optimizer
    optimizer = get_optimizer(net, learning_rate, weight_decay, momentum)

    # Define the cost function
    loss_function = get_loss_function()

    # Computes evaluation results before training
    print("Before training:")
    train_loss, train_accuracy = test_step(net, train_loader, loss_function)
    val_loss, val_accuracy = test_step(net, val_loader, loss_function)
    test_loss, test_accuracy = test_step(net, test_loader, loss_function)

    # Log to TensorBoard
    log_values(writer, -1, train_loss, train_accuracy, "Train")
    log_values(writer, -1, val_loss, val_accuracy, "Validation")
    log_values(writer, -1, test_loss, test_accuracy, "Test")

    print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
    print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
    print(f"\tTest loss {test_loss:.5f}, Test accuracy {test_accuracy:.2f}")
    print("-----------------------------------------------------")

    # For each epoch, train the network and then compute evaluation results
    for e in range(epochs):
        train_loss, train_accuracy = training_step(net, train_loader, optimizer, loss_function)
        val_loss, val_accuracy = test_step(net, val_loader, loss_function)

        # Logs to TensorBoard
        log_values(writer, e, train_loss, train_accuracy, "Train")
        log_values(writer, e, val_loss, val_accuracy, "Validation")

        print(f"Epoch: {e + 1}")
        print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
        print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
        print("-----------------------------------------------------")

    # Compute final evaluation results
    print("After training:")
    train_loss, train_accuracy = test_step(net, train_loader, loss_function)
    val_loss, val_accuracy = test_step(net, val_loader, loss_function)
    test_loss, test_accuracy = test_step(net, test_loader, loss_function)

    # Log to TensorBoard
    log_values(writer, epochs + 1, train_loss, train_accuracy, "Train")
    log_values(writer, epochs + 1, val_loss, val_accuracy, "Validation")
    log_values(writer, epochs + 1, test_loss, test_accuracy, "Test")

    print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
    print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
    print(f"\tTest loss {test_loss:.5f}, Test accuracy {test_accuracy:.2f}")
    print("-----------------------------------------------------")

    # Closes the logger
    writer.close()

    # Let's return the net
    return net

In [None]:
net_padded = main_pad(exp_name="hello_pad")

Finally, let's visualize the predictions of this network

In [None]:
rows, cols = 3, 3
device = "cuda:0"
transforms_padded = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Lambda(lambda x: padding(x)),
])
_, _, test_loader = get_mnist(rows * cols, transforms_padded)

# Load data
inputs, targets = next(iter(test_loader))
inputs = inputs.to(device)
targets = targets.to(device)

# Forward pass
outputs = net_padded(inputs)
preds = outputs.argmax(dim=1)

# Visualize some samples in a matrix
fig, axs = plt.subplots(rows, cols)
fig.suptitle("MNIST")
for idx, ax in enumerate(axs.flatten()):
    image, label = inputs[idx], targets[idx]
    pred = preds[idx]

    if pred != label:
        ax.title.set_color("red")
    else:
        ax.title.set_color("green")

    ax.axis("off")
    im = ax.imshow(torchvision.transforms.functional.to_pil_image(image), cmap="gray")
    ax.set_title(f"Pred: {pred.item()}")
    plt.colorbar(im, ax=ax)

plt.tight_layout()
plt.show()