In [0]:
import requests
from IPython.core.display import HTML
HTML(f"""
<style>
@import "https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css";
</style>
""")

# Model Tuning
This exercise is about investigating tehchiques to improve models and avoid overfitting by:
- Dropout layer
- Early stopping
- Data augmentation

As mentioned in the [youtube video](//youtu.be/njKP3FqW3Sk?thttps=2807)
 and the lecture, dropout layers and early stopping are useful methods to constrain the optimisation to counteract overfitting.
Like in the first exercise the `PyTorchTrainer`
 method must be used to set up the training process for the architechtures. The cell below loads the libraries and provides a function for training models.


In [0]:
from torch import optim
from trainers import PyTorchTrainer
from networks import *
from torchvision import transforms
from fashionmnist_utils.mnist_reader import load_mnist

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Create the 'models' folder
os.makedirs("models", exist_ok=True)
# Create the 'runs' folder
os.makedirs("runs", exist_ok=True)

def train_pytorch_network(to_train, early_stopping=False, patience=5):

    transform = transforms.ToTensor() ## simply a conversion from PIL (image format) to torch tensors.
    
    network = to_train

    trainerNetwork = PyTorchTrainer(
        nn_module=network,
        transform=transform,
        optimizer=optim.SGD(network.parameters(), lr=1e-2, momentum=0.5),
        batch_size=128,
    ) 

    print("training the network started")
    if early_stopping:
        trainerNetwork.train_es(30, patience=patience)
    else:
        trainerNetwork.train(10)
    
    trainerNetwork.save()
    print("training and saving the network ended")

## Dropout Layer
In this task you will investigate the dropout layers of the `TopCNN`
 model defined in the cell below. PyTorch provides the layer [nn.Dropout2d](https://pytorch.org/docs/stable/generated/torch.nn.Dropout2d.html<elem-3>.nn.Dropout2d)
 for convolutional layers and [nn.Dropout](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html<elem-4>.nn.Dropout)
 for fully connected layers.
<article class="message task"><a class="anchor" id="dropout"></a>
    <div class="message-header">
        <span>Task 1: Dropout layer parameters</span>
        <span class="has-text-right">
          <i class="bi bi-code"></i><i class="bi bi-stoplights medium"></i>
        </span>
    </div>
<div class="message-body">


1. Run the cell below to train the model.
2. Use the code from the NN architectures exercise
 to plot and evaluate the model performance. 
3. Experiment with  the probability parameter, `p`
,  of the dropout layers and compare the models to determine the best parameter setting for `p`
. Start with 0.2, then 0.7. 

<article class="message is-info">
  <div class="message-header">Info</div>
  <div class="message-body">

  Training of this model takes several minutes, depending on your computer.


  </div>
</article>


</div></article>



In [0]:
class TopCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1 = nn.Conv2d(1, 32, kernel_size=9, padding=4)
        self.bn_1 = nn.BatchNorm2d(32)
        self.conv_2 = nn.Conv2d(32, 32, kernel_size=5, padding=2)
        self.bn_2 = nn.BatchNorm2d(32)

        self.drop_1 = nn.Dropout2d(p=0.5)

        self.conv_3 = nn.Conv2d(32, 64, kernel_size=3)
        self.bn_3 = nn.BatchNorm2d(64)
        self.conv_4 = nn.Conv2d(64, 64, kernel_size=3)
        self.bn_4 = nn.BatchNorm2d(64)

        self.drop_2 = nn.Dropout2d(p=0.5)

        self.linear_1 = nn.Linear(5 ** 2 * 64, 100)

        self.drop_3 = nn.Dropout(p=0.5)

        self.linear_2 = nn.Linear(100, 10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = self.bn_1(x)
        x = F.relu(self.conv_2(x))
        x = self.bn_2(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = self.drop_1(x)

        x = F.relu(self.conv_3(x))
        x = self.bn_3(x)
        x = F.relu(self.conv_4(x))
        x = self.bn_4(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = self.drop_2(x)

        x = torch.flatten(x, start_dim=1)

        x = self.linear_1(x)
        x = F.relu(x)

        x = self.drop_3(x)

        x = self.linear_2(x)

        return x
    
train_pytorch_network(TopCNN())

<article class="message task"><a class="anchor" id="dropout2"></a>
    <div class="message-header">
        <span>Task 2: Adding a dropout layer</span>
        <span class="has-text-right">
          <i class="bi bi-code"></i><i class="bi bi-stoplights medium"></i>
        </span>
    </div>
<div class="message-body">


1. Add a dropout layer to the `CNN4layer`
 model defined in the cell below. 
2. Compare model performance with and without the dropout layer.
3. Reflect on  how the performance achieved above compares to the `TopCNN`
 model.



</div></article>



In [0]:
class CNN4Layer(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3)
        self.conv_2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        self.conv_3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.conv_4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
        self.layer_1 = nn.Linear(in_features=16 * 32, out_features=80)
        self.layer_2 = nn.Linear(in_features=80, out_features=10)


    def forward(self, picture):
        # 1 image is given, 8 output channels , kernel size 3
        imageConv1 = F.relu(self.conv_1(picture)) 
        # 8 images is given, 16 channels out, kernel size 3
        imageConv2 = F.relu(self.conv_2(imageConv1)) 
        # divides the imagesize by 2, image size = 4 
        maxPool2 = F.max_pool2d(imageConv2, 2, 2 )  
         # 8 images is given, 16 channels, kernel size 3
        imageConv3 = F.relu(self.conv_3(maxPool2)) 
        # 8 images is given, 16 comes out, kernel size 3
        imageConv3 = F.relu(self.conv_4(imageConv3))  
        maxPool4 = F.max_pool2d(imageConv3, 2, 2)
        imageFlatten = torch.flatten(maxPool4, start_dim=1)
        linearImage1 = F.relu(self.layer_1(imageFlatten))
        return self.layer_2(linearImage1)

train_pytorch_network(CNN4Layer())

# write your solution here

## Early stopping
This exercises is about early stopping using the `CNN4Layer`
 model. 
<article class="message task"><a class="anchor" id="early_stopping"></a>
    <div class="message-header">
        <span>Task 3: Early stopping</span>
        <span class="has-text-right">
          <i class="bi bi-code"></i><i class="bi bi-stoplights easy"></i>
        </span>
    </div>
<div class="message-body">


1. Execute the cell below to run the training process with early stopping. 
2. Change the `patience`
 parameter and observe how it changes the training time. Try 3 and 5 as the parameter values.
3. What is the influence of changing the patience parameter, and explain why.
4. Use the previous exercises to visualize the model results and compare its performance with the previous models. Explain how and why dropout and early stopping influence classification performance?



</div></article>



In [0]:
train_pytorch_network(CNN4Layer_dropout(),True, patience=1)

# write your solution here

## Data Augmentation
In this exercise you will implement data augmentation using PyTorch's `torchvision`
 library. Data augmentation involves subjecting the current training batch data to random transformations, effectively creating new training samples. 
The `train_pytorch_network`
 function (defined in the first cell) already contains the methods to transform the input images (in PIL format) to PyTorch tensors. Your task is to modify the function and add new augmentations to the transformation. 
The [torchvision.transforms.Compose](https://pytorch.org/vision/stable/transforms.html?highlight=compose<elem-14>.transforms.Compose)
 method allows you to compose multiple transformations. The function [torchvision.transforms.RandomAffine](https://pytorch.org/vision/stable/transforms.html<elem-15>.transforms.RandomAffine)
 allows you to randomly subject the training data to affine transformations. Each transformation must end with `transforms.ToTensors`
 function. For example:
```python3
transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomAffine(45),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor()
    ])


```
<article class="message task"><a class="anchor" id="augmentation"></a>
    <div class="message-header">
        <span>Task 4: Data augmentation <em>(optional)</em></span>
        <span class="has-text-right">
          <i class="bi bi-code"></i><i class="bi bi-stoplights medium"></i>
        </span>
    </div>
<div class="message-body">


1. Subject the training data to random affine transformations then train the `TopCNN`
 and `CNN4layer`
 models by following these steps:

- Compile a transformation object in `train_pytorch_network`
 using the `RandomAffine`
.
- Train the `TopCNN`
 and `CNN4layer`
 networks .
- Experiment with adding other types of data augmentation as well. 

2. Refelect on how data augmentation impacts the results and the training. Support your arguments with figures.



</div></article>



In [0]:
def train_pytorch_network(to_train, early_stopping=False, patience=5):

    transform = transforms.ToTensor() ## simply a conversion from PIL (image format) to torch tensors
    
    network = to_train

    trainerNetwork = PyTorchTrainer(
        nn_module=network,
        transform=transform,
        optimizer=optim.SGD(network.parameters(), lr=1e-2, momentum=0.5),
        batch_size=128,
    ) 

    print("training the network started")
    if early_stopping:
        trainerNetwork.train_es(30, patience=patience)
    else:
        trainerNetwork.train(10)
    
    trainerNetwork.save()
    print("training the network ended")
    
# write your solution here

In [0]:
# write reflections here