# Setting up a network model and starting a first training

In this exercise, we are going to practise, how to set up a neural network model and perform a first training with this network. For simplicity and fast processing, we are going to use the MNIST dataset directly obtainable from the torchvision package.

In [None]:
# Imports
# import libraries for simple image plotting and 
import matplotlib.pyplot as plt
import numpy as np

import torchvision.transforms as transforms
import torchvision as torchvision

!pip install --upgrade -q gspread
#!pip uninstall --yes 'google-auth==1.7.2'
!pip install 'google-auth==1.16.1'
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
from datetime import datetime
import pytz
import datetime
import random


tz = pytz.timezone('Europe/Berlin')

gc = gspread.authorize(GoogleCredentials.get_application_default())

The first time you run this there is probably an error. Just click on "RUNTIME" --> "RESTART RUNTIME" and run this cell again.

In [None]:
student_name = "yourName"
assert student_name != "yourName"

We are again using a form to monitor the progress for the different tasks:

In [None]:
#@title Result Form
gsheet = gc.open_by_url("https://docs.google.com/spreadsheets/d/1oNM74xlAKuSJESKm2ggO5sduKYdVab4xzTShiG0lB34/edit?usp=sharing")

def write_result(task_number, result=None):
  worksheet = gsheet.worksheet("task{}".format(task_number))
  current_time = datetime.datetime.now(tz).strftime("%X")
  current_date = str(datetime.date.today())
  if result:
    worksheet.append_row([student_name, current_time, current_date, result])
    print("Task {} successfully solved by {} at {} with result: {}".format(task_number, student_name, current_time, result))
  else:
    worksheet.append_row([student_name, current_time, current_date])
    print("Task {} successfully solved by {} at {}".format(task_number, student_name, current_time))

print("Reporting enabled - write_result(number_of_task, result='your result') ")


Let us directly test the reporting:

In [None]:
# Confirm that you are ready to go:
write_result(0, 'Ready!!!')

If reporting is properly enabled, we can continue with the next steps.

## Defining the augmentations

As described in the last exercise, we now define the augmentations:

In [None]:
# Imagenet values
norm_mean = (0.4914)
norm_std = (0.2023)

# define the transformaitons the images go through each time it is used for training
# includes augmentation AND normalization as described above
augmentation_train = transforms.Compose([
                                  # resize image to the network input size
                                  transforms.Resize((28,28)),
                                  # rotate the image with a angle from 0 to 60 (chosen randomly)
                                  transforms.RandomRotation(degrees=10),
                                  # convert the image into a tensor so it can be processed by the GPU
                                  transforms.ToTensor(),
                                  # normalize the image with the mean and std of ImageNet
                                  transforms.Normalize(norm_mean, norm_std),
                                   ])

In [None]:
# no augmentation for the test data only resizing, conversion to tensor and normalization
augmentation_test = transforms.Compose([
                    transforms.Resize((28,28)),
                    transforms.ToTensor(),
                    transforms.Normalize(norm_mean, norm_std),
                    ])


Now, we are creating an instance of our MNIST dataset as a torch class object. We can use an instance of the MNIST class in torchvision to easily load our data and control the training process. The dataset will be transferred into the dataloader objects later:

In [None]:
"""
Task 1: load the training part of the MNIST dataset that is provided by pytorch
Here's an overview of datasets
https://pytorch.org/docs/stable/torchvision/datasets.html

Hints:
- datasets can be found in torchvision.dataset.
- root can just be your base folder e.g. "."
- pick the test part for validation
- don't forget to add the augmentations from above
- make sure to enable the download
"""

import torchvision
data_dir = "/content/drive/My Drive/"
dataset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=augmentation_train)

In [None]:
import torch
from sklearn.model_selection import train_test_split

# get the total amount of images in the dataset
num_train = len(dataset)

# create a list of indices for the whole dataset
indices = list(range(num_train))

# get the class labels from the dataset object (0-6)
class_labels = dataset.targets
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# define the percentage of data that is not used for training
split_size = 0.2

# call a function of sklarn that takes care of splitting the dataset into training and validation+testing
train_indices, test_indices, class_labels_train, class_labels_test = train_test_split(indices,
                                                                                       class_labels,
                                                                                       test_size=split_size,
                                                                                       shuffle=True,
                                                                                       stratify= class_labels,
                                                                                       random_state=42)

# call a function of sklearn that splits validation+training into validation and training
train_indices, val_indices = train_test_split(train_indices,
                                               test_size=split_size,
                                               shuffle=True,
                                               stratify= class_labels_train,
                                               random_state=42)

# Creating data samplers and loaders using the indices:
SubsetRandomSampler = torch.utils.data.sampler.SubsetRandomSampler

# create instances of a torch class for picking random samples from our dataset
train_samples = SubsetRandomSampler(train_indices)
val_samples = SubsetRandomSampler(val_indices)
test_samples = SubsetRandomSampler(test_indices)

In the next step, we transfer our dataset into the dataloader. The dataloader is another class in pytorch, which allows the control of the loading process during training. It autonatically generates batches of our training data and shuffles them. Like this, we can perform effective training for multiple epochs:

In [None]:
# define the batch size for training, val and testing
batch_size, validation_batch_size, test_batch_size = 128, 128, 128

# create and instance of a dataloader for training
train_data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False,num_workers=1, sampler= train_samples)

# overwrite the dataset instance with the test augmentation (this is not nice code)
dataset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=augmentation_test)
# create instances of a dataloaders for validation and testing
validation_data_loader = torch.utils.data.DataLoader(dataset, batch_size=validation_batch_size, shuffle=False, sampler=val_samples)
test_data_loader = torch.utils.data.DataLoader(dataset, batch_size=test_batch_size, shuffle=False, sampler=test_samples)

# Define a Convolutional Neural Network

Pytorch makes it very easy to define a neural network. We have layers like Convolutions, ReLU non-linearity, Maxpooling etc. directly from torch library.

In this tutorial, we use The LeNet architecture introduced by LeCun et al. in their 1998 paper, [Gradient-Based Learning Applied to Document Recognition](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). As the name of the paper suggests, the authors’ implementation of LeNet was used primarily for OCR and character recognition in documents.

The LeNet architecture is straightforward and small, (in terms of memory footprint), making it perfect for teaching the basics of CNNs.

In [None]:
from torch import nn
import torch.nn.functional as F
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_classes = len(classes)
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, (5,5), padding=2)
        self.conv2 = nn.Conv2d(6, 16, (5,5)) 
        self.fc1   = nn.Linear(16*5*5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, num_classes)
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


## Task 1
Create a neural network capable of processing the image tensor 'img', which has only one channel. The network should contain at least one convolutional layer and one additional fully connected layer. Pay attention that within the fully connected layer the output dimension of the last convolutional layer has to fit the input dimension. Additionally, the output dimension of the fully connected layer before 'fc_fin' has to match the required input dimension of 'fc_fin'.

In [None]:
img = torch.rand((1, 1, 200, 200))

output_dim = 6

class LeNet2(nn.Module):
    def __init__(self):
        super(LeNet2, self).__init__()
        # --------------------
        # Insert your layers here
        self.conv1 = nn.Conv2d(1, 6, (5,5), padding=2)
        self.conv2 = nn.Conv2d(6, 16, (5,5)) 
        self.fc1   = nn.Linear(16*48*48, 120)
        self.fc2   = nn.Linear(120, 64)
        # --------------------
        self.fc_fin = nn.Linear(64, output_dim)

    def forward(self, x):
        # --------------------
        # Insert your forward pass here
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # --------------------
        x = self.fc_fin(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

model = LeNet2()

task_done = False
output = model(img)

if output.size(1) == 6:
  print('The correct output size has been generated.')
  task_done = True


When your model processes the image correctly, submit your result by running the following cell:

In [None]:
if task_done:
  write_result(1, str(model))
else:
  print("You didnt solve the task yet")

# Define a Loss function

Let's use a Classification Cross-Entropy loss.

$H_{y'} (y) := - \sum_{i} y_{i}' \log (y_i)$

### Median Frequency Balancing
The MNIST dataset is fortunately a very balanced dataset containing almost equal numbers for every class. However, there are datasets like for example the HAM10000 dataset, which have a large imbalance in the amount of label occurrence. A prediction would be therefore biased towards stronger represented classes. As a solution, we use **Median Frequency Balancing**.

In [None]:
# Median Frequency Balancing

# get the class labels of each image
class_labels = dataset.targets
# empty array for counting instance of each class
count_labels = np.zeros(len(classes))
# empty array for weights of each class
class_weights = np.zeros(len(classes))

# populate the count array
for l in class_labels:
  count_labels[l] += 1

# get median count
median_freq = np.median(count_labels)

# calculate the weigths
for i in range(len(classes)):
  class_weights[i] = median_freq/count_labels[i]

# print the weights
for i in range(len(classes)):
    print(classes[i],":", class_weights[i])

Now we define the loss function with the weights

In [None]:
class_weights = torch.FloatTensor(class_weights).to(device)
criterion = nn.CrossEntropyLoss(weight = class_weights)

# Define the Optimizer

The most common and effective Optimizer currently used is **Adam: Adaptive Moments**. You can look [here](https://arxiv.org/abs/1412.6980) for more information.


In [None]:
import torch.optim as optim

net = LeNet()
net = net.to(device)

optimizer = optim.Adam(net.parameters(), lr=1e-5)
print(net)

# Training

After everything has been set up, we can now start an actual training on our MNIST dataset. To save time, for the moment we will run only ten epochs. Within the training, our dataloader is used to load a batch from our MNIST dataset. This batch is forwarded to the model. The corresponding output is compared against its labels with the chosen loss function, here called 'criterion'. Then, the loss values are backpropagated through the whole model.

In [None]:
num_epochs = 10

def run_epoch():
  running_loss = 0.0
  data_loader = train_data_loader
  for i, data in enumerate(data_loader):
      # get the inputs
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)
      # set the parameter gradients to zero
      optimizer.zero_grad()

      # forward + backward + optimize
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
      
      #compute accuracy
      _, predicted = torch.max(outputs, 1)
      running_loss += loss.item()
  return running_loss

for epoch in range(num_epochs):
    
    running_loss = run_epoch()

    running_loss /= len(train_data_loader)

    print('Epoch: {}'.format(epoch+1))
    print('Loss: {}' .format(running_loss))

print('Finished Training')

## Task 2
Rewrite the 'run_epoch' function, so that it can perform the training and validation in consecutive steps for each epoch. By using the corresponding dataloaders, the data can be easily provided to the model. The train() and eval() function change the model into the corresponding state. Please note that by running the previous code box the model is now already trained. We therefore re-initialize the model.

In [None]:
net = LeNet()
net = net.to(device)

optimizer = optim.Adam(net.parameters(), lr=1e-5)

num_epochs = 10

def run_epoch():
  loss_dict = {'train': 0.0,
               'val': 0.0}

  # -----------------------
  # Specifiy which dataloader needs to be used in a phase within a dictionary
  data_loader = {'train': train_data_loader,
                  'val': validation_data_loader}
  # -----------------------

  for phase in ['train', 'val']:

    running_loss = 0.0
    num_correct = 0
    num_all = 0

    for i, data in enumerate(data_loader[phase]):

      # get the inputs
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)
      # set the parameter gradients to zero
      optimizer.zero_grad()

      if phase == 'train':
        net.train()  # Set model to training mode
      else:
        net.eval()  # Set model to validation mode

      # -------------------------------
      # Calculate the network output and its loss
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      # -------------------------------

      # The loss backpropagation and optimization step is only necessary when
      # in training mode:

      # -------------------------------
      # Implement the loss backpropagation for the training phase only
      if phase == 'train':
        loss.backward()
        optimizer.step()
      # -------------------------------
      
      #compute accuracy
      _, predicted = torch.max(outputs, 1)
      running_loss += loss.item()
      if phase == 'val':
        num_correct += torch.sum(predicted == labels).item()
        num_all += labels.size()[0]

    # ----------------------------------
    # Save the correct loss for every phase within the dictionary
    loss_dict[phase] = running_loss / len(data_loader[phase])
    # ----------------------------------
  accuracy = num_correct/num_all
  return loss_dict, accuracy

run_through = False
for epoch in range(num_epochs):
    
    loss_dict, accuracy = run_epoch()

    print('Epoch: {}'.format(epoch+1))
    for phase in ['train', 'val']:
      print('Loss {}: {}' .format(phase, loss_dict[phase]))
      if phase == 'val':
        print('Validation Accuracy: {}%' .format(np.round(accuracy, 3)*100))
    print()
    run_through = True

print('Finished Training')

After you have sucessfully performed the training, please submit your results:

In [None]:
if run_through:
  write_result(2, 'The last obtained validation loss is {}'.format(loss_dict['val']))
else:
  print("You didnt solve the task yet")

The following function is a useful tool to get information about your model:

In [None]:
from torchsummary import summary
summary(net, input_size=(1, 28, 28))

# Homework
After performing the training and validation of your system you are now ready to perform the inference on your test set. Implement the inference step for the MNIST dataset:

In [None]:
running_loss = 0.0
num_correct = 0
num_all = 0
data_loader = test_data_loader

for i, data in enumerate(data_loader):
  # ---------------------
  # Implement the data loading and prediction on the test set
  # get the inputs
  inputs, labels = data
  inputs, labels = inputs.to(device), labels.to(device)

  net.eval()  # Set model to validation mode

  # -------------------------------
  # Calculate the network output and its loss
  outputs = net(inputs)
  loss = criterion(outputs, labels)
  # -------------------------------

  # ---------------------
  _, predicted = torch.max(outputs, 1)
  num_correct += torch.sum(predicted == labels).item()
  num_all += labels.size()[0]
  running_loss += loss.item()

running_loss /= len(data_loader)

print('Loss for test set is {}'.format(running_loss))
print('Test accuracy of the network: {}%'.format(np.round(num_correct/num_all, 3)*100))


After you completed the inference, submit your test loss result here:

In [None]:
if not running_loss == 0.0:
  write_result(3, 'The obtained test loss is {}'.format(running_loss))
else:
  print("You didnt solve the task yet")