# MNIST (Modified National Institute of Standards and Technology)

This week we do everything on MNIST database due to time constraints: 
https://en.wikipedia.org/wiki/MNIST_database

![MNIST](https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png)


In [None]:
# Imports
!pip install --upgrade -q gspread
!pip install 'google-auth==1.16.1'
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
import datetime
import pytz

tz = pytz.timezone('Europe/Berlin')

gc = gspread.authorize(GoogleCredentials.get_application_default())

The first time you run this there is probably an error. Just click on "RUNTIME" --> "RESTART RUNTIME" and run this cell again.

In [None]:
student_name = "yourName"
assert student_name != "yourName"

In [None]:
# init google sheet methods for task submitting
gsheet = gc.open_by_url("https://docs.google.com/spreadsheets/d/1MZcZXXADcBUHjrdFIRNSAhE0oChqx4sRJ2odUCJ6qJ4/edit?usp=sharing")

def write_result(task_number, result=None):
  if task_number == 'homework':
    worksheet = gsheet.worksheet("homework".format(task_number))
  else:
    worksheet = gsheet.worksheet("task{}".format(task_number))
  current_time = datetime.datetime.now(tz).strftime("%X")
  current_date = str(datetime.date.today())
  if result:
    worksheet.append_row([student_name, current_time, current_date, result])
    print("Task {} successfully solved by {} at {} with result: {}".format(task_number, student_name, current_time, result))
  else:
    worksheet.append_row([student_name, current_time, current_date])
    print("Task {} successfully solved by {} at {}".format(task_number, student_name, current_time))

print("Reporting enabled - write_result(number_of_task, result='your result') ")


In [None]:
# quick check everything is working for you

my_favorite_animal =
assert my_favorite_animal, 'What is your favorite animal???!'

write_result(0, my_favorite_animal)

In [None]:
# names for our labels = just the digits
classes=[str(i) for i in range(10)]

In [None]:
import torchvision.transforms as transforms
# define the transformations the images go through each time it is used for training
# includes augmentation AND normalization as described above
augmentation_train = transforms.Compose([
                                  # randomly perform a horizontal flip of the image
                                  transforms.RandomHorizontalFlip(),
                                  # rotate the image with a angle from 0 to 60 (chosen randomly)
                                  transforms.RandomRotation(degrees=60),
                                  # convert the image into a tensor so it can be processed by the GPU
                                  transforms.ToTensor(),
                                  # normalize the image with the mean and std of ImageNet
                                   ])


# no augmentation for the test data only resizing, conversion to tensor and normalization
augmentation_test = transforms.Compose([
                    transforms.ToTensor(),
                    ])

# Task 1: load the MNIST dataset that is provided by pytorch 
Here's an overview of datasets
https://pytorch.org/docs/stable/torchvision/datasets.html

Hints:
- datasets can be found in torchvision.dataset.
- root can just be your base folder e.g. "."
- pick the test part for validation
- don't forget to add the augmentations from above
- make sure to enable the download


In [None]:
import torchvision
toy_dataset_train =
toy_dataset_val =

In [None]:
text = f"\n\nSize of training dataset: {len(toy_dataset_train)} \nSize of validation set: {len(toy_dataset_val)}"
assert (len(toy_dataset_train)+len(toy_dataset_val))==70000, 'Hmm? Is the MNIST dataset loaded?'
write_result(1, text)

In [None]:
# import of a couple more modules we need later for progress bars and plots
from tqdm import tnrange, tqdm_notebook
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Declare the network
import torch
from torch import nn
import torch.nn.functional as F
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Running on device:",device)
num_classes=10

class OurNet(nn.Module):
    def __init__(self):
        super(OurNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, (5,5), padding=2)
        self.conv2 = nn.Conv2d(6, 16, (3,3)) 
        self.fc1   = nn.Linear(16*13*13, 120)
        self.fc2   = nn.Linear(120, num_classes)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [None]:
# make sure we are running on GPU
assert str(device) == "cuda", 'If this fails maybe change the runtime: Runtime -> Change runtime type'
print('Nice! Running on the GPU')

In [None]:
# declare network and send it to device=cuda
net = OurNet()
net = net.to(device)
summary(net, input_size=(1, 28, 28))

In [None]:
# Median Frequency Balancing, last wekk ...

import numpy as np

# get the class labels of each image
class_labels = toy_dataset_train.targets
# empty array for counting instance of each class
count_labels = np.zeros(len(classes))
# empty array for weights of each class
class_weights = np.zeros(len(classes))

# populate the count array
for l in class_labels:
  count_labels[l] += 1

# get median count
median_freq = np.median(count_labels)
#print(median_freq)
#print(count_labels)

# calculate the weigths
for i in range(len(classes)):
  class_weights[i] = median_freq/count_labels[i]

# print the weights
for i in range(len(classes)):
    print(classes[i],":", class_weights[i])

Now we define the loss function with the weights

In [None]:
# put the weights on our device
class_weights = torch.FloatTensor(class_weights).to(device)

# Task 2: define the loss for multicalss classificaiton

Overview of losses available: https://pytorch.org/docs/stable/nn.html#loss-functions

Hints:
- CrossEntropy-Loss is a good choice
- use the nn module we have already imported as nn.
- dont forget to add the weights we just put on our device for class imbalance

In [None]:
# loss function
criterion =

In [None]:
# submitt your results
assert criterion.weight is not None, 'Did you specify the weight argument?'
write_result(2, result=str(criterion))

# Evaluation functions

Here we write a function which calculates the accuracy of model based on the validation dataset.

In [None]:
# functions for evaluation
from sklearn.metrics import accuracy_score

def get_num_correct(predicted, labels):
    '''
    This function is used to decide if the predicted and ground truth classes are same or not.
    args: 
      predicted = output of model
      labels = true output
    '''
    batch_len, correct= 0, 0
    batch_len = labels.size(0)
    correct = (predicted == labels).sum().item()
    # correct = [1,1,1,1,2,3,4,5] predicted = [1,2,1,1,,2,3,4,6]
    return batch_len, correct

# function for validation step
def evaluate(model, val_loader):
    '''
    This function calculates the total accuracy of the model on the validation set.
    args:
      model = the network we want to evaluate
      val_loader = the validation data loader
    '''
    losses= 0
    num_samples_total=0
    correct_total=0
    model.eval()
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        out = model(inputs)
        _, predicted = torch.max(out, 1)
        loss = criterion(out, labels)
        losses += loss.item() 
        b_len, corr = get_num_correct(predicted, labels)
        num_samples_total +=b_len
        correct_total +=corr
    accuracy = correct_total/num_samples_total
    losses = losses/len(val_loader)
    return losses, accuracy

# Training Loop

In [None]:
# define the batch size
batch_size, validation_batch_size = 4096, 4096

import datetime
start_time = datetime.datetime.now()
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=1e-2)

num_w = 0
# create and instance of a dataloader for training
train_data_loader = torch.utils.data.DataLoader(toy_dataset_train, batch_size=batch_size, shuffle=True,num_workers=num_w)
validation_data_loader = torch.utils.data.DataLoader(toy_dataset_val, batch_size=validation_batch_size, shuffle=True,  num_workers=num_w)
# number of loops over the dataset

num_epochs = 5
accuracy = []
val_accuracy = []
losses = []
val_losses = []

print("Started Training")
epoch = 0
running_loss = 0.0
correct_total= 0.0
num_samples_total=0.0
train_accuracy = 0.0
val_acc=0.0
val_loss = 0.0
print('Epoch: %d' %(epoch+1))
print('Train Loss: %.3f  Train Accuracy:%.3f' %(running_loss, train_accuracy))
print('Validation Loss: %.3f  Val Accuracy: %.3f' %(val_loss, val_acc))

for epoch in tnrange(num_epochs):
    running_loss = 0.0
    correct_total= 0.0
    num_samples_total=0.0
    
    print("...")
    for i, data in tqdm_notebook(enumerate(train_data_loader)):
        if i % 2 == 0:
            print(f"Iteration: {i+1}/{len(train_data_loader)}")
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # set the parameter gradients to zero
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        #compute accuracy
        _, predicted = torch.max(outputs, 1)
        b_len, corr = get_num_correct(predicted, labels)
        num_samples_total +=b_len
        correct_total +=corr
        running_loss += loss.item()

    
    running_loss /= len(train_data_loader)
    train_accuracy = correct_total/num_samples_total
    val_loss, val_acc = evaluate(net, validation_data_loader)
    
    print('Epoch: %d' %(epoch+1))
    print('Train Loss: %.3f  Train Accuracy:%.3f' %(running_loss, train_accuracy))
    print('Validation Loss: %.3f  Val Accuracy: %.3f' %(val_loss, val_acc))

    losses.append(running_loss)
    val_losses.append(val_loss)
    accuracy.append(train_accuracy)
    val_accuracy.append(val_acc)
print('Finished Training')
end_time = datetime.datetime.now()
delta = end_time -start_time
print("Time for training of {} Epochs is {}s".format(num_epochs,delta.seconds))

# Task 3: Why are the validation metrics better than the training metrics???
Didn't we learn, that this should be the other way around?!

In [None]:
# submitt your results
my_answer = 
assert my_answer is not None, 'Give it a try'
write_result(3, result=my_answer)

# Train and Validation Curves

To better understand whether our network is actually learning something, we plot the training and validation curves.

There are two types of curves:
- Loss Curves: Plotting the trend of the loss per epoch.
- Accuracy Curves: Plotting accuracy, that is the performance of our model per epoch.

In [None]:
# plot curves
from tqdm import tnrange, tqdm_notebook
import matplotlib.pyplot as plt
%matplotlib inline 
num_epochs=5
print(num_epochs)
epoch = range(1, num_epochs+1)
fig = plt.figure(figsize=(10, 15))
plt.subplot(2,1,2)
plt.plot(epoch, losses, label='Training loss')
plt.plot(epoch, val_losses, label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.legend()
#plt.figure()
plt.show()

fig = plt.figure(figsize=(10, 15))
plt.subplot(2,1,2)
plt.plot(epoch, accuracy, label='Training accuracy')
plt.plot(epoch, val_accuracy, label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.legend()
#plt.figure()
plt.show()



# Common Training Errors

Let us look at some errors and how to detect them.



## Data Scarcity

We need enough data to learn a good model. Less data means less learning.

Let's see how the performance is affected when we have less data.


In [None]:
# as before, just bigger network
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Running on:",device)
class OurNetBig(nn.Module):
    def __init__(self):
        super(OurNetBig, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, (5,5), padding=2)
        self.conv2 = nn.Conv2d(6, 16, (3,3), padding=2)
        self.conv3 = nn.Conv2d(16, 32, (3,3)) 
        self.fc1   = nn.Linear(32*13*13, 1000)
        self.fc2   = nn.Linear(1000, 120)
        self.fc3   = nn.Linear(120, 10)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.relu(self.conv3(x))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net_data = OurNetBig()
net_data = net_data.to(device)
summary(net_data, input_size=(1,28,28))

In [None]:
# choose subset to imitate scarce data
from torch.utils.data import Subset
batch_size = 50
subset_size = 200

length_dataset = [i for i in range(subset_size)]
small_toy_dataset_train = Subset(toy_dataset_train,length_dataset)
train_data_loader_small = torch.utils.data.DataLoader(small_toy_dataset_train, batch_size=batch_size, shuffle=True,num_workers=4)
validation_data_loader = torch.utils.data.DataLoader(toy_dataset_val, batch_size=batch_size, shuffle=True,  num_workers=4)

print(f"small dataset size: {len(small_toy_dataset_train)} batch_size: {batch_size} number of batches in dataloader: {len(train_data_loader_small)} ")


In [None]:
# weight initialisation
def weight_init(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight.data)

net_data.apply(weight_init)

In [None]:
# as before...
# optimizer
import torch.optim as optim 


num_epochs = 25
accuracy = []
val_accuracy = []
losses = []
val_losses = []
running_loss= 0.0
train_accuracy = 0.0
val_loss = 0.0
val_acc = 0.0
epoch=0

print('Epoch: %d' %(epoch+1))
print('Loss: %.3f  Accuracy:%.3f' %(running_loss, train_accuracy))
print('Validation Loss: %.3f  Val Accuracy: %.3f' %(val_loss, val_acc))

# intialize the network
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net_data.parameters(), lr=1e-2)
print(net_data)
print("Started Training")

for epoch in tnrange(num_epochs):
    running_loss = 0.0
    correct_total= 0.0
    num_samples_total=0.0
    for i, data in tqdm_notebook(enumerate(train_data_loader_small)):
        #print(f"Iteration: {i+1}/{len(train_data_loader_small)}")
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # set the parameter gradients to zero
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net_data(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        #compute accuracy
        _, predicted = torch.max(outputs, 1)
        b_len, corr = get_num_correct(predicted, labels)
        num_samples_total +=b_len
        correct_total +=corr
        running_loss += loss.item()

    
    running_loss /= len(train_data_loader_small)
    train_accuracy = correct_total/num_samples_total
    val_loss, val_acc = evaluate(net_data, validation_data_loader)
    
    print('Epoch: %d' %(epoch+1))
    print('Loss: %.3f  Accuracy:%.3f' %(running_loss, train_accuracy))
    print('Validation Loss: %.3f  Val Accuracy: %.3f' %(val_loss, val_acc))

    losses.append(running_loss)
    val_losses.append(val_loss)
    accuracy.append(train_accuracy)
    val_accuracy.append(val_acc)
print('Finished Training')

In [None]:
# lets look at the plots

epoch = range(1, num_epochs+1)
fig = plt.figure(figsize=(10, 15))
plt.subplot(2,1,2)
plt.plot(epoch, losses, label='Training loss')
plt.plot(epoch, val_losses, label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.legend()
plt.figure()
plt.show()

fig = plt.figure(figsize=(10, 15))
plt.subplot(2,1,2)
plt.plot(epoch, accuracy, label='Training accuracy')
plt.plot(epoch, val_accuracy, label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.legend()
plt.figure()
plt.show()


## Learning Rate


In [None]:
# new network to test new learning rate
net_lr = OurNet()
net_lr = net_lr.to(device)

# optimizer
import torch.optim as optim

optimizer_lr = optim.Adam(net_lr.parameters(), lr=10)

In [None]:
# number of loops over the dataset
num_epochs = 5
accuracy = []
val_accuracy = []
losses = []
val_losses = []

for epoch in tnrange(num_epochs):
    running_loss = 0.0
    correct_total= 0.0
    num_samples_total=0.0
    for i, data in tqdm_notebook(enumerate(train_data_loader)):
        print(f"Iteration: {i+1}/{len(train_data_loader)}")
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # set the parameter gradients to zero
        optimizer_lr.zero_grad()

        # forward + backward + optimize
        outputs = net_lr(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_lr.step()
        
        #compute accuracy
        _, predicted = torch.max(outputs, 1)
        b_len, corr = get_num_correct(predicted, labels)
        num_samples_total +=b_len
        correct_total +=corr
        running_loss += loss.item()

    
    running_loss /= len(train_data_loader)
    train_accuracy = correct_total/num_samples_total
    val_loss, val_acc = evaluate(net_lr, validation_data_loader)
    
    print('Epoch: %d' %(epoch+1))
    print('Loss: %.3f  Accuracy:%.3f' %(running_loss, train_accuracy))
    print('Validation Loss: %.3f  Val Accuracy: %.3f' %(val_loss, val_acc))

    losses.append(running_loss)
    val_losses.append(val_loss)
    accuracy.append(train_accuracy)
    val_accuracy.append(val_acc)
print('Finished Training')

In [None]:
# plot the graphs

epoch = range(1, num_epochs+1)
fig = plt.figure(figsize=(10, 15))
plt.subplot(2,1,2)
plt.plot(epoch, losses, label='Training loss')
plt.plot(epoch, val_losses, label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.legend()
plt.figure()
plt.show()

fig = plt.figure(figsize=(10, 15))
plt.subplot(2,1,2)
plt.plot(epoch, accuracy, label='Training accuracy')
plt.plot(epoch, val_accuracy, label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.legend()
plt.figure()
plt.show()


# Homework - Train a model which achieves at least 80% accuracy on HAM10000

In [None]:
# your code here



In [None]:
# share your results with us when you are finished
describe_your_solution = ''
model_accuracy = 0
assert model_accuracy, "fill in your best accuracy"
write_result('homework', f"{describe_your_solution}\n\n{model_accuracy}")