# Learning Process management

### Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Import required libraries

In [None]:
pip install tensorboard

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from datetime import date
from itertools import product
import os

In [None]:
from platform import python_version
print(python_version())
print(torch.__version__)
print(torchvision.__version__)
!nvidia-smi -L
!nvidia-smi
!lscpu |grep 'Model name'

### Set hyperparameters

In [None]:
parameters = dict(learning_rate = [0.001],
                  batch_size = [100],
                  weight_decay = [0],
                  epoch_number = [20],
                  scheduler_step_size = [5],
                  scheduler_gamma = [1]   )
param_values = [v for v in parameters.values()]
trg_dataset_ref = 1
valloader_size = 1000

### Prepare data for training

In [None]:
# Retrieve normalisation parameters 

norm_param_df = pd.read_csv('/content/drive/MyDrive/KASHIKO/DATASET/TRG_DATASET_NORM_PARAM.csv')

meanR = norm_param_df.loc[norm_param_df["Dataset"] == str(trg_dataset_ref), "meanR"].item()
meanG = norm_param_df.loc[norm_param_df["Dataset"] == str(trg_dataset_ref), "meanG"].item()
meanB = norm_param_df.loc[norm_param_df["Dataset"] == str(trg_dataset_ref), "meanB"].item()

stdR = norm_param_df.loc[norm_param_df["Dataset"] == str(trg_dataset_ref), "stdR"].item()
stdG = norm_param_df.loc[norm_param_df["Dataset"] == str(trg_dataset_ref), "stdG"].item()
stdB = norm_param_df.loc[norm_param_df["Dataset"] == str(trg_dataset_ref), "stdB"].item()

In [None]:
# Prepare normalised dataset
dataset = datasets.ImageFolder(
    '/content/drive/MyDrive/KASHIKO/DATASET/TRG_1_FINAL/',
    transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((meanR, meanG, meanB), (stdR, stdG, stdB))
    ])
)

### Define model + Optimizer + Loss function

In [None]:
# Model = 2 convolutional layers + 3 fully connected layers
if trg_dataset_ref == 1:
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 12, 5)
            self.bn1 = nn.BatchNorm2d(12)
            self.pool1 = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(12, 24, 5)
            self.bn2 = nn.BatchNorm2d(24)
            self.pool2 = nn.MaxPool2d(2, 2)
            self.fc1 = nn.Linear(24*53*53, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 2)

        def forward(self, x):
            x = self.pool1(F.relu(self.bn1(self.conv1(x))))
            x = self.pool2(F.relu(self.bn2(self.conv2(x))))
            x = x.view(-1,24*53*53)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

elif trg_dataset_ref == 2:
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 12, 5)
            nn.init.xavier_uniform_(self.conv1.weight)
            nn.init.constant_(self.conv1.bias, 0.1)
            self.bn1 = nn.BatchNorm2d(12)
            nn.init.constant_(self.bn1.weight, 1)
            nn.init.constant_(self.bn1.bias, 0)
            self.pool1 = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(12, 24, 5)
            nn.init.xavier_uniform_(self.conv2.weight)
            nn.init.constant_(self.conv2.bias, 0.1)
            self.bn2 = nn.BatchNorm2d(24)
            nn.init.constant_(self.bn2.weight, 1)
            nn.init.constant_(self.bn2.bias, 0)
            self.pool2 = nn.MaxPool2d(2, 2)
            self.fc1 = nn.Linear(24*53*53, 120)
            nn.init.xavier_uniform_(self.fc1.weight)
            nn.init.constant_(self.fc1.bias, 0.1)  
            self.fc2 = nn.Linear(120, 84)
            nn.init.xavier_uniform_(self.fc2.weight)
            nn.init.constant_(self.fc2.bias, 0.1)
            self.fc3 = nn.Linear(84, 2)
            nn.init.xavier_uniform_(self.fc3.weight)
            nn.init.constant_(self.fc3.bias, 0.1)

        def forward(self, x):
            x = self.pool1(F.relu(self.bn1(self.conv1(x))))
            x = self.pool2(F.relu(self.bn2(self.conv2(x))))
            x = x.view(-1,24*53*53)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

elif trg_dataset_ref == 3:
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 12, 5)
            nn.init.xavier_normal_(self.conv1.weight)
            nn.init.constant_(self.conv1.bias, 0.1)
            self.bn1 = nn.BatchNorm2d(12)
            nn.init.constant_(self.bn1.weight, 1)
            nn.init.constant_(self.bn1.bias, 0)
            self.pool1 = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(12, 24, 5)
            nn.init.xavier_normal_(self.conv2.weight)
            nn.init.constant_(self.conv2.bias, 0.1)
            self.bn2 = nn.BatchNorm2d(24)
            nn.init.constant_(self.bn2.weight, 1)
            nn.init.constant_(self.bn2.bias, 0)
            self.pool2 = nn.MaxPool2d(2, 2)
            self.fc1 = nn.Linear(24*53*53, 120)
            nn.init.xavier_normal_(self.fc1.weight)
            nn.init.constant_(self.fc1.bias, 0.1)  
            self.fc2 = nn.Linear(120, 84)
            nn.init.xavier_normal_(self.fc2.weight)
            nn.init.constant_(self.fc2.bias, 0.1)
            self.fc3 = nn.Linear(84, 2)
            nn.init.xavier_normal_(self.fc3.weight)
            nn.init.constant_(self.fc3.bias, 0.1)

        def forward(self, x):
            x = self.pool1(F.relu(self.bn1(self.conv1(x))))
            x = self.pool2(F.relu(self.bn2(self.conv2(x))))
            x = x.view(-1,24*53*53)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

net = Net()

In [None]:
# Define Loss function
criterion = nn.CrossEntropyLoss()

### Train model

In [None]:
for learning_rate, batch_size, weight_decay, epoch_number, scheduler_step_size, scheduler_gamma in product(*param_values): 
    
    # Initialise model    
    net = Net()    
    
    # Split dataset into a training dataset and a validation dataset
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - valloader_size, valloader_size])
    trainloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        drop_last=True)
    valloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=True,
        num_workers=4,
        drop_last=True)

    # Define Optimizer and scheduler
    optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay = weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step_size, gamma=scheduler_gamma)
    
    # Initialize tensorboard SummaryWriter file/directory
    date_now = str(date.today())
    time_now = datetime.now().strftime("%H:%M:%S")    
    log_dir_root = os.path.join('/content/drive/My Drive/KASHIKO/RUNS', date_now + '_' + time_now + '_')
    comment = f' trg_dataset{trg_dataset_ref} batch_size={batch_size} learning_rate={learning_rate} scheduler_step_size={scheduler_step_size} scheduler_gamma={scheduler_gamma} weight_decay={weight_decay} epoch_number={epoch_number}'
    log_dir = log_dir_root + comment
    tb = SummaryWriter(log_dir)
    
    best_accuracy = 0.0

    for epoch in range(epoch_number):  # loop over the dataset multiple times

        net.train()
        net.requires_grad = True
        trg_running_loss = 0.0
        trg_epoch_loss = 0.0

        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            trg_running_loss += loss.item()
            trg_epoch_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, trg_running_loss / 10))
                trg_running_loss = 0.0

        # At the end of each epoch, check the performance of the network using the validation dataset
        correct = 0.0
        total = 0.0
        TP = 0.0
        TN = 0.0
        FP = 0.0
        FN = 0.0
        val_loss = 0.0
        with torch.no_grad():
            for data in valloader:
                net.eval()
                images, labels = data
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                TP += (predicted == labels & labels == 0).sum().item()
                TN += (predicted == labels & labels == 1).sum().item()
                FP += (predicted != labels & predicted == 0).sum().item()
                FN += (predicted != labels & predicted == 1).sum().item()
                val_loss += criterion(outputs, labels)
        val_accuracy = 100 * correct / total
        val_sensitivity_TPR = TP/(FN+TP) if (FN+TP)!=0.0 else -1.0
        val_specificity_TNR = TN/(TN+FP) if (TN+FP)!=0.0 else -1.0
        val_FPR = FP/(TN+FP) if (TN+FP)!=0.0 else -1.0
        val_FNR = FN/(FN+TP) if (FN+TP)!=0.0 else -1.0
        val_precision = TP/(TP+FP) if (TP+FP)!=0.0 else -1.0
        val_recall = TP/(FN+TP) if (FN+TP)!=0.0 else -1.0
        inv_val_recall = 1/val_recall if val_recall!=0.0 else -1.0
        inv_val_precision = 1/val_precision if val_precision!=0.0 else -1.0
        val_F1_score = 2/(inv_val_precision + inv_val_recall) if (inv_val_precision + inv_val_recall)!=0.0 else -1.0
        print(f'Accuracy of the network on the 1000 test images:{val_accuracy}')

        # Store metrics and other parameters in tensorboard SummaryWriter
        # Metrics
        tb.add_scalar('Training Loss', trg_epoch_loss/(int((len(dataset)-valloader_size)/batch_size) * batch_size), epoch+1)
        tb.add_scalar('Validation Loss', val_loss/valloader_size, epoch+1)
        tb.add_scalar('Accuracy', val_accuracy, epoch+1)
        tb.add_scalar('Sensitivity TPR', val_sensitivity_TPR, epoch+1)
        tb.add_scalar('Specificity TNR', val_specificity_TNR, epoch+1)        
        tb.add_scalar('FPR', val_FPR, epoch+1)
        tb.add_scalar('FNR', val_FNR, epoch+1)        
        tb.add_scalar('Precision', val_precision, epoch+1)
        tb.add_scalar('Recall', val_recall, epoch+1)        
        tb.add_scalar('F1 Score', val_F1_score, epoch+1)
        # DEBUG
        tb.add_scalar('False Positive', FP, epoch+1)        
        tb.add_scalar('False Negative', FN, epoch+1)
        tb.add_scalar('True Positive', TP, epoch+1)        
        tb.add_scalar('True Negative', TN, epoch+1)
        # Training parameters
        tb.add_scalar('Learning rate (scheduler)', optimizer.param_groups[0]["lr"], epoch+1)
        # NN Layers parameters
        tb.add_histogram('conv1.bias', net.conv1.bias, epoch+1)
        tb.add_histogram('conv1.weight', net.conv1.weight, epoch+1)
        tb.add_histogram('conv1.weight.grad',net.conv1.weight.grad,epoch+1)
        tb.add_histogram('bn1.bias', net.bn1.bias, epoch+1)
        tb.add_histogram('bn1.weight', net.bn1.weight, epoch+1)
        tb.add_histogram('bn1.weight.grad',net.bn1.weight.grad,epoch+1)      
        tb.add_histogram('conv2.bias', net.conv2.bias, epoch+1)
        tb.add_histogram('conv2.weight', net.conv2.weight, epoch+1)
        tb.add_histogram('conv2.weight.grad',net.conv2.weight.grad,epoch+1)
        tb.add_histogram('bn2.bias', net.bn2.bias, epoch+1)
        tb.add_histogram('bn2.weight', net.bn2.weight, epoch+1)
        tb.add_histogram('bn2.weight.grad',net.bn2.weight.grad,epoch+1)  
        tb.add_histogram('fc1.bias', net.fc1.bias, epoch+1)
        tb.add_histogram('fc1.weight', net.fc1.weight, epoch+1)
        tb.add_histogram('fc1.weight.grad',net.fc1.weight.grad,epoch+1)
        tb.add_histogram('fc2.bias', net.fc2.bias, epoch+1)
        tb.add_histogram('fc2.weight', net.fc2.weight, epoch+1)
        tb.add_histogram('fc2.weight.grad',net.fc2.weight.grad,epoch+1)
        tb.add_histogram('fc3.bias', net.fc3.bias, epoch+1)
        tb.add_histogram('fc3.weight', net.fc3.weight, epoch+1)
        tb.add_histogram('fc3.weight.grad',net.fc3.weight.grad,epoch+1)
        
        
        # Save best models
        if val_accuracy > best_accuracy:
            best_model_epoch = epoch
            comment = f' trg_dataset{trg_dataset_ref} batch_size={batch_size} learning_rate={learning_rate} scheduler_step_size={scheduler_step_size} scheduler_gamma={scheduler_gamma} weight_decay={weight_decay} epoch_number={best_model_epoch} accuracy={val_accuracy}'
            torch.save(net.state_dict(),'/content/drive/MyDrive/KASHIKO/MODELS/model_' + date_now + '_' + time_now + '_' + comment + '.pth')
            best_accuracy = val_accuracy          

        # Update learning rate
        scheduler.step()
        
    # At the end of the training, close the tensorboard SummaryWriter and save the model to the drive
    tb.close()
    print('Training Completed')