## Encoder Training

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Imports
</div>

In [None]:
import numpy as np
import torch  
import torch.nn as nn  
import torch.optim as optim  
from torch.utils.data import DataLoader
from tqdm.notebook import trange, tqdm
import matplotlib.pyplot as plt
import time
import sys

import Datasets
import TrainingMetrics
import Models

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set device
</div>

In [None]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Config
</div>

In [None]:
TRAINING_FRACTION = 0.75
BATCH_SIZE = 64
LEARNING_RATE = 1e-4
N_EPOCHS = 1

<div class="alert alert-block alert-info" style="font-size: 18px;">
    File 
</div>

In [None]:
modelPath = sys.path[0] + '/files/ContaminationClassifierModel_UVW'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Pull out things from file
</div>

In [None]:
train_dataset, test_dataset = Datasets.get_classification_datasets(device, TRAINING_FRACTION)

print('Input(train):', train_dataset.input.shape)
print('Truth(train):', train_dataset.labels.shape)
print('')
print('Input(test):', test_dataset.input.shape)
print('Truth(test):', test_dataset.labels.shape)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0, generator=torch.Generator(device='cpu'))
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=0, generator=torch.Generator(device='cpu'))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Class Weights
</div>

In [None]:
nFalse = torch.count_nonzero(train_dataset.labels == 0).item()
nTrue = torch.count_nonzero(train_dataset.labels == 1).item()
nShower = torch.count_nonzero(train_dataset.labels == 2).item()
maxLinks = max(nTrue, nFalse, nShower)

class_weights = torch.tensor([float(maxLinks)/float(nFalse), float(maxLinks)/float(nTrue), float(maxLinks)/float(nShower)])

print('nShower:', nShower)
print('nTrue:', nTrue)
print('nFalse:', nFalse)
print('weights:', class_weights)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Setup training
</div>

In [None]:
# Get our model
_, _, n_features = train_dataset.input.shape
model = Models.ConvModel(device, num_features=n_features)

# Initialize the optimizer with above parameters
optimiser = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Scheduler
scheduler = optim.lr_scheduler.ExponentialLR(optimiser, gamma=0.97)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Let's start training!
</div>

In [None]:
def loss_function(pred, targets, class_weights) :
    # Loss function
    loss_func = torch.nn.CrossEntropyLoss(reduction='none')
    # Calc loss 
    loss = loss_func(pred, targets)
    # Apply weighting
    loss[targets == 0] *= class_weights[0]
    loss[targets == 1] *= class_weights[1]
    loss[targets == 2] *= class_weights[2]
    loss = (loss.sum() / loss.shape[0])
    return loss

In [None]:
# Initialize progress bar for tracking epochs
pbar = trange(0, N_EPOCHS, leave=True, desc="Epoch")

# Loggers for training and testing
training_av_loss_logger = []
test_av_loss_logger = []
training_acc_logger = []
test_acc_logger = []
train_false = []
train_true = []
train_shower = []
test_false = []
test_true = []
test_shower = []

# Loop over each epoch
for epoch in pbar:
    
    # Metrics
    train_loss_count = 0.0   
    n_pred_false_train, n_pred_true_train, n_pred_shower_train = 0, 0, 0
    n_false_train, n_true_train, n_shower_train = 0, 0, 0
    test_loss_count = 0.0
    n_pred_false_test, n_pred_true_test, n_pred_shower_test = 0, 0, 0
    n_false_test, n_true_test, n_shower_test = 0, 0, 0    
    
    # Set the model to training mode
    model.train()
    t0 = time.time()
    
    # Loop over each batch in the training dataset
    for x_train, label_train in tqdm(train_dataloader, desc="Training", leave=True):
        
        # Get the model predictions
        pred = model(x_train)
        
        # Compute the loss using cross-entropy loss
        label_train = label_train.squeeze(1)
        loss = loss_function(pred, label_train, class_weights)
        
        # Backpropagation and optimization step
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
    tt = time.time()
    print(f'Training Time: {tt-t0:.3f} s')
    
    # Update learning rate
    before_lr = optimiser.param_groups[0]["lr"]
    scheduler.step()
    after_lr = optimiser.param_groups[0]["lr"]
    print("Epoch %d: SGD lr %.4f -> %.4f" % (epoch, before_lr, after_lr))      
    
    # Set the model to evaluation mode
    model.eval()
    
    # Validation loop
    with torch.no_grad():
        # Loop over each batch in the training dataset
        for x_train, label_train in tqdm(train_dataloader, desc="Validation(Train)", leave=True):
            
            # Get the model predictions
            pred = model(x_train)

            # Compute the loss using cross-entropy loss
            label_train = label_train.squeeze(1)
            loss = loss_function(pred, label_train, class_weights)
            train_loss_count += loss.item()

            # Apply sigmoid for inference
            pred = torch.softmax(pred, dim=1)           
            
            # Update training accuracy
            n_pred_false_train += (torch.argmax(pred, dim=1)[label_train == 0] == 0).sum().item()
            n_pred_true_train += (torch.argmax(pred, dim=1)[label_train == 1] == 1).sum().item()
            n_pred_shower_train += (torch.argmax(pred, dim=1)[label_train == 2] == 2).sum().item()
            n_false_train += (label_train == 0).sum().item()
            n_true_train += (label_train == 1).sum().item()
            n_shower_train += (label_train == 2).sum().item() 
    
        # Loop over each batch in the testing dataset
        for x_test, label_test in tqdm(test_dataloader, desc="Validation(Test)", leave=True):

            # Get the model predictions
            pred = model(x_test)
            
            # Compute the loss using cross-entropy loss
            label_test = label_test.squeeze(1)
            loss = loss_function(pred, label_test, class_weights)
            test_loss_count += loss.item()
            
            # Apply sigmoid for inference
            pred = torch.softmax(pred, dim=1)                     

            # Update testing accuracy
            n_pred_false_test += (torch.argmax(pred, dim=1)[label_test == 0] == 0).sum().item()
            n_pred_true_test += (torch.argmax(pred, dim=1)[label_test == 1] == 1).sum().item()
            n_pred_shower_test += (torch.argmax(pred, dim=1)[label_test == 2] == 2).sum().item()
            n_false_test += (label_test == 0).sum().item()
            n_true_test += (label_test == 1).sum().item()
            n_shower_test += (label_test == 2).sum().item()   

        tv = time.time()
        print(f'Validation Time: {tv-tt:.3f} s')

        # Record values
        false_eff_train = round(float(n_pred_false_train)/ n_false_train * 100.0, 2)
        false_eff_test = round(float(n_pred_false_test)/ n_false_test * 100.0, 2)
        true_eff_train = round(float(n_pred_true_train)/ n_true_train * 100.0, 2)
        true_eff_test = round(float(n_pred_true_test)/ n_true_test * 100.0, 2)
        shower_eff_train = round(float(n_pred_shower_train)/ n_shower_train * 100.0, 2)
        shower_eff_test = round(float(n_pred_shower_test)/ n_shower_test * 100.0, 2)
        train_false.append(false_eff_train)
        train_true.append(true_eff_train)
        train_shower.append(shower_eff_train)
        test_false.append(false_eff_test)
        test_true.append(true_eff_test)
        test_shower.append(shower_eff_test)        
        training_av_loss_logger.append(train_loss_count / len(training_dataloader))
        test_av_loss_logger.append(test_loss_count / len(validation_dataloader))

        # Do prints for epoch
        print(f'False efficiency: {round(float(n_pred_false_train)/ n_false_train * 100.0, 2)}% (train), {round(float(n_pred_false_test)/ n_false_test * 100.0, 2)}% (train)')
        print(f'True efficiency: {round(float(n_pred_true_train)/ n_true_train * 100.0, 2)}% (train), {round(float(n_pred_true_test)/ n_true_test * 100.0, 2)}% (train)')
        print(f'Shower efficiency: {round(float(n_pred_shower_train)/ n_shower_train * 100.0, 2)}% (train), {round(float(n_pred_shower_test)/ n_shower_test * 100.0, 2)}% (train)')        
        
        # Save model
        model_cpu = model.to('cpu').eval()
        sm = torch.jit.script(model_cpu)
        sm.save(f"{modelPath}_{epoch}.pt")
        torch.save(model_cpu.state_dict(), f"{modelPath}_{epoch}.pkl")
        model = model.to(device)
        print(f"Saved model at epoch {epoch}")

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Made some post-training performance plots
</div>

In [None]:
_ = plt.figure(figsize=(10, 5))
_ = plt.plot(range(1, N_EPOCHS + 1), training_av_loss_logger)
_ = plt.plot(range(1, N_EPOCHS + 1), test_av_loss_logger)
_ = plt.legend(["Train", "Test"])
_ = plt.title("Training Vs Test Av. Loss")
_ = plt.xlabel("Epochs")
_ = plt.ylabel("Loss")

In [None]:
_ = plt.figure(figsize=(10, 5))
_ = plt.plot(range(1, N_EPOCHS + 1), training_acc_logger)
_ = plt.plot(range(1, N_EPOCHS + 1), test_acc_logger)

_ = plt.legend(["Train", "Test"])
_ = plt.title("Training Vs Test Accuracy")
_ = plt.xlabel("Epochs")
_ = plt.ylabel("Accuracy")
print("Max Test Accuracy %.2f%%" % (np.max(test_acc_logger) * 100))