## Encoder Performance

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Imports
</div>

In [None]:
import numpy as np
import torch  
from torch.utils.data import DataLoader
import sys

import Datasets
import TrainingMetrics

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set device
</div>

In [None]:
device = 'cpu'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Config
</div>

In [None]:
BATCH_SIZE = 64

<div class="alert alert-block alert-info" style="font-size: 18px;">
    File 
</div>

In [None]:
modelPath = '/home/imawby/Venusaurus/files/ContaminationClassifierModel_UVW'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Pull out things from file
</div>

In [None]:
train_dataset, test_dataset = Datasets.get_classification_datasets(device, TRAINING_FRACTION)

print('Input(train):', train_dataset.input.shape)
print('Truth(train):', train_dataset.labels.shape)
print('')
print('Input(test):', test_dataset.input.shape)
print('Truth(test):', test_dataset.labels.shape)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0, generator=torch.Generator(device='cpu'))
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=0, generator=torch.Generator(device='cpu'))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Get our predictions
</div>

In [None]:
chosen_epoch = 1

modelPath = sys.path[0] + '/files/ContaminationClassifierModel_UVW'
modelPath = f"{modelPath}_{chosen_epoch}.pt"
model = torch.jit.load(modelPath_classifier)

In [None]:
pred_final_train = []
truth_train = []
pred_final_test = []
truth_test = []

model.eval()
    
with torch.no_grad():
    for x, label in train_dataloader:
        this_pred_final_train = torch.softmax(model(x), dim=1).detach().numpy() # Apply sigmoid at inference
        truth_train.extend(label.flatten().detach().numpy().tolist())
        pred_final_train.extend(this_pred_final_train.tolist())      

    for x, label in test_dataloader:
        this_pred_final_test = torch.softmax(model(x), dim=1).detach().numpy() # Apply sigmoid at inference  
        truth_test.extend(label.flatten().detach().numpy().tolist())
        pred_final_test.extend(this_pred_final_test.tolist())
        
# Turn into numpy arrays        
pred_final_train = np.array(pred_final_train)
truth_train = np.array(truth_train)
pred_final_test = np.array(pred_final_test)
truth_test = np.array(truth_test)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Make some post-training performance plots
</div>

In [None]:
# For three classes: false track, true track, shower
TrainingMetrics.draw_confusion(pred_final_train, truth_train, 0.95) # Threshold is for all classes

TrainingMetrics.plot_scores_class(pred_final_train, pred_final_val, truth_train, truth_val, 0)
TrainingMetrics.plot_scores_class(pred_final_train, pred_final_val, truth_train, truth_val, 1)
TrainingMetrics.plot_scores_class(pred_final_train, pred_final_val, truth_train, truth_val, 2)

In [None]:
def plot_scores_class(scores_train, scores_test, truth_train, truth_test, score_class_index) :
    
    class_scores_train = scores_train[:, score_class_index]
    class_scores_test = scores_test[:, score_class_index]
    
    for class_index in [0,1,2] :
    
        this_scores_train = class_scores_train[truth_train == class_index]
        this_scores_test = class_scores_test[truth_test == class_index]
    
        plotting_weights_train = 1.0 / float(this_scores_train.shape[0])
        plotting_weights_train = torch.ones(this_scores_train.shape) * plotting_weights_train
        
        plotting_weights_test = 1.0 / float(this_scores_test.shape[0])
        plotting_weights_test = torch.ones(this_scores_test.shape) * plotting_weights_test

        legend_string = 'False' if class_index == 0 else 'True' if class_index == 1 else 'Shower'
        graph_color = 'red' if class_index == 0 else 'blue' if class_index == 1 else 'green'
        
        plt.hist(this_scores_train, bins=50, range=(0, 1.0), color=graph_color, label=(legend_string + ' train'), weights=plotting_weights_train, histtype='step', linestyle='solid')
        plt.hist(this_scores_test, bins=50, range=(0, 1.0), color=graph_color, label=(legend_string + ' test'), weights=plotting_weights_test, histtype='step', linestyle='dashed')

    
    #plt.ylim(0, 0.8)
    plt.yscale("log")
    
    plt.xlabel(('Classification Score For Class: ' + str(score_class_index)))
    #plt.ylabel('log(Proportion of Showers)')
    plt.ylabel('Proportion of Clusters')
    plt.legend(loc='best')
    plt.show()  
    
plot_scores_class(pred_final_train, pred_final_val, truth_train, truth_val, 0)
plot_scores_class(pred_final_train, pred_final_val, truth_train, truth_val, 1)
plot_scores_class(pred_final_train, pred_final_val, truth_train, truth_val, 2)    