## Encoder-Decoder Performance

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Imports
</div>

In [None]:
import numpy as np
import torch  
from torch.utils.data import DataLoader
import sys

#sys.path.insert(0, '/home/imawby/LArMachineLearningData/scripts/deep_learning/clusterSplitting')

import Datasets
import TrainingMetrics

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set device
</div>

In [None]:
device = 'cpu'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Config
</div>

In [None]:
BATCH_SIZE = 64
TRAINING_FRACTION = 0.75
ALPHA = 2.0     # Loss scaling

<div class="alert alert-block alert-info" style="font-size: 18px;">
    File 
</div>

In [None]:
modelPath = sys.path[0] + '/models/SplitPointModel_UVW'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Pull out things from file
</div>

In [None]:
train_dataset, test_dataset = Datasets.get_split_point_datasets(device, TRAINING_FRACTION)

print('Input(train):', train_dataset.input.shape)
print('Truth(train):', train_dataset.labels.shape)
print('Contaminated(train):', train_dataset.is_contaminated.shape)
print('')
print('Input(test):', test_dataset.input.shape)
print('Truth(test):', test_dataset.labels.shape)
print('Contaminated(test):', test_dataset.is_contaminated.shape)

In [None]:
print('n_background:', torch.count_nonzero(train_dataset.is_contaminated == 0).item())
print('n_signal:', torch.count_nonzero(train_dataset.is_contaminated == 1).item())
print('n_showers:', torch.count_nonzero(train_dataset.is_contaminated == 2).item())

<div class="alert alert-block alert-info" style="font-size: 18px;">
     Remove showers, we didnt train on them
</div>

In [None]:
mask_train = train_dataset.is_contaminated != 2
mask_test = test_dataset.is_contaminated != 2

train_dataset.input = train_dataset.input.unsqueeze(1)[mask_train]
train_dataset.labels = train_dataset.labels.unsqueeze(1)[mask_train]
train_dataset.is_contaminated = train_dataset.is_contaminated[mask_train].reshape(-1,1)

test_dataset.input = test_dataset.input.unsqueeze(1)[mask_test]
test_dataset.labels = test_dataset.labels.unsqueeze(1)[mask_test]
test_dataset.is_contaminated = test_dataset.is_contaminated[mask_test].reshape(-1,1)

print('Input(train):', train_dataset.input.shape)
print('Truth(train):', train_dataset.labels.shape)
print('Contaminated(train):', train_dataset.is_contaminated.shape)
print('')
print('Input(test):', test_dataset.input.shape)
print('Truth(test):', test_dataset.labels.shape)
print('Contaminated(test):', test_dataset.is_contaminated.shape)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0, generator=torch.Generator(device='cpu'))
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=0, generator=torch.Generator(device='cpu'))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Get our predictions
</div>

In [None]:
chosen_epoch = 0

modelPath = f"{modelPath}_alpha_"+str(ALPHA)+"_epoch_" + str(chosen_epoch) +".pt"
model = torch.jit.load(modelPath)

In [None]:
model.eval()

pred_final_train = []
truth_train = []
pred_final_test = []
truth_test = []

with torch.no_grad():
    
    for x_train, label_train, _ in train_dataloader:         
        
        # Make prediction
        pred = model(x_train)
        pred = torch.sigmoid(pred)
        
        label_train = label_train.reshape(-1)
        pred = pred.reshape(-1)
        pred_final_train.extend(pred.tolist())
        truth_train.extend(label_train.tolist()) 
    
    for x_test, label_test, _ in test_dataloader:          

        # Make prediction
        pred = model(x_test)
        pred = torch.sigmoid(pred)
        
        # Sort out mask
        label_test = label_test.reshape(-1)
        pred = pred.reshape(-1)
        pred_final_test.extend(pred.tolist())
        truth_test.extend(label_test.tolist())  
        
pred_final_train = np.array(pred_final_train)
truth_train = np.array(truth_train)
pred_final_test = np.array(pred_final_test)
truth_test = np.array(truth_test)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Made some post-training performance plots
</div>

In [None]:
TrainingMetrics.plot_scores(pred_final_train, pred_final_test, truth_train, truth_test)
TrainingMetrics.draw_confusion(pred_final_test, truth_test, 0.5) # Threshold used to identify signal, not signal is background