## Venusarus - Windows

Create training files from root trees obtained from DLThreeViewClusterSplittingAlgorithm when ran in training mode 

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Config
</div>

In [None]:
import uproot
import random
import numpy as np
import sys

#sys.path.insert(0, '/home/imawby/LArMachineLearningData/scripts/deep_learning/clusterSplitting')

import Utilities

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Config
</div>

In [None]:
WINDOW_SIZE = 48
NO_OVERLAP = True # True ensures that no windows overlap

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Open File
</div>

In [None]:
fileName = sys.path[0] + '/files/KalmanTraining_UVW_0.root'
outputFileName =  sys.path[0] + '/files/KalmanTraining_UVW_0.npz'
treeName = 'tree'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Pull things out of file
</div>

In [None]:
with uproot.open(f"{fileName}:{treeName}") as tree:   
    file_reco = tree.arrays(['Longitudinal', 'Transverse', 'Energy', 'HitWidth', 
                             'Angle', 'SecVertex', 'GapSep', 'EventHitSep', 'ClusterHitSep', 
                             'VertexL', 'IsContaminated', 'NContaminants', 'BacktrackedPDG'], library="np")

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Remove clusters that are a mess
</div>

In [None]:
mask = file_reco['NContaminants'] < 5
for entry in file_reco :
    file_reco[entry] = file_reco[entry][mask]

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Remove clusters that are too short
</div>

In [None]:
mask = [True if len(entry) > WINDOW_SIZE else False for entry in file_reco['Longitudinal']]
for entry in file_reco :
    file_reco[entry] = file_reco[entry][mask]

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Tag TRUE showers
</div>

In [None]:
shower_mask = np.logical_or(np.abs(file_reco['BacktrackedPDG']) == 11, np.abs(file_reco['BacktrackedPDG']) == 22)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Create windows
</div>

In [None]:
indices = [list(range(len(sublist))) for sublist in file_reco['Longitudinal']]
windows_indices = [Utilities.CreateWindows(entry, WINDOW_SIZE, NO_OVERLAP) for entry in indices]
cluster_indices = Utilities.GetClusterIndices(windows_indices)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Process truth & features
</div>

In [None]:
# Process truth
print('Forming truth...')
contamination_truth, split_point_truth = Utilities.ProcessTruth(shower_mask, cluster_indices, file_reco['Longitudinal'], file_reco['VertexL'], windows_indices)

# Process features
print('Forming transverse...')
transverse = Utilities.ProcessFeature(file_reco['Transverse'], windows_indices)
print('Forming energy...')
energy = Utilities.ProcessFeature(file_reco['Energy'], windows_indices)
print('Forming hitWidth...')
hitWidth = Utilities.ProcessFeature(file_reco['HitWidth'], windows_indices)
print('Forming theta_l...')
theta_l = Utilities.ProcessFeature(file_reco['Angle'], windows_indices)
print('Forming secVertex...')
secVertex = Utilities.ProcessFeature(file_reco['SecVertex'], windows_indices)
print('Forming gapSep...')
gapSep = Utilities.ProcessFeature(file_reco['GapSep'], windows_indices)
print('Forming eventHitSep...')
eventHitSep = Utilities.ProcessFeature(file_reco['EventHitSep'], windows_indices)
print('Forming clusterHitSep...')
clusterHitSep = Utilities.ProcessFeature(file_reco['ClusterHitSep'], windows_indices)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Move into x and y
</div>

In [None]:
x = np.dstack((transverse, energy, hitWidth, theta_l, secVertex, gapSep, eventHitSep, clusterHitSep))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Have to do some reshaping
</div>

In [None]:
contamination_truth = np.array(contamination_truth)
split_point_truth = np.array(split_point_truth)

contamination_truth = contamination_truth.reshape(-1,1)
split_point_truth = split_point_truth.reshape(split_point_truth.shape[0], split_point_truth.shape[1], 1)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Save to file
</div>

In [None]:
np.savez(outputFileName, x_tokens=x, y_tokens=split_point_truth, is_contaminated=contamination_truth, cluster_index=cluster_indices)