## Write Primary Tier Training File

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Imports
</div>

In [None]:
import sys
import os
sys.path.insert(0, os.getcwd()[0:len(os.getcwd()) - 11])

import math
import numpy as np
import sklearn 

import Utilities
import PrimaryTierFileHelper

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Create a file for primary track training (isTrackMode == True) or primary shower training (isTrackMode == False)?
</div>

In [None]:
isTrackMode = False

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Please put the path to your training file (created from makeTrainingTrees.C) and set ouput file name
</div>

In [None]:
fileName = sys.path[0] + '/files/hierarchy_TRAIN.root'

if (isTrackMode) : 
    trainVarFile = sys.path[0] + '/files/hierarchy_TRAIN_track.npz'
else :
    trainVarFile = sys.path[0] + '/files/hierarchy_TRAIN_shower.npz'

print('fileName:', fileName)
print('trainVarFile:', trainVarFile)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Convert file info to expected format
</div>

In [None]:
nLinks, variables, y, isTruePrimaryLink, trainingCutDCA = PrimaryTierFileHelper.ReadTreeForTraining(isTrackMode, fileName, normalise=True)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Plot the primary variables: 
    
    0     - primaryNSpacepoints
    1, 9  - primaryNuVertexSeparation
    2, 10 - primaryStartRegionNHits
    3, 11 - primaryStartRegionNParticles
    4, 12 - primaryDCA
    5, 13 - primaryConnectionExtrapDistance 
    6, 14 - primaryIsPOIClosestToNu
    7, 15 - primaryClosestParentL
    8, 16 - primaryClosestParentT
</div>

In [None]:
Utilities.drawSignalBackground(variables[:, 0], isTruePrimaryLink, "nSpacepoints")

if (isTrackMode) :
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 1].reshape(-1,1), variables[:, 9].reshape(-1,1)), axis=1), y, 'primaryNuVertexSeparation')    
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 2].reshape(-1,1), variables[:, 10].reshape(-1,1)), axis=1), y, 'primaryStartRegionNHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 3].reshape(-1,1), variables[:, 11].reshape(-1,1)), axis=1), y, 'primaryStartRegionNParticles')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 4].reshape(-1,1), variables[:, 12].reshape(-1,1)), axis=1), y, 'primaryDCA')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 5].reshape(-1,1), variables[:, 13].reshape(-1,1)), axis=1), y, 'primaryConnectionExtrapDistance')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 6].reshape(-1,1), variables[:, 14].reshape(-1,1)), axis=1), y, 'primaryIsPOIClosestToNu')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 7].reshape(-1,1), variables[:, 15].reshape(-1,1)), axis=1), y, 'primaryClosestParentL')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 8].reshape(-1,1), variables[:, 16].reshape(-1,1)), axis=1), y, 'primaryClosestParentT')
else :
    Utilities.drawSignalBackground(variables[:, 1], isTruePrimaryLink, 'primaryNuVertexSeparation')
    Utilities.drawSignalBackground(variables[:, 2], isTruePrimaryLink, 'primaryStartRegionNHits')
    Utilities.drawSignalBackground(variables[:, 3], isTruePrimaryLink, 'primaryStartRegionNParticles')    
    Utilities.drawSignalBackground(variables[:, 4], isTruePrimaryLink, 'primaryDCA')    
    Utilities.drawSignalBackground(variables[:, 5], isTruePrimaryLink, 'primaryConnectionExtrapDistance')
    Utilities.drawSignalBackground(variables[:, 6], isTruePrimaryLink, 'primaryIsPOIClosestToNu')
    Utilities.drawSignalBackground(variables[:, 7], isTruePrimaryLink, 'primaryClosestParentL') 
    Utilities.drawSignalBackground(variables[:, 8], isTruePrimaryLink, 'primaryClosestParentT') 

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Plot the training cut distribution - this infers the training cut to be applied in training
</div>

In [None]:
Utilities.drawSignalBackground(np.array(trainingCutDCA), isTruePrimaryLink, "trainingCutDCA")

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Shuffle the training dataset
</div>

In [None]:
variables, y, isTruePrimaryLink, trainingCutDCA = sklearn.utils.shuffle(variables, y, isTruePrimaryLink, trainingCutDCA)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Write the file
</div>

In [None]:
ntest = math.floor(nLinks * 0.1)
ntrain = math.floor(nLinks * 0.9)
    
variables_train = variables[:ntrain]
variables_test = variables[ntrain:]

y_train = y[:ntrain]
y_test = y[ntrain:]

isTruePrimaryLink_train = isTruePrimaryLink[:ntrain]
isTruePrimaryLink_test = isTruePrimaryLink[ntrain:]

trainingCutDCA_train = trainingCutDCA[:ntrain]
trainingCutDCA_test = trainingCutDCA[ntrain:]
    
np.savez(trainVarFile,
         variables_train=variables_train, y_train=y_train, isTruePrimaryLink_train=isTruePrimaryLink_train, trainingCutDCA_train=trainingCutDCA_train,
         variables_test=variables_test, y_test=y_test, isTruePrimaryLink_test=isTruePrimaryLink_test, trainingCutDCA_test=trainingCutDCA_test)

print('variables_train: ', variables_train.shape)    
print('isTruePrimaryLink_train:', isTruePrimaryLink_train.shape)
print('y_train: ', y_train.shape)
print('variables_test: ', variables_test.shape)  
print('y_test: ', y_test.shape)  
print('isTruePrimaryLink_test:', isTruePrimaryLink_test.shape)