## Write Later Tier Training File

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Imports
</div>

In [None]:
import sys
import os
sys.path.insert(0, os.getcwd()[0:len(os.getcwd()) - 10])

import math
import numpy as np
import sklearn 
import matplotlib.pyplot as plt

import Utilities
import LaterTierFileHelper

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Create a file for later tier track-track link training (isTrackMode == True) or tier track-shower link training (isTrackMode == False)?
</div>

In [None]:
isTrackMode = True

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Please put the path to your training file (created from makeTrainingTrees.C) and set ouput file name
</div>

In [None]:
fileName = sys.path[0] + '/files/hierarchy_TRAIN.root'

if (isTrackMode) : 
    trainVarFile = sys.path[0] + '/files/hierarchy_TRAIN_later_tier_track.npz'
else :
    trainVarFile = sys.path[0] + '/files/hierarchy_TRAIN_later_tier_shower.npz'

print('fileName:', fileName)
print('trainVarFile:', trainVarFile)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Convert file info to expected format
</div>

In [None]:
nLinks, variables, y, trueParentChildLink, trueChildVisibleGeneration, trainingCutSep, trainingCutDoesConnect, \
    trainingCutL, trainingCutT = LaterTierFileHelper.ReadTreeForTraining(isTrackMode, fileName, normalise=True)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Plot the later-tier variables: 
    
    0 - parentTrackScore
    1 - childTrackScore
    2 - parentNSpacepoints
    3 - childNSpacepoints
    4 - separation3D

    5, 26, 47, 68  - parentNuVertexSep
    6, 27, 48, 69  - childNuVertexSep
    7, 28, 49, 70  - parentEndRegionNHits
    8, 29, 50, 71  - parentEndRegionNParticles,
    9, 30, 51, 72  - parentEndRegionRToWall
    10, 31, 52, 73 - vertexSeparation
    11, 32, 53, 74 - doesChildConnect
    12, 33, 54, 75 - overshootStartDCA
    13, 34, 55, 76 - overshootStartL
    14, 35, 56, 77 - overshootEndDCA
    15, 36, 57, 78 - overshootEndL
    16, 37, 58, 79 - childConnectionDCA
    17, 38, 59, 80 - childConnectionExtrapDistance
    18, 39, 60, 81 - childConnectionLRatio
    19, 40, 61, 82 - parentConnectionPointNUpstreamHits
    20, 41, 62, 83 - parentConnectionPointNDownstreamHits
    21, 42, 63, 84 - parentConnectionPointNHitRatio
    22, 43, 64, 85 - parentConnectionPointEigenValueRatio
    23, 44, 65, 86 - parentConnectionPointOpeningAngle
    24, 45, 66, 87 - parentIsPOIClosestToNu
    25, 46, 67, 88 - childIsPOIClosestToNu
</div>

In [None]:
Utilities.drawSignalBackground(variables[:, 0], trueParentChildLink, "parentTrackScore")
Utilities.drawSignalBackground(variables[:, 1], trueParentChildLink, "childTrackScore")
Utilities.drawSignalBackground(variables[:, 2], trueParentChildLink, "parentNSpacepoints")
Utilities.drawSignalBackground(variables[:, 3], trueParentChildLink, "childNSpacepoints")
Utilities.drawSignalBackground(variables[:, 4], trueParentChildLink, "separation3D")

if (isTrackMode) :  
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 5].reshape(-1,1), variables[:, 26].reshape(-1,1), \
                                                                      variables[:, 47].reshape(-1,1), variables[:, 68].reshape(-1,1)), axis=1), y, 'parentNuVertexSep')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 6].reshape(-1,1), variables[:, 27].reshape(-1,1), \
                                                                      variables[:, 48].reshape(-1,1), variables[:, 69].reshape(-1,1)), axis=1), y, 'childNuVertexSep')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 7].reshape(-1,1), variables[:, 28].reshape(-1,1), \
                                                                      variables[:, 49].reshape(-1,1), variables[:, 70].reshape(-1,1)), axis=1), y, 'parentEndRegionNHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 8].reshape(-1,1), variables[:, 29].reshape(-1,1), \
                                                                      variables[:, 50].reshape(-1,1), variables[:, 71].reshape(-1,1)), axis=1), y, 'parentEndRegionNParticles')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 9].reshape(-1,1), variables[:, 30].reshape(-1,1), \
                                                                      variables[:, 51].reshape(-1,1), variables[:, 72].reshape(-1,1)), axis=1), y, 'parentEndRegionRToWall')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 10].reshape(-1,1), variables[:, 31].reshape(-1,1), \
                                                                      variables[:, 52].reshape(-1,1), variables[:, 73].reshape(-1,1)), axis=1), y, 'vertexSeparation')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 11].reshape(-1,1), variables[:, 32].reshape(-1,1), \
                                                                      variables[:, 53].reshape(-1,1), variables[:, 74].reshape(-1,1)), axis=1), y, 'doesChildConnect')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 12].reshape(-1,1), variables[:, 33].reshape(-1,1), \
                                                                      variables[:, 54].reshape(-1,1), variables[:, 75].reshape(-1,1)), axis=1), y, 'overshootStartDCA')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 13].reshape(-1,1), variables[:, 34].reshape(-1,1), \
                                                                      variables[:, 55].reshape(-1,1), variables[:, 76].reshape(-1,1)), axis=1), y, 'overshootStartL')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 14].reshape(-1,1), variables[:, 35].reshape(-1,1), \
                                                                      variables[:, 56].reshape(-1,1), variables[:, 77].reshape(-1,1)), axis=1), y, 'overshootEndDCA')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 15].reshape(-1,1), variables[:, 36].reshape(-1,1), \
                                                                      variables[:, 57].reshape(-1,1), variables[:, 78].reshape(-1,1)), axis=1), y, 'overshootEndL')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 16].reshape(-1,1), variables[:, 37].reshape(-1,1), \
                                                                      variables[:, 58].reshape(-1,1), variables[:, 79].reshape(-1,1)), axis=1), y, 'childConnectionDCA')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 17].reshape(-1,1), variables[:, 38].reshape(-1,1), \
                                                                      variables[:, 59].reshape(-1,1), variables[:, 80].reshape(-1,1)), axis=1), y, 'childConnectionExtrapDistance')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 18].reshape(-1,1), variables[:, 39].reshape(-1,1), \
                                                                      variables[:, 60].reshape(-1,1), variables[:, 81].reshape(-1,1)), axis=1), y, 'childConnectionLRatio')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 19].reshape(-1,1), variables[:, 40].reshape(-1,1), \
                                                                      variables[:, 61].reshape(-1,1), variables[:, 82].reshape(-1,1)), axis=1), y, 'parentConnectionPointNUpstreamHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 20].reshape(-1,1), variables[:, 41].reshape(-1,1), \
                                                                      variables[:, 62].reshape(-1,1), variables[:, 83].reshape(-1,1)), axis=1), y, 'parentConnectionPointNDownstreamHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 21].reshape(-1,1), variables[:, 42].reshape(-1,1), \
                                                                      variables[:, 63].reshape(-1,1), variables[:, 84].reshape(-1,1)), axis=1), y, 'primaryNuVertexSeparation')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 22].reshape(-1,1), variables[:, 43].reshape(-1,1), \
                                                                      variables[:, 64].reshape(-1,1), variables[:, 85].reshape(-1,1)), axis=1), y, 'parentConnectionPointNHitRatio')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 23].reshape(-1,1), variables[:, 44].reshape(-1,1), \
                                                                      variables[:, 65].reshape(-1,1), variables[:, 86].reshape(-1,1)), axis=1), y, 'parentConnectionPointOpeningAngle')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 24].reshape(-1,1), variables[:, 45].reshape(-1,1), \
                                                                      variables[:, 66].reshape(-1,1), variables[:, 87].reshape(-1,1)), axis=1), y, 'parentIsPOIClosestToNu')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 25].reshape(-1,1), variables[:, 46].reshape(-1,1), \
                                                                      variables[:, 67].reshape(-1,1), variables[:, 88].reshape(-1,1)), axis=1), y, 'childIsPOIClosestToNu')
else :    
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 5].reshape(-1,1), variables[:, 26].reshape(-1,1)), axis=1), y, 'parentNuVertexSep')    
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 6].reshape(-1,1), variables[:, 27].reshape(-1,1)), axis=1), y, 'childNuVertexSep')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 7].reshape(-1,1), variables[:, 28].reshape(-1,1)), axis=1), y, 'parentEndRegionNHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 8].reshape(-1,1), variables[:, 29].reshape(-1,1)), axis=1), y, 'parentEndRegionNParticles')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 9].reshape(-1,1), variables[:, 30].reshape(-1,1)), axis=1), y, 'parentEndRegionRToWall')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 10].reshape(-1,1), variables[:, 31].reshape(-1,1)), axis=1), y, 'vertexSeparation')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 11].reshape(-1,1), variables[:, 32].reshape(-1,1)), axis=1), y, 'doesChildConnect')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 12].reshape(-1,1), variables[:, 33].reshape(-1,1)), axis=1), y, 'overshootStartDCA')        
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 13].reshape(-1,1), variables[:, 34].reshape(-1,1)), axis=1), y, 'overshootStartL')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 14].reshape(-1,1), variables[:, 35].reshape(-1,1)), axis=1), y, 'overshootEndDCA')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 15].reshape(-1,1), variables[:, 36].reshape(-1,1)), axis=1), y, 'overshootEndL')  
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 16].reshape(-1,1), variables[:, 37].reshape(-1,1)), axis=1), y, 'childConnectionDCA')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 17].reshape(-1,1), variables[:, 38].reshape(-1,1)), axis=1), y, 'childConnectionExtrapDistance')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 18].reshape(-1,1), variables[:, 39].reshape(-1,1)), axis=1), y, 'childConnectionLRatio')  
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 19].reshape(-1,1), variables[:, 40].reshape(-1,1)), axis=1), y, 'parentConnectionPointNUpstreamHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 20].reshape(-1,1), variables[:, 41].reshape(-1,1)), axis=1), y, 'parentConnectionPointNDownstreamHits')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 21].reshape(-1,1), variables[:, 42].reshape(-1,1)), axis=1), y, 'parentConnectionPointNHitRatio')  
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 22].reshape(-1,1), variables[:, 43].reshape(-1,1)), axis=1), y, 'parentConnectionPointEigenValueRatio')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 23].reshape(-1,1), variables[:, 44].reshape(-1,1)), axis=1), y, 'parentConnectionPointOpeningAngle')
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 24].reshape(-1,1), variables[:, 45].reshape(-1,1)), axis=1), y, 'parentIsPOIClosestToNu')  
    Utilities.drawSignalBackgroundGroup(np.concatenate((variables[:, 25].reshape(-1,1), variables[:, 46].reshape(-1,1)), axis=1), y, 'childIsPOIClosestToNu')

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Plot the training cut distributions - this infers the training cuts to be applied in training
</div>

In [None]:
Utilities.drawSignalBackground(trainingCutSep, trueParentChildLink, "trainingCutSep")
Utilities.drawSignalBackground(trainingCutDoesConnect, trueParentChildLink, "trainingCutDoesConnect")
Utilities.drawSignalBackground(trainingCutL, trueParentChildLink, "trainingCutL")
Utilities.drawSignalBackground(trainingCutT, trueParentChildLink, "trainingCutT")

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Shuffle the training dataset
</div>

In [None]:
variables, y, trueParentChildLink, trueChildVisibleGeneration, trainingCutSep, trainingCutDoesConnect, trainingCutL, trainingCutT = \
    sklearn.utils.shuffle(variables, y, trueParentChildLink, trueChildVisibleGeneration, trainingCutSep, trainingCutDoesConnect, trainingCutL, trainingCutT)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Write the file
</div>

In [None]:
ntest = math.floor(nLinks * 0.1)
ntrain = math.floor(nLinks * 0.9)
    
variables_train = variables[:ntrain]
variables_test = variables[ntrain:(ntrain + ntest)]

y_train = y[:ntrain]
y_test = y[ntrain:(ntrain + ntest)]

trueParentChildLink_train = trueParentChildLink[:ntrain]
trueParentChildLink_test = trueParentChildLink[ntrain:(ntrain + ntest)]
    
trueChildVisibleGeneration_train = trueChildVisibleGeneration[:ntrain]
trueChildVisibleGeneration_test = trueChildVisibleGeneration[ntrain:(ntrain + ntest)]

trainingCutSep_train = trainingCutSep[:ntrain]
trainingCutSep_test = trainingCutSep[ntrain:(ntrain + ntest)]

trainingCutDoesConnect_train = trainingCutDoesConnect[:ntrain]
trainingCutDoesConnect_test = trainingCutDoesConnect[ntrain:(ntrain + ntest)]

trainingCutL_train = trainingCutL[:ntrain]
trainingCutL_test = trainingCutL[ntrain:(ntrain + ntest)]

trainingCutT_train = trainingCutT[:ntrain]
trainingCutT_test = trainingCutT[ntrain:(ntrain + ntest)]
    
np.savez(trainVarFile, \
         variables_train=variables_train, y_train=y_train, trueParentChildLink_train=trueParentChildLink_train, \
         trueChildVisibleGeneration_train=trueChildVisibleGeneration_train, \
         trainingCutSep_train=trainingCutSep_train, trainingCutDoesConnect_train=trainingCutDoesConnect_train, trainingCutL_train=trainingCutL_train, trainingCutT_train=trainingCutT_train, \
         variables_test=variables_test, y_test=y_test, trueParentChildLink_test=trueParentChildLink_test, \
         trueChildVisibleGeneration_test=trueChildVisibleGeneration_test, \
         trainingCutSep_test=trainingCutSep_test, trainingCutDoesConnect_test=trainingCutDoesConnect_test, trainingCutL_test=trainingCutL_test, trainingCutT_test=trainingCutT_test)

print('variables_train: ', variables_train.shape)
print('y_train: ', y_train.shape)   
print('trueChildVisibleGeneration_train:', trueChildVisibleGeneration_train.shape)
print('trainingCutSep_train:', trainingCutSep_train.shape)
print('trainingCutDoesConnect_train:', trainingCutDoesConnect_train.shape)
print('trainingCutL_train:', trainingCutL_train.shape)
print('trainingCutT_train:', trainingCutT_train.shape)
print('')
print('variables_test: ', variables_test.shape)  
print('y_test: ', y_test.shape)  
print('trueChildVisibleGeneration_test:', trueChildVisibleGeneration_test.shape)
print('trainingCutSep_test:', trainingCutSep_test.shape)
print('trainingCutDoesConnect_test:', trainingCutDoesConnect_test.shape)
print('trainingCutL_test:', trainingCutL_test.shape)
print('trainingCutT_test:', trainingCutT_test.shape)