In [None]:
###########################################################
# Imports
###########################################################
import sys
import os
sys.path.insert(0, os.getcwd()[0:len(os.getcwd()) - 10])

import math
import numpy as np
import sklearn 
import matplotlib.pyplot as plt

import LaterTierFileHelper

In [None]:
###########################################################
# Define mode
###########################################################
isTrackMode = True

In [None]:
###########################################################
# Define file
###########################################################

fileName = sys.path[0] + '/files/hierarchy_TRAIN.root'

if (isTrackMode) : 
    trainVarFile = sys.path[0] + '/files/hierarchy_TRAIN_later_tier_track.npz'
else :
    trainVarFile = sys.path[0] + '/files/hierarchy_TRAIN_later_tier_shower.npz'

print('fileName:', fileName)
print('trainVarFile:', trainVarFile)

In [None]:
###########################################################
# Read tree
###########################################################
if (isTrackMode) : 
    nLinks, variables, y, trueParentChildLink, isLinkOrientationCorrect, trueChildVisibleGeneration, trainingCutSep, trainingCutDoesConnect, \
    trainingCutL, trainingCutT = LaterTierFileHelper.readTreeGroupLinks_track(fileName, normalise=True)
else :
    nLinks, variables, y, trueParentChildLink, isLinkOrientationCorrect, trueChildVisibleGeneration, trainingCutSep, trainingCutDoesConnect, \
    trainingCutL, trainingCutT = LaterTierFileHelper.readTreeGroupLinks_shower(fileName, normalise=True)

In [None]:
##########################################################
# Graph drawing functions
###########################################################

def drawSignalBackgroundGroup_track(variable0, variable1, variable2, variable3, truth_link_orientation, truth_is_correct, graph_label) :
    
    signal_mask_0 = np.logical_and((truth_link_orientation[:,0] == 1), truth_is_correct).reshape(-1)
    signal_mask_1 = np.logical_and((truth_link_orientation[:,1] == 1), truth_is_correct).reshape(-1)
    signal_mask_2 = np.logical_and((truth_link_orientation[:,2] == 1), truth_is_correct).reshape(-1)
    signal_mask_3 = np.logical_and((truth_link_orientation[:,3] == 1), truth_is_correct).reshape(-1)
    wo_mask_0 = np.logical_and((truth_link_orientation[:,0] == 0), truth_is_correct).reshape(-1)
    wo_mask_1 = np.logical_and((truth_link_orientation[:,1] == 0), truth_is_correct).reshape(-1)
    wo_mask_2 = np.logical_and((truth_link_orientation[:,2] == 0), truth_is_correct).reshape(-1)
    wo_mask_3 = np.logical_and((truth_link_orientation[:,3] == 0), truth_is_correct).reshape(-1)
    background_mask = (truth_is_correct == 0).reshape(-1)
    
    variable_signal_0 = variable0[signal_mask_0].reshape(-1)
    variable_signal_1 = variable1[signal_mask_1].reshape(-1)
    variable_signal_2 = variable2[signal_mask_2].reshape(-1)
    variable_signal_3 = variable3[signal_mask_3].reshape(-1)
    variable_wo_0 = variable0[wo_mask_0].reshape(-1)
    variable_wo_1 = variable1[wo_mask_1].reshape(-1)
    variable_wo_2 = variable2[wo_mask_2].reshape(-1)
    variable_wo_3 = variable3[wo_mask_3].reshape(-1)
    variable_background_0 = variable0[background_mask].reshape(-1)
    variable_background_1 = variable0[background_mask].reshape(-1)
    variable_background_2 = variable0[background_mask].reshape(-1)
    variable_background_3 = variable0[background_mask].reshape(-1)
    
    variable_signal = np.concatenate((variable_signal_0, variable_signal_1, variable_signal_2, variable_signal_3))
    variable_wo = np.concatenate((variable_wo_0, variable_wo_1, variable_wo_2, variable_wo_3))
    variable_background = np.concatenate((variable_background_0, variable_background_1, variable_background_2, variable_background_3))
    
    signal_weights = 1.0 / float(variable_signal.shape[0])
    signal_weights = np.ones(variable_signal.shape[0]) * signal_weights
    
    wo_weights = 1.0 / float(variable_wo.shape[0])
    wo_weights = np.ones(variable_wo.shape[0]) * wo_weights
    
    background_weights = 1.0 / float(variable_background.shape[0])
    background_weights = np.ones(variable_background.shape[0]) * background_weights  
        
    plt.hist(variable_signal, bins=50, color='blue', weights=signal_weights, label='signal', fill=False, histtype='step')
    plt.hist(variable_wo, bins=50, color='orange', weights=wo_weights, label='wrong orientation', fill=False, histtype='step')
    plt.hist(variable_background, bins=50, color='red', weights=background_weights, label='background', fill=False, histtype='step')
    plt.title(graph_label)
    plt.xlabel(graph_label)
    plt.legend()
    plt.grid(True)
    plt.show()   
    
def drawSignalBackgroundGroup_shower(variable0, variable1, truth_link_orientation, truth_is_correct, graph_label) :
    
    signal_mask_0 = np.logical_and(truth_link_orientation[:,0] == 1, truth_is_correct).reshape(-1)
    signal_mask_1 = np.logical_and(truth_link_orientation[:,1] == 1, truth_is_correct).reshape(-1)
    wo_mask_0 = np.logical_and(truth_link_orientation[:,0] == 0, truth_is_correct).reshape(-1)
    wo_mask_1 = np.logical_and(truth_link_orientation[:,1] == 0, truth_is_correct).reshape(-1)
    background_mask = (truth_is_correct == 0).reshape(-1)

    variable_signal_0 = variable0[signal_mask_0].reshape(-1)
    variable_signal_1 = variable1[signal_mask_1].reshape(-1)
    variable_wo_0 = variable0[wo_mask_0].reshape(-1)
    variable_wo_1 = variable1[wo_mask_1].reshape(-1)
    variable_background_0 = variable0[background_mask].reshape(-1)
    variable_background_1 = variable0[background_mask].reshape(-1)

    variable_signal = np.concatenate((variable_signal_0, variable_signal_1))
    variable_wo = np.concatenate((variable_wo_0, variable_wo_1))
    variable_background = np.concatenate((variable_background_0, variable_background_1))

    signal_weights = 1.0 / float(variable_signal.shape[0])
    signal_weights = np.ones(variable_signal.shape[0]) * signal_weights

    wo_weights = 1.0 / float(variable_wo.shape[0])
    wo_weights = np.ones(variable_wo.shape[0]) * wo_weights

    background_weights = 1.0 / float(variable_background.shape[0])
    background_weights = np.ones(variable_background.shape[0]) * background_weights  

    plt.hist(variable_signal, bins=50, color='blue', weights=signal_weights, label='signal', fill=False, histtype='step')
    plt.hist(variable_wo, bins=50, color='orange', weights=wo_weights, label='wrong orientation', fill=False, histtype='step')
    plt.hist(variable_background, bins=50, color='red', weights=background_weights, label='background', fill=False, histtype='step')
    
    plt.title(graph_label)
    plt.xlabel(graph_label)
    plt.legend()
    plt.grid(True)
    plt.show()  
    
def drawSignalBackground(variable, truth_labels, graph_label) :
    
    signal_mask_vis = (truth_labels == 1).reshape(-1)
    background_mask_vis = (truth_labels == 0).reshape(-1)

    variable_signal = variable[signal_mask_vis].reshape(-1)
    variable_background = variable[background_mask_vis].reshape(-1)

    signal_weights = 1.0 / float(variable_signal.shape[0])
    signal_weights = np.ones(variable_signal.shape[0]) * signal_weights

    background_weights = 1.0 / float(variable_background.shape[0])
    background_weights = np.ones(variable_background.shape[0]) * background_weights   

    plt.hist(variable_signal, bins=50, color='blue', weights=signal_weights, label='signal', fill=False, histtype='step', linestyle='solid')
    plt.hist(variable_background, bins=50, color='red', weights=background_weights, label='background', fill=False, histtype='step', linestyle='solid')

    plt.title(graph_label)
    plt.xlabel(graph_label)
    plt.legend()
    plt.grid(True)
    plt.show()  

In [None]:
##########################################################
# Draw histograms
###########################################################

# 0 - parentTrackScore
# 1 - childTrackScore
# 2 - parentNSpacepoints
# 3 - childNSpacepoints
# 4 - separation3D

# 5, 26, 47, 68 parentNuVertexSep
# 6, 27, 48, 69 childNuVertexSep
# 7, 28, 49, 70 parentEndRegionNHits
# 8, 29, 50, 71 parentEndRegionNParticles,
# 9, 30, 51, 72 parentEndRegionRToWall
# 10, 31, 52, 73 vertexSeparation
# 11, 32, 53, 74 doesChildConnect
# 12, 33, 54, 75 overshootStartDCA
# 13, 34, 55, 76 overshootStartL
# 14, 35, 56, 77 overshootEndDCA
# 15, 36, 57, 78 overshootEndL
# 16, 37, 58, 79 childConnectionDCA
# 17, 38, 59, 80 childConnectionExtrapDistance
# 18, 39, 60, 81 childConnectionLRatio
# 19, 40, 61, 82 parentConnectionPointNUpstreamHits
# 20, 41, 62, 83 parentConnectionPointNDownstreamHits
# 21, 42, 63, 84 parentConnectionPointNHitRatio
# 22, 43, 64, 85 parentConnectionPointEigenValueRatio
# 23, 44, 65, 86 parentConnectionPointOpeningAngle
# 24, 45, 66, 87 parentIsPOIClosestToNu
# 25, 46, 67, 88 childIsPOIClosestToNu

drawSignalBackground(variables[:, 0], trueParentChildLink, "parentTrackScore")
drawSignalBackground(variables[:, 1], trueParentChildLink, "childTrackScore")
drawSignalBackground(variables[:, 2], trueParentChildLink, "parentNSpacepoints")
drawSignalBackground(variables[:, 3], trueParentChildLink, "childNSpacepoints")
drawSignalBackground(variables[:, 4], trueParentChildLink, "separation3D")

if (isTrackMode) :  
    drawSignalBackgroundGroup_track(variables[:, 5], variables[:, 26], variables[:, 47], variables[:, 68], isLinkOrientationCorrect, trueParentChildLink, 'parentNuVertexSep')
    drawSignalBackgroundGroup_track(variables[:, 6], variables[:, 27], variables[:, 48], variables[:, 69], isLinkOrientationCorrect, trueParentChildLink, 'childNuVertexSep')    
    drawSignalBackgroundGroup_track(variables[:, 7], variables[:, 28], variables[:, 49], variables[:, 70], isLinkOrientationCorrect, trueParentChildLink, 'parentEndRegionNHits')
    drawSignalBackgroundGroup_track(variables[:, 8], variables[:, 29], variables[:, 50], variables[:, 71], isLinkOrientationCorrect, trueParentChildLink, 'parentEndRegionNParticles')
    drawSignalBackgroundGroup_track(variables[:, 9], variables[:, 30], variables[:, 51], variables[:, 72], isLinkOrientationCorrect, trueParentChildLink, 'parentEndRegionRToWall')
    drawSignalBackgroundGroup_track(variables[:, 10], variables[:, 31], variables[:, 52], variables[:, 73], isLinkOrientationCorrect, trueParentChildLink, 'vertexSeparation')
    drawSignalBackgroundGroup_track(variables[:, 11], variables[:, 32], variables[:, 53], variables[:, 74], isLinkOrientationCorrect, trueParentChildLink, 'doesChildConnect')
    drawSignalBackgroundGroup_track(variables[:, 12], variables[:, 33], variables[:, 54], variables[:, 75], isLinkOrientationCorrect, trueParentChildLink, 'overshootStartDCA')
    drawSignalBackgroundGroup_track(variables[:, 13], variables[:, 34], variables[:, 55], variables[:, 76], isLinkOrientationCorrect, trueParentChildLink, 'overshootStartL')
    drawSignalBackgroundGroup_track(variables[:, 14], variables[:, 35], variables[:, 56], variables[:, 77], isLinkOrientationCorrect, trueParentChildLink, 'overshootEndDCA')
    drawSignalBackgroundGroup_track(variables[:, 15], variables[:, 36], variables[:, 57], variables[:, 78], isLinkOrientationCorrect, trueParentChildLink, 'overshootEndL')
    drawSignalBackgroundGroup_track(variables[:, 16], variables[:, 37], variables[:, 58], variables[:, 79], isLinkOrientationCorrect, trueParentChildLink, 'childConnectionDCA')
    drawSignalBackgroundGroup_track(variables[:, 17], variables[:, 38], variables[:, 59], variables[:, 80], isLinkOrientationCorrect, trueParentChildLink, 'childConnectionExtrapDistance')
    drawSignalBackgroundGroup_track(variables[:, 18], variables[:, 39], variables[:, 60], variables[:, 81], isLinkOrientationCorrect, trueParentChildLink, 'childConnectionLRatio')
    drawSignalBackgroundGroup_track(variables[:, 19], variables[:, 40], variables[:, 61], variables[:, 82], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointNUpstreamHits')
    drawSignalBackgroundGroup_track(variables[:, 20], variables[:, 41], variables[:, 62], variables[:, 83], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointNDownstreamHits')
    drawSignalBackgroundGroup_track(variables[:, 21], variables[:, 42], variables[:, 63], variables[:, 84], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointNHitRatio')
    drawSignalBackgroundGroup_track(variables[:, 22], variables[:, 43], variables[:, 64], variables[:, 85], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointEigenValueRatio')
    drawSignalBackgroundGroup_track(variables[:, 23], variables[:, 44], variables[:, 65], variables[:, 86], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointOpeningAngle')
    drawSignalBackgroundGroup_track(variables[:, 24], variables[:, 45], variables[:, 66], variables[:, 87], isLinkOrientationCorrect, trueParentChildLink, 'parentIsPOIClosestToNu')
    drawSignalBackgroundGroup_track(variables[:, 25], variables[:, 46], variables[:, 67], variables[:, 88], isLinkOrientationCorrect, trueParentChildLink, 'childIsPOIClosestToNu')
else :    
    drawSignalBackgroundGroup_shower(variables[:, 5], variables[:, 26], isLinkOrientationCorrect, trueParentChildLink, 'parentNuVertexSep')
    drawSignalBackgroundGroup_shower(variables[:, 6], variables[:, 27], isLinkOrientationCorrect, trueParentChildLink, 'childNuVertexSep')    
    drawSignalBackgroundGroup_shower(variables[:, 7], variables[:, 28], isLinkOrientationCorrect, trueParentChildLink, 'parentEndRegionNHits')
    drawSignalBackgroundGroup_shower(variables[:, 8], variables[:, 29], isLinkOrientationCorrect, trueParentChildLink, 'parentEndRegionNParticles')
    drawSignalBackgroundGroup_shower(variables[:, 9], variables[:, 30], isLinkOrientationCorrect, trueParentChildLink, 'parentEndRegionRToWall')
    drawSignalBackgroundGroup_shower(variables[:, 10], variables[:, 31], isLinkOrientationCorrect, trueParentChildLink, 'vertexSeparation')
    drawSignalBackgroundGroup_shower(variables[:, 11], variables[:, 32], isLinkOrientationCorrect, trueParentChildLink, 'doesChildConnect')
    drawSignalBackgroundGroup_shower(variables[:, 12], variables[:, 33], isLinkOrientationCorrect, trueParentChildLink, 'overshootStartDCA')
    drawSignalBackgroundGroup_shower(variables[:, 13], variables[:, 34], isLinkOrientationCorrect, trueParentChildLink, 'overshootStartL')
    drawSignalBackgroundGroup_shower(variables[:, 14], variables[:, 35], isLinkOrientationCorrect, trueParentChildLink, 'overshootEndDCA')
    drawSignalBackgroundGroup_shower(variables[:, 15], variables[:, 36], isLinkOrientationCorrect, trueParentChildLink, 'overshootEndL')
    drawSignalBackgroundGroup_shower(variables[:, 16], variables[:, 37], isLinkOrientationCorrect, trueParentChildLink, 'childConnectionDCA')
    drawSignalBackgroundGroup_shower(variables[:, 17], variables[:, 38], isLinkOrientationCorrect, trueParentChildLink, 'childConnectionExtrapDistance')
    drawSignalBackgroundGroup_shower(variables[:, 18], variables[:, 39], isLinkOrientationCorrect, trueParentChildLink, 'childConnectionLRatio')
    drawSignalBackgroundGroup_shower(variables[:, 19], variables[:, 40], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointNUpstreamHits')
    drawSignalBackgroundGroup_shower(variables[:, 20], variables[:, 41], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointNDownstreamHits')
    drawSignalBackgroundGroup_shower(variables[:, 21], variables[:, 42], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointNHitRatio')
    drawSignalBackgroundGroup_shower(variables[:, 22], variables[:, 43], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointEigenValueRatio')
    drawSignalBackgroundGroup_shower(variables[:, 23], variables[:, 44], isLinkOrientationCorrect, trueParentChildLink, 'parentConnectionPointOpeningAngle')
    drawSignalBackgroundGroup_shower(variables[:, 24], variables[:, 45], isLinkOrientationCorrect, trueParentChildLink, 'parentIsPOIClosestToNu')
    drawSignalBackgroundGroup_shower(variables[:, 25], variables[:, 46], isLinkOrientationCorrect, trueParentChildLink, 'childIsPOIClosestToNu')

In [None]:
###########################################################
# Draw training histograms
###########################################################

drawSignalBackground(trainingCutSep, trueParentChildLink, "trainingCutSep")
drawSignalBackground(trainingCutDoesConnect, trueParentChildLink, "trainingCutDoesConnect")
drawSignalBackground(trainingCutL, trueParentChildLink, "trainingCutL")
drawSignalBackground(trainingCutT, trueParentChildLink, "trainingCutT")

In [None]:
###########################################################
# This should shuffle things so that the indicies are still linked
###########################################################
variables, y, trueParentChildLink, trueChildVisibleGeneration, isLinkOrientationCorrect, trainingCutSep, trainingCutDoesConnect, trainingCutL, trainingCutT = \
    sklearn.utils.shuffle(variables, y, trueParentChildLink, trueChildVisibleGeneration, isLinkOrientationCorrect, trainingCutSep, trainingCutDoesConnect, trainingCutL, trainingCutT)

In [None]:
###########################################################
# Write file
###########################################################

ntest = math.floor(nLinks * 0.1)
ntrain = math.floor(nLinks * 0.9)

print('ntest: ', ntest)
print('ntrain: ', ntrain)
    
variables_train = variables[:ntrain]
variables_test = variables[ntrain:(ntrain + ntest)]

y_train = y[:ntrain]
y_test = y[ntrain:(ntrain + ntest)]

trueParentChildLink_train = trueParentChildLink[:ntrain]
trueParentChildLink_test = trueParentChildLink[ntrain:(ntrain + ntest)]

isLinkOrientationCorrect_train = isLinkOrientationCorrect[:ntrain]
isLinkOrientationCorrect_test = isLinkOrientationCorrect[ntrain:(ntrain + ntest)]
    
trueChildVisibleGeneration_train = trueChildVisibleGeneration[:ntrain]
trueChildVisibleGeneration_test = trueChildVisibleGeneration[ntrain:(ntrain + ntest)]

trainingCutSep_train = trainingCutSep[:ntrain]
trainingCutSep_test = trainingCutSep[ntrain:(ntrain + ntest)]

trainingCutDoesConnect_train = trainingCutDoesConnect[:ntrain]
trainingCutDoesConnect_test = trainingCutDoesConnect[ntrain:(ntrain + ntest)]

trainingCutL_train = trainingCutL[:ntrain]
trainingCutL_test = trainingCutL[ntrain:(ntrain + ntest)]

trainingCutT_train = trainingCutT[:ntrain]
trainingCutT_test = trainingCutT[ntrain:(ntrain + ntest)]
    
np.savez(trainVarFile, \
         variables_train=variables_train, y_train=y_train, trueParentChildLink_train=trueParentChildLink_train, \
         isLinkOrientationCorrect_train=isLinkOrientationCorrect_train, trueChildVisibleGeneration_train=trueChildVisibleGeneration_train, \
         trainingCutSep_train=trainingCutSep_train, trainingCutDoesConnect_train=trainingCutDoesConnect_train, trainingCutL_train=trainingCutL_train, trainingCutT_train=trainingCutT_train, \
         variables_test=variables_test, y_test=y_test, trueParentChildLink_test=trueParentChildLink_test, \
         isLinkOrientationCorrect_test=isLinkOrientationCorrect_test, trueChildVisibleGeneration_test=trueChildVisibleGeneration_test, \
         trainingCutSep_test=trainingCutSep_test, trainingCutDoesConnect_test=trainingCutDoesConnect_test, trainingCutL_test=trainingCutL_test, trainingCutT_test=trainingCutT_test)

In [None]:
print('variables_train: ', variables_train.shape)    
print('variables_test: ', variables_test.shape)  

print('y_train: ', y_train.shape)    
print('y_test: ', y_test.shape)  

print('trueChildVisibleGeneration_train:', trueChildVisibleGeneration_train.shape)
print('trueChildVisibleGeneration_test:', trueChildVisibleGeneration_test.shape)

print('trainingCutSep_train:', trainingCutSep_train.shape)
print('trainingCutSep_test:', trainingCutSep_test.shape)

print('trainingCutDoesConnect_train:', trainingCutDoesConnect_train.shape)
print('trainingCutDoesConnect_test:', trainingCutDoesConnect_test.shape)

print('trainingCutL_train:', trainingCutL_train.shape)
print('trainingCutL_test:', trainingCutL_test.shape)

print('trainingCutT_train:', trainingCutT_train.shape)
print('trainingCutT_test:', trainingCutT_test.shape)
