## Tune Cut File

written by Isobel Mawby (i.mawby1@lancaster.ac.uk)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Imports
</div>

In [None]:
import sys
import os
sys.path.insert(0, os.getcwd()[0:len(os.getcwd()) - 8])

import numpy as np
import matplotlib.pyplot as plt
import copy
import Utilities
import math

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set global variables
</div>

In [None]:
BOGUS_INT = -999
PRIMARY_REGION = 15.0

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Please put the name of the file created by AddScores.ipynb and set ouput file name
</div>

In [None]:
inputFile = sys.path[0] + '/files/networkScores.npz'
outputFile = sys.path[0] + '/files/newHierarchy.npz'

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Read file
</div>

In [None]:
data = np.load(inputFile)

primary_link_mask_in = data['primary_link_mask_main']
primary_particleID_in = data['primary_particleID_main']
primary_trueGen_in = data['primary_trueGen_main']
primary_trueParentID_in = data['primary_trueParentID_main']
primary_truePDG_in = data['primary_truePDG_main']
primary_trackScore_in = data['primary_trackScore_main']
primary_score_in = data['primary_score_main']
laterTier_link_mask_in = data['laterTier_link_mask_main']
laterTier_parentID_in = data['laterTier_parentID_main']
laterTier_childID_in = data['laterTier_childID_main']
laterTier_nuVertexSep_in = data['laterTier_nuVertexSep_main']
laterTier_separation3D_in = data['laterTier_separation3D_main']
laterTier_score_in = data['laterTier_score_main']

nEntries = len(primary_link_mask_in)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Create the parent-child and child-parent dictionaries (find the best parent for each child)
</div>

In [None]:
parentToChildDict_main = []    
childToParentDict_main = []    
    
for iEvent in range(nEntries) :
    
    # Get data for the event
    primary_link_mask = np.array(primary_link_mask_in[iEvent])    
    primary_particleID = np.array(primary_particleID_in[iEvent])[primary_link_mask]
    nParticles = np.count_nonzero(primary_link_mask)
    
    laterTier_link_mask = np.array(laterTier_link_mask_in[iEvent])
    laterTier_childID = np.array(laterTier_childID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_parentID = np.array(laterTier_parentID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_nuVertexSep = np.array(laterTier_nuVertexSep_in[iEvent])[laterTier_link_mask]
    laterTier_separation3D = np.array(laterTier_separation3D_in[iEvent])[laterTier_link_mask]
    laterTier_score = np.array(laterTier_score_in[iEvent])[laterTier_link_mask]
    
    parentToChildDict = {}
    childToParentDict = {}

    for particleID in primary_particleID :
        parentToChildDict[particleID] = []
        childToParentDict[particleID] = []
    
    for index in range(nParticles) :
        
        this_particleID = primary_particleID[index]

        if this_particleID in laterTier_childID : 
            
            linkIndices = np.where(laterTier_childID == this_particleID)[0]                   
            
            if (laterTier_nuVertexSep[linkIndices[0]] < PRIMARY_REGION) :
                continue 
            
            foundParentID = -1
            lowestSeparation = 100000000000.0
            highestLinkScore = -1.0      

            for linkIndex in linkIndices :
                
                # Has parent been considered as a primary?
                if not (laterTier_parentID[linkIndex] in primary_particleID) :
                    continue

                # Need a tie-breaker!        
                best = (laterTier_separation3D[linkIndex] < lowestSeparation) if (math.isclose(laterTier_score[linkIndex], highestLinkScore)) \
                    else (laterTier_score[linkIndex] > highestLinkScore)                                                                              
        
                if (best) :
                    lowestSeparation = laterTier_separation3D[linkIndex]
                    highestLinkScore = laterTier_score[linkIndex]
                    foundParentID = laterTier_parentID[linkIndex]                        

            if (foundParentID >= 0) :
                parentToChildDict[foundParentID].append(this_particleID)
                childToParentDict[this_particleID].append(foundParentID)
                
    parentToChildDict_main.append(parentToChildDict)
    childToParentDict_main.append(childToParentDict) 

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Investigate metrics as a function of the primary score threshold
</div>

In [None]:
primary_thresholds = []

# [track, shower]
primary_efficiency = [[],[]]
primary_purity = [[], []]
primary_metric = [[], []]
primaries_as_primaries_frac = [[], []]
primaries_as_other_frac = [[], []]
other_as_primaries_frac = [[], []]
other_as_other_frac = [[], []]

# Loop through thresholds
nThresholds = 20
for primary_threshold in range(0, nThresholds) :
    print('threshold:', str(higher_threshold) + '/' + str(nThresholds))
    primary_threshold /= float(nThresholds)    
    primary_thresholds.append(primary_threshold)
    
    for i in [0, 1] :
        if i == 0 :
            trackShowerMask = np.logical_and(primary_link_mask_in, primary_trackScore_in > 0.5)
        else :
            trackShowerMask = np.logical_and(primary_link_mask_in, primary_trackScore_in < 0.5)        

        # Calc metrics  
        n_true_primaries = np.count_nonzero(primary_trueGen_in[trackShowerMask] == 2)
        n_primaries_as_primaries = np.count_nonzero(np.logical_and(primary_trueGen_in[trackShowerMask] == 2, primary_score_in[trackShowerMask] > primary_threshold))
        n_primaries_as_other = np.count_nonzero(np.logical_and(primary_trueGen_in[trackShowerMask] == 2, primary_score_in[trackShowerMask] < primary_threshold))
        n_true_other = np.count_nonzero(primary_trueGen_in[trackShowerMask] != 2)
        n_other_as_primaries = np.count_nonzero(np.logical_and(primary_trueGen_in[trackShowerMask] != 2, primary_score_in[trackShowerMask] > primary_threshold))
        n_other_as_other = np.count_nonzero(np.logical_and(primary_trueGen_in[trackShowerMask] != 2, primary_score_in[trackShowerMask] < primary_threshold))
    
        selected_primaries = n_primaries_as_primaries + n_other_as_primaries
        primary_efficiency[i].append(0 if n_true_primaries == 0 else float(n_primaries_as_primaries)/float(n_true_primaries))
        primary_purity[i].append(0 if selected_primaries == 0 else float(n_primaries_as_primaries)/float(selected_primaries))
        primary_metric[i].append(primary_efficiency[i][-1] * primary_purity[i][-1])
        
        primaries_as_primaries_frac[i].append(0 if n_true_primaries == 0 else float(n_primaries_as_primaries) / float(n_true_primaries))
        primaries_as_other_frac[i].append(0 if n_true_primaries == 0 else float(n_primaries_as_other) / float(n_true_primaries))
        other_as_primaries_frac[i].append(0 if n_true_other == 0 else float(n_other_as_primaries) / float(n_true_other))
        other_as_other_frac[i].append(0 if n_true_other == 0 else float(n_other_as_other) / float(n_true_other))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Draw these metrics
</div>

In [None]:
potential_metric = [np.array(primaries_as_primaries_frac[0]) * np.array(other_as_other_frac[0]), np.array(primaries_as_primaries_frac[1]) * np.array(other_as_other_frac[1])]

for i in [0, 1] :
    plt.clf()
    plt.scatter(primary_thresholds, primaries_as_primaries_frac[i], color='green', label='primaries_as_primaries', s=10)
    plt.plot(primary_thresholds, primaries_as_primaries_frac[i], color='green')
    plt.scatter(primary_thresholds, other_as_primaries_frac[i], color='red', label='other_as_primaries_frac', s=10)
    plt.plot(primary_thresholds, other_as_primaries_frac[i], color='red', linestyle='dashed')
    plt.scatter(primary_thresholds, potential_metric[i], color='violet', label='potential_metric', s=10)
    plt.plot(primary_thresholds, potential_metric[i], color='violet')

    plt.title('Track' if i == 0 else 'Shower')
    plt.xlabel('primary_thresholds')
    plt.ylabel('arbitary units')
    plt.legend()
    plt.show()

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set the primary score thresholds used in pass 1 of the hierarchy building algorithm
</div>

In [None]:
PRIMARY_THRESHOLD_TRACK = 0.80
PRIMARY_THRESHOLD_SHOWER = 0.45

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Now build the primary tier (this mimics the logic used in the Pandora algorithm)
</div>

In [None]:
new_gen = []
new_parentID = []
primaries = []

for iEvent in range(nEntries) :

    # Get data for event
    primary_link_mask = np.array(primary_link_mask_in[iEvent])        
    trackShowerScore = np.array(primary_trackScore_in[iEvent])
    primaryScores = np.array(primary_score_in[iEvent])
    primaryParticleID = np.array(primary_particleID_in[iEvent], dtype='int')
    paddedLength = primary_link_mask.shape[0]
    
    # Create arrays for event
    this_parentID = np.array([BOGUS_INT] * paddedLength)
    this_gen = np.array([BOGUS_INT] * paddedLength)
    
    # Find selected primaries
    selected = np.logical_or(np.logical_and(trackShowerScore > 0.5, primaryScores > PRIMARY_THRESHOLD_TRACK),
                             np.logical_and(trackShowerScore < 0.5, primaryScores > PRIMARY_THRESHOLD_SHOWER))
    
    this_primaries = primaryParticleID[selected].tolist()
    this_gen[selected] = 2
    this_parentID[selected] = -1

    # Add event
    new_gen.append(this_gen)
    new_parentID.append(this_parentID)
    primaries.append(this_primaries)    

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Make copies to save
</div>

In [None]:
new_gen_pass1 = copy.deepcopy(new_gen)
new_parentID_pass1 = copy.deepcopy(new_parentID)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Tune higher tiers (this one takes a few minutes to run)
</div>

In [None]:
higher_thresholds = []

# [track, shower]
secondary_correct_parent_frac = [[], []]
secondary_no_parent_frac = [[], []]
secondary_incorrect_parent_frac = [[], []]

tertiary_correct_parent_frac = [[], []]
tertiary_no_parent_frac = [[], []]
tertiary_incorrect_parent_frac = [[], []]

higher_correct_parent_frac = [[], []]
higher_no_parent_frac = [[], []]
higher_incorrect_parent_frac = [[], []]

nThresholds = 20

# Loop through thresholds
for higher_threshold in range(0, nThresholds) :
    print('threshold:', str(higher_threshold) + '/' + str(nThresholds))
    higher_threshold /= float(nThresholds)
    higher_thresholds.append(higher_threshold)

    temp_parentID = np.array(copy.deepcopy(new_parentID))
    temp_gen = np.array(copy.deepcopy(new_gen))    
    
    for iEvent in range(nEntries) :

        # Get data for the event
        this_particle_tiers = [primaries[iEvent]]
        this_parentID = temp_parentID[iEvent]
        this_gen = temp_gen[iEvent]
        parentToChildDict = parentToChildDict_main[iEvent]
        
        primary_link_mask = np.array(primary_link_mask_in[iEvent])    
        primary_particleID = np.array(primary_particleID_in[iEvent])[primary_link_mask]
        trackShowerScore = np.array(primary_trackScore_in[iEvent])[primary_link_mask]

        laterTier_link_mask = np.array(laterTier_link_mask_in[iEvent])
        laterTier_childID = np.array(laterTier_childID_in[iEvent], dtype='int')[laterTier_link_mask]
        laterTier_parentID = np.array(laterTier_parentID_in[iEvent], dtype='int')[laterTier_link_mask]
        laterTier_score = np.array(laterTier_score_in[iEvent])[laterTier_link_mask]
        
        # Build hierarchy
        while (len(this_particle_tiers[-1]) != 0) :
            
            tier = []
            
            for parentID in this_particle_tiers[-1]:
                
                iParent = np.where(primary_particleID == parentID)[0][0]
                
                for childID in parentToChildDict[parentID] :
                    
                    iChild = np.where(primary_particleID == childID)[0][0]
                    
                    # If the child has already been assigned a parent
                    if (this_gen[iChild] != BOGUS_INT) :
                        continue
                        
                    linkIndex = np.where(np.logical_and(laterTier_childID == childID, laterTier_parentID == parentID))[0][0]
                    
                    isTrack = trackShowerScore[iChild] > 0.5
                    
                    if (laterTier_score[linkIndex] < higher_threshold) : 
                        continue

#                     # Secondary
#                     if ((len(this_particle_tiers) == 1) and isTrack and (laterTier_score[linkIndex] < 0.8)) :
#                         continue

#                     if ((len(this_particle_tiers) == 1) and (not isTrack) and (laterTier_score[linkIndex] < 0.8)) :
#                         continue                     

#                     if ((len(this_particle_tiers) != 1) and (laterTier_score[linkIndex] < higher_threshold)) : 
#                         continue
                        
                    this_parentID[iChild] = parentID
                    this_gen[iChild] = len(this_particle_tiers) + 2
                    tier.append(childID)

            this_particle_tiers.append(tier)
            
    for i in [0, 1] :
        if i == 0 :
            trackShowerMask = np.logical_and(primary_link_mask_in, primary_trackScore_in > 0.5)
        else :
            trackShowerMask = np.logical_and(primary_link_mask_in, primary_trackScore_in < 0.5)
            
        true_secondary_mask = np.logical_and(trackShowerMask, primary_trueGen_in == 3)
        true_tertiary_mask = np.logical_and(trackShowerMask, primary_trueGen_in == 4)
        true_higher_mask = np.logical_and(trackShowerMask, primary_trueGen_in > 4)

        # Secondary
        n_secondary = np.count_nonzero(true_secondary_mask)
        secondary_correct_parent = np.count_nonzero(np.logical_and(primary_trueParentID_in[true_secondary_mask] == temp_parentID[true_secondary_mask], \
                                                                   temp_gen[true_secondary_mask] == 3))
        secondary_no_parent = np.count_nonzero(temp_gen[true_secondary_mask] == BOGUS_INT)  
        secondary_incorrect_parent = np.count_nonzero(np.logical_not(np.logical_or(primary_trueParentID_in[true_secondary_mask] == temp_parentID[true_secondary_mask], \
                                                                     np.logical_or(temp_gen[true_secondary_mask] == BOGUS_INT, \
                                                                                   temp_gen[true_secondary_mask] == 2))))
        # Tertiary
        n_tertiary = np.count_nonzero(true_tertiary_mask)
        tertiary_correct_parent = np.count_nonzero(np.logical_and(primary_trueParentID_in[true_tertiary_mask] == temp_parentID[true_tertiary_mask], \
                                                                  temp_gen[true_tertiary_mask] == 4))
        tertiary_no_parent = np.count_nonzero(temp_gen[true_tertiary_mask] == BOGUS_INT)
        tertiary_incorrect_parent = np.count_nonzero(np.logical_not(np.logical_or(primary_trueParentID_in[true_tertiary_mask] == temp_parentID[true_tertiary_mask], \
                                                                     np.logical_or(temp_gen[true_tertiary_mask] == BOGUS_INT, \
                                                                                   temp_gen[true_tertiary_mask] == 2))))
        # Later tiers
        n_higher = np.count_nonzero(true_higher_mask)
        higher_correct_parent = np.count_nonzero(np.logical_and(primary_trueParentID_in[true_higher_mask] == temp_parentID[true_higher_mask], \
                                                                temp_gen[true_higher_mask] == primary_trueGen_in[true_higher_mask]))
        higher_no_parent = np.count_nonzero(temp_gen[true_higher_mask] == BOGUS_INT)
        higher_incorrect_parent = np.count_nonzero(np.logical_not(np.logical_or(primary_trueParentID_in[true_higher_mask] == temp_parentID[true_higher_mask], \
                                                                     np.logical_or(temp_gen[true_higher_mask] == BOGUS_INT, \
                                                                                   temp_gen[true_higher_mask] == 2))))
        
        secondary_correct_parent_frac[i].append(0 if n_secondary == 0 else float(secondary_correct_parent)/float(n_secondary))
        secondary_no_parent_frac[i].append(0 if n_secondary == 0 else float(secondary_no_parent)/float(n_secondary))
        secondary_incorrect_parent_frac[i].append(0 if n_secondary == 0 else float(secondary_incorrect_parent)/float(n_secondary))

        tertiary_correct_parent_frac[i].append(0 if n_tertiary == 0 else float(tertiary_correct_parent)/float(n_tertiary))
        tertiary_no_parent_frac[i].append(0 if n_tertiary == 0 else float(tertiary_no_parent)/float(n_tertiary))
        tertiary_incorrect_parent_frac[i].append(0 if n_tertiary == 0 else float(tertiary_incorrect_parent)/float(n_tertiary))

        higher_correct_parent_frac[i].append(0 if n_higher == 0 else float(higher_correct_parent)/float(n_higher))
        higher_no_parent_frac[i].append(0 if n_higher == 0 else float(higher_no_parent)/float(n_higher))
        higher_incorrect_parent_frac[i].append(0 if n_higher == 0 else float(higher_incorrect_parent)/float(n_higher))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Draw these metrics
</div>

In [None]:
potential_secondary_metric = [np.array(secondary_correct_parent_frac[0]) * (1 - np.array(secondary_incorrect_parent_frac[0])), np.array(secondary_correct_parent_frac[1]) * (1 - np.array(secondary_incorrect_parent_frac[1]))]
potential_tertiary_metric = [np.array(tertiary_correct_parent_frac[0]) * (1 - np.array(tertiary_incorrect_parent_frac[0])), np.array(tertiary_correct_parent_frac[1]) * (1 - np.array(tertiary_incorrect_parent_frac[1]))]
potential_higher_metric = [np.array(higher_correct_parent_frac[0]) * (1 - np.array(higher_incorrect_parent_frac[0])), np.array(higher_correct_parent_frac[1]) * (1 - np.array(higher_incorrect_parent_frac[1]))]

for i in [0, 1] :
    # Secondary plot
    plt.clf()
    plt.scatter(higher_thresholds, secondary_correct_parent_frac[i], color='green', label='secondary_correct_parent_frac', s=10)
    plt.plot(higher_thresholds, secondary_correct_parent_frac[i], color='green')
    plt.scatter(higher_thresholds, secondary_incorrect_parent_frac[i], color='red', label='secondary_incorrect_parent_frac', s=10)
    plt.plot(higher_thresholds, secondary_incorrect_parent_frac[i], color='red')
    plt.scatter(higher_thresholds, secondary_no_parent_frac[i], color='gray', label='secondary_no_parent_frac', s=10)
    plt.plot(higher_thresholds, secondary_no_parent_frac[i], color='gray')
    plt.scatter(higher_thresholds, potential_secondary_metric[i], color='violet', label='potential_metric', s=10)
    plt.plot(higher_thresholds, potential_secondary_metric[i], color='violet')
    plt.title('Track' if i == 0 else 'Shower')
    plt.xlabel('higher_thresholds')
    plt.ylabel('arbitary units')
    plt.legend()
    plt.show()
    # Tertiary plot
    plt.clf()
    plt.scatter(higher_thresholds, tertiary_correct_parent_frac[i], color='green', label='tertiary_correct_parent_frac', s=10)
    plt.plot(higher_thresholds, tertiary_correct_parent_frac[i], color='green')
    plt.scatter(higher_thresholds, tertiary_incorrect_parent_frac[i], color='red', label='tertiary_incorrect_parent_frac', s=10)
    plt.plot(higher_thresholds, tertiary_incorrect_parent_frac[i], color='red')
    plt.scatter(higher_thresholds, tertiary_no_parent_frac[i], color='gray', label='tertiary_no_parent_frac', s=10)
    plt.plot(higher_thresholds, tertiary_no_parent_frac[i], color='gray')
    plt.scatter(higher_thresholds, potential_tertiary_metric[i], color='violet', label='potential_metric', s=10)
    plt.plot(higher_thresholds, potential_tertiary_metric[i], color='violet')
    plt.title('Track' if i == 0 else 'Shower')
    plt.xlabel('tertiary_thresholds')
    plt.ylabel('arbitary units')
    plt.legend()
    plt.show()
    #Â Higher
    plt.clf()
    plt.scatter(higher_thresholds, higher_correct_parent_frac[i], color='green', label='higher_correct_parent_frac', s=10)
    plt.plot(higher_thresholds, higher_correct_parent_frac[i], color='green')
    plt.scatter(higher_thresholds, higher_incorrect_parent_frac[i], color='red', label='higher_incorrect_parent_frac', s=10)
    plt.plot(higher_thresholds, higher_incorrect_parent_frac[i], color='red')
    plt.scatter(higher_thresholds, higher_no_parent_frac[i], color='gray', label='higher_no_parent_frac', s=10)
    plt.plot(higher_thresholds, higher_no_parent_frac[i], color='gray')
    plt.scatter(higher_thresholds, potential_higher_metric[i], color='violet', label='potential_metric', s=10)
    plt.plot(higher_thresholds, potential_higher_metric[i], color='violet')
    plt.title('Track' if i == 0 else 'Shower')
    plt.xlabel('higher_thresholds')
    plt.ylabel('arbitary units')
    plt.legend()
    plt.show()

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set the later tier score thresholds used in pass 1 of the hierarchy building algorithm
</div>

In [None]:
SECONDARY_THRESHOLD_TRACK_PASS1 = 0.80
SECONDARY_THRESHOLD_SHOWER_PASS1 = 0.80
HIGHER_THRESHOLD_TRACK_PASS1 = 0.80
HIGHER_THRESHOLD_SHOWER_PASS1 = 0.80

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Now build the later tiers (this mimics the logic used in the Pandora algorithm)
</div>

In [None]:
for iEvent in range(nEntries) :
    # Get data for the event
    this_particle_tiers = [primaries[iEvent]]
    this_parentID = new_parentID[iEvent]
    this_gen = new_gen[iEvent]
    parentToChildDict = parentToChildDict_main[iEvent]

    primary_link_mask = np.array(primary_link_mask_in[iEvent])    
    primary_particleID = np.array(primary_particleID_in[iEvent])[primary_link_mask]
    trackShowerScore = np.array(primary_trackScore_in[iEvent])[primary_link_mask]

    laterTier_link_mask = np.array(laterTier_link_mask_in[iEvent])
    laterTier_childID = np.array(laterTier_childID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_parentID = np.array(laterTier_parentID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_score = np.array(laterTier_score_in[iEvent])[laterTier_link_mask]

    # Build hierarchy
    while (len(this_particle_tiers[-1]) != 0) :

        tier = []

        for parentID in this_particle_tiers[-1]:

            iParent = np.where(primary_particleID == parentID)[0][0]

            for childID in parentToChildDict[parentID] :

                iChild = np.where(primary_particleID == childID)[0][0]

                # If the child has already been assigned a parent
                if (this_gen[iChild] != BOGUS_INT) :
                    continue

                linkIndex = np.where(np.logical_and(laterTier_childID == childID, laterTier_parentID == parentID))[0][0]
                isTrack = trackShowerScore[iChild] > 0.5
                    
                # Secondary
                if ((len(this_particle_tiers) == 1) and isTrack and (laterTier_score[linkIndex] < SECONDARY_THRESHOLD_TRACK_PASS1)) :
                    continue
                if ((len(this_particle_tiers) == 1) and (not isTrack) and (laterTier_score[linkIndex] < SECONDARY_THRESHOLD_SHOWER_PASS1)) :
                    continue                     
                # Higher
                if ((len(this_particle_tiers) != 1) and (isTrack) and (laterTier_score[linkIndex] < HIGHER_THRESHOLD_TRACK_PASS1)) : 
                    continue
                if ((len(this_particle_tiers) != 1) and (not isTrack) and (laterTier_score[linkIndex] < HIGHER_THRESHOLD_SHOWER_PASS1)) : 
                    continue                    
                        
                this_parentID[iChild] = parentID
                this_gen[iChild] = len(this_particle_tiers) + 2
                tier.append(childID)

        this_particle_tiers.append(tier)                  

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Make copies to save
</div>

In [None]:
new_gen_pass2 = copy.deepcopy(new_gen)
new_parentID_pass2 = copy.deepcopy(new_parentID)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Can more be added to the primaries?
</div>

In [None]:
primary_thresholds = []

# [track, shower]
primary_efficiency = [[],[]]
primary_purity = [[], []]
primary_metric = [[], []]
primaries_as_primaries_frac = [[], []]
primaries_as_other_frac = [[], []]
other_as_primaries_frac = [[], []]
other_as_other_frac = [[], []]

nThresholds = 20
for primary_threshold in range(0, nThresholds) :
    print('threshold:', str(primary_threshold) + '/' + str(nThresholds))
    primary_threshold /= float(nThresholds)
    primary_thresholds.append(primary_threshold)
    
    temp_gen = np.array(copy.deepcopy(new_gen))
    unset_mask = np.logical_and(primary_link_mask_in, temp_gen == BOGUS_INT)
    
    # Loop through events
    for iEvent in range(nEntries) :
        
        if (np.count_nonzero(unset_mask[iEvent]) == 0) :
            continue
        
        this_gen = temp_gen[iEvent]
        childToParentDict = childToParentDict_main[iEvent]
        
        # Get data for the event
        primary_link_mask = np.array(primary_link_mask_in[iEvent])    
        primary_particleID = np.array(primary_particleID_in[iEvent])[primary_link_mask]
        laterTier_link_mask = np.array(laterTier_link_mask_in[iEvent])
        laterTier_childID = np.array(laterTier_childID_in[iEvent], dtype='int')[laterTier_link_mask]
        laterTier_parentID = np.array(laterTier_parentID_in[iEvent], dtype='int')[laterTier_link_mask]
        laterTier_score = np.array(laterTier_score_in[iEvent])[laterTier_link_mask]            
        
        for iParticle in np.where(unset_mask[iEvent])[0] :
            
            particleID = primary_particleID[iParticle]
            
            if (len(childToParentDict[particleID]) == 0) :
                this_gen[iParticle] = 2
                continue  
            
            parentID = childToParentDict[particleID][0]
            linkIndex = np.where(np.logical_and(laterTier_childID == particleID, laterTier_parentID == parentID))[0][0]

            if (laterTier_score[linkIndex] < primary_threshold) :
                this_gen[iParticle] = 2                                        
    
    for i in [0, 1] :
        if i == 0 :
            all_mask = np.logical_and(primary_link_mask_in, primary_trackScore_in > 0.5)
            target_mask = np.logical_and(unset_mask, primary_trackScore_in > 0.5)
        else :
            all_mask = np.logical_and(primary_link_mask_in, primary_trackScore_in < 0.5)
            target_mask = np.logical_and(unset_mask, primary_trackScore_in < 0.5)        

        # Calc metrics  
        n_true_primaries = np.count_nonzero(primary_trueGen_in[all_mask] == 2)
        n_primaries_as_primaries = np.count_nonzero(np.logical_and(primary_trueGen_in[target_mask] == 2, temp_gen[target_mask] == 2))
        n_primaries_as_other = np.count_nonzero(np.logical_and(primary_trueGen_in[target_mask] == 2, temp_gen[target_mask] != 2))
        n_true_other = np.count_nonzero(primary_trueGen_in[all_mask] != 2)
        n_other_as_primaries = np.count_nonzero(np.logical_and(primary_trueGen_in[target_mask] != 2, temp_gen[target_mask] == 2))
        n_other_as_other = np.count_nonzero(np.logical_and(primary_trueGen_in[target_mask] != 2, temp_gen[target_mask] != 2))
    
        selected_primaries = n_primaries_as_primaries + n_other_as_primaries
        primary_efficiency[i].append(0 if n_true_primaries == 0 else float(n_primaries_as_primaries)/float(n_true_primaries))
        primary_purity[i].append(0 if selected_primaries == 0 else float(n_primaries_as_primaries)/float(selected_primaries))
        primary_metric[i].append(primary_efficiency[i][-1] * primary_purity[i][-1])
        
        primaries_as_primaries_frac[i].append(0 if n_true_primaries == 0 else float(n_primaries_as_primaries) / float(n_true_primaries))
        primaries_as_other_frac[i].append(0 if n_true_primaries == 0 else float(n_primaries_as_other) / float(n_true_primaries))
        other_as_primaries_frac[i].append(0 if n_true_other == 0 else float(n_other_as_primaries) / float(n_true_other))
        other_as_other_frac[i].append(0 if n_true_other == 0 else float(n_other_as_other) / float(n_true_other))

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Draw these metrics
</div>

In [None]:
potential_metric = [np.array(primaries_as_primaries_frac[0]) * np.array(other_as_other_frac[0]), np.array(primaries_as_primaries_frac[1]) * np.array(other_as_other_frac[1])]

for i in [0, 1] :
    plt.clf()
    plt.scatter(primary_thresholds, primaries_as_primaries_frac[i], color='green', s=10)
    plt.plot(primary_thresholds, primaries_as_primaries_frac[i], color='green', label='primaries_as_primaries')
    plt.scatter(primary_thresholds, primaries_as_other_frac[i], color='red', s=10)
    plt.plot(primary_thresholds, primaries_as_other_frac[i], color='red', label='primaries_as_other_frac')
    plt.scatter(primary_thresholds, other_as_other_frac[i], color='green', s=10)
    plt.plot(primary_thresholds, other_as_other_frac[i], color='green', linestyle='dashed', label='other_as_other_frac')
    plt.scatter(primary_thresholds, other_as_primaries_frac[i], color='red', s=10)
    plt.plot(primary_thresholds, other_as_primaries_frac[i], color='red', linestyle='dashed', label='other_as_primaries_frac')
    plt.scatter(primary_thresholds, potential_metric[i], color='violet', label='potential_metric', s=10)
    plt.plot(primary_thresholds, potential_metric[i], color='violet')

    plt.title('Track' if i == 0 else 'Shower')
    plt.xlabel('primary_thresholds')
    plt.ylabel('arbitary units')
    plt.legend()
    plt.show()

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Set the later tier score thresholds used in pass 2 of the primary tier building
</div>

In [None]:
PRIMARY_THRESHOLD_TRACK_PASS2 = 0.90
PRIMARY_THRESHOLD_SHOWER_PASS2 = 0.90

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Now do pass 2 of the primary tier building (this mimics the logic used in the Pandora algorithm)
</div>

In [None]:
target_mask = np.logical_and(primary_link_mask_in, np.array(new_gen) == BOGUS_INT)

for iEvent in range(nEntries) :
        
    if (np.count_nonzero(target_mask[iEvent]) == 0) :
        continue
        
    # We want to change these!
    this_parentID = new_parentID[iEvent]
    this_gen = new_gen[iEvent]
    this_primaries = primaries[iEvent]
        
    # Get data for the event    
    childToParentDict = childToParentDict_main[iEvent]
    primary_link_mask = np.array(primary_link_mask_in[iEvent])    
    primary_particleID = np.array(primary_particleID_in[iEvent])[primary_link_mask]
    trackShowerScore = np.array(primary_trackScore_in[iEvent])[primary_link_mask]
    laterTier_link_mask = np.array(laterTier_link_mask_in[iEvent])
    laterTier_childID = np.array(laterTier_childID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_parentID = np.array(laterTier_parentID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_score = np.array(laterTier_score_in[iEvent])[laterTier_link_mask]            

    for iParticle in np.where(target_mask[iEvent])[0] :
            
        particleID = primary_particleID[iParticle]

        if (len(childToParentDict[particleID]) != 0) :
            parentID = childToParentDict[particleID][0]
            isTrack = trackShowerScore[iParticle] > 0.5
            linkIndex = np.where(np.logical_and(laterTier_childID == particleID, laterTier_parentID == parentID))[0][0]
            
            if ((isTrack and (laterTier_score[linkIndex] > PRIMARY_THRESHOLD_TRACK_PASS2)) or
                ((not isTrack) and (laterTier_score[linkIndex] > PRIMARY_THRESHOLD_SHOWER_PASS2))) :
                    continue
            
        this_primaries.append(particleID)
        this_gen[iParticle] = 2
        this_parentID[iParticle] = -1

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Make copies to save
</div>

In [None]:
new_gen_pass3 = copy.deepcopy(new_gen)
new_parentID_pass3 = copy.deepcopy(new_parentID)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Assign any remaining particles to the hierarchy if their parents can be found
</div>

In [None]:
for iEvent in range(nEntries) :
    
    # Create our particle hierarchy to build
    this_particle_tiers = [primaries[iEvent]]

    # In this case we do not want to make a copy
    this_parentID = new_parentID[iEvent] 
    this_gen = new_gen[iEvent] 
    childToParentDict = childToParentDict_main[iEvent]
    
    # Get data for the event
    primary_link_mask = np.array(primary_link_mask_in[iEvent])    
    primary_particleID = np.array(primary_particleID_in[iEvent])[primary_link_mask]
    nParticles = np.count_nonzero(primary_link_mask)
    laterTier_link_mask = np.array(laterTier_link_mask_in[iEvent])
    laterTier_childID = np.array(laterTier_childID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_parentID = np.array(laterTier_parentID_in[iEvent], dtype='int')[laterTier_link_mask]
    laterTier_score = np.array(laterTier_score_in[iEvent])[laterTier_link_mask]   
    
    # Build hierarchy
    while (len(this_particle_tiers[-1]) != 0) :

        tier = []

        for iParticle in range(nParticles) : 
            
            particleID = primary_particleID[iParticle]
            
            # If the child has already been assigned a parent
            if (this_gen[iParticle] == (len(this_particle_tiers) + 2)) :
                    tier.append(particleID)
            elif (this_gen[iParticle] == BOGUS_INT) :
                if (len(childToParentDict[particleID]) != 0) :    
                    
                    parentID = childToParentDict[particleID][0]                    
                    
                    if parentID in this_particle_tiers[-1] :
                        this_parentID[iParticle] = parentID
                        this_gen[iParticle] = len(this_particle_tiers) + 2
                        tier.append(particleID)

        this_particle_tiers.append(tier)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Make copies to save
</div>

In [None]:
new_gen_pass4 = copy.deepcopy(new_gen)
new_parentID_pass4 = copy.deepcopy(new_parentID)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Say that any remaining are primaries
</div>

In [None]:
target_mask = np.logical_and(primary_link_mask_in, np.array(new_gen) == BOGUS_INT)
np.array(new_parentID)[target_mask] = -1
np.array(new_gen)[target_mask] = 2

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Make copies to save
</div>

In [None]:
new_gen_pass5 = copy.deepcopy(new_gen)
new_parentID_pass5 = copy.deepcopy(new_parentID)

<div class="alert alert-block alert-info" style="font-size: 18px;">
    Save!
</div>

In [None]:
np.savez(outputFile, \
         primary_link_mask = primary_link_mask_in, \
         trueVisibleGeneration = primary_trueGen_in, \
         trueVisibleParentID = primary_trueParentID_in, \
         truePDG = primary_truePDG_in, \
         trackShowerScore = primary_trackScore_in, \
         new_gen_pass1 = new_gen_pass1, \
         new_parentID_pass1 = new_parentID_pass1, \
         new_gen_pass2 = new_gen_pass2, \
         new_parentID_pass2 = new_parentID_pass2, \
         new_gen_pass3 = new_gen_pass3, \
         new_parentID_pass3 = new_parentID_pass3, \
         new_gen_pass4 = new_gen_pass4, \
         new_parentID_pass4 = new_parentID_pass4, \
         new_gen_pass5 = new_gen_pass5, \
         new_parentID_pass5 = new_parentID_pass5)