# Comparison of Reconstruction Methods between L2IT and Exatrkx

In [1]:
%load_ext autoreload
%autoreload 2

# System imports
import os
import sys
import yaml
import logging

# External imports
import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")
sys.path.append("../../..")
sys.path.append("/global/homes/d/danieltm/ExaTrkX/tracks_reconstruction")

logging.basicConfig(level=logging.INFO)

## Explore L2IT Method

In [2]:
from effTrkReco import effAllEvts
from tools.selection import GetInterestingParticlesWithHits, SelectionFid

Welcome to JupyROOT 6.26/02


In [3]:
input_dir = "/global/cfs/cdirs/m3443/data/ITk-upgrade/tracking_comparison"

In [4]:
recTrkFile = os.path.join(input_dir, "tracks_reco/trackReco_allPaths-bestPaths_events_421-520_thr-0.80.h5")
methods = ['allPaths','bestPaths']
iEvt = 421
with pd.HDFStore(recTrkFile, mode='r') as reader:
        
        for m in methods:
            dataname = "/event{0}/{1}/reco_tracks".format(iEvt,m)
            df_trks = reader.get(dataname)
            # Remove -1 that are placeholders for empty hit
            trks = df_trks.values
            trks = [list(filter(lambda x: x !=-1, trk)) for trk in trks]
            print(m, len(trks))


allPaths 2438
bestPaths 2245


In [12]:
track_id = np.arange(0,len(trks))
track_lengths = np.array([len(trk) for trk in trks])
repeating_ids = np.repeat(track_id, track_lengths)

In [14]:
# Chain all trks lists together
from itertools import chain
trks_flat = list(chain.from_iterable(trks))

In [15]:
len(trks_flat), len(repeating_ids)

(23832, 23832)

In [22]:
config_dir = "/global/homes/d/danieltm/ExaTrkX/tracks_reconstruction/configs"
with open(os.path.join(config_dir, "conf_effTrkReco.yml"), "r") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
cnfSelT = config['selectionTruth']
selFid = SelectionFid( cnfSelT['pt'], cnfSelT['eta'],  cnfSelT['nSP'], cnfSelT['noSecondary'], cnfSelT['noElectron'] )

In [24]:
event_dir = os.path.join(input_dir, "GNN/ITk_ttbar_pu200_wout_SPoverlap_links/")
particles = pd.read_csv(os.path.join(event_dir, f"event000000{iEvt}-particles.csv")) 
hits = pd.read_csv(os.path.join(event_dir, f"event000000{iEvt}-truth.csv"))
interesting_particles, interesting_hits = GetInterestingParticlesWithHits(particles, hits, selFid)

There are 1214 particles to reconstruct.


In [27]:
fidCut = (particles.pt >= selFid.pt) & (abs(particles.eta) < selFid.eta)

if selFid.noSec:
    fidCut = fidCut & (particles.barcode<200000)

interestingParticles = particles[fidCut]


In [28]:
interestingParticles

Unnamed: 0,particle_id,subevent,barcode,px,py,pz,pt,eta,vx,vy,vz,radius,status,charge,pdgId,pass,vProdNIn,vProdNOut,vProdStatus,vProdBarcode
0,5,0,5,87968.900,-128765.000,-128765.000,155946.00,-2.348820,0.004713,-0.000789,13.8283,0.004779,22,0.666667,6,NO,2,3,0,-3
1,6,0,6,-79972.500,131769.000,131769.000,154138.00,0.883755,0.004713,-0.000789,13.8283,0.004779,22,-0.666667,-6,NO,2,3,0,-3
2,7,0,7,-7996.360,-3003.630,-3003.630,8541.87,-1.853680,0.004713,-0.000789,13.8283,0.004779,23,1.000000,21,NO,2,3,0,-3
3,10,0,10,2377.670,6519.720,6519.720,6939.75,-2.882440,0.004713,-0.000789,13.8283,0.004779,33,1.000000,21,NO,2,2,0,-6
4,14,0,14,88246.500,-123258.000,-123258.000,151592.00,-2.379160,0.004713,-0.000789,13.8283,0.004779,44,0.666667,6,NO,1,1,0,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88673,57010000575,5701,575,-212.857,1042.910,1042.910,1064.41,2.258170,0.003031,0.014943,20.8952,0.015248,1,1.000000,321,YES,1,2,0,-352
88674,57010000576,5701,576,-328.075,1171.280,1171.280,1216.36,1.731250,0.003031,0.014943,20.8952,0.015248,1,-1.000000,-211,YES,1,2,0,-352
88676,57010000581,5701,581,-883.655,1137.650,1137.650,1440.52,2.202190,0.003031,0.014943,20.8952,0.015248,1,1.000000,321,YES,1,2,0,-355
88695,57010000668,5701,668,-1002.790,-933.812,-933.812,1370.25,2.117340,0.003031,0.014943,20.8952,0.015248,1,1.000000,211,YES,1,3,0,-398


In [30]:
nhits =  hits.drop_duplicates(subset=['particle_id',
                                      'hardware',
                                      'barrel_endcap',
                                      'layer_disk',
                                      'eta_module',
                                      'phi_module'], keep='last')
nhits = nhits.groupby("particle_id")["hit_id"].count()
nhits = nhits.reset_index().rename(columns={"index":"particle_id", "hit_id": "nhit_diffModule"})


In [37]:
hits[(hits.hardware == "PIXEL") & (hits.particle_id != 0)]

Unnamed: 0,hit_id,x,y,z,cluster_index_1,cluster_index_2,particle_id,hardware,cluster_x_1,cluster_y_1,...,norm_x,norm_y,norm_z_1,cluster_x_2,cluster_y_2,cluster_z_2,eta_angle_2,phi_angle_2,norm_z_2,ID
0,0,-36.8012,-5.09665,-263.0,0,-1,56890001056,PIXEL,-36.8012,-5.09665,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
2,2,-36.2655,-10.19740,-263.0,2,-1,56740000679,PIXEL,-36.2655,-10.19740,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
3,3,-34.1268,-18.34360,-263.0,3,-1,55290001508,PIXEL,-34.1268,-18.34360,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
5,5,-38.5751,-7.56565,-263.0,5,-1,55870000320,PIXEL,-38.5751,-7.56565,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
6,6,-36.5809,-15.64480,-263.0,6,-1,56220000413,PIXEL,-36.5809,-15.64480,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237440,236896,33.4556,293.58300,2525.0,236896,-1,55820200003,PIXEL,33.4556,293.58300,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472
237441,236897,37.6597,293.77800,2525.0,236897,-1,55550200261,PIXEL,37.6597,293.77800,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472
237444,236900,41.5816,300.15100,2525.0,236900,-1,55800000334,PIXEL,41.5816,300.15100,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472
237445,236901,43.6099,300.05600,2525.0,236901,-1,55270200177,PIXEL,43.6099,300.05600,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472


In [None]:
hits[(hits.hardware == "PIXEL") & (hits.particle_id != 0)].drop_duplicates(subset=['particle_id',
                                      'hardware',
                                      'barrel_endcap',
                                      'layer_disk',
                                      'eta_module',
                                      'phi_module'], keep='last')

Unnamed: 0,hit_id,x,y,z,cluster_index_1,cluster_index_2,particle_id,hardware,cluster_x_1,cluster_y_1,...,norm_x,norm_y,norm_z_1,cluster_x_2,cluster_y_2,cluster_z_2,eta_angle_2,phi_angle_2,norm_z_2,ID
0,0,-36.8012,-5.09665,-263.0,0,-1,56890001056,PIXEL,-36.8012,-5.09665,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
2,2,-36.2655,-10.19740,-263.0,2,-1,56740000679,PIXEL,-36.2655,-10.19740,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
3,3,-34.1268,-18.34360,-263.0,3,-1,55290001508,PIXEL,-34.1268,-18.34360,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
5,5,-38.5751,-7.56565,-263.0,5,-1,55870000320,PIXEL,-38.5751,-7.56565,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
6,6,-36.5809,-15.64480,-263.0,6,-1,56220000413,PIXEL,-36.5809,-15.64480,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158329674399744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237440,236896,33.4556,293.58300,2525.0,236896,-1,55820200003,PIXEL,33.4556,293.58300,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472
237441,236897,37.6597,293.77800,2525.0,236897,-1,55550200261,PIXEL,37.6597,293.77800,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472
237444,236900,41.5816,300.15100,2525.0,236900,-1,55800000334,PIXEL,41.5816,300.15100,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472
237445,236901,43.6099,300.05600,2525.0,236901,-1,55270200177,PIXEL,43.6099,300.05600,...,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,749506290329321472


In [44]:
hits[hits.particle_id != 0].particle_id.isin(interestingParticles.particle_id).sum()

15264

In [45]:
hits[hits.particle_id != 0].drop_duplicates(subset=['particle_id',
                                      'hardware',
                                      'barrel_endcap',
                                      'layer_disk',
                                      'eta_module',
                                      'phi_module'], keep='last').particle_id.isin(interestingParticles.particle_id).sum()

15143

In [32]:
nhits

Unnamed: 0,particle_id,nhit_diffModule
0,0,16691
1,551,15
2,555,11
3,617,4
4,618,12
...,...,...
16139,57010200771,1
16140,57010200772,9
16141,57010200773,9
16142,57010200775,5


In [47]:
(interesting_particles.split_clusters != 0).sum()

113

### L2IT Efficiency

1. Get efficiency of one event
4. Convert candidates to hit ID df
5. Write function to add candidates to OneTrack
6. Add candidates
7. Get efficiency
8. Compare differences & debug

In [3]:
from tools.selection import SelectionRec, SelectionFid

In [4]:
input_dir = "/global/cfs/cdirs/m3443/data/ITk-upgrade/tracking_comparison"

inCsvDir = os.path.join(input_dir, "GNN/ITk_ttbar_pu200_wout_SPoverlap_links/")
inTrkFile = os.path.join(input_dir, "tracks_reco/trackReco_allPaths-bestPaths_events_421-520_thr-0.80.h5")
outDir = "reco_output"
firstEvt = 421
nEvent = 1
methods = ['allPaths', 'bestPaths']
onSolution = False
singleThr = 0.8
selFid = SelectionFid( 1000, 4, 3, True, True )
selRec = SelectionRec( 4 )
nWorker = 1

effAllEvts(inCsvDir,inTrkFile,outDir,firstEvt,nEvent,methods,onSolution,singleThr,selFid,selRec,nWorker)


Processing event #421
Getting particle list
There are 1214 particles to reconstruct.
Interesting particles retrieved
--------------------------------------------------
Getting reco tracks from file
--------------------------------------------------
Filling histograms
#selected particles   : 1214
#reconstructed tracks (allPaths) : 2186
#reconstructed tracks (bestPaths) : 1993


100%|██████████| 1214/1214 [04:17<00:00,  4.71it/s]


# True: 1214.0 140227630346176 truth_pt
*** Method allPaths ***
# Reco tracks (event 421): 2186
# VLoose Matched for efficiency: 1193.0
# Loose Matched for efficiency: 1192.0
# Tight Matched for efficiency: 1013.0
# Perfect Matched for efficiency: 936.0
*** Method bestPaths ***
# Reco tracks (event 421): 1993
# VLoose Matched for efficiency: 1193.0
# Loose Matched for efficiency: 1192.0
# Tight Matched for efficiency: 991.0
# Perfect Matched for efficiency: 891.0
--------------------------------------------------
Making efficiency
results saved in reco_output/trackEff_allPaths-bestPaths_first-421_nEvt-1_thr-0.80_noElecs.root.

******************** Results summary ********************
# True tracks: 1214.0
--- Method allPaths ---
# Reco tracks: 2186
VLoose  : 1193 Best Match, 1547 Matched [ cand. per particle : 1.3 +\- 0.7 ], 354 Duplicated (0.16),2186 Unmatched (1.00)
Loose   : 1192 Best Match, 1529 Matched [ cand. per particle : 1.3 +\- 0.7 ], 337 Duplicated (0.15),639 Unmatched (0.29

# Exatrkx Script

1. Read in events
2. 

In [2]:
from notebooks.ITk.utils import *
from onetrack import TrackingData
from onetrack.file_utils import list_files

In [3]:
input_dir = "/global/cfs/cdirs/m3443/data/ITk-upgrade/tracking_comparison"
inCsvDir = os.path.join(input_dir, "GNN/ITk_ttbar_pu200_wout_SPoverlap_links/")
inTrkFile = os.path.join(input_dir, "tracks_reco/trackReco_allPaths-bestPaths_events_421-520_thr-0.80.h5")

In [34]:
files = list_files(inCsvDir)[42:43]

In [35]:
files

['/global/cfs/cdirs/m3443/data/ITk-upgrade/tracking_comparison/GNN/ITk_ttbar_pu200_wout_SPoverlap_links/event000000421-particles.csv']

In [36]:
tracking_data = TrackingData()

In [37]:
tracking_data.load_file_list(files, file_type="csv")

INFO:root:Loading files
INFO:root:Building events


  0%|          | 0/1 [00:00<?, ?it/s]

In [38]:
tracking_data.add_candidates(inTrkFile, file_type="hdf5", build_method="AP")

In [39]:
tracking_data[0].candidates.get_df()

Unnamed: 0,hit_id,track_id
0,139,0
1,187,0
2,242,0
3,302,0
4,20114,0
...,...,...
26004,233848,2435
26005,302856,2436
26006,306976,2436
26007,250667,2437


In [49]:
matching_config = {
    "min_hits_truth": 7,
    "min_hits_reco": 7,
    "frac_reco_matched": 0.5,
    "frac_truth_matched": 0.5,
}

In [50]:
tracking_data.evaluate_candidates(evaluation_method="matching", **matching_config)

INFO:root:Evaluating candidates


  0%|          | 0/1 [00:00<?, ?it/s]

{'building_method': 'AP', 'evaluation_method': 'matching', 'eff': 0.9778156996587031, 'single_eff': 0.9795221843003413, 'fr': 0.0025497195308515908, 'dup': 0.17134115247322795}
n_true_tracks: 1172, n_reco_tracks: 1961, n_matched_particles: 1146, n_matched_tracks: 1956, n_duplicated_tracks: 336
