In [1]:
import awkward as ak
import numpy as np
import uproot as uproot
import matplotlib
import matplotlib.pyplot as plt
import networkx as nx
from utils import mkdir_p
import glob
from numba import jit
import pickle
import os, errno
import time 

In [2]:
@jit
def findNearestNeighbour(i, barycenters_x, barycenters_y, barycenters_z):
    # find nn, dist to nn for trackster i
    pos_i = np.array([barycenters_x[i], barycenters_y[i], barycenters_z[i]])
    d_least = 1000.
    for k in range(len(barycenters_x)):
        if k == i:
            continue
        pos_k = np.array([barycenters_x[k], barycenters_y[k], barycenters_z[k]])
        del_pos = pos_k - pos_i
        d = np.sqrt(del_pos[0]*del_pos[0] + del_pos[1]*del_pos[1] + del_pos[2]*del_pos[2])
        if d < d_least:
            d_least = d
            i_least = k
    return i_least, d_least
    

def computeEdgeAndLabels(trk_data, ass_data, gra_data, nodes, edges, edges_labels, best_simTs_match):
    '''Compute the truth graph'''
    for i in range(trk_data.NTracksters):
        nodes.append(i)
        qualities = ass_data.tsCLUE3D_recoToSim_CP_score[i]
        best_sts_i = ass_data.tsCLUE3D_recoToSim_CP[i][ak.argmin(qualities)]
        best_simTs_match.append(best_sts_i)
        #best_sts_i = best_sts_i if qualities[best_sts_i]<0.1 else -1
        
        for j in gra_data.linked_inners[i]:
            edges.append([j,i])
            qualities_j = ass_data.tsCLUE3D_recoToSim_CP_score[j]
            best_sts_j = ass_data.tsCLUE3D_recoToSim_CP[j][ak.argmin(qualities_j)]
            #best_sts_j = best_sts_j if qualities[best_sts_j]<0.1 else -1
            if best_sts_i == best_sts_j:
                edges_labels.append(1)
            else:
                edges_labels.append(0)
        
        if len(gra_data.linked_inners[i]) == 0 and len(gra_data.linked_outers[i]) == 0:
            # this trackster does not have any neighbours in the graph, connect it to its nearest neighbour
            b_x = ak.to_numpy(trk_data.barycenter_x)
            b_y = ak.to_numpy(trk_data.barycenter_y)
            b_z = ak.to_numpy(trk_data.barycenter_z)
            nearest_id, nearest_dist = findNearestNeighbour(i, b_x, b_y, b_z)
            edges.append([i, nearest_id])
            qualities_k = ass_data.tsCLUE3D_recoToSim_CP_score[nearest_id]
            best_sts_k = ass_data.tsCLUE3D_recoToSim_CP[nearest_id][ak.argmin(qualities_k)]
            if best_sts_i == best_sts_k:
                edges_labels.append(1)
            else:
                edges_labels.append(0)
            

In [7]:
#input_folder = "/eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/wredjeb/multi_particle/ntuples_10_600/"
#input_folder = "/eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/wredjeb/multiparticles_10/ntuples"
input_folder = "/eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/close_by_double_pion/production/new_new_ntuples/"
files = glob.glob(f"{input_folder}/*ntuples_*.root")


X = [ ]
Edges = [ ]
Edges_labels = [ ] 
outputPath  = './dataset_closeByDoublePion/'
mkdir_p(outputPath)


cum_events = 0

N = 10000000
offset = 24
for i_file, file in enumerate(files[offset:]):
    i_file = i_file + offset
    if i_file >= N: break
    try:
        with uproot.open(file) as f:
            t =  f["ticlNtuplizer/tracksters"]
            calo = f["ticlNtuplizer/simtrackstersCP"]
            cand = f["ticlNtuplizer/candidates"]
            ass = f["ticlNtuplizer/associations"]
            gra = f["ticlNtuplizer/graph"]

            trk_data = t.arrays(["NTracksters", "raw_energy","raw_em_energy","barycenter_x","barycenter_y","barycenter_z","eVector0_x", "eVector0_y","eVector0_z","EV1","EV2","EV3","vertices_indexes", "sigmaPCA1", "sigmaPCA2", "sigmaPCA3"])
            gra_data = gra.arrays(['linked_inners', 'linked_outers'])
            cand_data = cand.arrays(["tracksters_in_candidate"])
            ass_data = ass.arrays([ "tsCLUE3D_recoToSim_CP", "tsCLUE3D_recoToSim_CP_score"])
            simts_data = calo.arrays(["stsCP_regressed_energy"])

    
    except:
        print("error ", file)
        continue
    print('\nProcessing file {} '.format(file), end="")

    if(cum_events%1000 == 0):
        print(f"\nEvents {cum_events}")
    
    start = time.time()
    for ev in range(len(gra_data)):
        cum_events += 1
        print(".", end="")
        
        for en in simts_data[ev].stsCP_regressed_energy:
            simts_energy.append(en)
        
        trackster_sizes = []
        for vertices in trk_data[ev].vertices_indexes:
            trackster_sizes.append(ak.size(vertices))

        in_candidate = [-1 for i in range(trk_data[ev].NTracksters)]
        for indx, cand in enumerate(cand_data[ev].tracksters_in_candidate):
            for ts in cand:
                in_candidate[ts] = indx
                
        # Save the input variables
        
        best_simTs_match = []
        nodes = []
        edges = []
        edges_labels = []
        
        computeEdgeAndLabels(trk_data[ev], ass_data[ev], gra_data[ev], nodes, edges, edges_labels, best_simTs_match)
        
        x_ev = ak.zip({  "barycenter_x": trk_data[ev].barycenter_x,
                         "barycenter_y": trk_data[ev].barycenter_y,
                         "barycenter_z": trk_data[ev].barycenter_z,
                       "eVector0_x": trk_data[ev].eVector0_x,
                       "eVector0_y": trk_data[ev].eVector0_y,
                       "eVector0_z": trk_data[ev].eVector0_z,
                       "EV1": trk_data[ev].EV1,
                       "EV2": trk_data[ev].EV2,
                       "EV3": trk_data[ev].EV3,
                       "sigmaPCA1": trk_data[ev].sigmaPCA1,
                       "sigmaPCA2": trk_data[ev].sigmaPCA2,
                       "sigmaPCA3": trk_data[ev].sigmaPCA3,
                       "size": trackster_sizes,
                       "raw_en": trk_data[ev].raw_energy, 
                       'raw_em_energy': trk_data[ev].raw_em_energy,
                       'best_st': best_simTs_match,
                       "ts_candidate_labels": in_candidate
                      })

        X.append(x_ev)
        
        ed_np = np.array(edges).T
        Edges.append(ed_np)
        Edges_labels.append(edges_labels)
        
        
        # Save to disk
        if((cum_events % 10 == 0 and ev != 0)  or (ev == len(gra_data))):
            stop = time.time()
            print(f"t = {stop-start}s")
            print("Saving now the pickle data")

            pickle_dir = outputPath
            with open(pickle_dir+"{}_{}_node_features.pkl".format(str(i_file), str(cum_events)), "wb") as fp:   #Pickling
                pickle.dump(X, fp)
            with open(pickle_dir+"{}_{}_edges.pkl".format(str(i_file),str(cum_events)), "wb") as fp:   #Pickling
                pickle.dump(Edges, fp)
            with open(pickle_dir+"{}_{}_edges_labels.pkl".format(str(i_file),str(cum_events)), "wb") as fp:   #Pickling
                pickle.dump(Edges_labels, fp)
            #Emptying arrays
            ed_np = []
            X = []
            Edges = []
            Edges_labels = []
            start = time.time()


Processing file /eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/wredjeb/multiparticles_10/ntuples/new_ntuples_14752358_1019.root 
Events 0
..........t = 3.7947893142700195s
Saving now the pickle data
..........t = 1.3002328872680664s
Saving now the pickle data

Processing file /eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/wredjeb/multiparticles_10/ntuples/new_ntuples_14752358_102.root ..........t = 2.6060919761657715s
Saving now the pickle data
..........t = 2.0724194049835205s
Saving now the pickle data

Processing file /eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/wredjeb/multiparticles_10/ntuples/new_ntuples_14752358_1020.root ..........t = 2.5380587577819824s
Saving now the pickle data
..........t = 1.8332850933074951s
Saving now the pickle data

Processing file /eos/cms/store/group/dpg_hgcal/comm_hgcal/hackathon/samples/wredjeb/multiparticles_10/ntuples/new_ntuples_14752358_1021.root ..........t = 2.2405009269714355s
Saving now the pickle d

KeyboardInterrupt: 