In [1]:
import uproot
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak

In [2]:
with uproot.open("input/dataset.root:fastjet") as f:
    jet_pt = f["jet_pt"].array()
    jet_eta = f["jet_eta"].array()
    jet_phi = f["jet_phi"].array()
    trk_pt = f["trk_pT"].array()
    trk_eta = f["trk_eta"].array()
    trk_phi = f["trk_phi"].array()
    trk_q = f["trk_q"].array()
    trk_d0 = f["trk_d0"].array()
    trk_z0 = f["trk_z0"].array()
    trk_label = f["trk_label"].array()
    jet_trk_IDX = f["jet_track_index"].array()
    jet_pufr_truth = f["jet_pufr_truth"].array()

In [3]:
%%time
num_events = len(jet_pt)
trk_feats = []
for event in range(num_events):
    if event%5==0:
        print("Processing: ", event, " / ", num_events, end="\r")
    idx_list = list(jet_trk_IDX[event])
    idx_list.append(len(trk_pt[event]))
    
    jet_trk_feats = []
    for i in range(len(idx_list)-1):
        start_idx = idx_list[i]
        end_idx = idx_list[i+1]-1 
        trk_pt_tmp = np.array(trk_pt[event][start_idx:end_idx])
        trk_eta_tmp = np.array(trk_eta[event][start_idx:end_idx])
        trk_phi_tmp = np.array(trk_phi[event][start_idx:end_idx])
        trk_q_tmp = np.array(trk_q[event][start_idx:end_idx])
        trk_d0_tmp = np.array(trk_d0[event][start_idx:end_idx])
        trk_z0_tmp = np.array(trk_z0[event][start_idx:end_idx])
        trk_label_tmp = np.array(trk_label[event][start_idx:end_idx])

        feats = [trk_pt_tmp, trk_eta_tmp, trk_phi_tmp, trk_q_tmp,
                trk_d0_tmp, trk_z0_tmp, trk_label_tmp]
        feats = np.stack(feats, axis=-1)
        jet_trk_feats.append(feats)
    
    trk_feats.append(jet_trk_feats)
    
trk_feats = ak.Array(trk_feats)

print("Processing: ", num_events, " / ", num_events)
print("Num Events: ", len(trk_feats))
print("Num Jets in first event: ", len(trk_feats[0]))
print("Num Tracks in first event first jet: ", len(trk_feats[0][0]))
print("Num Tracks features: ", len(trk_feats[0][0][0]))

Processing:  1000  /  1000
Num Events:  1000
Num Jets in first event:  26
Num Tracks in first event first jet:  96
Num Tracks features:  7
CPU times: user 22.9 s, sys: 163 ms, total: 23 s
Wall time: 22.9 s


In [4]:
%%time
num_events = len(jet_pt)
jet_feats = []
for event in range(num_events):
    jet_pt_tmp = np.array(jet_pt[event])
    jet_eta_tmp = np.array(jet_eta[event])
    jet_phi_tmp = np.array(jet_phi[event])
    jet_pufr_truth_tmp = np.array(jet_pufr_truth[event])

    feats = [jet_pt_tmp, jet_eta_tmp, jet_phi_tmp, jet_pufr_truth_tmp]
    feats = np.stack(feats, axis=-1)
    
    jet_feats.append(feats)
    
jet_feats = ak.Array(jet_feats)

print("Num Events: ", len(jet_feats))
print("Num Jets in first event: ", len(jet_feats[0]))
print("Num Jet Features: ", len(jet_feats[0][0]))

Num Events:  1000
Num Jets in first event:  26
Num Jet Features:  4
CPU times: user 655 ms, sys: 6.87 ms, total: 662 ms
Wall time: 653 ms


In [43]:
jet_mask = abs(jet_feats[:,:,1])<4
selected_jets = jet_feats[jet_mask]
selected_tracks = trk_feats[jet_mask]

trk_q_cut = selected_tracks[:,:,:,3]!=0          # Skip neutral particles
trk_eta_cut = abs(selected_tracks[:,:,:,1])<4    # Skip forward region
trk_pt_cut = selected_tracks[:,:,:,0]>0.4        # 400MeV Cut

mask = trk_q_cut & trk_eta_cut & trk_pt_cut

refined_tracks = selected_tracks[mask]

all_tracks = ak.flatten(refined_tracks, axis=2)

print("Jet Shape:\t", selected_jets.type)
print("Trk_Jet  Shape:\t", refined_tracks.type)
#print("Trk_All Shape:\t", all_tracks.type)

Jet Shape:	 1000 * var * var * float64
Trk_Jet  Shape:	 1000 * var * var * var * float64


In [34]:
# Shuffle the tracks and jets
"""
num_events = len(selected_jets)

for event in range(1):
    num_jets = len(selected_jets[event])
    for jet in range(num_jets):
        p = np.random.permutation(len(refined_tracks[event][jet]))
        print(refined_tracks[event][jet][p])


for event in range(num_events):
    p = np.random.permutation(len(selected_jets[event]))
    selected_jets[event] = selected_jets[event][p]
    refined_tracks[event] = refined_tracks[event][p]
"""

'\nnum_events = len(selected_jets)\n\nfor event in range(1):\n    num_jets = len(selected_jets[event])\n    for jet in range(num_jets):\n        p = np.random.permutation(len(refined_tracks[event][jet]))\n        print(refined_tracks[event][jet][p])\n\n\nfor event in range(num_events):\n    p = np.random.permutation(len(selected_jets[event]))\n    selected_jets[event] = selected_jets[event][p]\n    refined_tracks[event] = refined_tracks[event][p]\n'

In [46]:
# Pad num tracks per jet on event basis
num_events = len(selected_jets)
print(num_events)
for event in range(1):
    len_list = []
    num_jets = len(selected_jets[event])
    for jet in range(num_jets):
        len_list.append(len(refined_tracks[event][jet]))  
    max_num_trks = max(len_list)
    
    for jet in range(num_jets):
        pad = ak.pad_none(refined_tracks[event][jet], max_num_trks, axis=1)
        print(pad)
    print("Max: ", max(len_list))
    print("Min: ", min(len_list))
    
    

1000
[[2.31, -1.2, 1.68, 1, 0.311, -58.3, ..., None, None, None, None, None], ...]
[[1.2, 1.44, -1.23, 1, 0.0824, -83.9, ..., None, None, None, None, None], ...]
[[0.991, 0.54, -0.973, 1, -0.0324, ..., None, None, None, None, None], ...]
[[0.731, -0.76, 1.61, 1, 0.45, -58.6, ..., None, None, None, None, None], ...]
[[0.778, 0.702, 0.215, -1, -0.492, ..., None, None, None, None, None], ...]
[[2.08, 1.1, -2.64, -1, -0.252, -6.6, ..., None, None, None, None, None], ...]
[[1.23, 2.52, -2.17, 1, -0.238, -7.99, ..., None, None, None, None, None], ...]
[[1.2, -2.31, -2.92, -1, -0.354, -4.83, ..., None, None, None, None, None], ...]
[[0.901, 0.223, -2.76, -1, -0.0186, ..., None, None, None, None, None], ...]
[[0.903, -2.92, -0.255, -1, 4.09, -142, ..., None, None, None, None, None], ...]
[[0.682, -3.77, 0.691, 1, -0.0144, ..., None, None, None, None, None], ...]
[[1.18, -0.307, 1.76, -1, -0.0731, ..., None, None, None, None, None], ...]
[[0.618, -0.429, 2.74, -1, -0.13, ..., None, None, None, 

In [18]:
# Split dataset into train, val, test
num_events = len(selected_jets)
train_split = int(0.7*num_events)  # 70% train
test_split = int(0.75*num_events)  #  5% val
                                   # 25% test

num_jet_feats = len(selected_jets[0][0])-1
num_trk_feats = len(refined_tracks[0][0])-1

X_train_raw = data[0:int(0.7*num_events)]
y_train = labels[0:int(0.7*num_events)].reshape(-1,1)
X_val_raw = data[int(0.7*num_events):int(0.75*num_events)]
y_val = labels[int(0.7*num_events):int(0.75*num_events)].reshape(-1,1)
X_test_raw = data[int(0.75*num_events):]
y_test = labels[int(0.75*num_events):].reshape(-1,1)

In [None]:
# Shuffle
# Split train, val, test
# Calc train mean and std
# Norm train, val, test
# Split batches
# Pad jets and tracks in batches