In [1]:
import uproot
import awkward as ak
import vector
vector.register_awkward()

In [2]:
print(f"uproot version: {uproot.__version__}")
print(f"awkward version: {ak.__version__}")
print(f"vector version: {vector.__version__}")

uproot version: 5.1.2
awkward version: 2.4.6
vector version: 1.1.1.post1


In [3]:
filename = "unweighted_events.root"

In [4]:
f = uproot.open(filename)

In [5]:
f['LHEF'].keys()

['Event',
 'Event/Event.fUniqueID',
 'Event/Event.fBits',
 'Event/Event.Number',
 'Event/Event.Nparticles',
 'Event/Event.ProcessID',
 'Event/Event.Weight',
 'Event/Event.ScalePDF',
 'Event/Event.CouplingQED',
 'Event/Event.CouplingQCD',
 'Event_size',
 'Rwgt',
 'Rwgt/Rwgt.fUniqueID',
 'Rwgt/Rwgt.fBits',
 'Rwgt/Rwgt.Weight',
 'Rwgt_size',
 'Particle',
 'Particle/Particle.fUniqueID',
 'Particle/Particle.fBits',
 'Particle/Particle.PID',
 'Particle/Particle.Status',
 'Particle/Particle.Mother1',
 'Particle/Particle.Mother2',
 'Particle/Particle.ColorLine1',
 'Particle/Particle.ColorLine2',
 'Particle/Particle.Px',
 'Particle/Particle.Py',
 'Particle/Particle.Pz',
 'Particle/Particle.E',
 'Particle/Particle.M',
 'Particle/Particle.PT',
 'Particle/Particle.Eta',
 'Particle/Particle.Phi',
 'Particle/Particle.Rapidity',
 'Particle/Particle.LifeTime',
 'Particle/Particle.Spin',
 'Particle_size']

In [6]:
help(f['LHEF'].arrays)

Help on method arrays in module uproot.behaviors.TBranch:

arrays(expressions=None, cut=None, *, filter_name=<function no_filter at 0x7f4340458040>, filter_typename=<function no_filter at 0x7f4340458040>, filter_branch=<function no_filter at 0x7f4340458040>, aliases=None, language=uproot.language.python.PythonLanguage(), entry_start=None, entry_stop=None, decompression_executor=None, interpretation_executor=None, array_cache='inherit', library='ak', ak_add_doc=False, how=None) method of uproot.models.TTree.Model_TTree_v20 instance
    Args:
        expressions (None, str, or list of str): Names of ``TBranches`` or
            aliases to convert to arrays or mathematical expressions of them.
            Uses the ``language`` to evaluate. If None, all ``TBranches``
            selected by the filters are included.
        cut (None or str): If not None, this expression filters all of the
            ``expressions``.
        filter_name (None, glob string, regex string in ``"/pattern/i"``

In [7]:
aliases = {
     "part_pt" : "Particle/Particle.PT",
     "part_eta" : "Particle/Particle.Eta",   
     "part_phi" : "Particle/Particle.Phi" ,  
     "part_PID" : "Particle/Particle.PID",
     "part_E" : "Particle/Particle.E",
     "part_mother1" : "Particle/Particle.Mother1",
     "part_mother2" : "Particle/Particle.Mother2"
}
part_features = f['LHEF'].arrays(list(aliases), aliases = aliases)

In [38]:
import numpy as np
np.unique(ak.num(part_features['part_PID']))

## First selection

- Select the highest E of the particle with PID=25

In [8]:
# select the highest E of the particle with PID=25
mask = part_features['part_PID'] == 25
higgs_features = part_features[mask]
sort_idx = ak.argsort(higgs_features['part_E'], ascending=False, axis=-1)
higgs_features = higgs_features[sort_idx]
pad_size = 1
clip = True
# zero pad if event has no particle with PID = 25
pad_val = 0
higgs_features = ak.fill_none(ak.pad_none(higgs_features, pad_size, clip=True), pad_val)[:, 0]
higgs_features

## Second selection

- Invariant mass of two particles whose mother1's PID = 99925

In [9]:
S_mask = part_features['part_PID'] == 99925
# filter out with exactly one S
proper_part_features = part_features[ak.sum(S_mask, axis=-1) == 1]

In [10]:
proper_part_features

In [32]:
max_part = ak.max(ak.num(proper_part_features['part_PID']))
# zero pad
fixed_part_features = ak.fill_none(ak.pad_none(proper_part_features, max_part), value=0)
S_idx = ak.where(fixed_part_features['part_PID'] == 99925)[1]
S_child_features = fixed_part_features[fixed_part_features['part_mother1'] == S_idx]
# filter out events with exactly two particles (the two h of S)
S_child_features = S_child_features[ak.num(S_child_features['part_pt']) == 2]
child_p4 = ak.zip({
    "pt"  : S_child_features['part_pt'],
    "eta" : S_child_features['part_eta'],
    "phi" : S_child_features['part_phi'],
    "E"   : S_child_features['part_E'],
}, with_name="Momentum4D")

In [104]:
hh_p4 = ak.sum(child_p4, axis=-1)
hh_p4

In [105]:
m_hh = hh_p4.m
m_hh

In [106]:
final_features = {
    'hh': ak.Array({
        'mhh': m_hh
    }),
    'highest_E_h': higgs_features
}
array = ak.Record(final_features)

In [107]:
array

In [108]:
ak.to_parquet(array, 'my_very_nice_data.parquet')

<pyarrow._parquet.FileMetaData object at 0x7fa304daa2f0>
  created_by: parquet-cpp-arrow version 13.0.0
  num_columns: 8
  num_rows: 1
  num_row_groups: 1
  format_version: 2.6
  serialized_size: 0

In [109]:
my_nice_array = ak.from_parquet('my_very_nice_data.parquet')

In [110]:
my_nice_array

In [111]:
my_nice_array['hh']['mhh']

In [112]:
ak.to_numpy(my_nice_array['hh']['mhh'])

array([456.11447558, 551.03679558, 548.67696864, ..., 548.23456484,
       550.77992548, 548.23755057])

## Selection 3

In [None]:
# events with 6 particles