In [1]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
import awkward as ak
import matplotlib.pyplot as plt
import numba
from numba import njit
import numpy as np
from pocket_coffea.lib.parton_provenance import reverse_index_array

In [2]:
filename = "root://storage01.lcg.cscs.ch:1096//pnfs/lcg.cscs.ch/cms/trivcat//store/mc/Run3Summer22EENanoAODv12/GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/NANOAODSIM/Poisson60KeepRAW_130X_mcRun3_2022_realistic_postEE_v6-v2/50000/fc603037-ef65-4bbf-9cef-934ecec40bbe.root"
filename = "/pnfs/psi.ch/cms/trivcat/store/user/mmalucch/hh4b_12.root"
events = NanoEventsFactory.from_root(
    filename, schemaclass=NanoAODSchema, entry_stop=20
).events()
print("Events read:", len(events))

Events read: 20


In [3]:
genpart = ak.with_field(events.GenPart, ak.local_index(events.GenPart, axis=1), "index")
print(genpart.index[0])
print(genpart.index[1])
# get last bquark copy
isB = abs(genpart.pdgId) == 5
isLast = genpart.hasFlags(["isLastCopy"])
isFirst = genpart.hasFlags(["isFirstCopy"])
isHard = genpart.hasFlags(["fromHardProcess"])

isHiggs = genpart.pdgId == 25
higgs = genpart[isHiggs & isLast & isHard]
higgs = higgs[ak.num(higgs.childrenIdxG, axis=2) == 2]

higgs = higgs[ak.argsort(higgs.pt, ascending=False)]
print(higgs.index, higgs.childrenIdxG)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ... 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ... 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]


[[15, 16], [15, 14], [15, 16], [14, 13], ... [18, 17], [11, 10], [23, 24], [20, 19]] [[[17, 18], [19, 20]], [[89, 90], ... [1170, 1171]], [[1230, 1231], [1228, 1229]]]


In [4]:
bquarks = genpart[isB & isHard & isFirst]
bquarks = genpart[isB & isHard]
mother_bquarks = genpart[bquarks.genPartIdxMother]
print(bquarks.index, mother_bquarks.index,mother_bquarks.childrenIdxG, mother_bquarks.pdgId)
print(mother_bquarks.pdgId == 25)
bquarks_from_higgs = bquarks[mother_bquarks.pdgId == 25]
print(bquarks_from_higgs.index, bquarks_from_higgs.genPartIdxMother)
print("bquarks_from_higgs", bquarks_from_higgs.index, bquarks_from_higgs.pdgId)

[[17, 18, 19, 20, 33, 37, 40, 42], [16, ... 43], [21, 22, 23, 24, 33, 34, 37, 38]] [[15, 15, 16, 16, 17, 18, 19, 20], [14, ... 29], [19, 19, 20, 20, 21, 22, 23, 24]] [[[17, 18], [17, 18], [19, 20], ... 1241, 1242, 1243], [1244], [1245, 1246, 1247]]] [[25, 25, 25, 25, 5, -5, 5, -5], [25, 25, ... 5, -5], [25, 25, 25, 25, 5, -5, 5, -5]]
[[True, True, True, True, False, False, ... True, True, False, False, False, False]]
[[17, 18, 19, 20], [16, 17, 18, 19], [17, ... [26, 27, 28, 29], [21, 22, 23, 24]] [[15, 15, 16, 16], [14, 14, 15, 15], [15, ... [23, 23, 24, 24], [19, 19, 20, 20]]
bquarks_from_higgs [[17, 18, 19, 20], [16, 17, 18, 19], [17, ... [26, 27, 28, 29], [21, 22, 23, 24]] [[5, -5, 5, -5], [5, -5, 5, -5], [5, -5, ... 5, -5], [5, -5, 5, -5], [5, -5, 5, -5]]


In [5]:
# bquarks = ak.flatten(higgs.children, axis=2)
# # print(higgs.children.index, higgs.children.genPartIdxMother)
# print(higgs.index, higgs.genPartIdxMother)
# print(bquarks.genPartIdxMother)

In [6]:
children_idxG = ak.without_parameters(genpart.childrenIdxG, behavior={})
children_idxG_flat = ak.flatten(children_idxG, axis=1)
genpart_pdgId_flat = ak.flatten(
    ak.without_parameters(genpart.pdgId, behavior={}), axis=1
)
genpart_LastCopy_flat = ak.flatten(
    ak.without_parameters(genpart.hasFlags(["isLastCopy"]), behavior={}), axis=1
)
genpart_pt_flat = ak.flatten(ak.without_parameters(genpart.pt, behavior={}), axis=1)
genparts_flat = ak.flatten(genpart)
genpart_offsets = np.concatenate(
    [[0], np.cumsum(ak.to_numpy(ak.num(genpart, axis=1), allow_missing=True))]
)
local_index_all = ak.local_index(genpart, axis=1)
local_index_b = ak.local_index(bquarks_from_higgs, axis=1)
b_quark_idx = ak.to_numpy(bquarks_from_higgs.index + genpart_offsets[:-1], allow_missing=False)
b_quarks_pdgId = ak.to_numpy(bquarks_from_higgs.pdgId, allow_missing=False)
nevents = b_quark_idx.shape[0]
firstgenpart_idxG = ak.firsts(genpart[:, 0].children).genPartIdxMotherG
firstgenpart_idxG_numpy = ak.to_numpy(firstgenpart_idxG, allow_missing=False)

In [23]:
@njit
def get_quark_last_copy(
    b_quarks_idx,
    b_quarks_pdgId,
    children_idxG_flat,
    genpart_pdgId_flat,
    genpart_offsets,
    genpart_LastCopy_flat,
    genpart_pt_flat,
    nevents,
    firstgenpart_idxG_numpy,
):
    prints = True
    # print input array
    if prints:
        print("b_quarks_idx", b_quarks_idx)
        print("b_quarks_pdgId", b_quarks_pdgId)
        print("children_idxG_flat", children_idxG_flat)
        print("genpart_pdgId_flat", genpart_pdgId_flat)
        print("genpart_offsets", genpart_offsets)
        print("genpart_LastCopy_flat", genpart_LastCopy_flat)
        print("nevents", nevents)

    # get the children ofthe b_quarks which have the same pdgId of the mother iteratively until we reach the last copy

    out = np.zeros(b_quarks_idx.shape, dtype="int64") - 1

    for iev in range(b_quarks_idx.shape[0]):
        if prints:
            print("\nEvent", iev)
        for ipart in range(b_quarks_idx.shape[1]):
            p_id = b_quarks_idx[iev][ipart]
            if prints:
                print("Parton", ipart)
            i = 0
            if genpart_LastCopy_flat[p_id]:
                out[iev][ipart] = p_id
                if prints:
                    print("out", out[iev][ipart], out[iev][ipart]-genpart_offsets[iev])
                continue
            while not genpart_LastCopy_flat[p_id] and i < 5:
                i += 1
                children_idxs = reverse_index_array(
                    children_idxG_flat[p_id],
                    firstgenpart_idxG_numpy,
                    genpart_offsets,
                    nevents,
                )
                # children_idxs = children_idxG_flat[p_id]
                if prints:
                    print(children_idxs)

                # get the children with the same pdgId as the mother with highest pt
                max_pt = -1
                max_pt_idx = -1

                # num_pdg_equal=0
                # for child_idx in children_idxs:
                #     if genpart_pdgId_flat[child_idx] == b_quarks_pdgId[iev][ipart]:
                #         num_pdg_equal+=1
                # if num_pdg_equal==1:
                #     p_id = children_idxs[0]
                #     break
                # if prints: print("\n\n###################\n\n")

                for child_idx in children_idxs:
                    if prints:
                        print("Child", child_idx)
                    if prints:
                        print(b_quarks_pdgId[iev][ipart])
                    if prints:
                        print(genpart_pdgId_flat[child_idx])
                    if genpart_pdgId_flat[child_idx] != b_quarks_pdgId[iev][ipart]:
                        continue
                    child_pt = genpart_pt_flat[child_idx]
                    if prints:
                        print(child_pt)
                    if child_pt > max_pt:
                        max_pt_idx = child_idx
                        max_pt = child_pt

                if prints:
                    print("genpart_LastCopy_flat", genpart_LastCopy_flat[max_pt_idx])
                if genpart_LastCopy_flat[max_pt_idx]:
                    if prints:
                        print("Found child")
                    out[iev][ipart] = max_pt_idx
                if prints:
                    print(p_id, max_pt_idx)
                p_id = max_pt_idx
                if prints:
                    print(p_id, max_pt_idx)
                # break
                # if max_pt == -1:
                #     max_pt_idx = p_id
                if out[iev][ipart] != -1:
                    break
            if prints:
                print("out", out[iev][ipart], out[iev][ipart]-genpart_offsets[iev])

    return out

In [24]:
b_quark_last_idx = get_quark_last_copy(
    b_quark_idx,
    b_quarks_pdgId,
    children_idxG_flat,
    genpart_pdgId_flat,
    genpart_offsets,
    genpart_LastCopy_flat,
    genpart_pt_flat,
    nevents,
    firstgenpart_idxG_numpy,
)
print(b_quark_last_idx)
print(genpart_offsets[:-1])

b_quarks_idx [[  17   18   19   20]
 [  87   88   89   90]
 [ 154  155  156  157]
 [ 217  218  219  220]
 [ 293  294  295  296]
 [ 346  347  348  349]
 [ 409  410  411  412]
 [ 501  502  503  504]
 [ 557  558  559  560]
 [ 634  635  636  637]
 [ 682  683  684  685]
 [ 758  759  760  761]
 [ 816  817  818  819]
 [ 864  865  867  868]
 [ 907  908  909  910]
 [ 984  985  986  987]
 [1054 1055 1056 1057]
 [1098 1099 1100 1101]
 [1168 1169 1170 1171]
 [1228 1229 1230 1231]]
b_quarks_pdgId [[ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]
 [ 5 -5  5 -5]]
children_idxG_flat [[2, 3, 4], [], [5], [6], [11, 12, 22, ... [1278, 1279], [], [], [], [], [], []]
genpart_pdgId_flat [21, 21, 25, 25, 21, 25, 25, 25, 25, ... 4122, -421, -16, -11, 12, -421, 13, -14]
genpart_o

In [25]:
bq=genparts_flat[b_quark_last_idx]
print(bq.index)

[[33, 37, 40, 42], [27, 21, 32, 33], [31, ... [26, 27, 42, 43], [33, 34, 37, 38]]
