In [1]:
from pocket_coffea.lib.categorization import *
from pocket_coffea.lib.cut_functions import *
from pocket_coffea.parameters.cuts.preselection_cuts import *
from pocket_coffea.lib.cut_definition import Cut

In [2]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
import awkward as ak

filename = "root://xrootd-cms.infn.it///store/mc/RunIISummer20UL18NanoAODv9/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/120000/87DEE912-70CF-A549-B10B-1A229B256E88.root"
events = NanoEventsFactory.from_root(filename, schemaclass=NanoAODSchema, entry_stop=1000).events()
events

<NanoEventsArray [<event 1:40419:40418002>, ... ] type='1000 * event'>

In [3]:
jet_cut =  lambda events, params, **kwargs: events.Jet.pt > params["pt"]
two_jets =  lambda events, params, **kwargs: ak.sum(events.Jet.pt>50, axis=1) > 3

In [4]:
catstd = StandardSelection(
        {"passthrough" : [passthrough],
             "jet_pt": [
                 Cut("jet_pt_large", {"pt":50}, jet_cut, collection="Jet"), 
                 Cut("ev_2_jet", {}, two_jets)]
        })

In [5]:
catstd.prepare(events, "2018","ttHTobb", isMC=True)

In [6]:
catstd.storage

MaskStorage(dim=2, masks=['passthrough__-904557648417540927', 'ev_2_jet__-7532592594126884410', 'jet_pt_large__-1520821223928223114'])

In [7]:
catstd.get_mask("jet_pt")[0:10].tolist()

[[True, True, True, True, True, False, False],
 [False, False, False, False, False],
 [False, False, False, False],
 [False, False, False, False, False, False, False, False],
 [False, False, False, False, False, False, False, False, False, False, False],
 [False, False, False, False, False, False],
 [False, False, False, False, False, False, False, False, False],
 [True, True, True, True, False, False, False],
 [False, False, False, False, False, False, False],
 [False, False, False, False, False, False, False, False]]

In [8]:
catstd.get_mask("jet_pt") & ak.sum(events.Electron.pt> 20)>=1

<Array [[False, False, ... False, False]] type='1000 * var * bool'>

In [9]:
catstd.storage.dim

2

In [10]:
catstd.keys()

['passthrough', 'jet_pt']

# Check product of dim=2 cuts

In [11]:
jets_pt = events.Jet.pt> 40
jets_eta = events.Jet.eta > 2
njets = ak.sum(events.Jet.pt > 40, axis=1) >=2

In [12]:
masks = [jets_pt, jets_eta]

In [13]:
masks

[<Array [[True, True, True, ... False, False]] type='1000 * var * bool'>,
 <Array [[False, False, ... False, False]] type='1000 * var * bool'>]

In [14]:
ak.prod(ak.concatenate([ak.singletons(m) for m in masks], axis=-1), axis=-1) == 1

<Array [[False, False, ... False, False]] type='1000 * var * bool'>

# Multicut

In [15]:
cat = CartesianSelection(
        multicuts = [
            MultiCut(name="Njets",
                     cuts=[
                         get_nObj_eq(4, 15., "Jet"),
                         get_nObj_eq(5, 15., "Jet"),
                         get_nObj_min(6, 15., "Jet"),
                     ],
                     cuts_names=["4j","5j","6j"]),
            MultiCut(name="jet_pt",
                    cuts=[
                         Cut("jet_pt_large", {"pt":20}, jet_cut, collection="Jet"),
                         Cut("jet_pt_large", {"pt":30}, jet_cut, collection="Jet"),
                         Cut("jet_pt_large", {"pt":40}, jet_cut, collection="Jet"),
                         Cut("jet_pt_large", {"pt":50}, jet_cut, collection="Jet")
                     ],
                     cuts_names=["pt20", "pt30","pt40", "pt50"])
        ],
        common_cats = catstd
    )

In [16]:
cat.categories

['passthrough',
 'jet_pt',
 '4j_pt20',
 '4j_pt30',
 '4j_pt40',
 '4j_pt50',
 '5j_pt20',
 '5j_pt30',
 '5j_pt40',
 '5j_pt50',
 '6j_pt20',
 '6j_pt30',
 '6j_pt40',
 '6j_pt50']

In [17]:
cat.prepare(events, "2018","ttHTobb", isMC=True)

In [18]:
cat.common_cats.storage

MaskStorage(dim=2, masks=['passthrough__-904557648417540927', 'ev_2_jet__-7532592594126884410', 'jet_pt_large__-1520821223928223114'])

In [19]:
for c, m in cat.get_masks():
    print(c)
    print(m)

passthrough
[[True, True, True, True, True, True, True, ... True, True, True, True, True, True]]
jet_pt
[[True, True, True, True, True, False, ... False, False, False, False, False, False]]
4j_pt20
[[False, False, False, False, False, False, ... False, False, False, False, False]]
4j_pt30
[[False, False, False, False, False, False, ... False, False, False, False, False]]
4j_pt40
[[False, False, False, False, False, False, ... False, False, False, False, False]]
4j_pt50
[[False, False, False, False, False, False, ... False, False, False, False, False]]
5j_pt20
[[False, False, False, False, False, False, ... False, False, False, False, False]]
5j_pt30
[[False, False, False, False, False, False, ... False, False, False, False, False]]
5j_pt40
[[False, False, False, False, False, False, ... False, False, False, False, False]]
5j_pt50
[[False, False, False, False, False, False, ... False, False, False, False, False]]
6j_pt20
[[True, True, True, True, True, True, True, ... True, True, True, 

In [23]:
cat.get_mask("6j_pt30")[0:10].tolist()

[[True, True, True, True, True, True, False],
 [False, False, False, False, False],
 [False, False, False, False],
 [True, True, True, True, True, True, False, False],
 [True, True, True, True, True, False, False, False, False, False, False],
 [True, True, True, True, False, False],
 [True, True, True, True, False, False, False, False, False],
 [True, True, True, True, True, True, False],
 [True, True, True, True, False, False, False],
 [True, True, True, True, False, False, False, False]]

In [22]:
ak.any(cat.get_mask("6j_pt30"), axis=1)[0:10].tolist()

[True, False, False, True, True, True, True, True, True, True]

# Check output

In [20]:
from coffea.util import load

In [59]:
df = load("../output/test_categorization_v12/output_all.coffea")

In [62]:
df["cutflow"]

{'initial': {'ttHTobb': 149000, 'TTToSemiLeptonic': 3960000},
 'skim': {'ttHTobb': 35003, 'TTToSemiLeptonic': 1512412},
 'presel': {'ttHTobb': 12062, 'TTToSemiLeptonic': 32385},
 'inclusive': {'ttHTobb': 12062.0, 'TTToSemiLeptonic': 32385.0},
 '4jets_40pt': {'ttHTobb': 10238.0, 'TTToSemiLeptonic': 21288.0},
 '4j_jeta0.5': {'ttHTobb': 1962.0, 'TTToSemiLeptonic': 10588.0},
 '4j_jeta1': {'ttHTobb': 2439.0, 'TTToSemiLeptonic': 13322.0},
 '4j_jeta1.5': {'ttHTobb': 2545.0, 'TTToSemiLeptonic': 13899.0},
 '5j_jeta0.5': {'ttHTobb': 3248.0, 'TTToSemiLeptonic': 8501.0},
 '5j_jeta1': {'ttHTobb': 3783.0, 'TTToSemiLeptonic': 10189.0},
 '5j_jeta1.5': {'ttHTobb': 3881.0, 'TTToSemiLeptonic': 10401.0},
 '6j_jeta0.5': {'ttHTobb': 5111.0, 'TTToSemiLeptonic': 7072.0},
 '6j_jeta1': {'ttHTobb': 5575.0, 'TTToSemiLeptonic': 7942.0},
 '6j_jeta1.5': {'ttHTobb': 5615.0, 'TTToSemiLeptonic': 8015.0}}

In [63]:
df["variables"]

{'JetGood_eta': {'TTToSemiLeptonic': Hist(
    StrCategory(['4j_jeta0.5', '4j_jeta1', '4j_jeta1.5', '4jets_40pt', '5j_jeta0.5', '5j_jeta1', '5j_jeta1.5', '6j_jeta0.5', '6j_jeta1', '6j_jeta1.5', 'inclusive'], name='cat', label='Category'),
    StrCategory(['nominal', 'pileupDown', 'pileupUp', 'sf_ele_idDown', 'sf_ele_idUp', 'sf_ele_recoDown', 'sf_ele_recoUp', 'sf_mu_idDown', 'sf_mu_idUp', 'sf_mu_isoDown', 'sf_mu_isoUp'], name='variation', label='Variation'),
    StrCategory(['2018'], name='year', label='Year'),
    Regular(100, -3, 3, name='JetGood.eta', label='$\\eta_{j}$'),
    storage=Weight()) # Sum: WeightedSum(value=3.22111e+07, variance=1.81104e+08),
  'ttHTobb': Hist(
    StrCategory(['4j_jeta0.5', '4j_jeta1', '4j_jeta1.5', '4jets_40pt', '5j_jeta0.5', '5j_jeta1', '5j_jeta1.5', '6j_jeta0.5', '6j_jeta1', '6j_jeta1.5', 'inclusive'], name='cat', label='Category'),
    StrCategory(['nominal', 'pileupDown', 'pileupUp', 'sf_ele_idDown', 'sf_ele_idUp', 'sf_ele_recoDown', 'sf_ele_recoUp'

In [69]:
H = df["variables"]["nJets"]["ttHTobb"]


In [70]:
H

Hist(
  StrCategory(['4j_jeta0.5', '4j_jeta1', '4j_jeta1.5', '4jets_40pt', '5j_jeta0.5', '5j_jeta1', '5j_jeta1.5', '6j_jeta0.5', '6j_jeta1', '6j_jeta1.5', 'inclusive'], name='cat', label='Category'),
  StrCategory(['nominal', 'pileupDown', 'pileupUp', 'sf_ele_idDown', 'sf_ele_idUp', 'sf_ele_recoDown', 'sf_ele_recoUp', 'sf_mu_idDown', 'sf_mu_idUp', 'sf_mu_isoDown', 'sf_mu_isoUp'], name='variation', label='Variation'),
  StrCategory(['2018'], name='year', label='Year'),
  Regular(10, 4, 14, name='events.nJetGood', label='$N_{JetGood}$'),
  storage=Weight()) # Sum: WeightedSum(value=71683.1, variance=8912.53) (WeightedSum(value=71690.3, variance=8913.5) with flow)

In [71]:
H["inclusive","nominal","2018",:]

In [72]:
H["inclusive","nominal","2018",:].sum()

WeightedSum(value=1389.42, variance=169.553)

In [67]:
df["cutflow"]

{'initial': {'ttHTobb': 149000, 'TTToSemiLeptonic': 3960000},
 'skim': {'ttHTobb': 35003, 'TTToSemiLeptonic': 1512412},
 'presel': {'ttHTobb': 12062, 'TTToSemiLeptonic': 32385},
 'inclusive': {'ttHTobb': 12062.0, 'TTToSemiLeptonic': 32385.0},
 '4jets_40pt': {'ttHTobb': 10238.0, 'TTToSemiLeptonic': 21288.0},
 '4j_jeta0.5': {'ttHTobb': 1962.0, 'TTToSemiLeptonic': 10588.0},
 '4j_jeta1': {'ttHTobb': 2439.0, 'TTToSemiLeptonic': 13322.0},
 '4j_jeta1.5': {'ttHTobb': 2545.0, 'TTToSemiLeptonic': 13899.0},
 '5j_jeta0.5': {'ttHTobb': 3248.0, 'TTToSemiLeptonic': 8501.0},
 '5j_jeta1': {'ttHTobb': 3783.0, 'TTToSemiLeptonic': 10189.0},
 '5j_jeta1.5': {'ttHTobb': 3881.0, 'TTToSemiLeptonic': 10401.0},
 '6j_jeta0.5': {'ttHTobb': 5111.0, 'TTToSemiLeptonic': 7072.0},
 '6j_jeta1': {'ttHTobb': 5575.0, 'TTToSemiLeptonic': 7942.0},
 '6j_jeta1.5': {'ttHTobb': 5615.0, 'TTToSemiLeptonic': 8015.0}}

In [75]:
H = df["variables"]["JetGood_pt_1"]["ttHTobb"]
H["inclusive","nominal","2018",:]

In [76]:
H["inclusive","nominal","2018",:].sum()

WeightedSum(value=1389.43, variance=169.556)

In [77]:
H = df["variables"]["JetGood_pt"]["ttHTobb"]
H["inclusive","nominal","2018",:]

In [78]:
H["inclusive","nominal","2018",:].sum()

WeightedSum(value=7734.08, variance=944.033)