## Trigger SFs

In [2]:
from __future__ import annotations

from os import listdir
from os.path import exists

import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
import pandas as pd
from hist import Hist
from hist.intervals import clopper_pearson_interval

plt.rcParams.update({"font.size": 11})
plt.style.use(hep.style.CMS)

In [3]:
def check_selector(sample: str, selector: str | list[str]):
    if isinstance(selector, (list, tuple)):
        for s in selector:
            if s.startswith("*"):
                if s[1:] in sample:
                    return True
            else:
                if sample.startswith(s):
                    return True
    else:
        if selector.startswith("*"):
            if selector[1:] in sample:
                return True
        else:
            if sample.startswith(selector):
                return True

    return False

In [4]:
year = "2022EE"
data_dir = "/eos/uscms/store/user/cmantill/bbbb/trigger_boosted/Aug15/"
samples = {
    "data": ["Run2022E", "Run2022F", "Run2022G"],
    "ttbar": ["TTtoLNu2Q"],
}

to_loop = {
    "data": "2022EE",
    "ttbar": "TTToLNuQQ",
}

full_samples_list = listdir(f"{data_dir}/{year}")
events_dict = {}
for label, selector in samples.items():
    events_dict[label] = []
    for sample in full_samples_list:
        if not check_selector(sample, selector):
            continue

        if not exists(f"{data_dir}/{year}/{sample}/parquet"):
            print(f"No parquet file for {sample}")
            continue

        events = pd.read_parquet(f"{data_dir}/{year}/{sample}/parquet", columns=None)
        not_empty = len(events) > 0
        if not_empty:
            events_dict[label].append(events)

        print(f"Loaded {sample: <50}: {len(events)} entries")

    if len(events_dict[label]):
        events_dict[label] = pd.concat(events_dict[label])
    else:
        del events_dict[label]

Loaded Run2022E                                          : 205074 entries
Loaded Run2022F                                          : 916121 entries
Loaded Run2022G                                          : 176783 entries
Loaded TTtoLNu2Q                                         : 6880444 entries


In [5]:
pt_bins = [250, 275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 550, 600, 800, 1000]
xbb_bins = [0.0, 0.8, 0.9, 0.95, 0.98, 1.0]

h0 = (
    Hist.new
    # .Reg(*pt_bins_fine, name="jet0pt", label="fj$^0$ $p_T$ (GeV)")
    .Var(pt_bins, name="jet0pt", label="fj$^0$ $p_T$ (GeV)")
    .Var(xbb_bins, name="jet0txbb", label="fj$^0$ $T_{Xbb}$ Score")
    .Double()
)

trigger_dict = {
    "combined_nodijet": (
        [
            "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35",
            "AK8PFJet425_SoftDropMass40",
        ],
        "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35 |\nAK8PFJet425_SoftDropMass40",
    ),
}

trigger_info = {}
for key, ev_label in to_loop.items():
    trigger_info[key] = {}
    events = events_dict[key]

    xbb_0 = events["ak8FatJetPNetXbb"][0]
    pt_0 = events["ak8FatJetPt"][0]
    msd_0 = events["ak8FatJetMsd"][0]

    one_jet = (pt_0 > 200) & (msd_0 > 60)

    for trigger_title, (triggers, trigger_label) in trigger_dict.items():
        title = f"{ev_label}_{trigger_title}"

        trigger_info[key][trigger_title] = []

        selection = one_jet
        trigger_selection = np.zeros_like(selection)

        for hlt in triggers:
            trigger_selection |= (events[hlt].values == 1).squeeze()
        num_selection = selection & trigger_selection

        den = h0.copy().fill(
            jet0pt=pt_0[selection],
            jet0txbb=xbb_0[selection],
        )
        num = h0.copy().fill(
            jet0pt=pt_0[num_selection],
            jet0txbb=xbb_0[num_selection],
        )
        trigger_info[key][trigger_title].append((num, den))

In [8]:
from correctionlib import schemav2


def get_corr(eff, eff_unc_up, eff_unc_dn, label, trigger_label, edges):
    def multibinning(sf):
        return schemav2.MultiBinning(
            nodetype="multibinning",
            inputs=["pt", "xbb"],
            edges=edges,
            content=list(eff.flatten()),
            flow=1.0,
        )

    corr = schemav2.Correction(
        name=f"fatjet_triggereff_{year}_{label}",
        description=f"{label} efficiency for trigger soup: {trigger_label}",
        version=1,
        inputs=[
            schemav2.Variable(
                name="systematic",
                type="string",
                description="Systematic variation",
            ),
            schemav2.Variable(
                name="pt",
                type="real",
                description="Jet transverse momentum (NanoAODv11 nominal value)",
            ),
            schemav2.Variable(
                name="xbb",
                type="real",
                description="Jet Xbb (xbb/xbb+qcd) (NanoAODv11 nominal value)",
            ),
        ],
        output=schemav2.Variable(
            name="weight", type="real", description=f"Jet {label} trigger efficiency"
        ),
        data=schemav2.Category(
            nodetype="category",
            input="systematic",
            content=[
                {"key": "nominal", "value": multibinning(eff)},
                {"key": "stat_up", "value": multibinning(eff_unc_up)},
                {"key": "stat_dn", "value": multibinning(eff_unc_dn)},
            ],
        ),
    )
    return corr


for trigger_label in trigger_info["data"]:
    for i, _ in enumerate(trigger_info["data"][trigger_label]):
        numerator_data, denominator_data = trigger_info["data"][trigger_label][i]
        numerator_mc, denominator_mc = trigger_info["ttbar"][trigger_label][i]

        trigger_title = "_".join(title.split("_")[1:])

        rdata = (numerator_data / denominator_data).view()
        rdata_unc = clopper_pearson_interval(numerator_data.view(), denominator_data.view())
        rdata_unc_up = rdata_unc[1]
        rdata_unc_dn = rdata_unc[0]

        rmc = (numerator_mc / denominator_mc).view()
        rmc = np.where(rmc == 0, 1.0, rmc)
        rmc_unc = clopper_pearson_interval(numerator_mc.view(), denominator_mc.view())
        rmc_unc_up = np.where(rmc_unc[0] == 0, np.inf, rmc_unc[0])
        rmc_unc_dn = np.where(rmc_unc[1] == 0, 1.0, rmc_unc[1])

        x = numerator_data
        edges = [list(ax.edges) for ax in x.axes]

        print(edges)

        corr_mc = get_corr(rmc, rmc_unc_up, rmc_unc_dn, "MC", trigger_label, edges)
        corr_data = get_corr(rdata, rdata_unc_up, rdata_unc_dn, "data", trigger_label, edges)

        cset = schemav2.CorrectionSet(schema_version=2, corrections=[corr_mc, corr_data])
        with open(f"data/fatjet_triggereff_{year}_{trigger_label}.json", "w") as fout:
            fout.write(cset.json(exclude_unset=True))

[[250.0, 275.0, 300.0, 325.0, 350.0, 375.0, 400.0, 425.0, 450.0, 475.0, 500.0, 550.0, 600.0, 800.0, 1000.0], [0.0, 0.8, 0.9, 0.95, 0.98, 1.0]]


In [7]:
import rich

rich.print(cset)

In [11]:
import correctionlib

jet_triggerSF = correctionlib.CorrectionSet.from_file(
    f"data/fatjet_triggereff_{year}_combined_nodijet.json"
)

In [16]:
events = events_dict["data"]

xbb_0 = events["ak8FatJetPNetXbb"][0]
pt_0 = events["ak8FatJetPt"][0]

nom = jet_triggerSF[f"fatjet_triggereff_{year}_MC"].evaluate("nominal", pt_0, xbb_0)

print(xbb_0)
print(pt_0)

0         0.622157
1         0.000786
2         0.376344
3         0.228831
4         0.002541
            ...   
176778    0.031593
176779    0.702336
176780    0.014381
176781    0.623170
176782    0.009204
Name: 0, Length: 1297978, dtype: float64
0         302.750
1         211.875
2         440.750
3         203.375
4         269.500
           ...   
176778    203.250
176779    280.000
176780    211.500
176781    312.000
176782    306.750
Name: 0, Length: 1297978, dtype: float64
