In [1]:
import coffea
from hist import Hist
from os import listdir
from os.path import exists
from typing import Union, List
import pandas as pd
import numpy as np
import mplhep as hep
from hist.intervals import clopper_pearson_interval
import math

import matplotlib.pyplot as plt

plt.rcParams.update({"font.size": 11})
plt.style.use(hep.style.CMS)

In [2]:
# how efficient is the resolved trigger for the boosted jets as function of HT

In [3]:
def plot_2d(ax, numerator, denominator, label, title, datalabel=None):
    eff, bins_x, bins_y = (numerator/denominator).to_numpy()
    cbar = hep.hist2dplot(
            numerator / denominator,
            ax=ax,
            cmin=0.1,
            cmax=1
        )
    cbar.cbar.set_label(r'Efficiency',size=18)
    cbar.cbar.ax.get_yaxis().labelpad = 15
    for i in range(len(bins_x) - 1):
        for j in range(len(bins_y) - 1):
            if not math.isnan(eff[i, j]):
                ax.text(
                    (bins_x[i] + bins_x[i + 1]) / 2,
                    (bins_y[j] + bins_y[j + 1]) / 2,
                    eff[i, j].round(2),
                    color="black",
                    ha="center",
                    va="center",
                    fontsize=12,
                )
    if datalabel:
        hep.cms.label(r"", data=True, lumi=datalabel, year=2022, fontsize=14)
    ax.set_title(f"{title}\n{label}", fontsize = 10, y=1.0, pad=25)


In [4]:
def check_selector(sample: str, selector: Union[str, List[str]]):
    if isinstance(selector, list) or isinstance(selector, tuple):
        for s in selector:
            if s.startswith("*"):
                if s[1:] in sample:
                    return True
            else:
                if sample.startswith(s):
                    return True
    else:
        if selector.startswith("*"):
            if selector[1:] in sample:
                return True
        else:
            if sample.startswith(selector):
                return True

    return False

In [5]:
year = "2022EE"
data_dir = f"/eos/uscms/store/user/cmantill/bbbb/trigger_boosted/Aug15/"
samples = {
    "data": ["Run2022E", "Run2022F", "Run2022G"],
    "ttbar": ["TTtoLNu2Q"],
    "signal": ["GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_TSG"],
}

full_samples_list = listdir(f"{data_dir}/{year}")
events_dict = {}
for label, selector in samples.items():
    events_dict[label] = []
    for sample in full_samples_list:
        if not check_selector(sample, selector):
            continue

        if not exists(f"{data_dir}/{year}/{sample}/parquet"):
            print(f"No parquet file for {sample}")
            continue

        events = pd.read_parquet(f"{data_dir}/{year}/{sample}/parquet", columns=None)
        not_empty = len(events) > 0
        if not_empty:
            events_dict[label].append(events)

        print(f"Loaded {sample: <50}: {len(events)} entries")

    if len(events_dict[label]):
        events_dict[label] = pd.concat(events_dict[label])
    else:
        del events_dict[label]

Loaded Run2022E                                          : 205074 entries
Loaded Run2022F                                          : 916121 entries
Loaded Run2022G                                          : 176783 entries
Loaded TTtoLNu2Q                                         : 6880444 entries
Loaded GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_TSG: 139 entries


In [6]:
events_dict["data"].columns.tolist()

[('QuadPFJet70_50_40_30', 0),
 ('QuadPFJet70_50_40_30_PFBTagParticleNet_2BTagSum0p65', 0),
 ('QuadPFJet70_50_40_35_PFBTagParticleNet_2BTagSum0p65', 0),
 ('QuadPFJet70_50_45_35_PFBTagParticleNet_2BTagSum0p65', 0),
 ('PFHT1050', 0),
 ('AK8PFJet230_SoftDropMass40_PFAK8ParticleNetBB0p35', 0),
 ('AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35', 0),
 ('AK8PFJet275_SoftDropMass40_PFAK8ParticleNetBB0p35', 0),
 ('AK8PFJet400_SoftDropMass40', 0),
 ('AK8PFJet425_SoftDropMass40', 0),
 ('AK8PFJet450_SoftDropMass40', 0),
 ('AK8DiPFJet250_250_MassSD30', 0),
 ('AK8DiPFJet250_250_MassSD50', 0),
 ('AK8DiPFJet260_260_MassSD30', 0),
 ('AK8DiPFJet270_270_MassSD30', 0),
 ('run', 0),
 ('ak8FatJetEta', 0),
 ('ak8FatJetEta', 1),
 ('ak8FatJetPhi', 0),
 ('ak8FatJetPhi', 1),
 ('ak8FatJetMass', 0),
 ('ak8FatJetMass', 1),
 ('ak8FatJetPt', 0),
 ('ak8FatJetPt', 1),
 ('ak8FatJetMsd', 0),
 ('ak8FatJetMsd', 1),
 ('ak8FatJetPNetXbb', 0),
 ('ak8FatJetPNetXbb', 1),
 ('ak8FatJetPNetXjj', 0),
 ('ak8FatJetPNetXjj', 1),
 ('

In [7]:
mreg_bins_fine = (15, 45, 300)
mreg_bins = [45, 60, 75, 90, 105, 120, 135, 150, 165, 180]
pt_bins_fine = (25, 200, 1000)
pt_bins = [250, 275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 550, 600, 1000]
xbb_bins = [0.0, 0.8, 0.9, 0.95, 0.98, 1.0]
xbb_bins_fine = (20, 0, 1)
arctanh_xbb_bins_fine = (20, 0, 6)

In [8]:
to_loop = {
    "data": "2022EE",
    "ttbar": "TTToLNuQQ",
}

h0 = (
    Hist.new.Var(pt_bins, name="jet0pt", label="fj$^0$ $p_T$ (GeV)")
    .Var(mreg_bins, name="jet0mreg", label="fj$^0$ $m_{reg}$ (GeV)")
    .Double()
)
h0_finebins = (
    Hist.new.Reg(*xbb_bins_fine, name="jet0txbb", label="fj$^0$ $T_{Xbb}$ Score")
    .Reg(*pt_bins_fine, name="jet0pt", label="fj$^0$ $p_T$ (GeV)")
    #.Reg(*mreg_bins_fine, name="jet0mreg", label="fj$^0$ $m_{reg}$ (GeV)")
    .Reg(*arctanh_xbb_bins_fine, name="jet0txbbtan", label="fj$^0$ atanh($T_{Xbb}$ Score)")
    .Double()
)
h10 = (
    Hist.new.Var(pt_bins, name="jet0pt", label="fj$^0$ $p_T$ (GeV)")
    .Var(pt_bins, name="jet1pt", label="fj$^1$ $p_T$ (GeV)")
    .Double()
)

trigger_dict = {
    "BoostedJet": (
        [
            "AK8PFJet425_SoftDropMass40",
            "AK8DiPFJet250_250_MassSD50",
            "AK8DiPFJet260_260_MassSD30",
        ],
        "AK8PFJet425_SoftDropMass40 |\n AK8DiPFJet250_250_MassSD50 |\n AK8DiPFJet260_260_MassSD30",
    ),
    "BoostedHbb": (
        [
            "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35",
        ],
        "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35",
    ),
    "Combined": (
        [
            "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35",
            "AK8PFJet425_SoftDropMass40",
            "AK8DiPFJet250_250_MassSD50",
            "AK8DiPFJet260_260_MassSD30",
        ],
        "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35 |\nAK8PFJet425_SoftDropMass40 |\n AK8DiPFJet250_250_MassSD50 |\n AK8DiPFJet260_260_MassSD30",
    ),
    "Combined_nodijet": (
        [
            "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35",
            "AK8PFJet425_SoftDropMass40",
        ],
        "AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35 |\nAK8PFJet425_SoftDropMass40",
    ),
}


trigger_info = {}
trigger_info_2d = {}
for key, ev_label in to_loop.items():
    trigger_info[key] = {}
    trigger_info_2d[key] = {}
    print(key)

    events = events_dict[key]

    xbb_0 = events["ak8FatJetPNetXbb"][0]
    pt_0 = events["ak8FatJetPt"][0]
    msd_0 = events["ak8FatJetMsd"][0]
    mreg_0 = events["ak8FatJetPNetMass"][0]

    xbb_1 = events["ak8FatJetPNetXbb"][1]
    pt_1 = events["ak8FatJetPt"][1]
    msd_1 = events["ak8FatJetMsd"][1]
    mreg_1 = events["ak8FatJetPNetMass"][1]

    two_jets = (pt_0 > 300) & (pt_1 > 250) & (msd_0 > 60) & (msd_1 > 60)
    one_jet = (pt_0 > 250) & (msd_0 > 60)

    selections = [
         # all xbb bin
        (
            two_jets, # selection
            f"{ev_label} " + r"$p_T^0>300 & p_T^1>250$", #label
            "jet0_dijet", # lab
            None, # 1d variable to project
        ),
        (
            one_jet, # selection
            f"{ev_label} " + r"$p_T^0>250$", #label
            "jet0_onejet", # lab
            None, # 1d variable to project
        ),
        # first xbb bin
        (
            two_jets & (xbb_0 > 0.98), # selection
            f"{ev_label} " + r"$p_T^0>300 & p_T^1>250$ & TX$_{bb}^{0}$[0.98:]", #label
            "jet0_dijet_xbb0-098", # lab
            None, # 1d variable to project
        ),
        (
            two_jets & (xbb_0 > 0.95) & (xbb_0 <= 0.98), 
            f"{ev_label} " + r"$p_T^0>300 & p_T^1>250$ & TX$_{bb}^{0}$[0.95-0.98]", 
            "jet0_dijet_xbb0-095-098", 
            None, 
        ),
        (
            two_jets & (xbb_0 > 0.8) & (xbb_0 <= 0.95), 
            f"{ev_label} " + r"$p_T^0>300 & p_T^1>250$ & TX$_{bb}^{0}$[0.8-0.95]", 
            "jet0_dijet_xbb0-08-095", 
            None, 
        ),
        (
            one_jet & (xbb_0 > 0.98), 
            f"{ev_label} " + r"$p_T^0>250$ & TX$_{bb}^{0}$[0.98:]",
            "jet0_onejet_xbb0-098",
            None,
        ),
        (
            one_jet & (xbb_0 > 0.95) & (xbb_0 <= 0.98), 
            f"{ev_label} " + r"$p_T^0>250$ & TX$_{bb}^{0}$[0.95-0.98]",
            "jet0_onejet_xbb0-095-098",
            None,
        ),
        (
            one_jet & (xbb_0 > 0.8) & (xbb_0 <= 0.95), 
            f"{ev_label} " + r"$p_T^0>250$ & TX$_{bb}^{0}$[0.8-0.95]", 
            "jet0_onejet_xbb0-08-095", 
            None, 
        ),
        (
            one_jet, # selection
            f"{ev_label} " + r"$p_T^0>250$", #label
            "jet0pt_one_jet", # lab
            "jet0pt", # project over this variable
        ),
        (
            two_jets, # selection
            f"{ev_label} " + r"$p_T^0>300 & p_T^1>250$", #label
            "jet0pt_dijet", # lab
            "jet0pt", # project over this variable
        ),
        (
            one_jet & (xbb_0 > 0.9), # selection
            f"{ev_label} " + r"$p_T^0>250$ & TX$_{bb}^{0}$[0.9:]", #label
            "jet0pt_one_jet_xbb0-09", # lab
            "jet0pt", # project over this variable
        ),
        (
            two_jets & (xbb_0 > 0.8), # selection
            f"{ev_label} " + r"$p_T^0>300 & p_T^1>250$ & TX$_{bb}^{0}$[0.8:]", #label
            "jet0pt_dijet_xbb0-08", # lab
            "jet0pt", # project over this variable
        ),
        (
            one_jet & (xbb_0 > 0.8), # selection
            f"{ev_label} " + r"$p_T^0>250$ & TX$_{bb}^{0}$[0.8:]", #label
            "jet0pt_one_jet_xbb0-08", # lab
            "jet0pt", # project over this variable
        ),
        (
            one_jet & (pt_0 > 400), # selection
            f"{ev_label} " + r"$p_T^0>400$ ", #label
            "jet0txbb_one_jet_pt400", # lab
            "jet0txbb", # project over this variable
        ),
        (
            one_jet & (pt_0 > 400), # selection
            f"{ev_label} " + r"$p_T^0>400$ ", #label
            "jet0txbbtan_one_jet_pt400", # lab
            "jet0txbbtan", # project over this variable
        ),
    ]

    for trigger_title, (triggers, trigger_label) in trigger_dict.items():
        title = f"{ev_label}_{trigger_title}"

        trigger_info[key][trigger_label] = []
        trigger_info_2d[key][trigger_label] = []

        for selection, label, lab, is1d in selections:

            trigger_selection = np.zeros_like(selection)
            for hlt in triggers:
                trigger_selection |= (events[hlt].values == 1).squeeze()
            num_selection = selection & trigger_selection

            if is1d is not None:
                den = h0_finebins.copy().fill(
                    jet0txbb=xbb_0[selection],
                    jet0pt=pt_0[selection],
                    jet0txbbtan=np.arctanh(xbb_0[selection]),
                    #jet0mreg=mreg_0[selection],
                )
                num = h0_finebins.copy().fill(
                    jet0txbb=xbb_0[num_selection],
                    jet0pt=pt_0[num_selection],
                    jet0txbbtan=np.arctanh(xbb_0[num_selection]),
                    #jet0mreg=mreg_0[num_selection],
                )
                trigger_info[key][trigger_label].append(
                    (num.project(is1d), den.project(is1d), label, f"{title}_{lab}")
                )
            else:
                den = h0.copy().fill(
                    jet0pt=pt_0[selection],
                    jet0mreg=mreg_0[selection],
                )
                num = h0.copy().fill(
                    jet0pt=pt_0[num_selection],
                    jet0mreg=mreg_0[num_selection],
                )
                trigger_info_2d[key][trigger_label].append(
                    (
                        num.project("jet0mreg", "jet0pt"),
                        den.project("jet0mreg", "jet0pt"),
                        label,
                        f"2d_{title}_{lab}.png",
                    )
                )                

data
ttbar


In [9]:
for key in trigger_info_2d.keys():
    for trigger_label,info in trigger_info_2d[key].items():
        for i in info:
            numerator, denominator, label, title = i
            datalabel = None
            if "2022EE" in title:
                datalabel = "26.3"
            fig, ax = plt.subplots(1, 1, figsize=(8, 6))
            plot_2d(
                ax,
                numerator, denominator, label, trigger_label,
                datalabel=datalabel,
            )
            fig.tight_layout()
            fig.savefig(f"trigger_plots/{title}", bbox_inches="tight")
            plt.close()


In [22]:
def plot_2d_error(ax, numerator, denominator, label, title, datalabel=None):
    err = abs(
        clopper_pearson_interval(numerator.view(), denominator.view())
        - numerator.view() / denominator.view()
    )[0]
    cbar = hep.hist2dplot(
            err,
            ax=ax,
            cmin=0.,
            cmax=0.5
        )
    cbar.cbar.set_label(r'Stat Error',size=18)
    cbar.cbar.ax.get_yaxis().labelpad = 15
    ax.set_title(f"{title}\n{label}", fontsize = 10, y=1.0, pad=25)

for ikey, key in enumerate(trigger_info_2d.keys()):
    for trigger_label,info in trigger_info_2d[key].items():
        for i, inf in enumerate(info):
            numerator, denominator, label, title = inf
            datalabel = None
            if "2022EE" in title:
                datalabel = "26.3"
            fig, ax = plt.subplots(1, 1, figsize=(8, 6))
            plot_2d_error(
                ax,
                numerator, denominator, label, trigger_label,
                datalabel=datalabel,
            )
            title = title.replace(".png","_stat_error.png")
            fig.tight_layout()
            fig.savefig(f"trigger_plots/{title}", bbox_inches="tight")
            plt.close()


  - numerator.view() / denominator.view()




In [None]:
datalabel = "26.3"
for trigger_label in trigger_info["data"].keys():
    for i,_ in enumerate(trigger_info["data"][trigger_label]):
        fig, ax = plt.subplots(1, 1, figsize=(8, 6))
        for key in trigger_info.keys():
            numerator, denominator, label, title = trigger_info[key][trigger_label][i]
            hep.histplot(
                numerator / denominator,
                # yerr is the relative shift from the y value
                yerr=abs(
                    clopper_pearson_interval(numerator.view(), denominator.view())
                    - numerator.view() / denominator.view()
                ),
                ax=ax,
                label=label,
                histtype="errorbar",
                capsize=4,
                elinewidth=1,
            )
        if datalabel:
            hep.cms.label(r"", data=True, lumi=datalabel, year=2022, fontsize=14)
        leg = ax.legend(loc="lower right", fontsize=10)
        leg.set_title(trigger_label, prop={"size": 10})
        leg.get_title().set_multialignment("center")
        ax.set_ylabel("Efficiency")
        ax.axhline(y=1.0, color='k', linestyle='dashdot')
        ax.set_ylim(0,1.1)
        title = "_".join(title.split("_")[1:])
        fig.tight_layout()
        fig.savefig(f"trigger_plots/1d_{title}.png")
        plt.close()
    