Checking Run 3 signal kinematics against Run 2

Author(s): Raghav Kansal

In [None]:
import os
import pickle
from collections import OrderedDict
import coffea
from coffea import nanoevents
from coffea.lookup_tools.dense_lookup import dense_lookup
import numpy as np
import awkward as ak
import pandas as pd
from hist.intervals import ratio_uncertainty, clopper_pearson_interval
import hist
from hist import Hist

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

# mplhep for CMS-style plots
import mplhep as hep

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
mpl.rcParams["font.size"] = 28

In [None]:
MAIN_DIR = "../../../"

plot_dir = f"{MAIN_DIR}/plots/SignalCheck/23Oct27"
_ = os.system(f"mkdir -p {plot_dir}")

In [None]:
run2_sig_path = "/eos/uscms/store/user/lpcpfnano/cmantill/v2_3/2018/HH/GluGluToHHTo4B_node_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8/GluGluToHHTo4B_node_cHHH1_preUL/230217_205036/0000/nano_mc2018_1.root"

# Can't open this directly with Coffea for some reason...
# run3_sig_path = "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/mc/Run3Summer22EENanoAODv10/GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/NANOAODSIM/Poisson70KeepRAW_124X_mcRun3_2022_realistic_postEE_v1-v1/30000/d00363f4-0cac-410d-8fc7-bb6f60ccb6cd.root"

run3_sig_path = "/eos/uscms/store/user/rkansal/bbbb/nano/Run3Summer22EENanoAODv10/GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/d00363f4-0cac-410d-8fc7-bb6f60ccb6cd.root"

In [None]:
events_dict = OrderedDict()
events_dict["run2"] = nanoevents.NanoEventsFactory.from_root(
    run2_sig_path, schemaclass=nanoevents.NanoAODSchema
).events()
events_dict["run3"] = nanoevents.NanoEventsFactory.from_root(
    run3_sig_path, schemaclass=nanoevents.NanoAODSchema
).events()

In [None]:
GEN_FLAGS = ["fromHardProcess", "isLastCopy"]
HIGGS_PDGID = 25

higgs_dict = {}
for key, events in events_dict.items():
    genvars = {}
    # finding the two gen higgs
    higgs = events.GenPart[
        (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)
    ]
    higgs[ak.argsort(higgs.pt, axis=1, ascending=False)]

    higgs_children = higgs.children
    for i in range(2):
        bb = higgs_children[:, i]
        bb[ak.argsort(bb.pt, axis=1, ascending=False)]
        genvars[f"bb{i}"] = bb

    genvars["higgs"] = higgs
    higgs_dict[key] = genvars

Plot Gen Kinematics

In [None]:
key_label_map = {
    "run2": "Run 2 HH4b",
    "run3": "Run 3 HH4b (TSG)",
}

higgs_var_map = {
    "pt": (np.linspace(0, 600, 31), r"$p_T$ (GeV)"),
    "eta": (np.linspace(-5, 5, 31), r"$\eta$"),
}

bb_var_map = {
    "pt": (np.linspace(0, 400, 21), r"$p_T$ (GeV)"),
}

In [None]:
for var, (bins, var_label) in higgs_var_map.items():
    for i in range(2):
        fig, (ax, rax) = plt.subplots(
            2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07), sharex=True
        )

        hists = []

        for key, genvars in higgs_dict.items():
            h = np.histogram(genvars["higgs"][var][:, i], bins)
            hep.histplot(
                h,
                yerr=True,
                label=key_label_map[key],
                density=True,
                ax=ax,
            )

            hists.append(h[0])

        ax.set_ylabel("Events (A.U.)")
        ax.set_xlim(bins[0], bins[-1])
        ax.legend(fancybox=True)

        scale = np.sum(hists[0]) / np.sum(hists[1])
        yerr = ratio_uncertainty(hists[1], hists[0], "poisson-ratio") * scale
        hep.histplot(
            hists[1] / hists[0] * scale,
            bins,
            yerr=yerr,
            ax=rax,
            histtype="errorbar",
            color="black",
            capsize=4,
        )

        rax.hlines(1, -1000, bins[-1], color="lightgray", linestyles="--")
        rax.set_xlabel(f"Gen Higgs {i + 1} {var_label}")
        rax.set_ylabel("Run 3 / Run 2")
        rax.set_ylim([0, 2])
        # plt.legend()
        plt.savefig(f"{plot_dir}/higgs{i}_{var}.pdf", bbox_inches="tight")
        plt.show()

In [None]:
for var, (bins, var_label) in bb_var_map.items():
    for i in range(2):
        for j in range(2):
            fig, (ax, rax) = plt.subplots(
                2,
                1,
                figsize=(12, 14),
                gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07),
                sharex=True,
            )

            hists = []

            for key, genvars in higgs_dict.items():
                h = np.histogram(genvars[f"bb{i}"][var][:, j], bins)
                hep.histplot(
                    h,
                    yerr=True,
                    label=key_label_map[key],
                    density=True,
                    ax=ax,
                )
                hists.append(h[0])

            ax.set_ylabel("Events (A.U.)")
            ax.set_xlim(bins[0], bins[-1])
            ax.legend(fancybox=True)

            scale = np.sum(hists[0]) / np.sum(hists[1])
            yerr = ratio_uncertainty(hists[1], hists[0], "poisson-ratio") * scale
            hep.histplot(
                hists[1] / hists[0] * scale,
                bins,
                yerr=yerr,
                ax=rax,
                histtype="errorbar",
                color="black",
                capsize=4,
            )

            rax.hlines(1, -1000, bins[-1], color="lightgray", linestyles="--")
            rax.set_xlabel(f"Gen Higgs {i + 1} bb Daughter {j + 1} {var_label}")
            rax.set_ylabel("Run 3 / Run 2")
            rax.set_ylim([0, 2])

            plt.savefig(f"{plot_dir}/higgs{i}_bb{j}_{var}.pdf", bbox_inches="tight")
            plt.show()

## Trigger comparison

### Signal

Run 3 Triggers

In [None]:
HLTs = ["AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35", "AK8PFJet425_SoftDropMass40"]

events = events_dict["run3"]

HLT_triggered = np.any(
    np.array([events.HLT[trigger] for trigger in HLTs]),
    axis=0,
)

Run 2 Trigger Efficiencies

In [None]:
def pad_val(
    arr: ak.Array,
    target: int,
    value: float = 0,
    axis: int = 0,
    to_numpy: bool = True,
    clip: bool = True,
):
    """
    pads awkward array up to ``target`` index along axis ``axis`` with value ``value``,
    optionally converts to numpy array
    """
    ret = ak.fill_none(ak.pad_none(arr, target, axis=axis, clip=clip), value, axis=axis)
    return ret.to_numpy() if to_numpy else ret


def trig_effs(fatjets, year: str = "2018", num_jets: int = 2):
    """Add the trigger efficiencies we measured in SingleMuon data"""
    with open(f"../corrections/data/{year}_triggereff_combined.pkl", "rb") as filehandler:
        combined = pickle.load(filehandler)

    # sum over TH4q bins
    effs_txbb = combined["num"][:, sum, :, :] / combined["den"][:, sum, :, :]

    ak8TrigEffsLookup = dense_lookup(
        np.nan_to_num(effs_txbb.view(flow=False), 0), np.squeeze(effs_txbb.axes.edges)
    )

    # TODO: confirm that these should be corrected pt, msd values
    fj_trigeffs = ak8TrigEffsLookup(
        pad_val(fatjets.Txbb, num_jets, 0, axis=1),
        pad_val(fatjets.pt, num_jets, 0, axis=1),
        pad_val(fatjets.msoftdrop, num_jets, 0, axis=1),
    )

    # combined eff = 1 - (1 - fj1_eff) * (1 - fj2_eff)
    combined_trigEffs = 1 - np.prod(1 - fj_trigeffs, axis=1)
    return combined_trigEffs

In [None]:
fatjets = events_dict["run2"].FatJet
fatjets.Txbb = fatjets.particleNetMD_Xbb / (fatjets.particleNetMD_QCD + fatjets.particleNetMD_Xbb)
trigeffs = trig_effs(fatjets)

Plots

In [None]:
# Hist.new.Reg(bins, name="jet0msd", label="$m_{SD}$ (GeV)")

higgs_var_map = {
    "pt": ([20, 200, 600], r"$p_T$ (GeV)"),
}

key_label_map = {
    "run2": "Run 2",
    "run3": "Run 3",
}

In [None]:
hists = {}

for var, (bins, var_label) in higgs_var_map.items():
    for i in range(1):
        fig, (ax, rax) = plt.subplots(
            2,
            1,
            figsize=(12, 14),
            gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07),
            sharex=True,
        )

        for key, genvars in higgs_dict.items():
            higgs = genvars["higgs"]
            events = events_dict[key]
            run_label = key_label_map[key]

            hf = Hist.new.Reg(*bins, name=var, label=var_label).Weight()
            hf.fill(higgs[var][:, i], weight=events.genWeight)

            tot = np.sum(hf.values())

            hep.histplot(
                hf / tot,
                yerr=True,
                label=f"{run_label} Full",
                ax=ax,
            )

            ht = Hist.new.Reg(*bins, name=var, label=var_label).Weight()
            if key == "run3":
                ht.fill(higgs[var][:, i][HLT_triggered], weight=events.genWeight[HLT_triggered])
            else:
                ht.fill(higgs[var][:, i], weight=events.genWeight * trigeffs)

            hep.histplot(
                ht / tot,
                yerr=True,
                label=f"{run_label} Triggered",
                ax=ax,
            )

            hep.histplot(
                ht / hf.values(),
                yerr=True,
                ax=rax,
                histtype="errorbar",
                # color="black",
                capsize=4,
                label=run_label,
            )

            hists[key] = {"full": hf, "triggered": ht}

        ax.set_xlabel(None)
        ax.set_ylabel("Events / Total")
        ax.set_xlim(bins[1], bins[2])
        ax.legend(fancybox=True)

        rax.hlines(1, bins[1], bins[2], color="lightgray", linestyles="--")
        rax.set_xlabel(f"Gen Higgs {i + 1} {var_label}")
        rax.set_ylabel("Signal efficiency")
        rax.set_ylim([0, 1.2])
        rax.legend()

        hep.cms.text(
            "Simulation Internal",
            ax=ax,
        )

        plt.savefig(f"{plot_dir}/signal_trigger_effs.pdf", bbox_inches="tight")
        plt.show()

In [None]:
for key in hists:
    print(key)
    print("Total efficiency H1 > 200")
    print(np.sum(hists[key]["triggered"].values()) / np.sum(hists[key]["full"].values()))
    print("Total efficiency H1 > 300")
    print(np.sum(hists[key]["triggered"].values()[5:]) / np.sum(hists[key]["full"].values()[5:]))

### Background

In [None]:
run2_qcd_path = "/eos/uscms/store/user/rkansal/bbbb/nano/RunIISummer20UL18NanoAODv9/QCD_Pt-15to7000-pilot_TuneCP5_Flat2018_13TeV_pythia8/8034A70C-036D-4C49-BD63-8C2EA5285CC7.root"

run3_qcd_path = "/eos/uscms/store/user/rkansal/bbbb/nano/Run3Summer22EENanoAODv11/QCD_PT-15to7000_TuneCP5_Flat2018_13p6TeV_pythia8/5eebc6b1-e595-4781-b316-d64a70dce222.root"

In [None]:
events_qcd_dict = OrderedDict()
events_qcd_dict["run2"] = nanoevents.NanoEventsFactory.from_root(
    run2_qcd_path, schemaclass=nanoevents.NanoAODSchema
).events()
events_qcd_dict["run3"] = nanoevents.NanoEventsFactory.from_root(
    run3_qcd_path, schemaclass=nanoevents.NanoAODSchema
).events()

Run 3 Triggers

In [None]:
HLTs = ["AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35", "AK8PFJet425_SoftDropMass40"]

events = events_qcd_dict["run3"]

HLT_triggered = np.any(
    np.array([events.HLT[trigger] for trigger in HLTs]),
    axis=0,
)

Run 2 Trigger Efficiencies

In [None]:
fatjets = events_qcd_dict["run2"].FatJet
fatjets.Txbb = fatjets.particleNetMD_Xbb / (fatjets.particleNetMD_QCD + fatjets.particleNetMD_Xbb)
trigeffs = trig_effs(fatjets)

Plots

In [None]:
# Hist.new.Reg(bins, name="jet0msd", label="$m_{SD}$ (GeV)")

plot_var_map = {
    "pt": ([15, 300, 1000], r"$p_T$ (GeV)"),
}

key_label_map = {
    "run2": "Run 2",
    "run3": "Run 3",
}

In [None]:
hists = {}

for var, (bins, var_label) in plot_var_map.items():
    for i in range(2):
        fig, (ax, rax) = plt.subplots(
            2,
            1,
            figsize=(12, 14),
            gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07),
            sharex=True,
        )

        for key, events in events_qcd_dict.items():
            fatjets = events.FatJet[:, i : i + 1]
            select = ak.any(fatjets.pt, axis=1)
            fatjets = ak.flatten(fatjets)
            run_label = key_label_map[key]

            hf = Hist.new.Reg(*bins, name=var, label=var_label).Weight()
            hf.fill(fatjets[var], weight=events.genWeight[select])

            tot = np.sum(events.genWeight)

            hep.histplot(
                hf / tot,
                yerr=True,
                label=f"{run_label} Full",
                ax=ax,
            )

            ht = Hist.new.Reg(*bins, name=var, label=var_label).Weight()
            if key == "run3":
                ht.fill(
                    fatjets[var][HLT_triggered[select]],
                    weight=events.genWeight[select][HLT_triggered[select]],
                )
            else:
                ht.fill(fatjets[var], weight=events.genWeight[select] * trigeffs[select])

            hep.histplot(
                ht / tot,
                yerr=True,
                label=f"{run_label} Triggered",
                ax=ax,
            )

            hep.histplot(
                ht / hf.values(),
                yerr=True,
                ax=rax,
                histtype="errorbar",
                # color="black",
                capsize=4,
                label=run_label,
            )

            hists[key] = {"full": hf, "triggered": ht}

        ax.set_xlabel(None)
        ax.set_ylabel("Events / Total")
        ax.set_xlim(bins[1], bins[2])
        ax.legend(fancybox=True)

        rax.hlines(1, bins[1], bins[2], color="lightgray", linestyles="--")
        rax.set_xlabel(f"Fatjet {i + 1} {var_label}")
        rax.set_ylabel("QCD efficiency")
        rax.set_ylim([0, 1.2])
        rax.legend()

        hep.cms.text(
            "Simulation Internal",
            ax=ax,
        )

        plt.savefig(f"{plot_dir}/qcd_trigger_effs_{var}{i}.pdf", bbox_inches="tight")
        plt.show()

In [None]:
hists["run3"]["triggered"]

In [None]:
for key in hists:
    print(key)
    print("Total efficiency H1 > 200")
    print(np.sum(hists[key]["triggered"].values()) / np.sum(hists[key]["full"].values()))
    print("Total efficiency H1 > 300")
    print(np.sum(hists[key]["triggered"].values()[5:]) / np.sum(hists[key]["full"].values()[5:]))