In [None]:
import matplotlib.pyplot as plt
import hist
import mplhep

mplhep.style.use(["CMS", "fira", "firamath"])
import matplotlib as mpl

mpl.rcParams["lines.linewidth"] = 2
mpl.rcParams["grid.color"] = "#CCCCCC"
mpl.rcParams["grid.linewidth"] = 0.5
mpl.rcParams["figure.edgecolor"] = "none"

import sys

sys.path.append("..")

from HH4b import utils
import vector
import pandas as pd
import numpy as np

In [None]:
samples = {
    "hh4b": ["GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
}

# no selection
path_to_dir = "/eos/uscms/store/user/cmantill/bbbb/matching/Oct30/"

dirs = {path_to_dir: samples}
year = "2018"
filters = None

# dictionary that will contain all information (from all samples)
events_dict = {}
for input_dir, samples in dirs.items():
    events_dict = {
        **events_dict,
        # this function will load files (only the columns selected), apply filters and compute a weight per event
        **utils.load_samples(input_dir, samples, year, filters=filters),
    }

samples_loaded = list(events_dict.keys())
keys_loaded = list(events_dict[samples_loaded[0]].keys())
print(f"Keys in events_dict")
for i in keys_loaded:
    print(i)

In [None]:
def make_vector(events: pd.DataFrame, obj: str):
    """Create a ``vector`` object from the columns of the dataframe"""
    mstring = "PNetMass" if obj == "ak8FatJet" else "Mass"

    return vector.array(
        {
            "pt": events[f"{obj}Pt"],
            "phi": events[f"{obj}Phi"],
            "eta": events[f"{obj}Eta"],
            "M": events[f"{obj}{mstring}"],
        }
    )

In [None]:
events = events_dict["hh4b"]

triggers = {
    "resolved": ["PFHT330PT30_QuadPFJet_75_60_45_40_TriplePFBTagDeepCSV_4p5"],
    "boosted": [
        "PFHT1050",
        "PFJet500",
        "AK8PFJet500",
        "AK8PFJet400_TrimMass30",
        "AK8PFHT800_TrimMass50",
        "AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4",
    ],
    "extra": [
        "QuadPFJet103_88_75_15_DoublePFBTagDeepCSV_1p3_7p7_VBF1",
        "QuadPFJet103_88_75_15_PFBTagDeepCSV_1p3_VBF2",
        "PFHT400_SixPFJet32_DoublePFBTagDeepCSV_2p94",
        "PFHT450_SixPFJet36_PFBTagDeepCSV_1p59",
        "AK8PFJet330_TrimMass30_PFAK8BTagDeepCSV_p17",
        "QuadPFJet98_83_71_15_DoublePFBTagDeepCSV_1p3_7p7_VBF1",
        "QuadPFJet98_83_71_15_PFBTagDeepCSV_1p3_VBF2",
        "PFMET100_PFMHT100_IDTight_CaloBTagDeepCSV_3p1",
    ],
}

ht = events.ht.values.squeeze()

trigger_masks = {}
for trig, hlts in triggers.items():
    trigger_mask = np.zeros_like(ht).astype(bool)
    for t in hlts:
        trigger_mask |= (events[t].values == 1).squeeze()
    trigger_masks[trig] = trigger_mask

jets = make_vector(events, "ak4Jet")
jets_outside = make_vector(events, "ak4JetOutside")
fatjets = make_vector(events, "ak8FatJet")
gen_higgs = make_vector(events, "GenHiggs")
gen_bs = make_vector(events, "Genb")

# ak4 jets matched to h1 and h2
h1ak4 = events["ak4JetHiggsMatchIndex"].to_numpy() == 0
h2ak4 = events["ak4JetHiggsMatchIndex"].to_numpy() == 1
num_ak4m2h1 = h1ak4.sum(axis=1)
num_ak4m2h2 = h2ak4.sum(axis=1)
h1m2ak4 = num_ak4m2h1 == 2
h2m2ak4 = num_ak4m2h2 == 2

# ak8 jets matched to h1 and h2
h1ak8 = events["ak8FatJetHiggsMatchIndex"].to_numpy() == 0
h2ak8 = events["ak8FatJetHiggsMatchIndex"].to_numpy() == 1
num_ak8m2h1 = h1ak8.sum(axis=1)
num_ak8m2h2 = h2ak8.sum(axis=1)
h1m1ak8 = num_ak8m2h1 == 1
h2m1ak8 = num_ak8m2h2 == 1

boosted = h1m1ak8 & h2m1ak8
semi_resolved_h1 = h1m2ak4 & h2m1ak8 & ~(boosted)
semi_resolved_h2 = h2m2ak4 & h1m1ak8 & ~(boosted)
semi_resolved = semi_resolved_h1 | semi_resolved_h2
resolved = (h1m2ak4 & h2m2ak4) & ~(boosted) & ~(semi_resolved)
not_categorized = ~(resolved | boosted | semi_resolved)

In [None]:
from hist.intervals import clopper_pearson_interval

# calcualte m_HH for every event
mhh = (gen_higgs[:, 0] + gen_higgs[:, 1]).m

mhh_axis = hist.axis.Regular(40, 250, 1200, name="hh_mass", label=r"$m_{HH}$ [GeV]")
ht_axis = hist.axis.Regular(40, 20, 1500, name="ht", label="HT [GeV]")
cat_axis = hist.axis.StrCategory([], name="cat", growth=True)

hist_mhh = hist.Hist(mhh_axis, cat_axis)
hist_mhh.fill(hh_mass=mhh[semi_resolved], cat="semi_resolved")
hist_mhh.fill(hh_mass=mhh[semi_resolved & trigger_masks["resolved"]], cat="resolved")
hist_mhh.fill(hh_mass=mhh[semi_resolved & trigger_masks["boosted"]], cat="boosted")
hist_mhh.fill(
    hh_mass=mhh[semi_resolved & (trigger_masks["boosted"] | trigger_masks["resolved"])], cat="or"
)
hist_mhh.fill(
    hh_mass=mhh[
        semi_resolved
        & (trigger_masks["boosted"] | trigger_masks["resolved"] | trigger_masks["extra"])
    ],
    cat="orextra",
)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
mplhep.histplot(hist_mhh[{"cat": "semi_resolved"}], ax=ax, label="All Semi-Resolved", color="black")
mplhep.histplot(hist_mhh[{"cat": "resolved"}], ax=ax, label="Trigger Resolved", color="blue")
mplhep.histplot(hist_mhh[{"cat": "boosted"}], ax=ax, label="Trigger Boosted", color="orange")
mplhep.histplot(hist_mhh[{"cat": "or"}], ax=ax, label="Trigger Boosted OR Resolved", color="red")
mplhep.histplot(
    hist_mhh[{"cat": "orextra"}], ax=ax, label="Trigger Boosted OR Resolved OR Extra", color="green"
)

leg = ax.legend(fontsize=10)
leg.set_title("Categories", prop={"size": 10})
ax.set_ylabel("Events")

colors = {
    "resolved": "blue",
    "boosted": "orange",
    "or": "red",
    "orextra": "green",
}
labels = {
    "resolved": "Resolved",
    "boosted": "Boosted",
    "or": "Res | Boost",
    "orextra": "Res | Boost | Extra",
}
fig, ax = plt.subplots(1, 1, figsize=(6, 4))
den = hist_mhh[{"cat": "semi_resolved"}]
for key in ["resolved", "boosted", "or", "orextra"]:
    num = hist_mhh[{"cat": key}]
    mplhep.histplot(
        num / den,
        yerr=abs(clopper_pearson_interval(num.view(), den.view()) - num.view() / den.view()),
        label=labels[key],
        color=colors[key],
        ax=ax,
    )
leg = ax.legend(fontsize=10)
ax.set_ylabel("Events")
leg.set_title("Efficiency cHHH1", prop={"size": 10})
ax.set_ylabel("Signal Trig. Eff.")

hist_ht = hist.Hist(ht_axis, cat_axis)
hist_ht.fill(ht=ht[semi_resolved], cat="semi_resolved")
hist_ht.fill(ht=ht[semi_resolved & trigger_masks["resolved"]], cat="resolved")
hist_ht.fill(ht=ht[semi_resolved & trigger_masks["boosted"]], cat="boosted")
hist_ht.fill(
    ht=ht[semi_resolved & (trigger_masks["boosted"] | trigger_masks["resolved"])], cat="or"
)
hist_ht.fill(
    ht=ht[
        semi_resolved
        & (trigger_masks["boosted"] | trigger_masks["resolved"] | trigger_masks["extra"])
    ],
    cat="orextra",
)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
den = hist_ht[{"cat": "semi_resolved"}]
for key in ["resolved", "boosted", "or", "orextra"]:
    num = hist_ht[{"cat": key}]
    mplhep.histplot(
        num / den,
        yerr=abs(clopper_pearson_interval(num.view(), den.view()) - num.view() / den.view()),
        label=labels[key],
        color=colors[key],
        ax=ax,
    )
leg = ax.legend(fontsize=10)
ax.set_ylabel("Events")
leg.set_title("Efficiency cHHH1", prop={"size": 10})
ax.set_ylabel("Signal Trig. Eff.")

In [None]:
events.ht.values.squeeze()

In [None]:
hist_mhh

In [None]:
hist_mhh = hist.Hist(mhh_axis, cat_axis)
hist_mhh.fill(hh_mass=mhh, cat="all")
hist_mhh.fill(hh_mass=mhh[trigger_masks["resolved"]], cat="resolved")
hist_mhh.fill(hh_mass=mhh[trigger_masks["boosted"]], cat="boosted")
hist_mhh.fill(hh_mass=mhh[(trigger_masks["boosted"] | trigger_masks["resolved"])], cat="or")
hist_mhh.fill(
    hh_mass=mhh[(trigger_masks["boosted"] | trigger_masks["resolved"] | trigger_masks["extra"])],
    cat="orextra",
)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
mplhep.histplot(hist_mhh[{"cat": "all"}], ax=ax, label="All", color="black")
mplhep.histplot(hist_mhh[{"cat": "resolved"}], ax=ax, label="Trigger Resolved", color="blue")
mplhep.histplot(hist_mhh[{"cat": "boosted"}], ax=ax, label="Trigger Boosted", color="orange")
mplhep.histplot(hist_mhh[{"cat": "or"}], ax=ax, label="Trigger Boosted OR Resolved", color="red")
mplhep.histplot(
    hist_mhh[{"cat": "orextra"}], ax=ax, label="Trigger Boosted OR Resolved OR Extra", color="green"
)

leg = ax.legend(fontsize=10)
leg.set_title("Categories", prop={"size": 10})
ax.set_ylabel("Events")

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
den = hist_mhh[{"cat": "all"}]
colors = {
    "resolved": "blue",
    "boosted": "orange",
    "or": "red",
    "orextra": "green",
}
labels = {
    "resolved": "Resolved",
    "boosted": "Boosted",
    "or": "Res | Boost",
    "orextra": "Res | Boost | Extra",
}
for key in ["resolved", "boosted", "or", "orextra"]:
    num = hist_mhh[{"cat": key}]
    mplhep.histplot(
        num / den,
        yerr=abs(clopper_pearson_interval(num.view(), den.view()) - num.view() / den.view()),
        label=labels[key],
        color=colors[key],
        ax=ax,
    )
leg = ax.legend(fontsize=10)
leg.set_title("Efficiency cHHH1", prop={"size": 10})
ax.set_ylabel("Signal Trig. Eff.")

hist_ht = hist.Hist(ht_axis, cat_axis)
hist_ht.fill(ht=ht, cat="semi_resolved")
hist_ht.fill(ht=ht[trigger_masks["resolved"]], cat="resolved")
hist_ht.fill(ht=ht[trigger_masks["boosted"]], cat="boosted")
hist_ht.fill(ht=ht[(trigger_masks["boosted"] | trigger_masks["resolved"])], cat="or")
hist_ht.fill(
    ht=ht[(trigger_masks["boosted"] | trigger_masks["resolved"] | trigger_masks["extra"])],
    cat="orextra",
)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
den = hist_ht[{"cat": "semi_resolved"}]
for key in ["resolved", "boosted", "or", "orextra"]:
    num = hist_ht[{"cat": key}]
    mplhep.histplot(
        num / den,
        yerr=abs(clopper_pearson_interval(num.view(), den.view()) - num.view() / den.view()),
        label=labels[key],
        color=colors[key],
        ax=ax,
    )
leg = ax.legend(fontsize=10)
ax.set_ylabel("Events")
leg.set_title("Efficiency cHHH1", prop={"size": 10})
ax.set_ylabel("Signal Trig. Eff.")

## Compare efficiency for different couplings

In [None]:
samples = {
    "hh4b": ["GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    "hh4b-c2p45": ["GluGlutoHHto4B_cHHH2p45_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    "hh4b-c5": ["GluGlutoHHto4B_cHHH5_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    "hh4b-c0": ["GluGlutoHHto4B_cHHH0_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
}

# no selection
path_to_dir = "/eos/uscms/store/user/cmantill/bbbb/matching/Oct30/"
# selection
path_to_dir = "/eos/uscms/store/user/cmantill/bbbb/matching/Oct30ApplySelection/"

dirs = {path_to_dir: samples}
year = "2018"
filters = None

# dictionary that will contain all information (from all samples)
events_dict = {}
for input_dir, samples in dirs.items():
    events_dict = {
        **events_dict,
        # this function will load files (only the columns selected), apply filters and compute a weight per event
        **utils.load_samples(input_dir, samples, year, filters=filters),
    }

samples_loaded = list(events_dict.keys())
keys_loaded = list(events_dict[samples_loaded[0]].keys())
print(f"Keys in events_dict")
for i in keys_loaded:
    print(i)

In [None]:
events_dict.keys()

In [None]:
mhh_axis = hist.axis.Regular(40, 250, 1200, name="hh_mass", label=r"$m_{HH}$ [GeV]")
ht_axis = hist.axis.Regular(40, 20, 1500, name="ht", label="HT [GeV]")
cat_axis = hist.axis.StrCategory([], name="cat", growth=True)
sample_axis = hist.axis.StrCategory([], name="sample_name", growth=True)

hist_mhh = hist.Hist(mhh_axis, cat_axis, sample_axis)
hist_ht = hist.Hist(ht_axis, cat_axis, sample_axis)

for key in events_dict.keys():
    events = events_dict[key]
    ht = events.ht.values.squeeze()

    trigger_masks = {}
    for trig, hlts in triggers.items():
        trigger_mask = np.zeros_like(ht).astype(bool)
        for t in hlts:
            trigger_mask |= (events[t].values == 1).squeeze()
        trigger_masks[trig] = trigger_mask

    jets = make_vector(events, "ak4Jet")
    jets_outside = make_vector(events, "ak4JetOutside")
    fatjets = make_vector(events, "ak8FatJet")
    gen_higgs = make_vector(events, "GenHiggs")
    gen_bs = make_vector(events, "Genb")

    mhh = (gen_higgs[:, 0] + gen_higgs[:, 1]).m

    hist_mhh.fill(hh_mass=mhh, cat="semi_resolved", sample_name=key)
    hist_mhh.fill(hh_mass=mhh[trigger_masks["resolved"]], cat="resolved", sample_name=key)
    hist_mhh.fill(hh_mass=mhh[trigger_masks["boosted"]], cat="boosted", sample_name=key)
    hist_mhh.fill(
        hh_mass=mhh[(trigger_masks["boosted"] | trigger_masks["resolved"])],
        cat="or",
        sample_name=key,
    )

    hist_ht.fill(ht=ht, cat="semi_resolved", sample_name=key)
    hist_ht.fill(ht=ht[trigger_masks["resolved"]], cat="resolved", sample_name=key)
    hist_ht.fill(ht=ht[trigger_masks["boosted"]], cat="boosted", sample_name=key)
    hist_ht.fill(
        ht=ht[(trigger_masks["boosted"] | trigger_masks["resolved"])], cat="or", sample_name=key
    )

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
mplhep.histplot(
    hist_mhh[{"cat": "semi_resolved", "sample_name": "hh4b-c2p45"}],
    ax=ax,
    label="cHHH2p45",
    histtype="fill",
    alpha=0.5,
    facecolor="black",
    linestyle="dotted",
    edgecolor="k",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "semi_resolved", "sample_name": "hh4b-c5"}],
    ax=ax,
    label="cHHH5",
    histtype="fill",
    alpha=0.3,
    facecolor="black",
    linestyle="dashed",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "semi_resolved", "sample_name": "hh4b-c0"}],
    ax=ax,
    label="cHH0",
    histtype="fill",
    hatch=r"\\\\",
    alpha=0.2,
    facecolor="0.95",
    edgecolor="k",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "semi_resolved", "sample_name": "hh4b"}],
    ax=ax,
    label="cHH1",
    color="black",
    linewidth=2,
)
leg = ax.legend(fontsize=10)
leg.set_title("All", prop={"size": 10})
ax.set_ylabel("Events")

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
mplhep.histplot(
    hist_mhh[{"cat": "resolved", "sample_name": "hh4b-c2p45"}],
    ax=ax,
    label="cHHH2p45",
    histtype="fill",
    alpha=0.5,
    facecolor="blue",
    linestyle="dotted",
    edgecolor="k",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "resolved", "sample_name": "hh4b-c5"}],
    ax=ax,
    label="cHHH5",
    histtype="fill",
    alpha=0.3,
    facecolor="blue",
    linestyle="dashed",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "resolved", "sample_name": "hh4b-c0"}],
    ax=ax,
    label="cHH0",
    histtype="fill",
    hatch=r"\\\\",
    alpha=0.2,
    facecolor="blue",
    edgecolor="k",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "resolved", "sample_name": "hh4b"}],
    ax=ax,
    label="cHH1",
    color="blue",
    linewidth=2,
)
leg = ax.legend(fontsize=10)
leg.set_title("Trigger resolved", prop={"size": 10})
ax.set_ylabel("Events")

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
mplhep.histplot(
    hist_mhh[{"cat": "boosted", "sample_name": "hh4b-c2p45"}],
    ax=ax,
    label="cHHH2p45",
    histtype="fill",
    alpha=0.5,
    facecolor="orange",
    linestyle="dotted",
    edgecolor="k",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "boosted", "sample_name": "hh4b-c5"}],
    ax=ax,
    label="cHHH5",
    histtype="fill",
    alpha=0.3,
    facecolor="orange",
    linestyle="dashed",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "boosted", "sample_name": "hh4b-c0"}],
    ax=ax,
    label="cHH0",
    histtype="fill",
    hatch=r"\\\\",
    alpha=0.2,
    facecolor="orange",
    edgecolor="k",
    linewidth=2,
)
mplhep.histplot(
    hist_mhh[{"cat": "boosted", "sample_name": "hh4b"}],
    ax=ax,
    label="cHH1",
    color="orange",
    linewidth=2,
)
leg = ax.legend(fontsize=10)
leg.set_title("Trigger boosted", prop={"size": 10})
ax.set_ylabel("Events")

linestyles = {"hh4b-c2p45": "dotted", "hh4b-c5": "dashed", "hh4b-c0": "dashdot", "hh4b": "solid"}
for key in ["resolved", "boosted", "or"]:
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    for s in events_dict.keys():
        den = hist_mhh[{"cat": "semi_resolved", "sample_name": s}]
        num = hist_mhh[{"cat": key, "sample_name": s}]
        mplhep.histplot(
            num / den,
            yerr=abs(clopper_pearson_interval(num.view(), den.view()) - num.view() / den.view()),
            label=s,
            color=colors[key],
            linestyle=linestyles[s],
            ax=ax,
        )
    leg = ax.legend(fontsize=10)
    ax.set_ylabel("Events")
    leg.set_title(f"Efficiency {key}", prop={"size": 10})
    ax.set_ylabel("Signal Trig. Eff.")

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    for s in events_dict.keys():
        den = hist_ht[{"cat": "semi_resolved", "sample_name": s}]
        num = hist_ht[{"cat": key, "sample_name": s}]
        mplhep.histplot(
            num / den,
            yerr=abs(clopper_pearson_interval(num.view(), den.view()) - num.view() / den.view()),
            label=s,
            color=colors[key],
            linestyle=linestyles[s],
            ax=ax,
        )
    leg = ax.legend(fontsize=10)
    ax.set_ylabel("Events")
    leg.set_title(f"Efficiency {key}", prop={"size": 10})
    ax.set_ylabel("Signal Trig. Eff.")