In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker

hep.style.use(["CMS", "firamath"])

formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 12})
plt.rcParams["lines.linewidth"] = 2
plt.rcParams["grid.color"] = "#CCCCCC"
plt.rcParams["grid.linewidth"] = 0.5
plt.rcParams["figure.edgecolor"] = "none"

# Checks on signal samples

In [None]:
import sys

sys.path.append("..")

import hist
import os
import utils
from postprocessing.postprocessing import Region, weight_shifts, bb_assignment
from utils import ShapeVar, CUT_MAX_VAL
from hh_vars import samples, data_key, bg_keys, sig_keys

In [None]:
# automatically reloads imported files on edits
%load_ext autoreload
%autoreload 2

In [None]:
year = "2022EE"
samples[year]

In [None]:
year = "2022EE"
samples_year = samples[year].copy()

# define dictionary with directories of files (this can be configured in a yaml file later in the script)
sig_keys = ["hh4b"]
for key in list(samples_year.keys()):
    if key not in "vhtobb" and key not in sig_keys:
        del samples_year[key]

MAIN_DIR = "../../../"
path_to_dir = f"{MAIN_DIR}/../data/skimmer/Oct26/"
# path_to_dir_signal = f"{MAIN_DIR}/../data/skimmer/Nov6fix/"
path_to_dir_signal = f"{MAIN_DIR}/../data/skimmer/Nov6fix2/"

sample_dirs = {path_to_dir: samples_year}

# hh_dict = {'hh4b': ['GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_TSG']}
# samples_year = {**samples_year, **hh_dict}
# sample_dirs[path_to_dir_signal] = hh_dict

# make plot and template directory
date = "23Nov6"
plot_dir = f"{MAIN_DIR}/plots/PostProcessing/{date}/{year}"
template_dir = f"templates/{date}/"
_ = os.system(f"mkdir -p {plot_dir}")
_ = os.system(f"mkdir -p {template_dir}/cutflows/{year}")

In [None]:
samples_year

In [None]:
sample_dirs

In [None]:
pt_cut = 300

filters = [
    [
        ("('ak8FatJetPt', '0')", ">=", pt_cut),
        ("('ak8FatJetPt', '1')", ">=", pt_cut),
        ("('ak8FatJetMsd', '0')", ">=", 60),
        ("('ak8FatJetMsd', '1')", ">=", 60),
        ("('ak8FatJetPNetXbb', '0')", ">=", 0.8),
    ],
    [
        ("('ak8FatJetPt', '0')", ">=", pt_cut),
        ("('ak8FatJetPt', '1')", ">=", pt_cut),
        ("('ak8FatJetMsd', '0')", ">=", 60),
        ("('ak8FatJetMsd', '1')", ">=", 60),
        ("('ak8FatJetPNetXbb', '1')", ">=", 0.8),
    ],
]

# save cutflow as pandas table
cutflow = pd.DataFrame(index=list(samples_year.keys()))

# dictionary that will contain all information (from all samples)
events_dict = {}
for input_dir, samples_dict in sample_dirs.items():
    events_dict = {
        **events_dict,
        # this function will load files (only the columns selected), apply filters and compute a weight per event
        **utils.load_samples(
            input_dir,
            samples_dict,
            year,
            filters=filters,
        ),
    }

utils.add_to_cutflow(events_dict, "Preselection", "weight", cutflow)
print("\n", cutflow)

bb_masks = bb_assignment(events_dict)

In [None]:
len(events_dict["hh4b"])

In [None]:
events_dict["hh4b"].single_weight_genweight

In [None]:
events_dict["hh4b"].ak8FatJetPNetMass

In [None]:
bb_masks

In [None]:
events_dict["hh4b"]["ak8FatJetPNetMass"].values[bb_masks["hh4b"] ^ (int("0") == 1)].squeeze()

In [None]:
for key, events in events_dict.items():
    if key == data_key:
        events["weight_nopileup"] = events["weight"]
        events["weight_notrigger"] = events["weight"]
    else:
        events["weight_nopileup"] = events["weight"] / events["single_weight_pileup"]
        events["weight_notrigger_nopileup"] = events["weight"] / (
            events["single_weight_trigsf_2jet"] * events["single_weight_pileup"]
        )

## Check mass

In [None]:
sig_key = "hh4b"
plots = {
    "bb0FatJetPNetMass": r"$m_{reg}^{j1}$ (GeV)",
    "bb1FatJetPNetMass": r"$m_{reg}^{j2}$ (GeV)",
    "bb0FatJetMsd": r"$m_{SD}^{j1}$ (GeV)",
    "bb1FatJetMsd": r"$m_{SD}^{j2}$ (GeV)",
}

mass_axis = hist.axis.Regular(40, 40, 250, name="mass", label=r"Mass [GeV]")
cat_axis = hist.axis.StrCategory([], name="cat", growth=True)
hist_m = hist.Hist(mass_axis, cat_axis)

for key in plots.keys():
    weight = utils.get_feat(events_dict[sig_key], "weight")
    pu_weight = utils.get_feat(events_dict[sig_key], "weight")
    if key == "hh4b":
        weight = weight / pu_weight
    hist_m.fill(
        mass=utils.get_feat(events_dict[sig_key], key, bb_masks[sig_key]),
        cat=key,
        weight=weight,
    )

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
hep.histplot(hist_m[{"cat": "bb0FatJetMsd"}], ax=ax, label=plots["bb0FatJetMsd"])
hep.histplot(hist_m[{"cat": "bb0FatJetPNetMass"}], ax=ax, label=plots["bb0FatJetPNetMass"])
ax.legend()
ax.set_xlabel("Mass (GeV)")
ax.set_ylabel("Events")

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
hep.histplot(hist_m[{"cat": "bb1FatJetMsd"}], ax=ax, label=plots["bb1FatJetMsd"])
hep.histplot(hist_m[{"cat": "bb1FatJetPNetMass"}], ax=ax, label=plots["bb1FatJetPNetMass"])
ax.legend()
ax.set_xlabel("Mass (GeV)")
ax.set_ylabel("Events")

In [None]:
for key in ["bb0FatJetPNetMass", "bb1FatJetPNetMass"]:
    hist_m = hist.Hist(mass_axis, cat_axis)
    for sig_key in ["hh4b", "vhtobb"]:
        weight = utils.get_feat(events_dict[sig_key], "weight")
        pu_weight = utils.get_feat(events_dict[sig_key], "weight")
        if key == "hh4b":
            weight = weight / pu_weight
        hist_m.fill(
            mass=utils.get_feat(events_dict[sig_key], key, bb_masks[sig_key]),
            cat=sig_key,
            weight=weight,
        )

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    hep.histplot(
        hist_m[{"cat": "hh4b"}],
        ax=ax,
        label="hh4b",
        histtype="fill",
        hatch=r"\\\\",
        alpha=0.5,
        facecolor="blue",
        edgecolor="black",
        linewidth=1,
    )
    hep.histplot(
        hist_m[{"cat": "vhtobb"}],
        ax=ax,
        label="vhtobb",
        histtype="fill",
        alpha=0.1,
        facecolor="blue",
        edgecolor="black",
        linewidth=1,
    )
    ax.legend()
    ax.set_xlabel(f"{plots[key]} (GeV)")
    ax.set_ylabel("Events")

## Check PU weight

In [None]:
npv_axis = hist.axis.Regular(20, 0, 80, name="npv", label=r"nPV")
colors = {"hh4b": "blue", "vhtobb": "orange"}
for sig_key in ["hh4b", "vhtobb"]:
    h_npv = hist.Hist(npv_axis, cat_axis)
    h_npv.fill(
        npv=utils.get_feat(events_dict[sig_key], "nPV"),
        cat="PU",
        weight=utils.get_feat(events_dict[sig_key], "weight"),
    )
    h_npv.fill(
        npv=utils.get_feat(events_dict[sig_key], "nPV"),
        cat="unweight",
        weight=utils.get_feat(events_dict[sig_key], "weight_nopileup"),
    )

    # print yields
    print(sig_key)
    print("no PU weight ", np.round(h_npv[{"cat": "unweight"}].sum(), 2))
    print("with PU weight ", np.round(h_npv[{"cat": "PU"}].sum(), 2))

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    hep.histplot(
        h_npv[{"cat": "PU"}],
        ax=ax,
        label="PU weight",
        histtype="fill",
        hatch=r"\\\\",
        alpha=0.2,
        facecolor=colors[sig_key],
        edgecolor="black",
        linewidth=1,
    )
    hep.histplot(
        h_npv[{"cat": "unweight"}],
        ax=ax,
        label="unweighted",
        histtype="fill",
        alpha=0.5,
        facecolor=colors[sig_key],
    )
    ax.legend(title=sig_key)
    ax.set_ylabel("Events")

## Check Trigger weight

In [None]:
msd_axis = hist.axis.Regular(40, 0, 250, name="msd", label=r"m$_{SD}^{1}$ [GeV]")
colors = {"hh4b": "blue", "vhtobb": "orange"}
for sig_key in ["hh4b", "vhtobb"]:
    h_m = hist.Hist(msd_axis, cat_axis)
    h_m.fill(
        msd=utils.get_feat(events_dict[sig_key], "bb0FatJetMsd", bb_masks[sig_key]),
        cat="trigger",
        weight=utils.get_feat(events_dict[sig_key], "weight_nopileup"),
    )
    h_m.fill(
        msd=utils.get_feat(events_dict[sig_key], "bb0FatJetMsd", bb_masks[sig_key]),
        cat="unweight",
        weight=utils.get_feat(events_dict[sig_key], "weight_notrigger_nopileup"),
    )

    print(h_m)
    # print yields
    print(sig_key)
    print("no trigger weight ", np.round(h_m[{"cat": "unweight"}].sum(), 2))
    print("with trigger weight ", np.round(h_m[{"cat": "trigger"}].sum(), 2))

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    hep.histplot(
        h_m[{"cat": "trigger"}],
        ax=ax,
        label="Trigger weight",
        histtype="fill",
        hatch=r"\\\\",
        alpha=0.1,
        facecolor=colors[sig_key],
        edgecolor="black",
        linewidth=1,
    )
    hep.histplot(
        h_m[{"cat": "unweight"}],
        ax=ax,
        label="unweighted",
        color=colors[sig_key],
        edgecolor="black",
    )
    ax.legend(title=sig_key)
    ax.set_ylabel("Events")