In [None]:
# automatically reloads imported files on edits
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path
import pickle
from constants import PT_BINS
from HH4b.hh_vars import LUMI

import mplhep as hep

hep.style.use(hep.style.CMS)

In [None]:
PLOT_DIR = Path("plots/corr_ZQQ_data")
PLOT_DIR.mkdir(parents=True, exist_ok=True)

OUTPUT_PKL_PATH = Path("corrs/ZQQ_data.pkl")
(OUTPUT_PKL_PATH.parent).mkdir(parents=True, exist_ok=True)

In [None]:
def read_pickle(path: Path):
    with path.open("rb") as f:
        return pickle.load(f)

In [None]:
path_DYLL_data = Path("corrs/DYLL_data.pkl")
path_ZQQ_DYLL = Path("corrs/ZQQ_DYLL.pkl")

SF_dict_DYLL_data = read_pickle(path_DYLL_data)
SF_dict_ZQQ_DYLL = read_pickle(path_ZQQ_DYLL)

In [None]:
def multiply_SFs_asymmetric(
    SF_dict_1: dict[str, np.ndarray], SF_dict_2: dict[str, np.ndarray]
) -> dict[str, np.ndarray]:
    """
    Alternative method that handles asymmetric errors more rigorously.
    This method propagates the up and down variations separately.
    """
    result_dict = {}

    nominal1 = SF_dict_1["nominal"]
    up1 = SF_dict_1["up"]
    down1 = SF_dict_1["down"]

    nominal2 = SF_dict_2["nominal"]
    up2 = SF_dict_2["up"]
    down2 = SF_dict_2["down"]

    # Multiply nominal values
    result_nominal = nominal1 * nominal2

    # For asymmetric errors, we need to consider all combinations
    # and take the envelope
    combinations = [
        up1 * up2,  # both up
        up1 * down2,  # first up, second down
        down1 * up2,  # first down, second up
        down1 * down2,  # both down
    ]

    # Find the maximum and minimum deviations from nominal
    max_result = np.max(combinations, axis=0)
    min_result = np.min(combinations, axis=0)

    result_dict = {
        "nominal": result_nominal,
        "up": max_result,
        "down": min_result,
        "pt": SF_dict_1["pt"],
    }

    return result_dict

In [None]:
SF_dict_ZQQ_data = {}
for year in SF_dict_DYLL_data:
    SF = multiply_SFs_asymmetric(SF_dict_ZQQ_DYLL[year], SF_dict_DYLL_data[year])
    SF_dict_ZQQ_data[year] = SF

# Save the combined SFs
with OUTPUT_PKL_PATH.open("wb") as f:
    pickle.dump(SF_dict_ZQQ_data, f)

# # Use DYLL->Data correction directly since ZQQ->DYLL is essentially 1
# SF_dict_ZQQ_data = SF_dict_DYLL_data

# with OUTPUT_PKL_PATH.open("wb") as f:
#     pickle.dump(SF_dict_ZQQ_data, f)

In [None]:
# Plot the scale factors
for year in SF_dict_ZQQ_data:
    sf = SF_dict_ZQQ_data[year]

    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot nominal scale factor
    ax.errorbar(
        (PT_BINS[:-1] + PT_BINS[1:]) / 2,
        sf["nominal"],
        xerr=(PT_BINS[1:] - PT_BINS[:-1]) / 2,
        yerr=sf["up"] - sf["nominal"],
        fmt="o",
        color="blue",
        markersize=5,
        capsize=3,
    )

    # plot 1
    ax.axhline(y=1, color="red", linestyle="--", label="Nominal SF = 1")

    ax.set_xlabel(r"Z $p_\mathrm{T}$ [GeV]")
    ax.set_ylabel("Scale Factor")
    # ax.set_xlim(PT_BINS[0], PT_BINS[-1])
    ax.set_ylim(0.5, 1.5)
    # ax.grid(True)

    hep.cms.label(
        ax=ax,
        label="Work in Progress",
        data=True,
        year=year.replace("All", ""),
        com=13.6,
        lumi=(round(LUMI[year] / 1000, 2)),
    )

    plt.tight_layout()
    plt.savefig(PLOT_DIR / f"SF_{year}.pdf", bbox_inches="tight")
    plt.show()

# Export to Correctionlib Format

In [None]:
from correctionlib import schemav2


def binning(edges, content):
    return schemav2.Binning(
        nodetype="binning",
        input="pt",
        edges=edges,
        content=list(content),
        flow="clamp",
    )


def get_corr(edges, val_nom, val_up, val_down):
    pt_weight = schemav2.Correction(
        name="GenZPtWeight",
        version=1,
        description="Gen-Level Z pT reweighting correction derived from ZMuMu",
        inputs=[
            schemav2.Variable(
                name="pt",
                type="real",
                description="Gen Z transverse momentum",
            ),
            schemav2.Variable(
                name="systematic",
                type="string",
                description="Systematic variation",
            ),
        ],
        output=schemav2.Variable(
            name="weight", type="real", description="Multiplicative event weight"
        ),
        data=schemav2.Category(
            nodetype="category",
            input="systematic",
            content=[
                {"key": "nominal", "value": binning(edges, val_nom)},
                {"key": "stat_up", "value": binning(edges, val_up)},
                {"key": "stat_dn", "value": binning(edges, val_down)},
            ],
            default=binning(edges, val_nom),
        ),
        generic_formulas=[],
    )
    cset = schemav2.CorrectionSet(
        schema_version=2,
        description="Gen-Level Z pT reweighting correction derived from ZMuMu",
        corrections=[
            pt_weight,
        ],
        compound_corrections=[],
    )

    return cset

In [None]:
for year, SF in SF_dict_ZQQ_data.items():
    year = year.replace("All", "")
    edges = SF["pt"]
    if (len(edges) != len(PT_BINS)) or (not np.isclose(edges, PT_BINS).all()):
        raise ValueError(f"Edges ({edges}) for {year} do not match PT_BINS {PT_BINS}")
    cset = get_corr(edges, SF["nominal"], SF["up"], SF["down"])
    with open(f"corr_{year}.json", "w") as fout:
        fout.write(cset.model_dump_json(exclude_unset=True))