In [None]:
from pathlib import Path
import pickle
import numpy as np
import matplotlib.pyplot as plt
import mplhep as hep

plt.style.use(hep.style.CMS)

HH4B_DIR = "/home/users/woodson/HH4b/"
plot_dir = Path(f"{HH4B_DIR}/plots/Scaling_Toys/")

xbb_cuts = np.arange(0.8, 0.999, 0.0025)
bdt_cuts = np.arange(0.9, 0.999, 0.0025)
method = "2dkde"
ntoys = 100
optimize = True
restrict = True
optimize_str_data = f"{xbb_cuts[0]:.4f}_{xbb_cuts[-1]:.4f}_{xbb_cuts[1]-xbb_cuts[0]:.4f}_{bdt_cuts[0]:.4f}_{bdt_cuts[-1]:.4f}_{bdt_cuts[1]-bdt_cuts[0]:.4f}"
if optimize:
    optimize_str = optimize_str_data
else:
    optimize_str = "noopt"
if not restrict:
    optimize_str += "_norest"

fom_toys = {}
s_toys = {}
b_toys = {}
xbb_cut_toys = {}
bdt_cut_toys = {}
for lumi_scale in [1, 138.0 / 62.0, 10]:
    ls = str(lumi_scale)
    with open(
        plot_dir / f"fom_toys_{method}_{ntoys}_{lumi_scale:4f}_{optimize_str}.pkl",
        "rb",
    ) as f:
        data = pickle.load(f)
        fom_toys[ls] = np.array(data["fom_toys"])
        s_toys[ls] = np.array(data["s_toys"])
        b_toys[ls] = np.array(data["b_toys"])
        xbb_cut_toys[ls] = np.array(data["xbb_cut_toys"])
        bdt_cut_toys[ls] = np.array(data["bdt_cut_toys"])

with open(
    plot_dir / f"fom_data_{optimize_str_data}.pkl",
    "rb",
) as f:
    data = pickle.load(f)
    fom_data = data["fom_data"]

In [None]:
plt.figure()
plt.hist(
    fom_toys[str(138.0 / 62.0)],
    bins=np.linspace(0, 10, 51),
    alpha=0.7,
    label=f"Toys, L=138/fb, median FoM={np.median(fom_toys[str(138.0 / 62.0)]):.1f}",
    color="blue",
)
plt.hist(
    fom_toys[str(1)],
    bins=np.linspace(0, 10, 51),
    alpha=0.7,
    label=f"Toys, L=62/fb, median FoM={np.median(fom_toys[str(1)]):.1f}",
    color="red",
)
plt.hist(
    fom_toys[str(10)],
    bins=np.linspace(0, 10, 51),
    alpha=0.7,
    label=f"Toys, L=620/fb, median FoM={np.median(fom_toys[str(10)]):.1f}",
    color="green",
)
plt.axvline(
    fom_data,
    ymin=0,
    ymax=0.75,
    color="black",
    linestyle="--",
    label=f"Data, L=62/fb, FoM={fom_data:.1f}",
)
plt.xlabel(r"Optimal FoM=$2\sqrt{B}/S$")
plt.ylabel("Number of Toys")
# set x-axis limits
plt.xlim(0, 10)
# get current axes and set y-axis limits
plt.ylim(0, 30)
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.hist(
    fom_toys[str(138/62)] / fom_toys[str(1)],
    bins=np.linspace(0, 1.5, 51),
    alpha=0.7,
    label=f"Toys: median = {np.median(fom_toys[str(138/62)]/fom_toys[str(1)]):.2f}",
    color="violet",
)
plt.axvline(
    np.sqrt(62 / 138),
    color="black",
    linestyle="--",
    label=r"$\sqrt{L} = \sqrt{62/138} = 0.67$",
)
plt.axvline(
    5.5 / 9.2,
    color="red",
    linestyle="--",
    label=f"Run 2/Run 3: 5.5/9.2 = 0.60",
)
plt.xlabel("Ratio of FoMs or Limits (L=138/fb / L=62/fb)")
plt.legend()
plt.ylabel("Number of Toys")
plt.xlim(0, 2)
plt.ylim(0, 25)
# left sided p-value
pval = np.sum(fom_toys[str(138/62)] / fom_toys[str(1)] < 5.5 / 9.2) / len(fom_toys)
print(pval)


In [None]:
plt.figure()
plt.hist(
    s_toys[str(1)] / b_toys[str(1)],
    bins=np.linspace(0, 1, 51),
    alpha=0.7,
    label=f"Toys, L=62/fb: median = {np.median(s_toys[str(1)]/b_toys[str(1)]):.2f}",
    color="red",
)
plt.hist(
    s_toys / b_toys,
    bins=np.linspace(0, 1, 51),
    alpha=0.7,
    label=f"Toys, L=138/fb: median = {np.median(s_toys/b_toys):.2f}",
    color="blue",
)
plt.legend()
plt.xlabel(r"$S/B$")
plt.ylabel("Number of Toys")
plt.xlim(0, 1)
plt.show()

In [None]:
plt.figure()
plt.hist(
    xbb_cut_toys[str(1)],
    bins=np.concatenate((xbb_cuts, [1])),
    alpha=0.7,
    label=f"Toys, L=62/fb",
    color="red",
)
plt.hist(
    xbb_cut_toys[str(138/62)],
    bins=np.concatenate((xbb_cuts, [1])),
    alpha=0.7,
    label=f"Toys, L=138/fb",
    color="blue",
)
plt.legend()
plt.xlabel(r"$T_{Xbb}$ cut")
plt.ylabel("Number of Toys")
plt.xlim(np.min(xbb_cuts), 1)
plt.show()

In [None]:
plt.figure()
plt.hist(
    bdt_cut_toys_ls1,
    bins=np.concatenate((bdt_cuts, [1])),
    alpha=0.7,
    label=f"Toys, L=62/fb",
    color="red",
)
plt.hist(
    bdt_cut_toys,
    bins=np.concatenate((bdt_cuts, [1])),
    alpha=0.7,
    label=f"Toys, L=138/fb",
    color="blue",
)
plt.legend()
plt.xlabel(r"$BDT_{ggF}$ cut")
plt.ylabel("Number of Toys")
plt.xlim(np.min(bdt_cuts), 1)
plt.show()

In [None]:
import hist
def make_histograms(mass_array, xbb_array, bdt_array):
    mass_axis = hist.axis.Regular(16, 60, 220, name="mass")
    bdt_bins = 20
    bdt_axis = hist.axis.Regular(bdt_bins, 0.8, 1, name="bdt")
    xbb_bins = 20
    xbb_axis = hist.axis.Regular(xbb_bins, 0.8, 1, name="xbb")

    mask = (mass_array > 150) | (mass_array < 110)
    # mask = mass_array > 0  # no masking for now

    h_mass = hist.Hist(mass_axis)
    h_mass.fill(mass_array[mask])
    h_xbb = hist.Hist(xbb_axis)
    h_xbb.fill(xbb_array[mask])
    h_bdt = hist.Hist(bdt_axis)
    h_bdt.fill(bdt_array[mask])

    # sample toys from 3D distribution
    h_mass_xbb_bdt = hist.Hist(mass_axis, xbb_axis, bdt_axis)
    h_mass_xbb_bdt.fill(
        mass=mass_array[mask],
        xbb=xbb_array[mask],
        bdt=bdt_array[mask],
    )

    # make 2D histograms
    h_mass_xbb = hist.Hist(mass_axis, xbb_axis)
    h_mass_xbb.fill(
        mass=mass_array[mask],
        xbb=xbb_array[mask],
    )
    h_mass_bdt = hist.Hist(mass_axis, bdt_axis)
    h_mass_bdt.fill(
        mass=mass_array[mask],
        bdt=bdt_array[mask],
    )
    h_xbb_bdt = hist.Hist(xbb_axis, bdt_axis)
    h_xbb_bdt.fill(
        xbb=xbb_array[mask],
        bdt=bdt_array[mask],
    )

    return h_mass, h_xbb, h_bdt, h_mass_xbb, h_mass_bdt, h_xbb_bdt, h_mass_xbb_bdt


In [None]:
import pickle
from Scaling_Toys import minuit_transform, logit, plot_corner, minuit_inverse_transform, sigmoid
from scipy.stats import gaussian_kde
import numpy as np

HH4B_DIR = "/home/users/woodson/HH4b/"
# open the pickle file
with open(  # noqa: PTH123
    f"{HH4B_DIR}/data/events_combined_25June2ReRunBDTZbbSFs384Check.pkl", "rb"
) as f:
    events_combined = pickle.load(f)
data_array = events_combined["data"][["H2PNetMass", "H2TXbb", "bdt_score"]].to_numpy()

transformed_data_array = np.column_stack(
    (
        minuit_transform(data_array[:, 0], xmin=60, xmax=220),
        logit(data_array[:, 1]),
        logit(data_array[:, 2]),
    )
)
kde_3d_mass_xbb_bdt = gaussian_kde(transformed_data_array.T, bw_method="silverman")
kde_2d_xbb_bdt = gaussian_kde(transformed_data_array[:, 1:].T, bw_method="silverman")
kde_1d_mass = gaussian_kde(transformed_data_array[:, 0], bw_method="silverman")
kde_1d_xbb = gaussian_kde(transformed_data_array[:, 1], bw_method="silverman")
kde_1d_bdt = gaussian_kde(transformed_data_array[:, 2], bw_method="silverman")

integral = len(events_combined["data"])


In [None]:
n_samples = int(integral)
rng = np.random.default_rng(42)  # for reproducibility
mass_toy = minuit_inverse_transform(
    kde_1d_mass.resample(n_samples, seed=rng)[0], xmin=60, xmax=220
)
sampled_transformed_data = kde_2d_xbb_bdt.resample(n_samples, seed=rng).T
xbb_toy = sigmoid(sampled_transformed_data[:, 0])
bdt_toy = sigmoid(sampled_transformed_data[:, 1])

mass_toy = minuit_inverse_transform(
                kde_1d_mass.resample(n_samples, seed=rng)[0], xmin=60, xmax=220
            )
xbb_toy = sigmoid(kde_1d_xbb.resample(n_samples, seed=rng)[0])
bdt_toy = sigmoid(kde_1d_bdt.resample(n_samples, seed=rng)[0])

#h_mass, h_xbb, h_bdt, h_mass_xbb, h_mass_bdt, h_xbb_bdt, h_mass_xbb_bdt = make_histograms(
#    mass_toy,
#    xbb_toy,
#    bdt_toy,
#)

h_mass, h_xbb, h_bdt, h_mass_xbb, h_mass_bdt, h_xbb_bdt, h_mass_xbb_bdt = make_histograms(
    events_combined["data"]["H2PNetMass"],
    events_combined["data"]["H2TXbb"],
    events_combined["data"]["bdt_score"],
)

fig = plot_corner(h_mass, h_xbb, h_bdt, h_mass_xbb, h_mass_bdt, h_xbb_bdt)