In [1]:
from typing import List
import uproot
import numpy as np
import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker
import os

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 20})

In [2]:
MAIN_DIR = "../../"

plot_dir = f"{MAIN_DIR}/plots/GoF/23Oct9"
_ = os.system(f"mkdir -p {plot_dir}")

In [8]:
# cards_dir = "23May13MP"

file = uproot.concatenate(
    f"/ospool/cms-user/yuzhe/BoostedHWW/prediction/boostedHWW/combine/scripts/f_test/cards/nTF_0/higgsCombineData.GoodnessOfFit.mH125.root"
)

data_ts = file["limit"][0]  # data test statistic

file = uproot.concatenate(
    f"/ospool/cms-user/yuzhe/BoostedHWW/prediction/boostedHWW/combine/scripts/f_test/cards/nTF_0/higgsCombineToys.GoodnessOfFit.mH125.*.root"
)
# //
toy_ts = np.array(file["limit"])  # toy test statistics

FileNotFoundError: file not found

    '/ospool/cms-user/yuzhe/BoostedHWW/prediction/boostedHWW/combine/scripts/f_test/cards/nTF_0/higgsCombineToys.GoodnessOfFit.mH125.*.root'

Files may be specified as:
   * str/bytes: relative or absolute filesystem path or URL, without any colons
         other than Windows drive letter or URL schema.
         Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root"
   * str/bytes: same with an object-within-ROOT path, separated by a colon.
         Example: "rel/file.root:tdirectory/ttree"
   * pathlib.Path: always interpreted as a filesystem path or URL only (no
         object-within-ROOT path), regardless of whether there are any colons.
         Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")

Functions that accept many files (uproot.iterate, etc.) also allow:
   * glob syntax in str/bytes and pathlib.Path.
         Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
   * dict: keys are filesystem paths, values are objects-within-ROOT paths.
         Example: {"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}
   * already-open TTree objects.
   * iterables of the above.


In [None]:
def p_value(data_ts: float, toy_ts: List[float]):
    return np.mean(toy_ts >= data_ts)

In [None]:
pval = p_value(data_ts, toy_ts)
pval

In [None]:
plot_max = max(np.max(toy_ts), data_ts)
plot_min = min(np.min(toy_ts), data_ts)

In [None]:
plt.figure(figsize=(12, 8))
_ = plt.hist(
    toy_ts, np.linspace(plot_min, plot_max, 10), color="dimgrey", histtype="step", label="Toys"
)
plt.axvline(data_ts, color="red", linestyle=":", label=rf"Data ($p$-value = {pval:.2f})")
_ = plt.legend()
plt.title("Goodness of Fit")
plt.ylabel("Number of Toys")
plt.xlabel("Test Statistics")
plt.savefig(f"{plot_dir}/gof.pdf")

In [None]:
from scipy.stats import chi2

chi2_df = 100

plt.figure(figsize=(12, 8))
x = np.linspace(plot_min, plot_max, 101)
plt.plot(
    x,
    chi2.pdf(x, chi2_df) * 1.3,
    color="blue",
    linestyle="--",
    alpha=0.6,
    label=rf"$\chi^2_{{DoF = {chi2_df}}}$",
)
_ = plt.hist(
    toy_ts,
    np.linspace(plot_min, plot_max, 10),
    density=True,
    color="dimgrey",
    histtype="step",
    label="Toys",
)
plt.axvline(data_ts, color="red", linestyle=":", label=rf"Data ($p$-value = {pval:.2f})")
_ = plt.legend()
plt.title("Goodness of Fit")
plt.ylabel("Number of Toys (A.U.)")
plt.xlabel("Test Statistics")
plt.savefig(f"{plot_dir}/gof_chi2.pdf")