In [None]:
import awkward as ak
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

NanoAODSchema.warn_missing_crossrefs = False

fname = "MC_defaultAK4.root"
events = NanoEventsFactory.from_root(
    {fname: "Events"},
    schemaclass=NanoAODSchema,
    metadata={"dataset": "GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8"},
).events()

In [None]:
pt_cut = ak.flatten((events.FatJet.pt > 250).compute())
eta_cut = ak.flatten((abs(events.FatJet.eta) < 2.5).compute())
msd_cut = ak.flatten((events.FatJet.msoftdrop > 40).compute())
cut = pt_cut & eta_cut & msd_cut

In [None]:
glopart2_txbb = ak.flatten(events.FatJet.globalParT2_XbbVsQCD.compute())
glopart3_pxbb = ak.flatten(events.FatJet.globalParT3_Xbb.compute())
glopart3_pqcd = ak.flatten(events.FatJet.globalParT3_QCD.compute())
glopart3_txbb = glopart3_pxbb / (glopart3_pxbb + glopart3_pqcd)

In [None]:
import matplotlib.pyplot as plt
import mplhep as hep

plt.style.use(hep.style.CMS)
import numpy as np

bins = np.linspace(0, 1, 20)
plt.figure()
plt.hist(
    glopart2_txbb[cut],
    bins=bins,
    range=(0, 1),
    histtype="step",
    label=r"GloParTv2 $T_{Xbb}$",
)
plt.hist(
    glopart3_txbb[cut],
    bins=bins,
    range=(0, 1),
    histtype="step",
    label=r"GloParTv3 $T_{Xbb}$",
    linestyle="--",
)
plt.xlim(0, 1)
plt.xlabel(r"$T_{Xbb}$")
plt.ylabel("Jets")
plt.legend(title=r"$p_{T} > 250$ GeV, $|\eta| < 2.5$, $m_{SD} > 40$ GeV")
plt.savefig("glopartv2v3_histogram.pdf")
plt.show()

In [None]:
# 2D scatter plot
plt.figure()
plt.scatter(
    glopart2_txbb[cut],
    glopart3_txbb[cut],
    s=4,
    alpha=0.7,
)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r"GloParTv2 $T_{Xbb}$")
plt.ylabel(r"GloParTv3 $T_{Xbb}$")
plt.title(r"$p_{T} > 250$ GeV, $|\eta| < 2.5$, $m_{SD} > 40$ GeV")
plt.savefig("glopartv2v3_scatter.pdf")
plt.show()

In [None]:
# 2D confusion matrix with bins at [0.3, 0.95, 0.975, 0.99, 1.0]
from sklearn.metrics import confusion_matrix

bins = [0.3, 0.95, 0.975, 0.99, 1.0]
glopart2_binned = np.digitize(glopart2_txbb[cut], bins) - 1
glopart3_binned = np.digitize(glopart3_txbb[cut], bins) - 1
cm = confusion_matrix(
    glopart3_binned, glopart2_binned, labels=np.arange(len(bins) - 1), normalize="pred"
)


plt.figure()
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues, origin="lower")
cbar = plt.colorbar()
plt.xticks(
    np.arange(len(bins) - 1),
    [f"[{bins[i]}, {bins[i+1]}]" for i in range(len(bins) - 1)],
    rotation=45,
)
plt.yticks(np.arange(len(bins) - 1), [f"[{bins[i]}, {bins[i+1]}]" for i in range(len(bins) - 1)])
plt.xlabel("GloParTv2 $T_{Xbb}$ Bins")
plt.ylabel("GloParTv3 $T_{Xbb}$ Bins")
plt.title(r"$p_{T} > 250$ GeV, $|\eta| < 2.5$, $m_{SD} > 40$ GeV    ")
# plot numbers in each cell
thresh = 0.6
for i in range(len(bins) - 1):
    for j in range(len(bins) - 1):
        plt.text(
            j,
            i,
            f"{cm[i, j]:.2f}",
            ha="center",
            va="center",
            color="white" if cm[i, j] > thresh else "black",
        )
# set color scale title
cbar.set_label("Probability")
plt.savefig("glopartv2v3_confusion_matrix.pdf", bbox_inches="tight")
plt.show()