In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import awkward as ak
import vector
import glob
import numpy as np

import sklearn.metrics
import yaml
import os

import mplhep
mplhep.style.use(mplhep.style.CMS)

In [None]:
test_file_paths = [
    os.path.basename(p) for p in yaml.safe_load(open("../config/datasets/test.yaml", "rb"))["test"]["paths"] if "ZH_Htautau" in p
]

In [None]:
vals_y_true = []
vals_y_pred = []

vals_e_true = []
vals_e_pred = []
for fi in test_file_paths:
    fn = "../data/SimpleDNN/ZH_Htautau/" + fi
    if not os.path.isfile(fn):
        continue
    print(fn)

    data = ak.from_parquet(fn)
    y_true = data["gen_jet_tau_decaymode"]!=-1
    y_pred = data["tauClassifier"]
    
    data.fields
    
    tauP4_pred = vector.awk(
        ak.zip(
            {
                "x": data["tau_p4s"].x,
                "y": data["tau_p4s"].y,
                "z": data["tau_p4s"].z,
                "tau": data["tau_p4s"].tau,
            }
        )
    )
    tauP4_true = vector.awk(
        ak.zip(
            {
                "x": data["gen_jet_tau_p4s"].x,
                "y": data["gen_jet_tau_p4s"].y,
                "z": data["gen_jet_tau_p4s"].z,
                "tau": data["gen_jet_tau_p4s"].tau,
            }
        )
    )
    
    vals_e_pred.append(ak.to_numpy(tauP4_pred.t))
    vals_e_true.append(ak.to_numpy(tauP4_true.t))
    
    vals_y_true.append(ak.to_numpy(y_true))
    vals_y_pred.append(ak.to_numpy(y_pred))

vals_y_true = np.concatenate(vals_y_true)
vals_y_pred = np.concatenate(vals_y_pred)

vals_e_true = np.concatenate(vals_e_true)
vals_e_pred = np.concatenate(vals_e_pred)

In [None]:
fpr, tpr, _ = sklearn.metrics.roc_curve(vals_y_true, vals_y_pred)

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0.0, 1, 100)
plt.hist(vals_y_pred[vals_y_true==1], bins=b, density=1, histtype="step", label="sig")
plt.hist(vals_y_pred[vals_y_true==0], bins=b, density=1, histtype="step", label="bkg");
plt.yscale("log")
plt.legend()
plt.xlabel("tau classifier")
plt.ylabel("number of jets / bin")

In [None]:
plt.figure(figsize=(5,5))
plt.plot(fpr, tpr)
plt.plot([0,1], [0,1], color="black", ls="--")
plt.xlabel("FPR")
plt.ylabel("TPR")

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0,150,100)
plt.hist2d(vals_e_true[vals_y_true==1], vals_e_pred[vals_y_true==1], (b, b), cmap="hot_r");
plt.plot([0,150],[0,150], color="blue", ls="--")
plt.xlabel("True energy")
plt.ylabel("Reconstructed energy")