In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import awkward as ak
import vector
import glob
import numpy as np

import sklearn.metrics
import yaml
import os

import matplotlib
import mplhep
mplhep.style.use(mplhep.style.CMS)

In [None]:
#get the list of files that were set aside for testing
test_paths = yaml.safe_load(open("../config/datasets/test.yaml", "rb"))["test"]["paths"]
test_paths = [os.path.basename(p) for p in test_paths]

In [None]:
#binary classification outputs
vals_cls_true = []
vals_cls_pred = []

#decay modes
vals_dm_true = []
vals_dm_pred = []

#energy regression outputs
vals_e_true = []
vals_e_pred = []

#loop over the evaluation outputs
for fi in glob.glob("../simplednn/SimpleDNN/*/*.parquet"):
    if not os.path.basename(fi) in test_paths:
        continue
    print(fi)

    data = ak.from_parquet(fi)
    
    cls_true = data["gen_jet_tau_decaymode"]!=-1
    cls_pred = data["tauClassifier"]

    dm_true = data["gen_jet_tau_decaymode"]
    dm_pred = data["tau_decaymode"]
    
    tauP4_pred = vector.awk(
        ak.zip(
            {
                "px": data["tau_p4s"].x,
                "py": data["tau_p4s"].y,
                "pz": data["tau_p4s"].z,
                "mass": data["tau_p4s"].tau,
            }
        )
    )
    tauP4_true = vector.awk(
        ak.zip(
            {
                "px": data["gen_jet_tau_p4s"].x,
                "py": data["gen_jet_tau_p4s"].y,
                "pz": data["gen_jet_tau_p4s"].z,
                "mass": data["gen_jet_tau_p4s"].tau,
            }
        )
    )
    
    vals_e_pred.append(ak.to_numpy(tauP4_pred.energy))
    vals_e_true.append(ak.to_numpy(tauP4_true.energy))
    
    vals_cls_pred.append(ak.to_numpy(cls_pred))
    vals_cls_true.append(ak.to_numpy(cls_true))

    vals_dm_pred.append(ak.to_numpy(dm_pred))
    vals_dm_true.append(ak.to_numpy(dm_true))
    
vals_cls_true = np.concatenate(vals_cls_true)
vals_cls_pred = np.concatenate(vals_cls_pred)

vals_e_true = np.concatenate(vals_e_true)
vals_e_pred = np.concatenate(vals_e_pred)

vals_dm_true = np.concatenate(vals_dm_true)
vals_dm_pred = np.concatenate(vals_dm_pred)

In [None]:
fpr, tpr, _ = sklearn.metrics.roc_curve(vals_cls_true, vals_cls_pred)

## Binary classification output

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0.0, 1, 100)
plt.hist(vals_cls_pred[vals_cls_true==1], bins=b, density=1, histtype="step", label="sig")
plt.hist(vals_cls_pred[vals_cls_true==0], bins=b, density=1, histtype="step", label="bkg");
plt.yscale("log")
plt.legend()
plt.xlabel("tau classifier")
plt.ylabel("number of jets / bin")

In [None]:
plt.figure(figsize=(5,5))
plt.plot(tpr, fpr)
#plt.plot([0,1], [0,1], color="black", ls="--")
plt.xlabel("TPR")
plt.ylabel("FPR")
plt.yscale("log")
plt.ylim(1e-5, 1)

## Energy regression output

In [None]:
#for energy regression and decay mode, we focus only on the jets which are taus in simulation
mask_true_tau = vals_cls_true==1

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0,150,100)
plt.hist2d(vals_e_true[mask_true_tau], vals_e_pred[mask_true_tau], (b, b), cmap="hot_r");
plt.plot([0,150],[0,150], color="blue", ls="--")
plt.xlabel("True energy")
plt.ylabel("Reconstructed energy")

In [None]:
b = np.linspace(-5,5,100)
plt.hist(vals_e_pred[mask_true_tau] - vals_e_true[mask_true_tau], bins=b);
plt.xlabel("$E_{reco} - E_{true}$ [GeV]")

## Decay mode multiclass output

In [None]:
confusion_matrix = sklearn.metrics.confusion_matrix(vals_dm_true[mask_true_tau], vals_dm_pred[mask_true_tau], labels=range(16))
acc = sklearn.metrics.accuracy_score(vals_dm_true[mask_true_tau], vals_dm_pred[mask_true_tau])

In [None]:
plt.imshow(confusion_matrix, cmap="Blues", norm=matplotlib.colors.LogNorm(vmin=10))
plt.colorbar()
plt.title("Accuracy = {:.2f}".format(acc));