# Build zRank

In [None]:
import matplotlib.pyplot as plt
import numexpr
import numpy as np
import pandas as pd

%matplotlib inline
plt.rcParams["figure.figsize"] = [10, 5]

In [None]:
import sys
sys.path.append("/home/kunath/iLCSoft/projects/ZH")

from higgsstrahlung.from_root import RootfileHandler
import higgsstrahlung.cuts as cuts

file_handler = RootfileHandler(tupleName="e1Tree")

meta = pd.read_pickle(file_handler._r2p.df_folder / "meta.pkl")

events = file_handler._df
# e1 = file_handler._getDataFrame(single_uncut_process="e1e1h")
# preselection = "(" + ") & (".join(cuts.preselections["e1Tree"]) + ")"
e1 = events[events.process == "e1e1h"]

nu_events = RootfileHandler(tupleName="nuTree")._df

In [None]:
e1

## Build a ranking

In [None]:
from z_rank import createZRanking

cl = createZRanking(e1, events, 1_000, print_every=100, save_as="e1_eLpR")

In [None]:
cl

In [None]:
#%%capture out  
# Do not show the figure. 

def addPreviouslyProposedCutLines(axs):
    kw = dict(color="orange", ls=":")
    axs[0].axhline(88, **kw, label="proposed by-hand cut")
    axs[0].axhline(94, **kw)
    axs[1].axhline(124, **kw)
    axs[1].axhline(127, **kw)
    axs[2].axhline(.99, **kw)
    axs[3].axhline(.93, **kw)

fig, axs = plt.subplots(5, figsize=(10, 25), sharex=True)
axs[0].fill_between(cl.index, cl["mZ >= "], cl["mZ <= "], alpha=.9, label="eff-pur ranked cut")
axs[1].fill_between(cl.index, cl["mRecoil <= "], cl["mRecoil >= "], alpha=.9)
axs[2].fill_between(cl.index, 0, cl["abs(cosTMiss) <= "], alpha=.9)
axs[3].fill_between(cl.index, 0, cl["abs(cosTZ) <= "], alpha=.9)

for i in range(len(axs)):
    axs[i].axvline((cl.eff * cl.pur).argmax(), color="gray", ls="--")
axs[0].set_ylabel("mZ")
axs[1].set_ylabel("mRecoil")
axs[2].set_ylabel("abs(cosTMiss)")
axs[3].set_ylabel("abs(cosTZ)")
axs[-1].set_xlabel("iteration")

addPreviouslyProposedCutLines(axs)

cl.pur.plot(ax=axs[-1])
cl.eff.plot(ax=axs[-1])
(cl.eff * cl.pur).plot(label="eff*pur", ax=axs[-1])
axs[-1].axvline((cl.eff * cl.pur).argmax(), color="gray", ls="--")
axs[-1].set_xlim((0, cl.index.max()))
axs[0].legend()
axs[-1].legend()

fig.savefig("fig/z_ranking.png", facecolor="white", dpi=300)

In [None]:
print((cl.eff * cl.pur).max())
cl.iloc[(cl.eff * cl.pur).argmax()]

## Add the zRank variable

In [None]:
from z_rank import getZRank

events["zRank"] = getZRank(events, ranking_table="e1_eLpR")

In [None]:
signal = events[events.process == "e1e1h"]

events.zRank.plot.hist(bins=200, weights=events.weight, label="all")
signal.zRank.plot.hist(bins=200, weights=signal.weight, label="e1e1h")
plt.yscale("log")
plt.xlabel("Z$_e$")
plt.legend();

## Add the $\nu\nu$-BDT$_H$

In [None]:
from higgs_only_model import getXGBModel

model, training_columns = getXGBModel()

h_pred = model.predict_proba(events[training_columns])[:,1]  # sklearn model.
events["hBDT"] = h_pred

In [None]:
signal = events[events.process == "e1e1h"]

events.hBDT.plot.hist(bins=200, weights=events.weight, label="all")
signal.hBDT.plot.hist(bins=200, weights=signal.weight, label="e1e1h")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();

In [None]:
nu_events["hBDT"] = model.predict_proba(nu_events[training_columns])[:,1]

nu_signal = nu_events[nu_events.process == "nnh"]
nu_events.hBDT.plot.hist(bins=200, weights=nu_events.weight, label="all")
nu_signal.hBDT.plot.hist(bins=200, weights=nu_signal.weight, label="nnh")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();

## Appendix:
A H ranking for the Higgs part of ZH, Z->$\nu\nu$ events.
(Was produced earlier along the lines of the Z ranking. Here only loaded.)

In [None]:
from z_rank import getZRank

nu_events["hRank"] = getZRank(nu_events, ranking_table="nu_eLpR")
nu_signal = nu_events[nu_events.process == "nnh"]
nu_events.hRank.plot.hist(bins=200, weights=nu_events.weight, label="all")
nu_signal.hRank.plot.hist(bins=200, weights=nu_signal.weight, label="nnh")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();