# Build zRank

In [None]:
import matplotlib.pyplot as plt
import numexpr
import numpy as np
import pandas as pd
from pathlib import Path

%matplotlib inline
plt.rcParams["figure.figsize"] = [10, 5]

In [None]:
import sys
sys.path.append("/home/kunath/iLCSoft/projects/ZH")

from higgsstrahlung.from_root import RootfileHandler
import higgsstrahlung.cuts as cuts

rf = "2020-10-08-235437"

file_handler = RootfileHandler(tupleName="e1Tree", root_folder=rf)
e1_events = file_handler._df
e1_signal = e1_events[e1_events.process == "e1e1h"]
# meta = pd.read_pickle(file_handler._r2p.df_folder / "meta.pkl")
# preselection = "(" + ") & (".join(cuts.preselections["e1Tree"]) + ")"

file_handler = RootfileHandler(tupleName="e2Tree", root_folder=rf)
e2_events = file_handler._df
e2_signal = e2_events[e2_events.process == "e2e2h"]

file_handler = RootfileHandler(tupleName="nuTree", root_folder=rf)
nu_events = file_handler._df
nu_signal = nu_events[nu_events.process == "nnh"]

## Build a ranking

In [None]:
from z_rank import createZRanking

e1_cl = createZRanking(e1_signal, e1_events, 1_000, print_every=100, save_as="e1_eLpR")

In [None]:
e1_cl

In [None]:
e2_cl = createZRanking(e2_signal, e2_events, 1_000, print_every=100, save_as="e2_eLpR")

In [None]:
#%%capture out  
# Do not show the figure. 

def addPreviouslyProposedCutLines(axs):
    kw = dict(color="orange", ls=":")
    axs[0].axhline(88, **kw, label="proposed by-hand cut")
    axs[0].axhline(94, **kw)
    axs[1].axhline(124, **kw)
    axs[1].axhline(127, **kw)
    axs[2].axhline(.99, **kw)
    axs[3].axhline(.93, **kw)

cl = e1_cl

fig, axs = plt.subplots(5, figsize=(10, 25), sharex=True)
axs[0].fill_between(cl.index, cl["mZ >= "], cl["mZ <= "], alpha=.9, label="eff-pur ranked cut")
axs[1].fill_between(cl.index, cl["mRecoil <= "], cl["mRecoil >= "], alpha=.9)
axs[2].fill_between(cl.index, 0, cl["abs(cosTMiss) <= "], alpha=.9)
axs[3].fill_between(cl.index, 0, cl["abs(cosTZ) <= "], alpha=.9)

for i in range(len(axs)):
    axs[i].axvline((cl.eff * cl.pur).argmax(), color="gray", ls="--")
axs[0].set_ylabel("mZ")
axs[1].set_ylabel("mRecoil")
axs[2].set_ylabel("abs(cosTMiss)")
axs[3].set_ylabel("abs(cosTZ)")
axs[-1].set_xlabel("iteration")

addPreviouslyProposedCutLines(axs)

cl.pur.plot(ax=axs[-1])
cl.eff.plot(ax=axs[-1])
(cl.eff * cl.pur).plot(label="eff*pur", ax=axs[-1])
axs[-1].axvline((cl.eff * cl.pur).argmax(), color="gray", ls="--")
axs[-1].set_xlim((0, cl.index.max()))
axs[0].legend()
axs[-1].legend()

fig.savefig("fig/z_ranking.png", facecolor="white", dpi=300)

In [None]:
print((e1_cl.eff * e1_cl.pur).max())
e1_cl.iloc[(e1_cl.eff * e1_cl.pur).argmax()]

In [None]:
print((e2_cl.eff * e2_cl.pur).max())
e2_cl.iloc[(e2_cl.eff * e2_cl.pur).argmax()]

## Add the zRank variable

In [None]:
from z_rank import getZRank

e1_events["zRank"] = getZRank(e1_events, ranking_table="e1_eLpR")
e2_events["zRank"] = getZRank(e2_events, ranking_table="e2_eLpR")

In [None]:
e1_signal = e1_events[e1_events.process == "e1e1h"]

e1_events.zRank.plot.hist(bins=200, weights=e1_events.weight, label="all")
e1_signal.zRank.plot.hist(bins=200, weights=e1_signal.weight, label="e1e1h")
plt.yscale("log")
plt.xlabel("Z$_e$")
plt.legend();

In [None]:
e2_signal = e2_events[e2_events.process == "e2e2h"]

e2_events.zRank.plot.hist(bins=200, weights=e2_events.weight, label="all")
e2_signal.zRank.plot.hist(bins=200, weights=e2_signal.weight, label="e2e2h")
plt.yscale("log")
plt.xlabel("Z$_e$")
plt.legend();

## Add the $\nu\nu$-BDT$_H$

In [None]:
df_path = Path("data")
if (df_path / "nu_events.pkl").exists():
    nu_events = pd.read_pickle(df_path / "nu_events.pkl")
    e1_events = pd.read_pickle(df_path / "e1_events.pkl")
    e2_events = pd.read_pickle(df_path / "e2_events.pkl")
else:
    from higgs_only_model import getXGBModel

    model, training_columns = getXGBModel()
    nu_events["hBDT"] = model.predict_proba(nu_events[training_columns])[:,1] 
    e1_events["hBDT"] = model.predict_proba(e1_events[training_columns])[:,1] 
    e2_events["hBDT"] = model.predict_proba(e2_events[training_columns])[:,1] 

    nu_events.to_pickle(df_path / "nu_events.pkl")
    e1_events.to_pickle(df_path / "e1_events.pkl")
    e2_events.to_pickle(df_path / "e2_events.pkl")

In [None]:
e1_signal = e1_events[e1_events.process == "e1e1h"]

e1_events.hBDT.plot.hist(bins=200, weights=e1_events.weight, label="all")
e1_signal.hBDT.plot.hist(bins=200, weights=e1_signal.weight, label="e1e1h")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();

In [None]:
e2_signal = e2_events[e2_events.process == "e2e2h"]

e2_events.hBDT.plot.hist(bins=200, weights=e2_events.weight, label="all")
e2_signal.hBDT.plot.hist(bins=200, weights=e2_signal.weight, label="e2e2h")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();

In [None]:
nu_signal = nu_events[nu_events.process == "nnh"]

nu_events.hBDT.plot.hist(bins=200, weights=nu_events.weight, label="all")
nu_signal.hBDT.plot.hist(bins=200, weights=nu_signal.weight, label="nnh")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();

In [None]:
nu_events

## Appendix:
A H ranking for the Higgs part of ZH, Z->$\nu\nu$ events.
(Was produced earlier along the lines of the Z ranking. Here only loaded.)

from z_rank import getZRank

nu_events["hRank"] = getZRank(nu_events, ranking_table="nu_eLpR")
nu_signal = nu_events[nu_events.process == "nnh"]
nu_events.hRank.plot.hist(bins=200, weights=nu_events.weight, label="all")
nu_signal.hRank.plot.hist(bins=200, weights=nu_signal.weight, label="nnh")
plt.yscale("log")
plt.xlabel("BDT$_H$")
plt.legend();