In [None]:
from uproot_module import Data
from vbfdm import VBFDM
import uproot
import awkward
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mplhep as hep

#del hep.style.ROOT["xaxis.labellocation"]
#del hep.style.ROOT["yaxis.labellocation"]
plt.style.use(hep.style.ROOT)

In [None]:
SIGNAL_PATH = "/home/santiago/VBF_DMSimp_spin0_EWKExcluded/Events/run_18/DMSimpSpin0_MY5000_MX1000_07042020.root"
BCKGR_PATH = "/home/santiago/VBFDM_UdeA_CMS/scripts/preliminary_study/data/background/ZjetstoNuNu1_72.172087677pb_renamed.csv"
vbf5000_1000 = VBFDM(SIGNAL_PATH, BCKGR_PATH)

In [None]:
SIGNAL_PATH = "/home/santiago/VBF_DMSimp_spin0_EWKExcluded/Events/run_19/DMSimpSpin0_MY1000_MX100_14052020.root"
BCKGR_PATH = "/home/santiago/VBFDM_UdeA_CMS/scripts/preliminary_study/data/background/ZjetstoNuNu1_72.172087677pb_renamed.csv"
vbf1000_100 = VBFDM(SIGNAL_PATH, BCKGR_PATH)

In [None]:
SIGNAL_PATH = "/home/santiago/VBF_DMSimp_spin0_EWKExcluded/Events/run_20/DMSimpSpin0_MY100_MX10_14052020.root"
BCKGR_PATH = "/home/santiago/VBFDM_UdeA_CMS/scripts/preliminary_study/data/background/ZjetstoNuNu1_72.172087677pb_renamed.csv"
vbf100_10 = VBFDM(SIGNAL_PATH, BCKGR_PATH)

In [None]:
def plotHisto(bkg, sig, bkg_cs, sig_cs, limits, bkg_label=None, sig_label=None, bins=100, size=(10,6), name=None):
    """
    :param bkg    Array with the background data
    :param sig    Array or array list with data of signal/signals
    :param bkg_cs Background cross section
    :param sig_cs Signal cross section (must be the same size as sig)
    :limits       Lower and upper limits
    :bkg_label    Label for the background
    :sig_label    Label(s) for the signal(s)
    :bins         Number of bins
    :size         Plot size
    """
    if type(sig) != list:
        sig = [sig]
    if type(sig_cs) != list:
        sig_cs = [sig_cs]
    if type(sig_label) != list:
        sig_label = [sig_label]
        
    #TODO: Validate the size of "sig..." variables
    
    luminosity = 150  #fb^-1
    b_w = luminosity*bkg_cs/len(bkg)  # weight of the background
    
    # weight of the signal
    s_w = []
    for i in range(len(sig)):
        s_w.append(luminosity*sig_cs[i]/len(sig[i]))
    
    fig, ax = plt.subplots(1,figsize=size)
    ax.hist(bkg, histtype="step", bins=bins, density=True, label=bkg_label);
    for i in range(len(sig)):
        ax.hist(sig[i], histtype="step", bins=bins, density=True, label=sig_label[i])
    ax.set_xlim(limits[0],limits[1])
    
    if name is not None:
        ax.set(xlabel=name, ylabel="Events")
    else:
        ax.set(ylabel="Events")
    ax.set_title("150 fb$^{-1}$ (13 TeV)", loc="right")
    ax.legend(fontsize="large", loc="best");

In [None]:
vbf100_10.cut_flow(0)
vbf1000_100.cut_flow(0)
vbf5000_1000.cut_flow(0)

In [None]:
cut_name = 'HT'
bkg = vbf100_10.background.dataframe['HT']
s1 = vbf100_10.signal.dataframe['HT']
s2 = vbf1000_100.signal.dataframe['HT']
s3 = vbf5000_1000.signal.dataframe['HT']

plotHisto(bkg[bkg>50],[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (0,7000),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,
          name=cut_name)

In [None]:
vbf100_10.cut_flow(1)
vbf1000_100.cut_flow(1)
vbf5000_1000.cut_flow(1)

In [None]:
cut_name = r'$\eta(J_0)$'
bkg = vbf100_10.background.dataframe['Jet.Eta[0]']
s1 = vbf100_10.signal.dataframe['Jet.Eta[0]']
s2 = vbf1000_100.signal.dataframe['Jet.Eta[0]']
s3 = vbf5000_1000.signal.dataframe['Jet.Eta[0]']

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (5,-5),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
cut_name = r'$\eta(J_1)$'
bkg = vbf100_10.background.dataframe['Jet.Eta[1]']
s1 = vbf100_10.signal.dataframe['Jet.Eta[1]']
s2 = vbf1000_100.signal.dataframe['Jet.Eta[1]']
s3 = vbf5000_1000.signal.dataframe['Jet.Eta[1]']

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (5,-5),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
cut_name = r'$\eta(J_0)*\eta(J_1)$'
bkg = vbf100_10.background.dataframe['Jet.Eta[0]']*vbf100_10.background.dataframe['Jet.Eta[1]']
s1 = vbf100_10.signal.dataframe['Jet.Eta[0]']*vbf100_10.signal.dataframe['Jet.Eta[1]']
s2 = vbf1000_100.signal.dataframe['Jet.Eta[0]']*vbf1000_100.signal.dataframe['Jet.Eta[1]']
s3 = vbf5000_1000.signal.dataframe['Jet.Eta[0]']*vbf5000_1000.signal.dataframe['Jet.Eta[1]']

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (5,-5),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
vbf100_10.cut_flow(2)
vbf1000_100.cut_flow(2)
vbf5000_1000.cut_flow(2)

In [None]:
cut_name = r'$|\Delta \phi (J_0,J_1)|$'
bkg = np.abs(vbf100_10.background.dataframe['DPhi_J0_J1'])
s1 = np.abs(vbf100_10.signal.dataframe['DPhi_J0_J1'])
s2 = np.abs(vbf1000_100.signal.dataframe['DPhi_J0_J1'])
s3 = np.abs(vbf5000_1000.signal.dataframe['DPhi_J0_J1'])

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (0,4),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
vbf5000_1000.signal.dataframe.columns
# vbf5000_1000.signal.cuts

In [None]:
vbf100_10.cut_flow(3)
vbf1000_100.cut_flow(3)
vbf5000_1000.cut_flow(3)

In [None]:
cut_name = r'$max(m(J_i,J_j))$'
bkg = np.abs(vbf100_10.background.dataframe['max_inv_mass'])
s1 = np.abs(vbf100_10.signal.dataframe['max_inv_mass'])
s2 = np.abs(vbf1000_100.signal.dataframe['max_inv_mass'])
s3 = np.abs(vbf5000_1000.signal.dataframe['max_inv_mass'])

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (0,6000),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
vbf100_10.cut_flow(4)
vbf1000_100.cut_flow(4)
vbf5000_1000.cut_flow(4)

In [None]:
def minDEta(df):
    return np.abs(df[[f"DPhi_MET_J{i}" for i in range(3)]] ).min(axis=1)

cut_name = r'$min(|\Delta\phi(MET,J_i)|)$'
bkg = minDEta(vbf100_10.background.dataframe)
s1 = minDEta(vbf100_10.signal.dataframe)
s2 = minDEta(vbf1000_100.signal.dataframe)
s3 = minDEta(vbf5000_1000.signal.dataframe)

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (0,4),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
vbf100_10.cut_flow(5)
vbf1000_100.cut_flow(5)
vbf5000_1000.cut_flow(5)

In [None]:
def deltaEta(df):
    return np.abs(df["Jet.Eta[0]"] - df["Jet.Eta[1]"])

cut_name = r'$|\Delta \eta (J_0,J_1)|$'
bkg = deltaEta(vbf100_10.background.dataframe)
s1 = deltaEta(vbf100_10.signal.dataframe)
s2 = deltaEta(vbf1000_100.signal.dataframe)
s3 = deltaEta(vbf5000_1000.signal.dataframe)

plotHisto(bkg,[s1, s2, s3],
          7.217209e4,
          [8.44992635099, 3.66740016, 0.02681618247],
          (0,6),
          bkg_label="Z+Jets",
          sig_label=["MY100 MX10", "MY1000 MX100", "MY5000 MX1000"],
          bins=100,         
          name=cut_name)

In [None]:
vbf100_10.cut_flow(6)
vbf1000_100.cut_flow(6)
vbf5000_1000.cut_flow(6)