In [1]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import uproot
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
import matplotlib
from matplotlib import pyplot as plt
import root_pandas

from keras.models import Sequential, Model, load_model
from keras.optimizers import SGD
from keras.layers import Input, Activation, Dense, Convolution2D, MaxPooling2D, Dropout, Flatten, LeakyReLU
from keras.utils import np_utils

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Welcome to JupyROOT 6.12/07


Using TensorFlow backend.


In [2]:
model = load_model('nn_model.h5')

In [3]:
input_sig  = '../analysis/objects_gg_HH_bbbb_SM.root'
datsp_bkg  = '../background/data_3btag_with_weights_AR.root'
input_dat  = '../analysis/objects_data_BTagCSV_Run2016_ALL.root'
treename = 'bbbbTree'

## convert to dataframes
vars_training = [ 'H1_b1_pt', 'H1_b2_pt', 'H2_b1_pt', 'H2_b2_pt',
#                  'H1_b1_m', 'H1_b2_m', 'H2_b1_m', 'H2_b2_m', 
                  'H1_b1_eta', 'H1_b2_eta', 'H2_b1_eta', 'H2_b2_eta', 
                  'H1_eta', 'H1_pt', 'H2_eta', 'H2_pt', 
                  'HH_eta', 'HH_pt','HH_m', 'H1H2_deltaEta', 'H1_costhetaCM', 'H1H2_deltaPhi']

# extra variables needed for preselections
all_vars = vars_training + ['H1_m', 'H2_m', 'n_btag', 'xs', 'norm_weight']
all_vars = list(set(all_vars))

In [4]:
# Save Scores in Dataframes

print 'Opening file: ', input_sig
arrs_sig  = uproot.open(input_sig)[treename]
print 'Opening file: ', datsp_bkg
arrs_bkg  = uproot.open(datsp_bkg)[treename]
print 'Opening file: ', input_dat
arrs_dat  = uproot.open(input_dat)[treename]

print 'Converting to pandas'
data_sig = arrs_sig.pandas.df(all_vars)
data_bkg = arrs_bkg.pandas.df(all_vars+['bkg_model_w'])
data_rel = arrs_dat.pandas.df(all_vars)

## apply a selection on the datasets
data_bkg = data_bkg[data_bkg['n_btag'] == 3]
data_sig = data_sig[data_sig['n_btag'] >= 4]

# restrict training to the signal region
data_bkg['chi'] = np.sqrt( (data_bkg['H1_m']-120)*(data_bkg['H1_m']-120)+(data_bkg['H2_m']-110)*(data_bkg['H2_m']-110))
data_sig['chi'] = np.sqrt( (data_sig['H1_m']-120)*(data_sig['H1_m']-120)+(data_sig['H2_m']-110)*(data_sig['H2_m']-110))
data_rel['chi'] = np.sqrt( (data_rel['H1_m']-120)*(data_rel['H1_m']-120)+(data_rel['H2_m']-110)*(data_rel['H2_m']-110))

data_bkg = data_bkg[data_bkg['chi'] < 30]
data_sig = data_sig[data_sig['chi'] < 30]
data_rel = data_rel[data_rel['chi'] < 30]

data_bkg = data_bkg.drop(columns=['chi'])
data_sig = data_sig.drop(columns=['chi'])
data_rel = data_rel.drop(columns=['chi'])

Opening file:  ../analysis/objects_gg_HH_bbbb_SM.root
Opening file:  ../background/data_3btag_with_weights_AR.root
Opening file:  ../analysis/objects_data_BTagCSV_Run2016_ALL.root
Converting to pandas


In [5]:
print 'Label datasets'
data_bkg['isSignal'] = np.zeros(len(data_bkg))
data_sig['isSignal'] = np.ones(len(data_sig))
data_rel['isSignal'] = np.full(len(data_rel),2)

print 'Combine datasets'
all_data = pd.concat([data_bkg, data_sig, data_rel], axis=0, sort=False)

Label datasets
Combine datasets


In [6]:
print 'Normalizing input for NN'
scaler = StandardScaler().fit(all_data[vars_training])
all_data[vars_training] = scaler.transform(all_data[vars_training])

Normalizing input for NN


In [7]:
print 'Predicting signal'
all_data['BDT_Score'] = model.predict(all_data[vars_training])

Predicting signal


In [8]:
print 'Unnormalizing dataframe'
all_data[vars_training] = scaler.inverse_transform(all_data[vars_training])

Unnormalizing dataframe


In [9]:
print 'Separating signal and background dataframes'
sig = all_data[all_data.isSignal == 1][all_vars+['BDT_Score']]
bkg = all_data[all_data.isSignal == 0][all_vars+['BDT_Score', 'bkg_model_w']]
rel = all_data[all_data.isSignal == 2][all_vars+['BDT_Score']]

Separating signal and background dataframes


In [10]:
print 'Write to root file'
sig.to_root('NN_signal.root',key='bbbbTree')
bkg.to_root('NN_background.root',key='bbbbTree')
rel.to_root('NN_data.root',key='bbbbTree')

Write to root file
