In [2]:
import os
import numpy as np
import pandas as pd
import joblib
import json
import tensorflow as tf
import gc

In [3]:
tuning_path = '../tunings/TrigL2_20180125_v8'
tuning_file = "ElectronRingerLooseTriggerConfig.json"
with open(os.path.join(tuning_path,tuning_file)) as f:
    conf = json.load(f)

In [4]:
#conf

In [5]:
def open_boosted(path= ""):
    '''
    This function will get a .npz file and transform into a pandas DataFrame. 
    The .npz has three types of data: float, int and bool this function will concatenate these features and reorder them.

    Arguments:
    path (str) - the full path to .npz file
    '''
    # open the file
    d = dict(np.load(path, allow_pickle=True))   
    #print(d.keys()) 
    # create a list of temporary dataframes that should be concateneted into a final one
    df_list = []
    for itype in ['float', 'int', 'bool', 'object']:
        df_list.append(pd.DataFrame(data=d['data_%s' %itype], columns=d['features_%s' %itype]))
    # concat the list
    df = pd.concat(df_list, axis=1)
    # return the DataFrame with ordered features.
    df = df[d['ordered_features']]
    # add the target information
    df['target'] = d['target']
    df['et_bin'] = d['etBinIdx']
    df['eta_bin'] = d['etaBinIdx']
    # remove the list of DataFrame and collect into garbage collector
    del df_list, d
    gc.collect()
    return df

In [7]:
# my pc path
#data_path = '/media/natmourajr/Backup/Work/CERN/Qualify/cern_data/Zee_boosted/mc16_13TeV.302236_309995_341330.sgn.boosted_probes.WZ_llqq_plus_radion_ZZ_llqq_plus_ggH3000.merge.25bins/mc16_13TeV.302236_309995_341330.sgn.boosted_probes.WZ_llqq_plus_radion_ZZ_llqq_plus_ggH3000.merge.25bins_et4_eta0.npz'
# LPS path
data_path = '/home/natmourajr/Workspace/CERN/Qualify/data/Zee_boosted/mc16_13TeV.302236_309995_341330.sgn.boosted_probes.WZ_llqq_plus_radion_ZZ_llqq_plus_ggH3000.merge.25bins/mc16_13TeV.302236_309995_341330.sgn.boosted_probes.WZ_llqq_plus_radion_ZZ_llqq_plus_ggH3000.merge.25bins_et4_eta0.npz'
df = open_boosted(data_path)

In [8]:
df.head()

Unnamed: 0,RunNumber,avgmu,trig_L2_cl_et,trig_L2_cl_eta,trig_L2_cl_phi,trig_L2_cl_reta,trig_L2_cl_ehad1,trig_L2_cl_eratio,trig_L2_cl_f1,trig_L2_cl_f3,...,trig_L2_cl_lhtight_et0to12,trig_L2_cl_lhtight_et12to20,trig_L2_cl_lhtight_et22toInf,trig_L2_el_cut_pt0to15,trig_L2_el_cut_pt15to20,trig_L2_el_cut_pt20to50,trig_L2_el_cut_pt50toInf,target,et_bin,eta_bin
0,300000,46.5,1187730.5,0.400507,0.212308,0.966145,1612.466919,0.995495,0.102619,0.009935,...,True,True,True,True,True,True,True,1.0,4,0
1,300000,46.5,1187730.5,0.400507,0.212308,0.966145,1612.466919,0.995495,0.102619,0.009935,...,True,True,True,True,True,True,True,1.0,4,0
2,300000,37.5,1169499.375,0.289294,-1.42802,0.974601,4729.030762,0.993501,0.090885,0.014455,...,True,True,True,True,True,True,True,1.0,4,0
3,300000,38.5,1239348.125,-0.114279,2.722416,0.97535,11178.170898,0.99139,0.054241,0.021975,...,True,True,True,True,True,True,True,1.0,4,0
4,300000,38.5,1239348.125,-0.114279,2.722416,0.97535,11178.170898,0.99139,0.054241,0.021975,...,True,True,True,True,True,True,True,1.0,4,0


In [9]:
def add_tuning_decision(df, tuning_path, config_dict):
        #find et and eta bin
        idx = (df['et_bin']+df['eta_bin']).unique()[0]
        print(idx)

        m_path = tuning_path
        m_model_path = os.path.join(m_path,config_dict["Model__path"][idx].replace('.onnx',''))
        with open(m_model_path + '.json', 'r') as f:
                sequential = json.load(f) 
        model = tf.keras.models.model_from_json(json.dumps(sequential, separators=(',', ':')))
        model.load_weights(m_model_path + '.h5')
        slope, offset = config_dict["Threshold__slope"][idx],config_dict["Threshold__offset"][idx]

        # ring list
        rings = ['trig_L2_cl_ring_%i' %(iring) for iring in range(100)] 

        def norm1( data ): 
                norms = np.abs( data.sum(axis=1) )
                norms[norms==0] = 1
                return data/norms[:,None] 
        df['nn_output_%s_%s'%(config_dict['__operation__'].lower(),config_dict['__version__'])] = model.predict(norm1(df[rings].values)) 
        df['thr_%s_%s' %(config_dict['__operation__'].lower(),config_dict['__version__'])] = df['avgmu']*slope + offset
        df['nn_decision_%s_%s' %(config_dict['__operation__'].lower(),config_dict['__version__'])] = 0
        df.loc[df['nn_output_%s_%s' %(config_dict['__operation__'].lower(),config_dict['__version__'])] > df['thr_%s_%s' %(config_dict['__operation__'].lower(),config_dict['__version__'])],'nn_decision_%s_%s' %(config_dict['__operation__'].lower(),config_dict['__version__'])] = 1 


In [10]:
tuning_path = '../tunings/TrigL2_20180125_v8'
add_tuning_decision(df=df, tuning_path=tuning_path, config_dict=conf)

4


In [11]:
df.head()

Unnamed: 0,RunNumber,avgmu,trig_L2_cl_et,trig_L2_cl_eta,trig_L2_cl_phi,trig_L2_cl_reta,trig_L2_cl_ehad1,trig_L2_cl_eratio,trig_L2_cl_f1,trig_L2_cl_f3,...,trig_L2_el_cut_pt0to15,trig_L2_el_cut_pt15to20,trig_L2_el_cut_pt20to50,trig_L2_el_cut_pt50toInf,target,et_bin,eta_bin,nn_output_loose_v8,thr_loose_v8,nn_decision_loose_v8
0,300000,46.5,1187730.5,0.400507,0.212308,0.966145,1612.466919,0.995495,0.102619,0.009935,...,True,True,True,True,1.0,4,0,0.674055,-0.167558,1
1,300000,46.5,1187730.5,0.400507,0.212308,0.966145,1612.466919,0.995495,0.102619,0.009935,...,True,True,True,True,1.0,4,0,0.674055,-0.167558,1
2,300000,37.5,1169499.375,0.289294,-1.42802,0.974601,4729.030762,0.993501,0.090885,0.014455,...,True,True,True,True,1.0,4,0,1.400567,-0.059216,1
3,300000,38.5,1239348.125,-0.114279,2.722416,0.97535,11178.170898,0.99139,0.054241,0.021975,...,True,True,True,True,1.0,4,0,1.624148,-0.071254,1
4,300000,38.5,1239348.125,-0.114279,2.722416,0.97535,11178.170898,0.99139,0.054241,0.021975,...,True,True,True,True,1.0,4,0,1.624148,-0.071254,1
