In [None]:
import ROOT
import os
import numpy as np
from root_numpy import root2array, rec2array

from tensorflow.keras.models import model_from_json

from matplotlib import pyplot as plt

# sklearn packages
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc

from array import array
from matplotlib.colors import LogNorm

In [None]:
def plot_input_variables(X_signal, X_bkg, muon):

    xtitle_list = ['status', 'track x (cm)', 'track y (cm)', 'abs(pullX)', 'abs(pullY)', 'abs(pullDxDz)', 'abs(pullDyDz)', 'eta']

    tmp_array = np.array(X_signal)
    X_signal = tmp_array.T.tolist()

    tmp_array = np.array(X_bkg)
    X_bkg = tmp_array.T.tolist()

    # plot all input variables
    for i in range(0,7):
        for j in range(0,8):
            plt.clf()
            plt.hist(X_signal[j*7+i], color='blue', label='signal',
                histtype='stepfilled', density=True,
                alpha=0.5, bins=50)
            plt.hist(X_bkg[j*7+i], color='red', label='bkg',
                histtype='stepfilled', density=True,
                alpha=0.5, bins=50)
            plt.xlabel(xtitle_list[i])
            plt.ylabel('a.u.')
            plt.legend(loc="upper center")
            plt.savefig(PLOT_DIR+'data_mc_input_variables/Muon'+muon+'_'+xtitle_list[i]+'_station'+str(j)+'.png')

In [None]:
min_values = [-1., -120., -140., 0., 0., 0., 0.,
-1., -140., -140., 0., 0., 0., 0.,
-1., -170., -140., 0., 0., 0., 0.,
-1., -220., -140., 0., 0., 0., 0.,
-1., -60., -100., 0., 0., 0., 0.,
-1., -70., -170., 0., 0., 0., 0.,
-1., -70., -170., 0., 0., 0., 0.,
-1., -70., -170., 0., 0., 0., 0., 0.]

max_values = [1., 120., 140, 3., 5., 12, 5.,
1., 140., 140, 3., 5., 12., 5.,
1., 170., 140, 3., 5., 12., 5.,
1., 220., 140, 3., 5., 12., 5.,
1., 60., 100., 3., 5., 12., 5.,
1., 70., 170, 3., 5., 12., 5.,
1., 70., 170, 3., 5., 12., 5.,
1., 70., 170, 3., 5., 12., 5., 2.5]

In [None]:
inputfile = '/eos/user/b/bjoshi/RunIITau23Mu/AnalysisTrees/T3MSelectionTree_veto_optimized_globalMuonId.root'

Read the input variables from the file

In [None]:
branch_names = [
'Muon1_station1_status', 'Muon1_station1_TrackX', 'Muon1_station1_TrackY', 'Muon1_station1_pullX', 'Muon1_station1_pullY', 'Muon1_station1_pullDxDz', 'Muon1_station1_pullDyDz',
'Muon1_station2_status', 'Muon1_station2_TrackX', 'Muon1_station2_TrackY', 'Muon1_station2_pullX', 'Muon1_station2_pullY', 'Muon1_station2_pullDxDz', 'Muon1_station2_pullDyDz',
'Muon1_station3_status', 'Muon1_station3_TrackX', 'Muon1_station3_TrackY', 'Muon1_station3_pullX', 'Muon1_station3_pullY', 'Muon1_station3_pullDxDz', 'Muon1_station3_pullDyDz',
'Muon1_station4_status', 'Muon1_station4_TrackX', 'Muon1_station4_TrackY', 'Muon1_station4_pullX', 'Muon1_station4_pullY', 'Muon1_station4_pullDxDz', 'Muon1_station4_pullDyDz',
'Muon1_station5_status', 'Muon1_station5_TrackX', 'Muon1_station5_TrackY', 'Muon1_station5_pullX', 'Muon1_station5_pullY', 'Muon1_station5_pullDxDz', 'Muon1_station5_pullDyDz',
'Muon1_station6_status', 'Muon1_station6_TrackX', 'Muon1_station6_TrackY', 'Muon1_station6_pullX', 'Muon1_station6_pullY', 'Muon1_station6_pullDxDz', 'Muon1_station6_pullDyDz',
'Muon1_station7_status', 'Muon1_station7_TrackX', 'Muon1_station7_TrackY', 'Muon1_station7_pullX', 'Muon1_station7_pullY', 'Muon1_station7_pullDxDz', 'Muon1_station7_pullDyDz',
'Muon1_station8_status', 'Muon1_station8_TrackX', 'Muon1_station8_TrackY', 'Muon1_station8_pullX', 'Muon1_station8_pullY', 'Muon1_station8_pullDxDz', 'Muon1_station8_pullDyDz',
'var_Muon1_Eta'
]

muon1_ds_array = root2array(inputfile, "TreeS_Ds",branch_names)
muon1_ds_array = rec2array(muon1_ds_array)

segm_comp_muon1_ds_array= root2array(inputfile, "TreeS_Ds",['Muon1_segmentCompatibility'])
segm_comp_muon1_ds_array = rec2array(segm_comp_muon1_ds_array)

muon1_bu_array = root2array(inputfile, "TreeS_Bu",branch_names)
muon1_bu_array = rec2array(muon1_bu_array)

segm_comp_muon1_bu_array= root2array(inputfile, "TreeS_Bu",['Muon1_segmentCompatibility'])
segm_comp_muon1_bu_array = rec2array(segm_comp_muon1_bu_array)

muon1_bd_array = root2array(inputfile ,"TreeS_Bd",branch_names)
muon1_bd_array = rec2array(muon1_bd_array)

segm_comp_muon1_bd_array= root2array(inputfile ,"TreeS_Bd",['Muon1_segmentCompatibility'])
segm_comp_muon1_bd_array = rec2array(segm_comp_muon1_bd_array)

muon1_bkg_array = root2array(inputfile ,"TreeB",branch_names)
muon1_bkg_array = rec2array(muon1_bkg_array)

segm_comp_muon1_bkg_array= root2array(inputfile,"TreeS_Bd",['Muon1_segmentCompatibility'])
segm_comp_muon1_bkg_array = rec2array(segm_comp_muon1_bkg_array)

In [None]:
branch_names = [
'Muon2_station1_status', 'Muon2_station1_TrackX', 'Muon2_station1_TrackY', 'Muon2_station1_pullX', 'Muon2_station1_pullY', 'Muon2_station1_pullDxDz', 'Muon2_station1_pullDyDz',
'Muon2_station2_status', 'Muon2_station2_TrackX', 'Muon2_station2_TrackY', 'Muon2_station2_pullX', 'Muon2_station2_pullY', 'Muon2_station2_pullDxDz', 'Muon2_station2_pullDyDz',
'Muon2_station3_status', 'Muon2_station3_TrackX', 'Muon2_station3_TrackY', 'Muon2_station3_pullX', 'Muon2_station3_pullY', 'Muon2_station3_pullDxDz', 'Muon2_station3_pullDyDz',
'Muon2_station4_status', 'Muon2_station4_TrackX', 'Muon2_station4_TrackY', 'Muon2_station4_pullX', 'Muon2_station4_pullY', 'Muon2_station4_pullDxDz', 'Muon2_station4_pullDyDz',
'Muon2_station5_status', 'Muon2_station5_TrackX', 'Muon2_station5_TrackY', 'Muon2_station5_pullX', 'Muon2_station5_pullY', 'Muon2_station5_pullDxDz', 'Muon2_station5_pullDyDz',
'Muon2_station6_status', 'Muon2_station6_TrackX', 'Muon2_station6_TrackY', 'Muon2_station6_pullX', 'Muon2_station6_pullY', 'Muon2_station6_pullDxDz', 'Muon2_station6_pullDyDz',
'Muon2_station7_status', 'Muon2_station7_TrackX', 'Muon2_station7_TrackY', 'Muon2_station7_pullX', 'Muon2_station7_pullY', 'Muon2_station7_pullDxDz', 'Muon2_station7_pullDyDz',
'Muon2_station8_status', 'Muon2_station8_TrackX', 'Muon2_station8_TrackY', 'Muon2_station8_pullX', 'Muon2_station8_pullY', 'Muon2_station8_pullDxDz', 'Muon2_station8_pullDyDz',
'var_Muon2_Eta'
]

muon2_ds_array = root2array(inputfile,"TreeS_Ds",branch_names)
muon2_ds_array = rec2array(muon2_ds_array)

segm_comp_muon2_ds_array= root2array(inputfile,"TreeS_Ds",['Muon2_segmentCompatibility'])
segm_comp_muon2_ds_array = rec2array(segm_comp_muon2_ds_array)

muon2_bu_array = root2array(inputfile,"TreeS_Bu",branch_names)
muon2_bu_array = rec2array(muon2_bu_array)

segm_comp_muon2_bu_array= root2array(inputfile,"TreeS_Bu",['Muon2_segmentCompatibility'])
segm_comp_muon2_bu_array = rec2array(segm_comp_muon2_bu_array)

muon2_bd_array = root2array(inputfile,"TreeS_Bd",branch_names)
muon2_bd_array = rec2array(muon2_bd_array)

segm_comp_muon2_bd_array= root2array(inputfile,"TreeS_Bd",['Muon2_segmentCompatibility'])
segm_comp_muon2_bd_array = rec2array(segm_comp_muon2_bd_array)

muon2_bkg_array = root2array(inputfile,"TreeB",branch_names)
muon2_bkg_array = rec2array(muon2_bkg_array)

segm_comp_muon2_bkg_array= root2array(inputfile,"TreeS_Bd",['Muon2_segmentCompatibility'])
segm_comp_muon2_bkg_array = rec2array(segm_comp_muon2_bkg_array)

In [None]:
branch_names = [
'Muon3_station1_status', 'Muon3_station1_TrackX', 'Muon3_station1_TrackY', 'Muon3_station1_pullX', 'Muon3_station1_pullY', 'Muon3_station1_pullDxDz', 'Muon3_station1_pullDyDz',
'Muon3_station2_status', 'Muon3_station2_TrackX', 'Muon3_station2_TrackY', 'Muon3_station2_pullX', 'Muon3_station2_pullY', 'Muon3_station2_pullDxDz', 'Muon3_station2_pullDyDz',
'Muon3_station3_status', 'Muon3_station3_TrackX', 'Muon3_station3_TrackY', 'Muon3_station3_pullX', 'Muon3_station3_pullY', 'Muon3_station3_pullDxDz', 'Muon3_station3_pullDyDz',
'Muon3_station4_status', 'Muon3_station4_TrackX', 'Muon3_station4_TrackY', 'Muon3_station4_pullX', 'Muon3_station4_pullY', 'Muon3_station4_pullDxDz', 'Muon3_station4_pullDyDz',
'Muon3_station5_status', 'Muon3_station5_TrackX', 'Muon3_station5_TrackY', 'Muon3_station5_pullX', 'Muon3_station5_pullY', 'Muon3_station5_pullDxDz', 'Muon3_station5_pullDyDz',
'Muon3_station6_status', 'Muon3_station6_TrackX', 'Muon3_station6_TrackY', 'Muon3_station6_pullX', 'Muon3_station6_pullY', 'Muon3_station6_pullDxDz', 'Muon3_station6_pullDyDz',
'Muon3_station7_status', 'Muon3_station7_TrackX', 'Muon3_station7_TrackY', 'Muon3_station7_pullX', 'Muon3_station7_pullY', 'Muon3_station7_pullDxDz', 'Muon3_station7_pullDyDz',
'Muon3_station8_status', 'Muon3_station8_TrackX', 'Muon3_station8_TrackY', 'Muon3_station8_pullX', 'Muon3_station8_pullY', 'Muon3_station8_pullDxDz', 'Muon3_station8_pullDyDz',
'var_Muon3_Eta'
]

muon3_ds_array = root2array(inputfile,"TreeS_Ds",branch_names)
muon3_ds_array = rec2array(muon3_ds_array)

segm_comp_muon3_ds_array= root2array(inputfile,"TreeS_Ds",['Muon3_segmentCompatibility'])
segm_comp_muon3_ds_array = rec2array(segm_comp_muon3_ds_array)

muon3_bu_array = root2array(inputfile,"TreeS_Bu",branch_names)
muon3_bu_array = rec2array(muon3_bu_array)

segm_comp_muon3_bu_array= root2array(inputfile,"TreeS_Bu",['Muon3_segmentCompatibility'])
segm_comp_muon3_bu_array = rec2array(segm_comp_muon3_bu_array)

muon3_bd_array = root2array(inputfile,"TreeS_Bd",branch_names)
muon3_bd_array = rec2array(muon3_bd_array)

segm_comp_muon3_bd_array= root2array(inputfile,"TreeS_Bd",['Muon3_segmentCompatibility'])
segm_comp_muon3_bd_array = rec2array(segm_comp_muon3_bd_array)

muon3_bkg_array = root2array(inputfile,"TreeB",branch_names)
muon3_bkg_array = rec2array(muon3_bkg_array)

segm_comp_muon3_bkg_array= root2array(inputfile,"TreeS_Bd",['Muon3_segmentCompatibility'])
segm_comp_muon3_bkg_array = rec2array(segm_comp_muon3_bkg_array)

Preprocess the data to normalize all the station variables

In [None]:
print("Preprocessing data...")
for i in range(len(muon1_bkg_array)):
    if (i%100000==0): print("Processing %d/%d" %(i, len(muon1_bkg_array)))
    muon1_bkg_array[i][56] = muon1_bkg_array[i][56]/2.5
    muon2_bkg_array[i][56] = muon2_bkg_array[i][56]/2.5  
    muon3_bkg_array[i][56] = muon3_bkg_array[i][56]/2.5  
    for j in range(0,56):
        if (j%7==0): continue
        if (j%7<=6 and j%7>=3): muon1_bkg_array[i][j] = abs(muon1_bkg_array[i][j])
        if (muon1_bkg_array[i][j]>99999.0): muon1_bkg_array[i][j] = max_values[j]
        muon1_bkg_array[i][j] -= min_values[j]
        muon1_bkg_array[i][j] /= (max_values[j]-min_values[j])
        if (muon1_bkg_array[i][j]>1.0): muon1_bkg_array[i][j] = 1.0

        if (j%7<=6 and j%7>=3): muon2_bkg_array[i][j] = abs(muon2_bkg_array[i][j])
        if (muon2_bkg_array[i][j]>99999.0): muon2_bkg_array[i][j] = max_values[j]
        muon2_bkg_array[i][j] -= min_values[j]
        muon2_bkg_array[i][j] /= (max_values[j]-min_values[j])
        if (muon2_bkg_array[i][j]>1.0): muon2_bkg_array[i][j] = 1.0

        if (j%7<=6 and j%7>=3): muon3_bkg_array[i][j] = abs(muon3_bkg_array[i][j])
        if (muon3_bkg_array[i][j]>99999.0): muon3_bkg_array[i][j] = max_values[j]
        muon3_bkg_array[i][j] -= min_values[j]
        muon3_bkg_array[i][j] /= (max_values[j]-min_values[j])
        if (muon3_bkg_array[i][j]>1.0): muon3_bkg_array[i][j] = 1.0

In [None]:
print("Preprocessing mc (dstotau) samples...")
for i in range(0,len(muon1_ds_array)):
    if (i%10000==0): print("Processing %d/%d" %(i, len(muon1_ds_array)))
    muon1_ds_array[i][56] = muon1_ds_array[i][56]/2.5
    muon2_ds_array[i][56] = muon2_ds_array[i][56]/2.5  
    muon3_ds_array[i][56] = muon3_ds_array[i][56]/2.5 

    for j in range(0,56):
        if (j%7==0): continue
        if (j%7<=6 and j%7>=3): muon1_ds_array[i][j] = abs(muon1_ds_array[i][j])
        if (muon1_ds_array[i][j]>99999.0): muon1_ds_array[i][j] = max_values[j]
        muon1_ds_array[i][j] -= min_values[j]
        muon1_ds_array[i][j] /= (max_values[j]-min_values[j])
        if (muon1_ds_array[i][j]>1.0): muon1_ds_array[i][j] = 1.0

        if (j%7<=6 and j%7>=3): muon2_ds_array[i][j] = abs(muon2_ds_array[i][j])
        if (muon2_ds_array[i][j]>99999.0): muon2_ds_array[i][j] = max_values[j]
        muon2_ds_array[i][j] -= min_values[j]
        muon2_ds_array[i][j] /= (max_values[j]-min_values[j])
        if (muon2_ds_array[i][j]>1.0): muon2_ds_array[i][j] = 1.0
        
        if (j%7<=6 and j%7>=3): muon3_ds_array[i][j] = abs(muon3_ds_array[i][j])
        if (muon3_ds_array[i][j]>99999.0): muon3_ds_array[i][j] = max_values[j]
        muon3_ds_array[i][j] -= min_values[j]
        muon3_ds_array[i][j] /= (max_values[j]-min_values[j])
        if (muon3_ds_array[i][j]>1.0): muon3_ds_array[i][j] = 1.0

print("Preprocessing mc (butotau) samples...")
for i in range(len(muon1_bu_array)):
    muon1_bu_array[i][56] = muon1_bu_array[i][56]/2.5
    muon2_bu_array[i][56] = muon2_bu_array[i][56]/2.5  
    muon3_bu_array[i][56] = muon3_bu_array[i][56]/2.5
    
    if (i%10000==0): print("Processing %d/%d" %(i, len(muon1_bu_array)))
    for j in range(0,56):
        if (j%7==0): continue
        if (j%7<=6 and j%7>=3): muon1_bu_array[i][j] = abs(muon1_bu_array[i][j])
        if (muon1_bu_array[i][j]>99999.0): muon1_bu_array[i][j] = max_values[j]
        muon1_bu_array[i][j] -= min_values[j]
        muon1_bu_array[i][j] /= (max_values[j]-min_values[j])
        if (muon1_bu_array[i][j]>1.0): muon1_bu_array[i][j] = 1.0

        if (j%7<=6 and j%7>=3): muon2_bu_array[i][j] = abs(muon2_bu_array[i][j])
        if (muon2_bu_array[i][j]>99999.0): muon2_bu_array[i][j] = max_values[j]
        muon2_bu_array[i][j] -= min_values[j]
        muon2_bu_array[i][j] /= (max_values[j]-min_values[j])
        if (muon2_bu_array[i][j]>1.0): muon2_bu_array[i][j] = 1.0
        
        if (j%7<=6 and j%7>=3): muon3_bu_array[i][j] = abs(muon3_bu_array[i][j])
        if (muon3_bu_array[i][j]>99999.0): muon3_bu_array[i][j] = max_values[j]
        muon3_bu_array[i][j] -= min_values[j]
        muon3_bu_array[i][j] /= (max_values[j]-min_values[j])
        if (muon3_bu_array[i][j]>1.0): muon3_bu_array[i][j] = 1.0

print("Preprocessing mc (bdtotau) samples...")
for i in range(len(muon1_bd_array)):
    
    muon1_bd_array[i][56] = muon1_bd_array[i][56]/2.5
    muon2_bd_array[i][56] = muon2_bd_array[i][56]/2.5  
    muon3_bd_array[i][56] = muon3_bd_array[i][56]/2.5
    
    if (i%10000==0): print("Processing %d/%d" %(i, len(muon1_bd_array)))
    for j in range(0,56):
        if (j%7==0): continue
        if (j%7<=6 and j%7>=3): muon1_bd_array[i][j] = abs(muon1_bd_array[i][j])
        if (muon1_bd_array[i][j]>99999.0): muon1_bd_array[i][j] = max_values[j]
        muon1_bd_array[i][j] -= min_values[j]
        muon1_bd_array[i][j] /= (max_values[j]-min_values[j])
        if (muon1_bd_array[i][j]>1.0): muon1_bd_array[i][j] = 1.0
        
        if (j%7<=6 and j%7>=3): muon2_bd_array[i][j] = abs(muon2_bd_array[i][j])
        if (muon2_bd_array[i][j]>99999.0): muon2_bd_array[i][j] = max_values[j]
        muon2_bd_array[i][j] -= min_values[j]
        muon2_bd_array[i][j] /= (max_values[j]-min_values[j])
        if (muon2_bd_array[i][j]>1.0): muon2_bd_array[i][j] = 1.0

        if (j%7<=6 and j%7>=3): muon3_bd_array[i][j] = abs(muon3_bd_array[i][j])
        if (muon3_bd_array[i][j]>99999.0): muon3_bd_array[i][j] = max_values[j]
        muon3_bd_array[i][j] -= min_values[j]
        muon3_bd_array[i][j] /= (max_values[j]-min_values[j])
        if (muon3_bd_array[i][j]>1.0): muon3_bd_array[i][j] = 1.0

In [None]:
# load json and create model
json_file_global = open('/eos/user/b/bjoshi/RunIITau23Mu/SegmentComp_plots/new/model_global.json', 'r')
loaded_model_json_global = json_file_global.read()
json_file_global.close()
loaded_model_global = model_from_json(loaded_model_json_global)
# load weights into new model
loaded_model_global.load_weights("/eos/user/b/bjoshi/RunIITau23Mu/SegmentComp_plots/new/model_global.h5")

# load json and create model
json_file_tracker = open('/eos/user/b/bjoshi/RunIITau23Mu/SegmentComp_plots/21_01_21_preselected/model_tracker.json', 'r')
loaded_model_json_tracker = json_file_tracker.read()
json_file_tracker.close()
loaded_model_tracker = model_from_json(loaded_model_json_tracker)
# load weights into new model
loaded_model_tracker.load_weights("/eos/user/b/bjoshi/RunIITau23Mu/SegmentComp_plots/21_01_21_preselected/model_tracker.h5")