# Loading models, creating input and estimating output

In [1]:
import numpy as np

from keras.models import load_model

## Starting with DeepCSV

In [2]:
# Load and summarize model
model = load_model('models/DeepCSV_phase1_training_model.h5')
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 66)]              0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               6700      
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)               0   

In [3]:
# Create random inputs
inputs = np.full((1,66),0.8)
print(inputs.shape)

(1, 66)


In [4]:
# Predict output
score = model.predict(inputs)
print(score)

[[0.28030092 0.01946744 0.17605226 0.52417934]]


In [5]:
# Create random inputs
inputs = np.full((1,66),0.4)
print(inputs.shape)
print(inputs)

(1, 66)
[[0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4
  0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4
  0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4
  0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4]]


In [6]:
# Predict output
score = model.predict(inputs)
print(score)

[[0.31404865 0.02748379 0.26622272 0.39224482]]


## Finding DeepNtuples

In [20]:
cat /afs/cern.ch/user/t/tmoxter/public/VertexingData/README

Flat NTuples of the loose and tight IVF settings and subsequent DeepCSV discriminators
distSig0 reffers to the cut variable of 2D distance Significance, which was set to 0 in both cases (loose and tight IVF) because hardly any change to the jet distribution of vertex category was seen in the CSV's tagging varaibles. 

in summary:
DNT_out_distSig0_l.root -> distSig2Dmin=0, vertexMinDLenSig=0.25, vertexMinDLen2DSig=0.125
DNT_out_distSig0_t.root -> distSig2Dmin=0, vertexMinDLenSig=0.5, vertexMinDLen2DSig=0.25 
DNToutput_loose_0_.root -> distSig2Dmin=2, vertexMinDLenSig=0.25, vertexMinDLen2DSig=0.125 
DNToutput_tight_0_.root -> distSig2Dmin=2, vertexMinDLenSig=0.5, vertexMinDLen2DSig=0.25



In [8]:
ls /afs/cern.ch/user/t/tmoxter/public/VertexingData/

[0m[0mDNT_out_distSig0_l.root[0m  [0mDNToutput_loose_0_.root[0m  [1;34mNotFlat[0m/
[0mDNT_out_distSig0_t.root[0m  [0mDNToutput_tight_0_.root[0m  [0mREADME[0m


## Adapting DeepNtuples

In [9]:
import uproot
import numpy as np
import pandas as pd
import awkward1 as ak

import matplotlib.pyplot as plt
import mplhep as hep

In [10]:
deepntuple = uproot.open('/afs/cern.ch/user/t/tmoxter/public/VertexingData/DNT_out_distSig0_l.root')['deepntuplizer/tree'].arrays()
deepntuple

{b'n_sv': array([1, 1, 0, ..., 1, 0, 2], dtype=uint32),
 b'nsv': array([1., 1., 0., ..., 1., 0., 2.], dtype=float32),
 b'sv_pt': <JaggedArray [[29.263012] [23.357866] [] ... [5.1125484] [] [36.21584 11.838466]] at 0x7f78a513b250>,
 b'sv_eta': <JaggedArray [[-0.12234419] [1.5979714] [] ... [1.0088412] [] [1.8564138 1.9839677]] at 0x7f78a512b110>,
 b'sv_phi': <JaggedArray [[-2.377102] [0.346123] [] ... [-0.22099943] [] [1.7911874 1.7522379]] at 0x7f78a513bc90>,
 b'sv_etarel': <JaggedArray [[-0.47533765] [-0.49012518] [] ... [-0.49332345] [] [-0.46505427 -0.4073919]] at 0x7f78a513bfd0>,
 b'sv_phirel': <JaggedArray [[-0.4773848] [-0.48553276] [] ... [-0.32678413] [] [-0.43157113 -0.47052062]] at 0x7f78a513b4d0>,
 b'sv_deltaR': <JaggedArray [[-0.4665384] [-0.48248392] [] ... [-0.3266555] [] [-0.42316437 -0.4028131]] at 0x7f78a5144790>,
 b'sv_mass': <JaggedArray [[2.231662] [1.5486048] [] ... [0.6314855] [] [1.1228014 0.6111452]] at 0x7f78a5144810>,
 b'sv_ntracks': <JaggedArray [[4.0] [3.0] 

In [11]:
deepntuple_ak1 = {name.decode(): ak.from_awkward0(array) for name, array in deepntuple.items()}
deepntuple_ak1

{'n_sv': <Array [1, 1, 0, 0, 0, 1, ... 1, 1, 2, 1, 0, 2] type='21085 * uint32'>,
 'nsv': <Array [1, 1, 0, 0, 0, 1, ... 1, 1, 2, 1, 0, 2] type='21085 * float32'>,
 'sv_pt': <Array [[29.3], [23.4], ... [], [36.2, 11.8]] type='21085 * var * float32'>,
 'sv_eta': <Array [[-0.122], [1.6], ... [], [1.86, 1.98]] type='21085 * var * float32'>,
 'sv_phi': <Array [[-2.38], [0.346], ... [], [1.79, 1.75]] type='21085 * var * float32'>,
 'sv_etarel': <Array [[-0.475], [-0.49, ... [-0.465, -0.407]] type='21085 * var * float32'>,
 'sv_phirel': <Array [[-0.477], ... [-0.432, -0.471]] type='21085 * var * float32'>,
 'sv_deltaR': <Array [[-0.467], ... [-0.423, -0.403]] type='21085 * var * float32'>,
 'sv_mass': <Array [[2.23], [1.55], ... [], [1.12, 0.611]] type='21085 * var * float32'>,
 'sv_ntracks': <Array [[4], [3], [], [], ... [2], [], [4, 2]] type='21085 * var * float32'>,
 'sv_chi2': <Array [[6.02], [0.0189], ... [], [11.5, 4.87]] type='21085 * var * float32'>,
 'sv_ndf': <Array [[4.21], [2.91], 

In [12]:
print('jet pt :', deepntuple_ak1['jet_pt'])
print('jet eta:', deepntuple_ak1['jet_eta'])
print()
print(deepntuple_ak1['TagVarCSV_jetNSecondaryVertices'])
print(deepntuple_ak1['TagVarCSV_trackSumJetEtRatio'])

jet pt : [100, 44.8, 38, 33.8, 51.7, 48.2, 31.4, ... 69.4, 20.2, 116, 135, 106, 32.1, 136]
jet eta: [-0.147, 1.59, 0.406, 2.18, 0.00265, -0.942, ... 1.08, 2.14, 0.508, 1, 1.59, 1.89]

[1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, ... 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2]
[0.686, 0.151, 0.787, 0.667, 0.173, 0.582, ... 0.547, 0.325, 0.445, 0.144, 0.753]


In [13]:
for i in range(10):
    print('i: ', i, 'pt: ', deepntuple_ak1['jet_pt'][i] )

i:  0 pt:  100.1991195678711
i:  1 pt:  44.75279998779297
i:  2 pt:  37.99165725708008
i:  3 pt:  33.82807922363281
i:  4 pt:  51.74195861816406
i:  5 pt:  48.22136306762695
i:  6 pt:  31.389516830444336
i:  7 pt:  26.246288299560547
i:  8 pt:  148.56610107421875
i:  9 pt:  56.0772590637207


In [14]:
# Recreating DeepCSV input from
# https://github.com/DL4Jets/DeepJet/blob/master/modules/datastructures/TrainData_deepCSV.py

d = deepntuple_ak1

In [15]:
def fill_input_vector(d, i):
    
    input_vector = [ d['jet_pt'][i], d['jet_eta'][i],
                     d['TagVarCSV_jetNSecondaryVertices'][i],
                     d['TagVarCSV_trackSumJetEtRatio'][i], d['TagVarCSV_trackSumJetDeltaR'][i],
                     d['TagVarCSV_vertexCategory'][i], d['TagVarCSV_trackSip2dValAboveCharm'][i],
                     d['TagVarCSV_trackSip2dSigAboveCharm'][i], d['TagVarCSV_trackSip3dValAboveCharm'][i],
                     d['TagVarCSV_trackSip3dSigAboveCharm'][i], d['TagVarCSV_jetNSelectedTracks'][i],
                     d['TagVarCSV_jetNTracksEtaRel'][i],
                   
                     d['TagVarCSVTrk_trackJetDistVal'][i][0],
                     d['TagVarCSVTrk_trackPtRel'][i][0],
                     d['TagVarCSVTrk_trackDeltaR'][i][0],
                     d['TagVarCSVTrk_trackPtRatio'][i][0],
                     d['TagVarCSVTrk_trackSip3dSig'][i][0],
                     d['TagVarCSVTrk_trackSip2dSig'][i][0],
                     d['TagVarCSVTrk_trackDecayLenVal'][i][0],
                    
                     d['TagVarCSVTrk_trackJetDistVal'][i][1],
                     d['TagVarCSVTrk_trackPtRel'][i][1],
                     d['TagVarCSVTrk_trackDeltaR'][i][1],
                     d['TagVarCSVTrk_trackPtRatio'][i][1],
                     d['TagVarCSVTrk_trackSip3dSig'][i][1],
                     d['TagVarCSVTrk_trackSip2dSig'][i][1],
                     d['TagVarCSVTrk_trackDecayLenVal'][i][1],

                     d['TagVarCSVTrk_trackJetDistVal'][i][2],
                     d['TagVarCSVTrk_trackPtRel'][i][2],
                     d['TagVarCSVTrk_trackDeltaR'][i][2],
                     d['TagVarCSVTrk_trackPtRatio'][i][2],
                     d['TagVarCSVTrk_trackSip3dSig'][i][2],
                     d['TagVarCSVTrk_trackSip2dSig'][i][2],
                     d['TagVarCSVTrk_trackDecayLenVal'][i][2],
                    
                     d['TagVarCSVTrk_trackJetDistVal'][i][3],
                     d['TagVarCSVTrk_trackPtRel'][i][3],
                     d['TagVarCSVTrk_trackDeltaR'][i][3],
                     d['TagVarCSVTrk_trackPtRatio'][i][3],
                     d['TagVarCSVTrk_trackSip3dSig'][i][3],
                     d['TagVarCSVTrk_trackSip2dSig'][i][3],
                     d['TagVarCSVTrk_trackDecayLenVal'][i][3],
                    
                     d['TagVarCSVTrk_trackJetDistVal'][i][4],
                     d['TagVarCSVTrk_trackPtRel'][i][4],
                     d['TagVarCSVTrk_trackDeltaR'][i][4],
                     d['TagVarCSVTrk_trackPtRatio'][i][4],
                     d['TagVarCSVTrk_trackSip3dSig'][i][4],
                     d['TagVarCSVTrk_trackSip2dSig'][i][4],
                     d['TagVarCSVTrk_trackDecayLenVal'][i][4],
                    
                     d['TagVarCSVTrk_trackJetDistVal'][i][5],
                     d['TagVarCSVTrk_trackPtRel'][i][5],
                     d['TagVarCSVTrk_trackDeltaR'][i][5],
                     d['TagVarCSVTrk_trackPtRatio'][i][5],
                     d['TagVarCSVTrk_trackSip3dSig'][i][5],
                     d['TagVarCSVTrk_trackSip2dSig'][i][5],
                     d['TagVarCSVTrk_trackDecayLenVal'][i][5],
                    
                     d['TagVarCSV_trackEtaRel'][i][0],
                     d['TagVarCSV_trackEtaRel'][i][1],
                     d['TagVarCSV_trackEtaRel'][i][2],
                     d['TagVarCSV_trackEtaRel'][i][3],

                     d['TagVarCSV_vertexMass'][i][0],
                     d['TagVarCSV_vertexNTracks'][i][0],
                     d['TagVarCSV_vertexEnergyRatio'][i][0],
                     d['TagVarCSV_vertexJetDeltaR'][i][0],
                     d['TagVarCSV_flightDistance2dVal'][i][0],
                     d['TagVarCSV_flightDistance2dSig'][i][0],
                     d['TagVarCSV_flightDistance3dVal'][i][0],
                     d['TagVarCSV_flightDistance3dSig'][i][0]
                   ]
    
    return input_vector

In [16]:
input_deepCSV = fill_input_vector(d, 0)
print(input_deepCSV)

[100.1991195678711, -0.14700652658939362, 1.0, 0.6864300966262817, 0.009192017838358879, 0.0, 0.0005459639942273498, 0.36565935611724854, 0.0015289890579879284, 1.3150200843811035, 9.0, 4.0, -0.005050946492701769, 0.508870005607605, 0.15449602901935577, 0.15263594686985016, 6.511486530303955, 6.646363258361816, 0.09059252589941025, -0.0005342753138393164, 0.8791101574897766, 0.10158131271600723, 0.10085965692996979, 1.7673397064208984, 1.5515769720077515, 0.060919590294361115, -0.0024296038318425417, 0.21655413508415222, 0.027650538831949234, 0.027321934700012207, 1.052257776260376, 0.9399802088737488, 0.036134444177150726, -0.0028469425160437822, 0.39822354912757874, 0.042038824409246445, 0.04154758155345917, -0.8707574009895325, 0.36565935611724854, 0.0034640240482985973, -0.005370347760617733, 0.3384493887424469, 0.10749753564596176, 0.10663701593875885, -1.6697150468826294, 0.0688890591263771, 0.0497017465531826, -0.002588188275694847, 0.4039844572544098, 0.11487825959920883, 0.112

In [17]:
inputs = np.full((1,66),input_deepCSV)

In [18]:
score = model.predict(inputs)
print(score)

[[0.05536846 0.00135163 0.07036184 0.87291807]]


In [19]:
print('probb: ', d['pfDeepCSVJetTags_probb'][0], 'probbb: ', d['pfDeepCSVJetTags_probbb'][0], 
      'probc: ', d['pfDeepCSVJetTags_probc'][0], 'probudsg', d['pfDeepCSVJetTags_probudsg'][0])

probb:  0.4826902449131012 probbb:  0.0052902428433299065 probc:  0.29264748096466064 probudsg 0.21937201917171478
