# Find a suiting epsilon

The goal of this notebook is to derive a suitable magnitude of the attack for each feature.

In [14]:
#import uproot
import uproot4 as uproot
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
#import seaborn as sns
import mplhep as hep

In [18]:
import awkward1 as ak

In [32]:
uproot.__version__

'0.1.2'

## Checking files content

Focusing on pred_ntuple_merged_342

In [7]:
NOMINAL_INPUT_FILE = "/eos/user/a/anstein/public/DeepJet/Train_DF/nominal_with_etarel_phirel/predict_FGSM/pred_ntuple_merged_342.root:tree"
ADVERSARIAL_INPUT_FILE = "/eos/user/a/anstein/public/DeepJet/Train_DF/adversarial_with_etarel_phirel/predict_FGSM/pred_ntuple_merged_342.root:tree"

In [16]:
inputs_root = "/eos/cms/store/group/phys_btag/ParticleTransformer/merged/ntuple_merged_342.root:deepntuplizer/tree"

In [20]:
file_ = uproot.open(inputs_root)
columns_ = file_.keys()

In [21]:
columns_

['n_sv',
 'nsv',
 'sv_pt',
 'sv_eta',
 'sv_phi',
 'sv_e',
 'sv_etarel',
 'sv_phirel',
 'sv_deltaR',
 'sv_mass',
 'sv_ntracks',
 'sv_chi2',
 'sv_ndf',
 'sv_normchi2',
 'sv_dxy',
 'sv_dxyerr',
 'sv_dxysig',
 'sv_d3d',
 'sv_d3derr',
 'sv_d3dsig',
 'sv_costhetasvpv',
 'sv_enratio',
 'n_gtracks',
 'nGtracks',
 'gtrack_pt',
 'gtrack_eta',
 'gtrack_phi',
 'gtrack_mass',
 'gtrack_dz',
 'gtrack_dxy',
 'gtrack_3D_ip',
 'gtrack_3D_sip',
 'gtrack_2D_ip',
 'gtrack_2D_sip',
 'gtrack_dR',
 'gtrack_dist_neigh',
 'gtrack_3D_TrackProbability',
 'gtrack_2D_TrackProbability',
 'gtrack_chi2reduced',
 'gtrack_nPixelHits',
 'gtrack_nHits',
 'gtrack_jetAxisDistance',
 'gtrack_jetAxisDlength',
 'gtrack_PCAtrackFromPV',
 'gtrack_dotProdTrack',
 'gtrack_dotProdTrack2D',
 'npv',
 'rho',
 'ntrueInt',
 'event_no',
 'jet_no',
 'gen_pt',
 'Delta_gen_pt',
 'isB',
 'isGBB',
 'isBB',
 'isLeptonicB',
 'isLeptonicB_C',
 'isC',
 'isGCC',
 'isCC',
 'isUD',
 'isS',
 'isG',
 'isPU',
 'isUndefined',
 'genDecay',
 'jet_hflav',


In [22]:
global_branches = ['jet_pt', 'jet_eta',
                    'nCpfcand','nNpfcand',
                    'nsv','npv',
                    'TagVarCSV_trackSumJetEtRatio',
                    'TagVarCSV_trackSumJetDeltaR',
                    'TagVarCSV_vertexCategory',
                    'TagVarCSV_trackSip2dValAboveCharm',
                    'TagVarCSV_trackSip2dSigAboveCharm',
                    'TagVarCSV_trackSip3dValAboveCharm',
                    'TagVarCSV_trackSip3dSigAboveCharm',
                    'TagVarCSV_jetNSelectedTracks',
                    'TagVarCSV_jetNTracksEtaRel']
cpf_branches = ['Cpfcan_BtagPf_trackEtaRel',
                 'Cpfcan_BtagPf_trackPtRel',
                 'Cpfcan_BtagPf_trackPPar',
                 'Cpfcan_BtagPf_trackDeltaR',
                 'Cpfcan_BtagPf_trackPParRatio',
                 'Cpfcan_BtagPf_trackSip2dVal',
                 'Cpfcan_BtagPf_trackSip2dSig',
                 'Cpfcan_BtagPf_trackSip3dVal',
                 'Cpfcan_BtagPf_trackSip3dSig',
                 'Cpfcan_BtagPf_trackJetDistVal',
                 'Cpfcan_ptrel',
                 'Cpfcan_drminsv',
                 'Cpfcan_VTX_ass',
                 'Cpfcan_puppiw',
                 'Cpfcan_chi2',
                 'Cpfcan_quality']
npf_branches = ['Npfcan_ptrel', 
                 #'Npfcan_etarel', 'Npfcan_phirel',
                 'Npfcan_deltaR',
                 'Npfcan_isGamma', 'Npfcan_HadFrac', 'Npfcan_drminsv', 'Npfcan_puppiw']
vtx_branches = ['sv_pt','sv_deltaR',
                 'sv_mass',
                 #'sv_etarel',
                 #'sv_phirel',
                 'sv_ntracks',
                 'sv_chi2',
                 'sv_normchi2',
                 'sv_dxy',
                 'sv_dxysig',
                 'sv_d3d',
                 'sv_d3dsig',
                 'sv_costhetasvpv',
                 'sv_enratio']

In [82]:
df_glob = file_.arrays(global_branches, library="ak")

In [83]:
df_cpf = file_.arrays(cpf_branches, library="ak")

In [84]:
df_npf = file_.arrays(npf_branches, library="ak")

In [85]:
df_vtx = file_.arrays(vtx_branches, library="ak")

In [86]:
df_glob

<Array [{jet_pt: 24.3, ... ] type='400000 * {"jet_pt": float32, "jet_eta": float...'>

In [87]:
df_cpf

<Array [{Cpfcan_BtagPf_trackEtaRel: [, ... ] type='400000 * {"Cpfcan_BtagPf_trac...'>

In [88]:
df_npf

<Array [{Npfcan_ptrel: [-0.918, ... 0.969]}] type='400000 * {"Npfcan_ptrel": var...'>

In [89]:
df_vtx

<Array [{sv_pt: [], ... sv_enratio: []}] type='400000 * {"sv_pt": var * float32,...'>

In [34]:
[np.round(np.quantile(ak.to_numpy(df_glob['jet_pt']),0.005),2),np.round(np.quantile(ak.to_numpy(df_glob['jet_pt']),0.995),2)]

[14.95, 1700.17]

In [197]:
def quantile_min_max(feature,group='glob',candidate=None):
    if group=='glob':
        array_np = ak.to_numpy(df_glob[feature])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.round(np.quantile(array_np,0.005),2),np.round(np.quantile(array_np,0.995),2)]
    elif group=='cpf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_cpf[feature][candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.round(np.quantile(array_np,0.005),2),np.round(np.quantile(array_np,0.995),2)]
    elif group=='npf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_npf[feature][candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.round(np.quantile(array_np,0.005),2),np.round(np.quantile(array_np,0.995),2)]
    elif group=='vtx':
        print(feature,group,candidate)
        array_np = ak.to_numpy(ak.fill_none(ak.pad_none(df_vtx[feature],4)[:,candidate],0))
        #print(array_np)
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.round(np.quantile(array_np,0.005),2),np.round(np.quantile(array_np,0.995),2)]

In [92]:
print(quantile_min_max('jet_pt','glob'))

[14.95, 1700.17]


In [93]:
print(quantile_min_max('Cpfcan_BtagPf_trackEtaRel','cpf',1))

[1.81, 4.1]


In [200]:
global_epsilons = np.zeros(len(global_branches))

In [201]:
for (i,key) in enumerate(global_branches):
    range_inputs = quantile_min_max(key,'glob')
    scale_epsilon = (range_inputs[1] - range_inputs[0])/2
    global_epsilons[i] = scale_epsilon
    print(scale_epsilon)

842.61
2.45
20.0
11.0
2.5
22.0
0.605
1.535
1.0
0.035
11.685
0.075
18.195
9.5
5.5


In [202]:
cpf_epsilons = np.zeros((len(cpf_branches),25))

In [203]:
for (i,key) in enumerate(cpf_branches):
    for cand in range(25):
        range_inputs = quantile_min_max(key,'cpf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        cpf_epsilons[i,cand] = scale_epsilon
        print(scale_epsilon)

Cpfcan_BtagPf_trackEtaRel cpf 0
0.6000000000000001
Cpfcan_BtagPf_trackEtaRel cpf 1
1.1449999999999998
Cpfcan_BtagPf_trackEtaRel cpf 2
0.9149999999999998
Cpfcan_BtagPf_trackEtaRel cpf 3
0.8000000000000003
Cpfcan_BtagPf_trackEtaRel cpf 4
0.9100000000000001
Cpfcan_BtagPf_trackEtaRel cpf 5
1.855
Cpfcan_BtagPf_trackEtaRel cpf 6
1.365
Cpfcan_BtagPf_trackEtaRel cpf 7
0.4849999999999999
Cpfcan_BtagPf_trackEtaRel cpf 8
1.06
Cpfcan_BtagPf_trackEtaRel cpf 9
0.5
Cpfcan_BtagPf_trackEtaRel cpf 10
1.19
Cpfcan_BtagPf_trackEtaRel cpf 11
0.44999999999999996
Cpfcan_BtagPf_trackEtaRel cpf 12
0.575
Cpfcan_BtagPf_trackEtaRel cpf 13
2.115
Cpfcan_BtagPf_trackEtaRel cpf 14
1.4200000000000002
Cpfcan_BtagPf_trackEtaRel cpf 15
1.38
Cpfcan_BtagPf_trackEtaRel cpf 16
1.855
Cpfcan_BtagPf_trackEtaRel cpf 17
1.0000000000000002
Cpfcan_BtagPf_trackEtaRel cpf 18
2.11
Cpfcan_BtagPf_trackEtaRel cpf 19
1.155
Cpfcan_BtagPf_trackEtaRel cpf 20
0.8899999999999999
Cpfcan_BtagPf_trackEtaRel cpf 21
0.69
Cpfcan_BtagPf_trackEtaRel cp

In [204]:
npf_epsilons = np.zeros((len(npf_branches),25))

In [205]:
for (i,key) in enumerate(npf_branches):
    for cand in range(25):
        range_inputs = quantile_min_max(key,'npf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        npf_epsilons[i,cand] = scale_epsilon
        print(scale_epsilon)

Npfcan_ptrel npf 0
0.014999999999999958
Npfcan_ptrel npf 1
0.09499999999999997
Npfcan_ptrel npf 2
0.06
Npfcan_ptrel npf 3
0.065
Npfcan_ptrel npf 4
0.034999999999999976
Npfcan_ptrel npf 5
0.16499999999999998
Npfcan_ptrel npf 6
0.020000000000000018
Npfcan_ptrel npf 7
0.10999999999999999
Npfcan_ptrel npf 8
0.034999999999999976
Npfcan_ptrel npf 9
0.14
Npfcan_ptrel npf 10
0.09499999999999997
Npfcan_ptrel npf 11
0.03999999999999998
Npfcan_ptrel npf 12
0.0050000000000000044
Npfcan_ptrel npf 13
0.15999999999999998
Npfcan_ptrel npf 14
0.07
Npfcan_ptrel npf 15
0.10499999999999998
Npfcan_ptrel npf 16
0.185
Npfcan_ptrel npf 17
0.024999999999999967
Npfcan_ptrel npf 18
0.15500000000000003
Npfcan_ptrel npf 19
0.010000000000000009
Npfcan_ptrel npf 20
0.07999999999999996
Npfcan_ptrel npf 21
0.025000000000000022
Npfcan_ptrel npf 22
0.010000000000000009
Npfcan_ptrel npf 23
0.03999999999999998
Npfcan_ptrel npf 24
0.09999999999999998
Npfcan_deltaR npf 0
0.044999999999999984
Npfcan_deltaR npf 1
0.0849999999

In [206]:
vtx_epsilons = np.zeros((len(vtx_branches),4))

In [207]:
for (i,key) in enumerate(vtx_branches):
    for cand in range(4):
        range_inputs = quantile_min_max(key,'vtx',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        vtx_epsilons[i,cand] = scale_epsilon
        print(scale_epsilon)

sv_pt vtx 0
183.88
sv_pt vtx 1
122.495
sv_pt vtx 2
75.35
sv_pt vtx 3
37.315
sv_deltaR vtx 0
0.25
sv_deltaR vtx 1
0.25
sv_deltaR vtx 2
0.245
sv_deltaR vtx 3
0.24
sv_mass vtx 0
6.49
sv_mass vtx 1
5.275
sv_mass vtx 2
3.4
sv_mass vtx 3
1.525
sv_ntracks vtx 0
4.5
sv_ntracks vtx 1
4.0
sv_ntracks vtx 2
3.0
sv_ntracks vtx 3
2.0
sv_chi2 vtx 0
7.325
sv_chi2 vtx 1
5.93
sv_chi2 vtx 2
4.125
sv_chi2 vtx 3
2.455
sv_normchi2 vtx 0
3.915
sv_normchi2 vtx 1
2.575
sv_normchi2 vtx 2
1.285
sv_normchi2 vtx 3
0.63
sv_dxy vtx 0
7.92
sv_dxy vtx 1
3.435
sv_dxy vtx 2
1.025
sv_dxy vtx 3
0.145
sv_dxysig vtx 0
194.215
sv_dxysig vtx 1
34.86
sv_dxysig vtx 2
10.99
sv_dxysig vtx 3
3.04
sv_d3d vtx 0
12.06
sv_d3d vtx 1
5.35
sv_d3d vtx 2
1.415
sv_d3d vtx 3
0.21
sv_d3dsig vtx 0
194.76
sv_d3dsig vtx 1
34.87
sv_d3dsig vtx 2
11.025
sv_d3dsig vtx 3
3.06
sv_costhetasvpv vtx 0
0.96
sv_costhetasvpv vtx 1
0.5
sv_costhetasvpv vtx 2
0.5
sv_costhetasvpv vtx 3
0.5
sv_enratio vtx 0
0.405
sv_enratio vtx 1
0.24
sv_enratio vtx 2
0.13
sv_en

In [208]:
mkdir /eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary

In [209]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/global_epsilons.npy',global_epsilons)

In [210]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/cpf_epsilons.npy',cpf_epsilons)

In [211]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/npf_epsilons.npy',npf_epsilons)

In [212]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/vtx_epsilons.npy',vtx_epsilons)