# Find a suiting epsilon

The goal of this notebook is to derive a suitable magnitude of the attack for each feature.

In [1]:
#import uproot
import uproot4 as uproot
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
#import seaborn as sns
import mplhep as hep

In [2]:
import awkward1 as ak

In [3]:
uproot.__version__

'0.1.2'

## Checking files content

Focusing on pred_ntuple_merged_342

In [4]:
NOMINAL_INPUT_FILE = "/eos/user/a/anstein/public/DeepJet/Train_DF/nominal_with_etarel_phirel/predict_FGSM/pred_ntuple_merged_342.root:tree"
ADVERSARIAL_INPUT_FILE = "/eos/user/a/anstein/public/DeepJet/Train_DF/adversarial_with_etarel_phirel/predict_FGSM/pred_ntuple_merged_342.root:tree"

In [5]:
inputs_root = "/eos/cms/store/group/phys_btag/ParticleTransformer/merged/ntuple_merged_342.root:deepntuplizer/tree"

In [6]:
file_ = uproot.open(inputs_root)
columns_ = file_.keys()

In [7]:
columns_

['n_sv',
 'nsv',
 'sv_pt',
 'sv_eta',
 'sv_phi',
 'sv_e',
 'sv_etarel',
 'sv_phirel',
 'sv_deltaR',
 'sv_mass',
 'sv_ntracks',
 'sv_chi2',
 'sv_ndf',
 'sv_normchi2',
 'sv_dxy',
 'sv_dxyerr',
 'sv_dxysig',
 'sv_d3d',
 'sv_d3derr',
 'sv_d3dsig',
 'sv_costhetasvpv',
 'sv_enratio',
 'n_gtracks',
 'nGtracks',
 'gtrack_pt',
 'gtrack_eta',
 'gtrack_phi',
 'gtrack_mass',
 'gtrack_dz',
 'gtrack_dxy',
 'gtrack_3D_ip',
 'gtrack_3D_sip',
 'gtrack_2D_ip',
 'gtrack_2D_sip',
 'gtrack_dR',
 'gtrack_dist_neigh',
 'gtrack_3D_TrackProbability',
 'gtrack_2D_TrackProbability',
 'gtrack_chi2reduced',
 'gtrack_nPixelHits',
 'gtrack_nHits',
 'gtrack_jetAxisDistance',
 'gtrack_jetAxisDlength',
 'gtrack_PCAtrackFromPV',
 'gtrack_dotProdTrack',
 'gtrack_dotProdTrack2D',
 'npv',
 'rho',
 'ntrueInt',
 'event_no',
 'jet_no',
 'gen_pt',
 'Delta_gen_pt',
 'isB',
 'isGBB',
 'isBB',
 'isLeptonicB',
 'isLeptonicB_C',
 'isC',
 'isGCC',
 'isCC',
 'isUD',
 'isS',
 'isG',
 'isPU',
 'isUndefined',
 'genDecay',
 'jet_hflav',


In [8]:
global_branches = ['jet_pt', 'jet_eta',
                    'nCpfcand','nNpfcand',
                    'nsv','npv',
                    'TagVarCSV_trackSumJetEtRatio',
                    'TagVarCSV_trackSumJetDeltaR',
                    'TagVarCSV_vertexCategory',
                    'TagVarCSV_trackSip2dValAboveCharm',
                    'TagVarCSV_trackSip2dSigAboveCharm',
                    'TagVarCSV_trackSip3dValAboveCharm',
                    'TagVarCSV_trackSip3dSigAboveCharm',
                    'TagVarCSV_jetNSelectedTracks',
                    'TagVarCSV_jetNTracksEtaRel']
cpf_branches = ['Cpfcan_BtagPf_trackEtaRel',
                 'Cpfcan_BtagPf_trackPtRel',
                 'Cpfcan_BtagPf_trackPPar',
                 'Cpfcan_BtagPf_trackDeltaR',
                 'Cpfcan_BtagPf_trackPParRatio',
                 'Cpfcan_BtagPf_trackSip2dVal',
                 'Cpfcan_BtagPf_trackSip2dSig',
                 'Cpfcan_BtagPf_trackSip3dVal',
                 'Cpfcan_BtagPf_trackSip3dSig',
                 'Cpfcan_BtagPf_trackJetDistVal',
                 'Cpfcan_ptrel',
                 'Cpfcan_drminsv',
                 'Cpfcan_VTX_ass',
                 'Cpfcan_puppiw',
                 'Cpfcan_chi2',
                 'Cpfcan_quality']
npf_branches = ['Npfcan_ptrel', 
                 #'Npfcan_etarel', 'Npfcan_phirel',
                 'Npfcan_deltaR',
                 'Npfcan_isGamma', 'Npfcan_HadFrac', 'Npfcan_drminsv', 'Npfcan_puppiw']
vtx_branches = ['sv_pt','sv_deltaR',
                 'sv_mass',
                 #'sv_etarel',
                 #'sv_phirel',
                 'sv_ntracks',
                 'sv_chi2',
                 'sv_normchi2',
                 'sv_dxy',
                 'sv_dxysig',
                 'sv_d3d',
                 'sv_d3dsig',
                 'sv_costhetasvpv',
                 'sv_enratio']

In [9]:
df_glob = file_.arrays(global_branches, library="ak")

In [10]:
df_cpf = file_.arrays(cpf_branches, library="ak")

In [11]:
df_npf = file_.arrays(npf_branches, library="ak")

In [12]:
df_vtx = file_.arrays(vtx_branches, library="ak")

In [13]:
df_glob

<Array [{jet_pt: 24.3, ... ] type='400000 * {"jet_pt": float32, "jet_eta": float...'>

In [14]:
df_cpf

<Array [{Cpfcan_BtagPf_trackEtaRel: [, ... ] type='400000 * {"Cpfcan_BtagPf_trac...'>

In [15]:
df_npf

<Array [{Npfcan_ptrel: [-0.918, ... 0.969]}] type='400000 * {"Npfcan_ptrel": var...'>

In [16]:
df_vtx

<Array [{sv_pt: [], ... sv_enratio: []}] type='400000 * {"sv_pt": var * float32,...'>

In [42]:
ak.to_numpy(df_npf['Npfcan_drminsv'][10:][0])

array([-0.2936093 , -0.18370833, -0.16862626, -0.06246918, -0.05315652,
       -0.03082207,  0.        ], dtype=float32)

In [44]:
df_npf['Npfcan_drminsv'][10:49][0]

<Array [-0.294, -0.184, -0.169, ... -0.0308, 0] type='7 * float32'>

In [49]:
ak.to_list(df_npf['Npfcan_isGamma'])

[[0.0, 1.0],
 [0.0, 0.0, 1.0, 1.0],
 [0.0, 1.0],
 [1.0, 0.0, 1.0],
 [1.0, 1.0, 0.0],
 [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0],
 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 1.0],
 [1.0, 0.0, 0.0, 0.0],
 [0.0, 1.0, 1.0],
 [0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0],
 [0.0, 1.0, 1.0, 0.0, 1.0],
 [1.0, 1.0],
 [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
 [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0],
 [1.0, 1.0, 1.0, 0.0, 0.0, 1.0],
 [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0],
 [1.0, 1.0, 1.0, 1.0],
 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 1.0],
 [1.0, 0.0, 1.0],
 [1.0, 0.0, 1.0, 1.0, 1.0, 1.0],
 [1.0, 1.0, 1.0],
 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0],
 [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 [1.0, 1.0, 0.0],
 [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 1.0, 1.0],
 [1.0],
 [1.0, 0.0, 1.0],
 [0.0, 0.0],
 [1.0, 0.0, 1.0, 0.0, 1.0, 0.

In [66]:
ak.to_numpy(ak.pad_none(df_npf['Npfcan_isGamma'],25,clip=True))

masked_array(
  data=[[0.0, 1.0, --, ..., --, --, --],
        [0.0, 0.0, 1.0, ..., --, --, --],
        [0.0, 1.0, --, ..., --, --, --],
        ...,
        [1.0, 0.0, 1.0, ..., --, --, --],
        [1.0, 0.0, 0.0, ..., --, --, --],
        [1.0, --, --, ..., --, --, --]],
  mask=[[False, False,  True, ...,  True,  True,  True],
        [False, False, False, ...,  True,  True,  True],
        [False, False,  True, ...,  True,  True,  True],
        ...,
        [False, False, False, ...,  True,  True,  True],
        [False, False, False, ...,  True,  True,  True],
        [False,  True,  True, ...,  True,  True,  True]],
  fill_value=1e+20,
  dtype=float32)

In [67]:
ak.pad_none(df_npf['Npfcan_isGamma'],25,clip=True).type

400000 * 25 * ?float32

In [65]:
print(ak.to_list(ak.fill_none(ak.pad_none(df_npf['Npfcan_isGamma'],25, axis=1), -1)[:100]))

[[0.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], [0.0, 0.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], [0.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], [1.0, 0.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 0.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0

In [17]:
[np.round(np.quantile(ak.to_numpy(df_glob['jet_pt']),0.005),2),np.round(np.quantile(ak.to_numpy(df_glob['jet_pt']),0.995),2)]

[14.95, 1700.17]

In [68]:
df_cpf_clip = ak.pad_none(df_cpf,25,clip=True)

In [69]:
df_npf_clip = ak.pad_none(df_npf,25,clip=True)

In [72]:
df_vtx_clip = ak.pad_none(df_vtx,4,clip=True)

In [71]:
df_vtx_clip.type

400000 * {"sv_pt": 4 * ?float32, "sv_deltaR": 4 * ?float32, "sv_mass": 4 * ?float32, "sv_ntracks": 4 * ?float32, "sv_chi2": 4 * ?float32, "sv_normchi2": 4 * ?float32, "sv_dxy": 4 * ?float32, "sv_dxysig": 4 * ?float32, "sv_d3d": 4 * ?float32, "sv_d3dsig": 4 * ?float32, "sv_costhetasvpv": 4 * ?float32, "sv_enratio": 4 * ?float32}

In [73]:
df_glob['TagVarCSV_trackSip2dValAboveCharm']

<Array [-1, 5.39e-05, ... 0.00075, 0.00193] type='400000 * float32'>

In [74]:
def quantile_min_max(feature,group='glob',candidate=None):
    if group=='glob':
        array_np = ak.to_numpy(df_glob[feature])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]
    elif group=='cpf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_cpf_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]
    elif group=='npf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_npf_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]
    elif group=='vtx':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_vtx_clip[feature][:,candidate])
        #print(array_np)
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]

In [75]:
print(quantile_min_max('jet_pt','glob'))

[14.954755067825317, 1700.1684771728524]


In [76]:
print(quantile_min_max('Cpfcan_BtagPf_trackEtaRel','cpf',1))

Cpfcan_BtagPf_trackEtaRel cpf 1
[0.0, 6.499822947978974]


In [77]:
global_epsilons = np.zeros(len(global_branches))

In [78]:
for (i,key) in enumerate(global_branches):
    range_inputs = quantile_min_max(key,'glob')
    scale_epsilon = (range_inputs[1] - range_inputs[0])/2
    global_epsilons[i] = scale_epsilon
    print(scale_epsilon)

842.6068610525135
2.4465465140342717
20.0
11.0
2.5
22.0
0.6028803032636653
1.5342048591375355
1.0
0.03867157917469759
11.689111927747735
0.07255982267670336
18.195679666996007
9.5
5.5


In [79]:
cpf_epsilons = np.zeros((len(cpf_branches),25))

In [80]:
for (i,key) in enumerate(cpf_branches):
    for cand in range(25):
        range_inputs = quantile_min_max(key,'cpf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        cpf_epsilons[i,cand] = scale_epsilon
        print(scale_epsilon)

Cpfcan_BtagPf_trackEtaRel cpf 0
3.2015150749683383
Cpfcan_BtagPf_trackEtaRel cpf 1
3.249911473989487
Cpfcan_BtagPf_trackEtaRel cpf 2
3.2827672731876376
Cpfcan_BtagPf_trackEtaRel cpf 3
3.2942367434501656
Cpfcan_BtagPf_trackEtaRel cpf 4
3.2862647032737735
Cpfcan_BtagPf_trackEtaRel cpf 5
3.2886453902721415
Cpfcan_BtagPf_trackEtaRel cpf 6
3.273715602159502
Cpfcan_BtagPf_trackEtaRel cpf 7
3.234298893213272
Cpfcan_BtagPf_trackEtaRel cpf 8
3.2020350348949433
Cpfcan_BtagPf_trackEtaRel cpf 9
3.155824612379075
Cpfcan_BtagPf_trackEtaRel cpf 10
3.1302560317516335
Cpfcan_BtagPf_trackEtaRel cpf 11
3.0793365871906295
Cpfcan_BtagPf_trackEtaRel cpf 12
3.016834135055542
Cpfcan_BtagPf_trackEtaRel cpf 13
2.9852204871177683
Cpfcan_BtagPf_trackEtaRel cpf 14
2.916715399026871
Cpfcan_BtagPf_trackEtaRel cpf 15
2.8608034586906435
Cpfcan_BtagPf_trackEtaRel cpf 16
2.8043352711200713
Cpfcan_BtagPf_trackEtaRel cpf 17
2.7441590392589585
Cpfcan_BtagPf_trackEtaRel cpf 18
2.6782805943489074
Cpfcan_BtagPf_trackEtaRel cp

In [81]:
npf_epsilons = np.zeros((len(npf_branches),25))

In [82]:
for (i,key) in enumerate(npf_branches):
    for cand in range(25):
        range_inputs = quantile_min_max(key,'npf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        npf_epsilons[i,cand] = scale_epsilon
        print(scale_epsilon)

Npfcan_ptrel npf 0
0.4988385747373104
Npfcan_ptrel npf 1
0.4991959933936596
Npfcan_ptrel npf 2
0.4994162027537823
Npfcan_ptrel npf 3
0.49946722462773324
Npfcan_ptrel npf 4
0.4994997982680798
Npfcan_ptrel npf 5
0.4995152355730534
Npfcan_ptrel npf 6
0.4995233120024204
Npfcan_ptrel npf 7
0.49951726391911505
Npfcan_ptrel npf 8
0.4995159812271595
Npfcan_ptrel npf 9
0.49950265988707543
Npfcan_ptrel npf 10
0.49948051586747166
Npfcan_ptrel npf 11
0.49946126356720927
Npfcan_ptrel npf 12
0.4994294062256813
Npfcan_ptrel npf 13
0.49938815936446185
Npfcan_ptrel npf 14
0.49933895483613017
Npfcan_ptrel npf 15
0.49928027510643
Npfcan_ptrel npf 16
0.49919608265161514
Npfcan_ptrel npf 17
0.499051845818758
Npfcan_ptrel npf 18
0.49886402949690817
Npfcan_ptrel npf 19
0.4985028611123562
Npfcan_ptrel npf 20
0.49765894532203675
Npfcan_ptrel npf 21
0.4933272023499012
Npfcan_ptrel npf 22
1.3872154497908143e-40
Npfcan_ptrel npf 23
1.3872434757601008e-40
Npfcan_ptrel npf 24
1.3872504822524224e-40
Npfcan_deltaR np

In [83]:
vtx_epsilons = np.zeros((len(vtx_branches),4))

In [84]:
for (i,key) in enumerate(vtx_branches):
    for cand in range(4):
        range_inputs = quantile_min_max(key,'vtx',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        vtx_epsilons[i,cand] = scale_epsilon
        print(scale_epsilon)

sv_pt vtx 0
183.88010505676272
sv_pt vtx 1
122.49261089324963
sv_pt vtx 2
75.35158931732178
sv_pt vtx 3
37.31423210144047
sv_deltaR vtx 0
0.24878360755741596
sv_deltaR vtx 1
0.24780722767114638
sv_deltaR vtx 2
0.2453938914835453
sv_deltaR vtx 3
0.23816268935799598
sv_mass vtx 0
6.4912935972213885
sv_mass vtx 1
5.2763627052307225
sv_mass vtx 2
3.4015095496177943
sv_mass vtx 3
1.5267452961206474
sv_ntracks vtx 0
4.5
sv_ntracks vtx 1
4.0
sv_ntracks vtx 2
3.0
sv_ntracks vtx 3
2.0
sv_chi2 vtx 0
7.322967150211334
sv_chi2 vtx 1
5.928946223258974
sv_chi2 vtx 2
4.125741980075838
sv_chi2 vtx 3
2.455987374782577
sv_normchi2 vtx 0
3.912606446743013
sv_normchi2 vtx 1
2.5729418146610263
sv_normchi2 vtx 2
1.2868374955654147
sv_normchi2 vtx 3
0.6289712247252467
sv_dxy vtx 0
7.922402477264405
sv_dxy vtx 1
3.4358518099784905
sv_dxy vtx 2
1.0272969353199037
sv_dxy vtx 3
0.1472291492670783
sv_dxysig vtx 0
194.21731933593762
sv_dxysig vtx 1
34.857987518310566
sv_dxysig vtx 2
10.989088225364718
sv_dxysig vt

In [85]:
mkdir /eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary

mkdir: das Verzeichnis „/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary“ kann nicht angelegt werden: Die Datei existiert bereits


In [86]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/global_epsilons.npy',global_epsilons)

In [87]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/cpf_epsilons.npy',cpf_epsilons)

In [88]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/npf_epsilons.npy',npf_epsilons)

In [89]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/vtx_epsilons.npy',vtx_epsilons)