# Find a suiting epsilon

The goal of this notebook is to derive a suitable magnitude of the attack for each feature.

In [2]:
#import uproot
import uproot4 as uproot
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
#import seaborn as sns
import mplhep as hep

In [3]:
import awkward1 as ak

In [4]:
uproot.__version__

'0.1.2'

## Checking files content

Focusing on pred_ntuple_merged_342

In [5]:
NOMINAL_INPUT_FILE = "/eos/user/a/anstein/public/DeepJet/Train_DF/nominal_with_etarel_phirel/predict_FGSM/pred_ntuple_merged_342.root:tree"
ADVERSARIAL_INPUT_FILE = "/eos/user/a/anstein/public/DeepJet/Train_DF/adversarial_with_etarel_phirel/predict_FGSM/pred_ntuple_merged_342.root:tree"

In [6]:
inputs_root = "/eos/cms/store/group/phys_btag/ParticleTransformer/merged/ntuple_merged_342.root:deepntuplizer/tree"

In [7]:
file_ = uproot.open(inputs_root)
columns_ = file_.keys()

In [8]:
columns_

['n_sv',
 'nsv',
 'sv_pt',
 'sv_eta',
 'sv_phi',
 'sv_e',
 'sv_etarel',
 'sv_phirel',
 'sv_deltaR',
 'sv_mass',
 'sv_ntracks',
 'sv_chi2',
 'sv_ndf',
 'sv_normchi2',
 'sv_dxy',
 'sv_dxyerr',
 'sv_dxysig',
 'sv_d3d',
 'sv_d3derr',
 'sv_d3dsig',
 'sv_costhetasvpv',
 'sv_enratio',
 'n_gtracks',
 'nGtracks',
 'gtrack_pt',
 'gtrack_eta',
 'gtrack_phi',
 'gtrack_mass',
 'gtrack_dz',
 'gtrack_dxy',
 'gtrack_3D_ip',
 'gtrack_3D_sip',
 'gtrack_2D_ip',
 'gtrack_2D_sip',
 'gtrack_dR',
 'gtrack_dist_neigh',
 'gtrack_3D_TrackProbability',
 'gtrack_2D_TrackProbability',
 'gtrack_chi2reduced',
 'gtrack_nPixelHits',
 'gtrack_nHits',
 'gtrack_jetAxisDistance',
 'gtrack_jetAxisDlength',
 'gtrack_PCAtrackFromPV',
 'gtrack_dotProdTrack',
 'gtrack_dotProdTrack2D',
 'npv',
 'rho',
 'ntrueInt',
 'event_no',
 'jet_no',
 'gen_pt',
 'Delta_gen_pt',
 'isB',
 'isGBB',
 'isBB',
 'isLeptonicB',
 'isLeptonicB_C',
 'isC',
 'isGCC',
 'isCC',
 'isUD',
 'isS',
 'isG',
 'isPU',
 'isUndefined',
 'genDecay',
 'jet_hflav',


In [9]:
global_branches = ['jet_pt', 'jet_eta',
                    'nCpfcand','nNpfcand',
                    'nsv','npv',
                    'TagVarCSV_trackSumJetEtRatio',
                    'TagVarCSV_trackSumJetDeltaR',
                    'TagVarCSV_vertexCategory',
                    'TagVarCSV_trackSip2dValAboveCharm',
                    'TagVarCSV_trackSip2dSigAboveCharm',
                    'TagVarCSV_trackSip3dValAboveCharm',
                    'TagVarCSV_trackSip3dSigAboveCharm',
                    'TagVarCSV_jetNSelectedTracks',
                    'TagVarCSV_jetNTracksEtaRel']
cpf_branches = ['Cpfcan_BtagPf_trackEtaRel',
                 'Cpfcan_BtagPf_trackPtRel',
                 'Cpfcan_BtagPf_trackPPar',
                 'Cpfcan_BtagPf_trackDeltaR',
                 'Cpfcan_BtagPf_trackPParRatio',
                 'Cpfcan_BtagPf_trackSip2dVal',
                 'Cpfcan_BtagPf_trackSip2dSig',
                 'Cpfcan_BtagPf_trackSip3dVal',
                 'Cpfcan_BtagPf_trackSip3dSig',
                 'Cpfcan_BtagPf_trackJetDistVal',
                 'Cpfcan_ptrel',
                 'Cpfcan_drminsv',
                 'Cpfcan_VTX_ass',
                 'Cpfcan_puppiw',
                 'Cpfcan_chi2',
                 'Cpfcan_quality']
npf_branches = ['Npfcan_ptrel', 
                 #'Npfcan_etarel', 'Npfcan_phirel',
                 'Npfcan_deltaR',
                 'Npfcan_isGamma', 'Npfcan_HadFrac', 'Npfcan_drminsv', 'Npfcan_puppiw']
vtx_branches = ['sv_pt','sv_deltaR',
                 'sv_mass',
                 #'sv_etarel',
                 #'sv_phirel',
                 'sv_ntracks',
                 'sv_chi2',
                 'sv_normchi2',
                 'sv_dxy',
                 'sv_dxysig',
                 'sv_d3d',
                 'sv_d3dsig',
                 'sv_costhetasvpv',
                 'sv_enratio']

In [10]:
df_glob = file_.arrays(global_branches, library="ak")

In [11]:
df_cpf = file_.arrays(cpf_branches, library="ak")

In [12]:
df_npf = file_.arrays(npf_branches, library="ak")

In [13]:
df_vtx = file_.arrays(vtx_branches, library="ak")

In [14]:
df_glob

<Array [{jet_pt: 24.3, ... ] type='400000 * {"jet_pt": float32, "jet_eta": float...'>

In [15]:
df_cpf

<Array [{Cpfcan_BtagPf_trackEtaRel: [, ... ] type='400000 * {"Cpfcan_BtagPf_trac...'>

In [16]:
df_npf

<Array [{Npfcan_ptrel: [-0.918, ... 0.969]}] type='400000 * {"Npfcan_ptrel": var...'>

In [17]:
df_vtx

<Array [{sv_pt: [], ... sv_enratio: []}] type='400000 * {"sv_pt": var * float32,...'>

In [25]:
df_cpf_clip = ak.pad_none(df_cpf,25,clip=True)

In [26]:
df_npf_clip = ak.pad_none(df_npf,25,clip=True)

In [27]:
df_vtx_clip = ak.pad_none(df_vtx,4,clip=True)

In [28]:
df_vtx_clip.type

400000 * {"sv_pt": 4 * ?float32, "sv_deltaR": 4 * ?float32, "sv_mass": 4 * ?float32, "sv_ntracks": 4 * ?float32, "sv_chi2": 4 * ?float32, "sv_normchi2": 4 * ?float32, "sv_dxy": 4 * ?float32, "sv_dxysig": 4 * ?float32, "sv_d3d": 4 * ?float32, "sv_d3dsig": 4 * ?float32, "sv_costhetasvpv": 4 * ?float32, "sv_enratio": 4 * ?float32}

In [30]:
def quantile_min_max(feature,group='glob',candidate=None):
    if group=='glob':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_glob[feature])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]
    elif group=='cpf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_cpf_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]
    elif group=='npf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_npf_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]
    elif group=='vtx':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_vtx_clip[feature][:,candidate])
        #print(array_np)
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        return [np.quantile(array_np,0.005),np.quantile(array_np,0.995)]

In [31]:
print(quantile_min_max('jet_pt','glob'))

jet_pt glob None
[14.954755067825317, 1700.1684771728524]


In [32]:
print(quantile_min_max('Cpfcan_BtagPf_trackEtaRel','cpf',1))

Cpfcan_BtagPf_trackEtaRel cpf 1
[1.6659805768728257, 456.2992126464855]


In [33]:
global_epsilons = np.zeros(len(global_branches))

In [37]:
global_ranges = np.zeros((len(global_branches),2))

In [38]:
for (i,key) in enumerate(global_branches):
    range_inputs = quantile_min_max(key,'glob')
    scale_epsilon = (range_inputs[1] - range_inputs[0])/2
    global_epsilons[i] = scale_epsilon
    global_ranges[i] = range_inputs
    print(range_inputs, scale_epsilon)

jet_pt glob None
[14.954755067825317, 1700.1684771728524] 842.6068610525135
jet_eta glob None
[-2.4466045618057253, 2.4464884662628177] 2.4465465140342717
nCpfcand glob None
[0.0, 40.0] 20.0
nNpfcand glob None
[0.0, 22.0] 11.0
nsv glob None
[0.0, 5.0] 2.5
npv glob None
[7.0, 51.0] 22.0
TagVarCSV_trackSumJetEtRatio glob None
[0.0, 1.2057606065273305] 0.6028803032636653
TagVarCSV_trackSumJetDeltaR glob None
[0.0, 3.068409718275071] 1.5342048591375355
TagVarCSV_vertexCategory glob None
[0.0, 2.0] 1.0
TagVarCSV_trackSip2dValAboveCharm glob None
[-0.023457033559679985, 0.05388612478971518] 0.03867157917469759
TagVarCSV_trackSip2dSigAboveCharm glob None
[-4.50480307340622, 18.87342078208925] 11.689111927747735
TagVarCSV_trackSip3dValAboveCharm glob None
[-0.05840983817353844, 0.08670980717986829] 0.07255982267670336
TagVarCSV_trackSip3dSigAboveCharm glob None
[-10.18956787586212, 26.20179145812989] 18.195679666996007
TagVarCSV_jetNSelectedTracks glob None
[0.0, 19.0] 9.5
TagVarCSV_jetNTracks

In [39]:
global_ranges

array([[ 1.49547551e+01,  1.70016848e+03],
       [-2.44660456e+00,  2.44648847e+00],
       [ 0.00000000e+00,  4.00000000e+01],
       [ 0.00000000e+00,  2.20000000e+01],
       [ 0.00000000e+00,  5.00000000e+00],
       [ 7.00000000e+00,  5.10000000e+01],
       [ 0.00000000e+00,  1.20576061e+00],
       [ 0.00000000e+00,  3.06840972e+00],
       [ 0.00000000e+00,  2.00000000e+00],
       [-2.34570336e-02,  5.38861248e-02],
       [-4.50480307e+00,  1.88734208e+01],
       [-5.84098382e-02,  8.67098072e-02],
       [-1.01895679e+01,  2.62017915e+01],
       [ 0.00000000e+00,  1.90000000e+01],
       [ 0.00000000e+00,  1.10000000e+01]])

In [41]:
cpf_epsilons = np.zeros((len(cpf_branches),25))

In [40]:
cpf_ranges = np.zeros((len(cpf_branches),25, 2))

In [42]:
for (i,key) in enumerate(cpf_branches):
    for cand in range(25):
        range_inputs = quantile_min_max(key,'cpf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        cpf_epsilons[i,cand] = scale_epsilon
        cpf_ranges[i,cand] = range_inputs
        print(range_inputs, scale_epsilon)

Cpfcan_BtagPf_trackEtaRel cpf 0
[1.324528922438622, 6.424960517883301] 2.5502157977223394
Cpfcan_BtagPf_trackEtaRel cpf 1
[1.5682243537902834, 6.618929469585422] 2.5253525578975693
Cpfcan_BtagPf_trackEtaRel cpf 2
[1.5810839718580247, 6.697601296901704] 2.5582586625218395
Cpfcan_BtagPf_trackEtaRel cpf 3
[1.5833026045560838, 6.743854150772096] 2.580275773108006
Cpfcan_BtagPf_trackEtaRel cpf 4
[1.5868718791007996, 6.747469069957737] 2.5802985954284687
Cpfcan_BtagPf_trackEtaRel cpf 5
[1.5835239338874818, 6.781107225418091] 2.5987916457653046
Cpfcan_BtagPf_trackEtaRel cpf 6
[0.0, 6.547431204319004] 3.273715602159502
Cpfcan_BtagPf_trackEtaRel cpf 7
[0.0, 6.536999049186708] 3.268499524593354
Cpfcan_BtagPf_trackEtaRel cpf 8
[0.0, 6.549455020427704] 3.274727510213852
Cpfcan_BtagPf_trackEtaRel cpf 9
[0.0, 6.548987185955048] 3.274493592977524
Cpfcan_BtagPf_trackEtaRel cpf 10
[0.0, 6.569660317897797] 3.2848301589488984
Cpfcan_BtagPf_trackEtaRel cpf 11
[0.0, 6.583185620307923] 3.2915928101539613
Cp

In [43]:
npf_epsilons = np.zeros((len(npf_branches),25))

In [44]:
npf_ranges = np.zeros((len(npf_branches),25, 2))

In [45]:
for (i,key) in enumerate(npf_branches):
    for cand in range(25):
        range_inputs = quantile_min_max(key,'npf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        npf_epsilons[i,cand] = scale_epsilon
        npf_ranges[i,cand] = range_inputs
        print(range_inputs, scale_epsilon)

Npfcan_ptrel npf 0
[-0.9976771494746208, 5.0] 2.9988385747373103
Npfcan_ptrel npf 1
[-0.9983924651145935, 5.0] 2.9991962325572965
Npfcan_ptrel npf 2
[-0.9988347306847573, 5.0] 2.999417365342379
Npfcan_ptrel npf 3
[-0.9989339128136635, 2.455066529732263e-39] 0.49946695640683175
Npfcan_ptrel npf 4
[-0.9989992380142212, 3.0566021317077466e-39] 0.4994996190071106
Npfcan_ptrel npf 5
[-0.9990548500418663, 3.0566021317077466e-39] 0.49952742502093317
Npfcan_ptrel npf 6
[-0.9991021165251732, 3.0566021317077466e-39] 0.4995510582625866
Npfcan_ptrel npf 7
[-0.9991229176521301, 3.0566021317077466e-39] 0.49956145882606506
Npfcan_ptrel npf 8
[-0.999152780175209, 3.0566021317077466e-39] 0.4995763900876045
Npfcan_ptrel npf 9
[-0.9991658931970596, 3.0566021317077466e-39] 0.4995829465985298
Npfcan_ptrel npf 10
[-0.9991778138279915, 3.0566021317077466e-39] 0.49958890691399577
Npfcan_ptrel npf 11
[-0.9991930726170539, 3.0566021317077466e-39] 0.49959653630852696
Npfcan_ptrel npf 12
[-0.9991957557201385, 3.0

In [46]:
vtx_epsilons = np.zeros((len(vtx_branches),4))

In [47]:
vtx_ranges = np.zeros((len(vtx_branches),4, 2))

In [48]:
for (i,key) in enumerate(vtx_branches):
    for cand in range(4):
        range_inputs = quantile_min_max(key,'vtx',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        vtx_epsilons[i,cand] = scale_epsilon
        vtx_ranges[i,cand] = range_inputs
        print(range_inputs, scale_epsilon)

sv_pt vtx 0
[0.0, 367.76021011352543] 183.88010505676272
sv_pt vtx 1
[0.0, 377.9374073791506] 188.9687036895753
sv_pt vtx 2
[0.0, 381.6216246032718] 190.8108123016359
sv_pt vtx 3
[0.0, 382.3482508850098] 191.1741254425049
sv_deltaR vtx 0
[-0.49756693974137306, 1.0] 0.7487834698706866
sv_deltaR vtx 1
[-0.49766344025731085, 1.0] 0.7488317201286554
sv_deltaR vtx 2
[-0.4976642429828644, 1.0] 0.7488321214914322
sv_deltaR vtx 3
[-0.49766804441809653, 1.0] 0.7488340222090483
sv_mass vtx 0
[0.0, 12.982587194442777] 6.4912935972213885
sv_mass vtx 1
[0.0, 15.283788576126113] 7.641894288063057
sv_mass vtx 2
[0.0, 15.98883020401004] 7.99441510200502
sv_mass vtx 3
[0.0, 16.232963218688976] 8.116481609344488
sv_ntracks vtx 0
[0.0, 9.0] 4.5
sv_ntracks vtx 1
[0.0, 10.0] 5.0
sv_ntracks vtx 2
[0.0, 10.0] 5.0
sv_ntracks vtx 3
[0.0, 10.0] 5.0
sv_chi2 vtx 0
[0.0, 14.645934300422669] 7.322967150211334
sv_chi2 vtx 1
[0.0, 15.159636454582223] 7.579818227291112
sv_chi2 vtx 2
[0.0, 15.359875984191907] 7.6799379

In [86]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/global_epsilons.npy',global_epsilons)

In [49]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/global_ranges.npy',global_ranges)

In [87]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/cpf_epsilons.npy',cpf_epsilons)

In [50]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/cpf_ranges.npy',cpf_ranges)

In [88]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/npf_epsilons.npy',npf_epsilons)

In [51]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/npf_ranges.npy',npf_ranges)

In [89]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/vtx_epsilons.npy',vtx_epsilons)

In [52]:
np.save('/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/auxiliary/vtx_ranges.npy',vtx_ranges)