# ROC curves
## Comparison of different trainings with different setups for inference

In [1]:
import numpy as np

import matplotlib.pyplot as plt
import mplhep as hep


plt.style.use(hep.cms.style.ROOT)

In [2]:
plt.rcParams['axes.axisbelow'] = True

In [None]:
'''
#Run3 setup:
model_names = ['nominal_with_etarel_phirel', 'adversarial_with_etarel_phirel']
prediction_setups = ['', '_FGSM']
prediction_files = 'one_prediction'
'''

In [3]:
model_names = ['nominal', 'adversarial_eps0p01', 'adversarial_eps0p005']
prediction_setups = ['', '_FGSM']
prediction_files = 'outfiles'

In [4]:
tagger = 'DF_Run2' # 'DF'

In [5]:
dirz = [[f'/eos/user/a/anstein/public/DeepJet/Train_{tagger}/{model_name}/predict{prediction_setup}/' \
        for model_name in model_names] for prediction_setup in prediction_setups]

In [6]:
dirz

[['/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/nominal/predict/',
  '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p01/predict/',
  '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p005/predict/'],
 ['/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/nominal/predict_FGSM/',
  '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p01/predict_FGSM/',
  '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p005/predict_FGSM/']]

In [7]:
BvsL_paths = [item + f'BvL_{prediction_files}.npy' for sublist in dirz for item in sublist]

In [8]:
BvsL_paths

['/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/nominal/predict/BvL_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p01/predict/BvL_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p005/predict/BvL_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/nominal/predict_FGSM/BvL_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p01/predict_FGSM/BvL_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p005/predict_FGSM/BvL_outfiles.npy']

In [9]:
BvsC_paths = [item + f'BvC_{prediction_files}.npy' for sublist in dirz for item in sublist]

In [10]:
BvsC_paths

['/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/nominal/predict/BvC_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p01/predict/BvC_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p005/predict/BvC_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/nominal/predict_FGSM/BvC_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p01/predict_FGSM/BvC_outfiles.npy',
 '/eos/user/a/anstein/public/DeepJet/Train_DF_Run2/adversarial_eps0p005/predict_FGSM/BvC_outfiles.npy']

In [11]:
xya = np.load(BvsL_paths[0], allow_pickle=True)

In [None]:
fig,ax = plt.subplots(figsize=[12,12])
ax.set_xlim(left=0.,right=1)

ax.set_yscale('log')
ax.set_ylim(bottom=1e-3)
ax.set_ylim(top=1)

tag_name = ' (b,bb,lepb)'
mistag_name = ' (uds,g)'

ax.set_ylabel('Mistagging rate'+mistag_name)
ax.set_xlabel('Tagging efficiency'+tag_name)

ax.grid(which='minor', alpha=0.85)
ax.grid(which='major', alpha=0.95, color='black')

linestyles = ['-','-','-','--','--','--']
colors = ['tab:blue','tab:orange','tab:green','tab:blue','tab:orange','tab:green']
names_models = ['Nominal training', r'Adversarial training (against $\epsilon=0.01$)',
                r'Adversarial training (against $\epsilon=0.005$)']
names_setups = ['nominal samples', r'FGSM ($\epsilon=0.01$, limit $20\%$)']
legtitle = 'BvsL'
legloc = 'upper left'
for i in range(6):
    xya = np.load(BvsL_paths[i], allow_pickle=True)
    legend_text = names_models[i % 3] + '\n' + (names_setups[0] if i<3 else names_setups[1]) + '\n' + 'AUC = ' + f'{xya[2]:.3f}'
    ax.plot(xya[0], xya[1], linestyle=linestyles[i], color=colors[i],linewidth=3.0,label=legend_text)
leg = ax.legend(title=legtitle,loc=legloc,fontsize=16,title_fontsize=20,labelspacing=0.7,frameon=True,framealpha=1,facecolor='white')
if 'right' in legloc:
    aligned = 'right'
else:
    aligned = 'left'
leg._legend_box.align = aligned
leg.get_frame().set_linewidth(0.0)
#hep.cms.text('Simulation Preliminary')
hep.cms.label(rlabel='(13 TeV)')
#hep.cms.rlabel('13 TeV')
#fig.savefig(legtitle + '_' + tagger + '_ROC.pdf', bbox_inches='tight')

In [None]:
fig,ax = plt.subplots(figsize=[12,12])
ax.set_xlim(left=0.,right=1)

ax.set_yscale('log')
ax.set_ylim(bottom=1e-3)
ax.set_ylim(top=1)

tag_name = ' (b,bb,lepb)'
mistag_name = ' (c)'

ax.set_ylabel('Mistagging rate'+mistag_name)
ax.set_xlabel('Tagging efficiency'+tag_name)

ax.grid(which='minor', alpha=0.85)
ax.grid(which='major', alpha=0.95, color='black')

linestyles = ['-','-','-','--','--','--']
colors = ['tab:blue','tab:orange','tab:green','tab:blue','tab:orange','tab:green']
names_models = ['Nominal training', r'Adversarial training (against $\epsilon=0.01$)',
                r'Adversarial training (against $\epsilon=0.005$)']
names_setups = ['nominal samples', r'FGSM ($\epsilon=0.01$, limit $20\%$)']
legtitle = 'BvsC'
legloc = 'upper left'
for i in range(6):
    xya = np.load(BvsC_paths[i], allow_pickle=True)
    legend_text = names_models[i % 3] + '\n' + (names_setups[0] if i<3 else names_setups[1]) + '\n' + 'AUC = ' + f'{xya[2]:.3f}'
    ax.plot(xya[0], xya[1], linestyle=linestyles[i], color=colors[i],linewidth=3.0,label=legend_text)
leg = ax.legend(title=legtitle,loc=legloc,fontsize=16,title_fontsize=20,labelspacing=0.7,frameon=True,framealpha=1,facecolor='white')
if 'right' in legloc:
    aligned = 'right'
else:
    aligned = 'left'
leg._legend_box.align = aligned
leg.get_frame().set_linewidth(0.0)
hep.cms.label(rlabel='(13 TeV)')
#fig.savefig(legtitle + '_' + tagger + '_ROC.pdf', bbox_inches='tight')