# NOTE

New file where the metrics are computed and the realtive leaderboard table is generated

In [1]:
from plot_results import get_paths, get_results, fix_values
from utils.constants import CIFAR10_DICT_MODELS_TO_ID, IMAGENET_DICT_MODELS_TO_ID, cifar10_model_dict, imagenet_model_dict


import torch as pt

import numpy as np

In [2]:

atk_type_and_name_list = ['O-atk/Stab', "O-atk/AutoTarget",  'U-atk/Shake']
datasets = ['cifar10', "imagenet"]
robusts = [True]

dict_terms = {'semi_robust': 'robust',
              'naive_robust': 'naive'}

# PATHS STRUCTURE: 0-C10 stab, 1-IMG stab, 2-C10 AT, 3-IMG AT, 4-C10 shake, 5-IMG shake
paths = get_paths(datasets, atk_type_and_name_list, robusts, verbose=False)

# ------------------------ CALIBRATOIN CURVE
for path_list in paths[:2]:
    robustness_level = dict_terms[path_list[0].split('/')[2]]
    ds_name = path_list[0].split('/')[3]
    title = f"calibration_curve_{robustness_level}_{ds_name}"
    


In [3]:
def compute_nud(clean_entropy, adv_entropy, nclass):
    num=np.abs(adv_entropy - clean_entropy)
    den = -np.log(1/nclass) - clean_entropy
    nud = pt.mean(num/den)
    
    return nud.item()

In [4]:
def uncertainty_span(over_conf, under_conf):
    span =np.abs(over_conf-under_conf)
    
    return pt.mean(span).item(), pt.std(span).item()

In [5]:
def squared_uncertainty_span(over_conf, under_conf):
    squared_unc_span = (over_conf-under_conf)**2
    
    return pt.mean(squared_unc_span).item(), pt.std(squared_unc_span).item()

In [6]:
cifar_gap_list=[]
imagenet_gap_list =[]

for path_list in paths[:2]:
    robustness_level = dict_terms[path_list[0].split('/')[2]]
    ds_name = path_list[0].split('/')[3]
    title = f"calibration_curve_{robustness_level}_{ds_name}"
    
    for plot_i, path in enumerate(path_list):
        path_splitted = path.split('/')
        model_name = path_splitted[-3 if 'semi_robust' in path else -4]
        
        nclass = 10 if "cifar10" in path else 1000
        
        eps_to_adv_results_dict = get_results(*path_splitted)
        clean_entropy = eps_to_adv_results_dict[0]['entropy_of_mean']
        
        over_adv_entropy = fix_values(path, 'entropy_of_mean', 'Stab')
        over_nud = compute_nud(clean_entropy, over_adv_entropy, nclass)
        
        under_adv_entropy = fix_values(path, 'entropy_of_mean', 'Shake')
        under_nud= compute_nud(clean_entropy, under_adv_entropy, nclass)
        
        # print(model_name)
        # print(f"{under_nud=}, {over_nud=}")
        
        us_mean, us_std = uncertainty_span(over_adv_entropy, under_adv_entropy)
        sus_mean, sus_std = squared_uncertainty_span(over_adv_entropy, under_adv_entropy)
        
        res_dict = {"model_name":model_name,
                     "under_nud":under_nud,
                     "over_nud":over_nud,
                     "nud_gap":(under_nud+over_nud),
                    "us_mean":us_mean,
                    "us_std":us_std,
                    "sus_mean":sus_mean,
                    "sus_std":sus_std}
        
        if "cifar10" in path:
            cifar_gap_list.append(res_dict)
        else:
            imagenet_gap_list.append(res_dict)

        

In [7]:
# Sorting by the 'age' key in each dictionary
cifar_gap_list.sort(key=lambda x: x["us_mean"])

In [8]:
imagenet_gap_list.sort(key=lambda x: x["us_mean"])
imagenet_gap_list

[{'model_name': 'Liu2023convNextL',
  'under_nud': 0.19850148260593414,
  'over_nud': 0.19223099946975708,
  'nud_gap': 0.3907324820756912,
  'us_mean': 1.3476158380508423,
  'us_std': 0.9361840486526489,
  'sus_mean': 2.692399740219116,
  'sus_std': 3.558372735977173},
 {'model_name': 'Liu2023convNextB',
  'under_nud': 0.23352986574172974,
  'over_nud': 0.25055432319641113,
  'nud_gap': 0.48408418893814087,
  'us_mean': 1.6126735210418701,
  'us_std': 1.0197010040283203,
  'sus_mean': 3.640376329421997,
  'sus_std': 4.310660362243652},
 {'model_name': 'Liu2023swinL',
  'under_nud': 0.22591754794120789,
  'over_nud': 0.35966697335243225,
  'nud_gap': 0.5855845212936401,
  'us_mean': 1.7970839738845825,
  'us_std': 1.3041759729385376,
  'sus_mean': 4.930173873901367,
  'sus_std': 6.074586868286133},
 {'model_name': 'salman2020R18',
  'under_nud': 0.2614194452762604,
  'over_nud': 0.3472945988178253,
  'nud_gap': 0.6087140440940857,
  'us_mean': 1.8500226736068726,
  'us_std': 1.07014393

# Print tables

In [15]:
ds_name ="imagenet"

gap_list_dict = cifar_gap_list if ds_name == "cifar" else imagenet_gap_list

MODELS_DICT = CIFAR10_DICT_MODELS_TO_ID if ds_name == "cifar" else IMAGENET_DICT_MODELS_TO_ID

rb_dict = cifar10_model_dict if ds_name == "cifar" else imagenet_model_dict

inizio_tab = ('\\begin{table}[htb]\n'
f'\caption{{\\{ds_name} Leaderboard for \\{ds_name}. Models are sorted by NUG rank. Alongside the NUD value, the relative attack rank is reported.}} \n'
'\\resizebox{\\textwidth}{!}{ \n'
'\\begin{tabular}{lllllllllll} \n'
'Robustbench ID& Model ID & Rob Acc & NUD samplewise & UnSpan mean & UnSpan std & SqrdUnSpan mean & SqrdUnSpan std & RB rank & NUD rank \\\\ \\hline'
)

fine_tab =('\end{tabular} \n'
           '} \n'
f'\label{{tab:comp-{ds_name}}} \n'
'\end{table} \n')

In [16]:
print(inizio_tab)

\begin{table}[htb]
\caption{\imagenet Leaderboard for \imagenet. Models are sorted by NUG rank. Alongside the NUD value, the relative attack rank is reported.} 
\resizebox{\textwidth}{!}{ 
\begin{tabular}{lllllllllll} 
Robustbench ID& Model ID & Rob Acc & NUD samplewise & UnSpan mean & UnSpan std & SqrdUnSpan mean & SqrdUnSpan std & RB rank & NUD rank \\ \hline


In [17]:
print(inizio_tab)

idx = 1
for model in gap_list_dict:
    model_key = model["model_name"]
    model_name = rb_dict[model_key]["name"].replace("_","\\_")
    row_entry=(f'{model_name} & '
               f'{MODELS_DICT[model_key]["paper_id"]} {MODELS_DICT[model_key]["paper_ref"]} & '
               #f'{MODELS_DICT[model_key]["clean_acc"]} & '
               f'${MODELS_DICT[model_key]["rob_acc"]}$ & '
               f'${model["nud_gap"]:.3f}$ & '
               f'${model["us_mean"]:.3f}$ & '
               f'${model["us_std"]:.3f}$ & '
               f'${model["sus_mean"]:.3f}$ & '
               f'${model["sus_std"]:.3f}$ & '
               f'${MODELS_DICT[model_key]["rb_rank"]}$ & '
               f'${idx}$ \\\\'
               )
    
    idx+=1
    print(row_entry)
    
print(fine_tab)

\begin{table}[htb]
\caption{\imagenet Leaderboard for \imagenet. Models are sorted by NUG rank. Alongside the NUD value, the relative attack rank is reported.} 
\resizebox{\textwidth}{!}{ 
\begin{tabular}{lllllllllll} 
Robustbench ID& Model ID & Rob Acc & NUD samplewise & UnSpan mean & UnSpan std & SqrdUnSpan mean & SqrdUnSpan std & RB rank & NUD rank \\ \hline
Liu2023Comprehensive\_ConvNeXt-L & \liuconvnl \liu & $58.48\%$ & $0.391$ & $1.348$ & $0.936$ & $2.692$ & $3.558$ & $2$ & $1$ \\
Liu2023Comprehensive\_ConvNeXt-B & \liuconvnb \liu & $55.82\%$ & $0.484$ & $1.613$ & $1.020$ & $3.640$ & $4.311$ & $4$ & $2$ \\
Liu2023Comprehensive\_Swin-L & \liuswinl \liu & $59.56\%$ & $0.586$ & $1.797$ & $1.304$ & $4.930$ & $6.075$ & $1$ & $3$ \\
Salman2020Do\_R18 & \salmanID \salman & $25.32\%$ & $0.609$ & $1.850$ & $1.070$ & $4.568$ & $4.910$ & $8$ & $4$ \\
Salman2020Do\_R50 & \salmanIDdue \salman & $34.96\%$ & $0.535$ & $1.928$ & $1.274$ & $5.341$ & $5.998$ & $5$ & $5$ \\
Liu2023Comprehensive\_Sw