In [1]:
# methods = [
#     'lime_20', # pert
#     'shap_20', # pert
#     'intgrad', # grad
#     'gradcam', # grad
#     'fullgrad', # grad
#     'rise_20', # pert
#     'archipelago', # pert?
#     'mfaba', # attak with grad
#     'agi', # attack with grad
#     'ampe', # attack
#     'bcos', # 
#     'xdnn', # gradient x input
#     'bagnet', # by construction
#     'attn', # by construction
#     'sop', # by construction
# ]

name_mapping = {
    'lime': {'name': 'LIME', 'category': 'Post Hoc'},
    'shap': {'name': 'SHAP', 'category': 'Post Hoc'},
    'rise': {'name': 'RISE', 'category': 'Post Hoc'},
    'lime_20': {'name': 'LIME', 'category': 'Post Hoc'}, # pert
    'shap_20': {'name': 'SHAP', 'category': 'Post Hoc'}, # pert
    'intgrad': {'name': 'IG', 'category': 'Post Hoc'}, # grad
    'gradcam': {'name': 'GC', 'category': 'Post Hoc'}, # grad
    'fullgrad': {'name': 'FG', 'category': 'Post Hoc'}, # grad
    'rise_20': {'name': 'RISE', 'category': 'Post Hoc'}, # pert
    'archipelago': {'name': 'Archi.', 'category': 'Post Hoc'}, # pert?
    'mfaba': {'name': 'MFABA', 'category': 'Post Hoc'}, # attak with grad
    'agi': {'name': 'AGI', 'category': 'Post Hoc'}, # attack with grad
    'ampe': {'name': 'AMPE', 'category': 'Post Hoc'}, # attack
    'bcos': {'name': 'BCos', 'category': 'Post Hoc'}, # 
    'xdnn': {'name': 'XDNN', 'category': 'Post Hoc'}, # gradient x input
    'bagnet': {'name': 'BagNet', 'category': 'Faithful'}, # by construction
    'attn': {'name': 'FRESH', 'category': 'Faithful'}, # by construction
    'sop': {'name': 'SOP', 'category': 'Faithful'}, # by construction
}

# Faithfulness Table

In [2]:
metrics = {
    'fid': 'fids_dict.pt',
    'ins': 'inss_dict.pt',
    'del': 'dels_dict.pt'
}

In [4]:
import random
import numpy as np

def bootstrap(data_list, num_bootstrap=4, num_samples=None, seed=0):
    if num_samples is None:
        num_samples = len(data_list)
    means = []
    random.seed(seed)
    for i in range(num_bootstrap):
        exp_idxs = random.choices(list(range(num_samples)), k=num_samples)
        means.append(np.mean([data_list[di] for di in exp_idxs]))
    return np.std(means)

In [4]:
import torch
from collections import defaultdict
import numpy as np

data = defaultdict(dict)

for metric in metrics:
    metric_data = torch.load(metrics[metric])
    for k in metric_data:
        # print(metric_data[k])
        metric_data_k = metric_data[k]
        if isinstance(metric_data_k, torch.Tensor):
            metric_data_k = metric_data_k.tolist()
        metric_mean = np.mean(metric_data_k)
        metric_std = bootstrap(metric_data_k)
        
        data[name_mapping[k]['name']][metric] = {
            'mean': metric_mean,
            'std': metric_std
        }

In [5]:
data['SOP']['fid'] = {'mean': 0, 'std': 0}
data['FRESH']['fid'] = {'mean': 0, 'std': 0}

In [6]:
data

defaultdict(dict,
            {'LIME': {'fid': {'mean': 3.8659593040449547,
               'std': 0.24447320699391215},
              'ins': {'mean': 0.8594826424045171,
               'std': 0.0047404675164642385},
              'del': {'mean': 0.4759477342731027,
               'std': 0.0044879419120097755}},
             'SHAP': {'fid': {'mean': 0.015066511677321157,
               'std': 0.0055835503101662205},
              'ins': {'mean': 0.8781494490159503, 'std': 0.00675367555679661},
              'del': {'mean': 0.4211493296650733,
               'std': 0.007620442914370582}},
             'RISE': {'fid': {'mean': 0.8835330924589653,
               'std': 0.5331996520981681},
              'ins': {'mean': 0.6352594335778282,
               'std': 0.0074599545546029464},
              'del': {'mean': 0.7078599741135145,
               'std': 0.0032852447086318634}},
             'IG': {'fid': {'mean': 7.161289486885071,
               'std': 0.21186511951130466},
             

In [10]:
import numpy as np

def format_value(value, std, bold=False, italic=False):
    if value is None or np.isnan(value):
        return '-'
    formatted = f"{value:.3f} $\\pm$ {std:.3f}"
    if bold:
        formatted = f"\\textbf{{{formatted}}}"
    elif italic:
        formatted = f"\\textit{{{formatted}}}"
    return formatted

def print_table(data, name_mapping):
    print("\\begin{tabular}{c|c}")
    print("\\toprule")
    print("Method & Fidelity$\\downarrow$ \\\\")
    print("\\midrule")

    post_hoc_methods = [v['name'] for k, v in name_mapping.items() if v['category'] == 'Post Hoc']
    fidelity_scores = []

    for method in post_hoc_methods:
        if method in data and 'fid' in data[method] and data[method]['fid'] is not None:
            mean = round(data[method]['fid']['mean'], 6)
            std = round(data[method]['fid']['std'], 6)
            fidelity_scores.append((method, mean, std))

    # Sort by fidelity score (lower is better)
    fidelity_scores.sort(key=lambda x: x[1])

    # Determine best and second-best values
    best_value = fidelity_scores[0][1]
    second_best_value = fidelity_scores[1][1] if len(fidelity_scores) > 1 else None

    for method, mean, std in fidelity_scores:
        is_best = mean == best_value
        is_second_best = mean == second_best_value
        formatted_value = format_value(mean, std, bold=is_best, italic=is_second_best)
        print(f"{method} & {formatted_value} \\\\")

    print("\\bottomrule")
    print("\\end{tabular}")

# Call the function
print_table(data, name_mapping)

\begin{tabular}{c|c}
\toprule
Method & Fidelity$\downarrow$ \\
\midrule
XDNN & \textbf{0.000 $\pm$ 0.000} \\
SHAP & \textit{0.015 $\pm$ 0.006} \\
SHAP & \textit{0.015 $\pm$ 0.006} \\
RISE & 0.884 $\pm$ 0.533 \\
RISE & 0.884 $\pm$ 0.533 \\
LIME & 3.866 $\pm$ 0.244 \\
LIME & 3.866 $\pm$ 0.244 \\
AGI & 5.416 $\pm$ 0.549 \\
MFABA & 6.674 $\pm$ 0.166 \\
IG & 7.161 $\pm$ 0.212 \\
GC & 10.406 $\pm$ 1.098 \\
Archi. & 10.850 $\pm$ 0.354 \\
BCos & 13.372 $\pm$ 0.373 \\
FG & 13.567 $\pm$ 0.158 \\
AMPE & 13.671 $\pm$ 0.326 \\
\bottomrule
\end{tabular}


In [8]:
# small
import sys

sys.path.append('/shared_data0/weiqiuy/exlib/src')
sys.path.append('/shared_data0/weiqiuy/sop/src')
from sop.utils import bootstrap

metrics = {
    'ins': 'inss_dict_small.pt',
    'del': 'dels_dict_small.pt'
}

import torch
from collections import defaultdict
import numpy as np

def get_ins_del_table(metrics):
    
    
    name_mapping = {
        # 'lime': {'name': 'LIME', 'category': 'Post Hoc'},
        # 'shap': {'name': 'SHAP', 'category': 'Post Hoc'},
       
        'lime_20': {'name': 'LIME', 'category': 'Post Hoc'}, # pert
        'shap_20': {'name': 'SHAP', 'category': 'Post Hoc'}, # pert
         'rise': {'name': 'RISE', 'category': 'Post Hoc'},
        'intgrad': {'name': 'IG', 'category': 'Post Hoc'}, # grad
        'gradcam': {'name': 'GC', 'category': 'Post Hoc'}, # grad
        'fullgrad': {'name': 'FG', 'category': 'Post Hoc'}, # grad
        'rise_20': {'name': 'RISE', 'category': 'Post Hoc'}, # pert
        'archipelago': {'name': 'Archi.', 'category': 'Post Hoc'}, # pert?
        'mfaba': {'name': 'MFABA', 'category': 'Post Hoc'}, # attak with grad
        'agi': {'name': 'AGI', 'category': 'Post Hoc'}, # attack with grad
        'ampe': {'name': 'AMPE', 'category': 'Post Hoc'}, # attack
        'bcos': {'name': 'BCos', 'category': 'Post Hoc'}, # 
        'xdnn': {'name': 'XDNN', 'category': 'Post Hoc'}, # gradient x input
        'bagnet': {'name': 'BagNet', 'category': 'Faithful'}, # by construction
        'attn': {'name': 'FRESH', 'category': 'Faithful'}, # by construction
        'sop': {'name': 'SOP', 'category': 'Faithful'}, # by construction
    }
    
    data = defaultdict(dict)

    for metric in metrics:
        metric_data = torch.load(metrics[metric])
        for k in metric_data:
            # print(metric_data[k])
            metric_data_k = metric_data[k]
            if isinstance(metric_data_k, torch.Tensor):
                metric_data_k = metric_data_k.tolist()
            metric_mean = np.mean(metric_data_k)
            metric_std = bootstrap(metric_data_k)

            data[name_mapping[k]['name']][metric] = {
                'mean': metric_mean,
                'std': metric_std
            }


    def format_value(value, std, bold=False, italic=False):
        if value is None or np.isnan(value):
            return '-'
        formatted = f"{value:.3f} $\\pm$ {std:.3f}"
        if bold:
            formatted = f"\\textbf{{{formatted}}}"
        elif italic:
            formatted = f"\\textit{{{formatted}}}"
        return formatted

    def print_table(data, name_mapping):
        print("\\begin{tabularx}{\\textwidth}{c|c|*{3}{>{\\centering\\arraybackslash}X}}")
        print("\\toprule")
        print("\\multirow{2}{*}{Category} & \\multirow{2}{*}{Method} & \\multicolumn{3}{c}{\\textbf{ImageNet}} \\\\")
        print("& & Fid.$\\downarrow$ & Ins.$\\uparrow$ & Del.$\\downarrow$ \\\\")
        print("\\midrule")

        categories = ['Post Hoc', 'Faithful']
        metrics = ['fid', 'ins', 'del']

        all_values = []
        for category in categories:
            methods = [v['name'] for k, v in name_mapping.items() if v['category'] == category]
            for method in methods:
                values = []
                for metric in metrics:
                    if method in data and metric in data[method] and data[method][metric] is not None:
                        mean = round(data[method][metric]['mean'], 6)
                        std = round(data[method][metric]['std'], 6)
                        values.append((mean, std))
                    else:
                        values.append(None)
                all_values.append((category, method, values))

        # Determine best and second-best values for each column
        best_values = [min((v[0] for _, _, row in all_values for v in [row[0]] if v is not None), default=None),
                       max((v[0] for _, _, row in all_values for v in [row[1]] if v is not None), default=None),
                       min((v[0] for _, _, row in all_values for v in [row[2]] if v is not None), default=None)]

        second_best_values = [sorted(set(v[0] for _, _, row in all_values for v in [row[0]] if v is not None))[1] if len(set(v[0] for _, _, row in all_values for v in [row[0]] if v is not None)) > 1 else None,
                              sorted(set(v[0] for _, _, row in all_values for v in [row[1]] if v is not None), reverse=True)[1] if len(set(v[0] for _, _, row in all_values for v in [row[1]] if v is not None)) > 1 else None,
                              sorted(set(v[0] for _, _, row in all_values for v in [row[2]] if v is not None))[1] if len(set(v[0] for _, _, row in all_values for v in [row[2]] if v is not None)) > 1 else None]

        current_category = None
        for category, method, values in all_values:
            if category != current_category:
                if current_category is not None:
                    print("\\midrule")
                print(f"\\multirow{{{len([m for c, m, _ in all_values if c == category])}}}{{*}}{{{category}}} & ", end="")
                current_category = category
            else:
                print("& ", end="")

            formatted_values = []
            for i, value in enumerate(values):
                if value is not None:
                    is_best = value[0] == best_values[i]
                    is_second_best = value[0] == second_best_values[i]
                    formatted_values.append(format_value(value[0], value[1], bold=is_best, italic=is_second_best))
                else:
                    formatted_values.append('-')

            print(f"{method} & {' & '.join(formatted_values)} \\\\")

        print("\\bottomrule")
        print("\\end{tabularx}")

    # Your data dictionary and name_mapping remain the same
    # ...

    print_table(data, name_mapping)
get_ins_del_table(metrics)

\begin{tabularx}{\textwidth}{c|c|*{3}{>{\centering\arraybackslash}X}}
\toprule
\multirow{2}{*}{Category} & \multirow{2}{*}{Method} & \multicolumn{3}{c}{\textbf{ImageNet}} \\
& & Fid.$\downarrow$ & Ins.$\uparrow$ & Del.$\downarrow$ \\
\midrule
\multirow{13}{*}{Post Hoc} & LIME & - & 0.815 $\pm$ 0.005 & 0.428 $\pm$ 0.004 \\
& SHAP & - & \textit{0.831 $\pm$ 0.006} & 0.373 $\pm$ 0.008 \\
& RISE & - & 0.590 $\pm$ 0.008 & 0.661 $\pm$ 0.004 \\
& IG & - & 0.611 $\pm$ 0.006 & 0.617 $\pm$ 0.008 \\
& GC & - & 0.772 $\pm$ 0.007 & 0.366 $\pm$ 0.007 \\
& FG & - & 0.759 $\pm$ 0.005 & 0.383 $\pm$ 0.004 \\
& RISE & - & 0.590 $\pm$ 0.008 & 0.661 $\pm$ 0.004 \\
& Archi. & - & 0.676 $\pm$ 0.003 & 0.501 $\pm$ 0.004 \\
& MFABA & - & 0.674 $\pm$ 0.006 & 0.499 $\pm$ 0.010 \\
& AGI & - & 0.735 $\pm$ 0.006 & 0.462 $\pm$ 0.008 \\
& AMPE & - & 0.675 $\pm$ 0.007 & 0.534 $\pm$ 0.005 \\
& BCos & - & 0.257 $\pm$ 0.005 & 0.288 $\pm$ 0.008 \\
& XDNN & - & 0.199 $\pm$ 0.007 & \textit{0.156 $\pm$ 0.003} \\
\midrule
\mult

In [9]:
metrics_hist = {
    'ins': 'inss_hist_dict.pt',
    'del': 'dels_hist_dict.pt'
}
get_ins_del_table(metrics_hist)

\begin{tabularx}{\textwidth}{c|c|*{3}{>{\centering\arraybackslash}X}}
\toprule
\multirow{2}{*}{Category} & \multirow{2}{*}{Method} & \multicolumn{3}{c}{\textbf{ImageNet}} \\
& & Fid.$\downarrow$ & Ins.$\uparrow$ & Del.$\downarrow$ \\
\midrule
\multirow{13}{*}{Post Hoc} & LIME & - & 0.797 $\pm$ 0.007 & 0.555 $\pm$ 0.023 \\
& SHAP & - & \textit{0.909 $\pm$ 0.018} & 0.524 $\pm$ 0.023 \\
& RISE & - & 0.652 $\pm$ 0.043 & 0.776 $\pm$ 0.019 \\
& IG & - & 0.679 $\pm$ 0.030 & 0.786 $\pm$ 0.059 \\
& GC & - & 0.789 $\pm$ 0.021 & 0.540 $\pm$ 0.031 \\
& FG & - & 0.830 $\pm$ 0.029 & 0.447 $\pm$ 0.026 \\
& RISE & - & 0.652 $\pm$ 0.043 & 0.776 $\pm$ 0.019 \\
& Archi. & - & 0.824 $\pm$ 0.058 & 0.608 $\pm$ 0.025 \\
& MFABA & - & 0.855 $\pm$ 0.048 & 0.610 $\pm$ 0.085 \\
& AGI & - & 0.875 $\pm$ 0.048 & 0.585 $\pm$ 0.050 \\
& AMPE & - & 0.738 $\pm$ 0.078 & 0.645 $\pm$ 0.041 \\
& BCos & - & 0.574 $\pm$ 0.175 & 0.380 $\pm$ 0.032 \\
& XDNN & - & 0.245 $\pm$ 0.038 & 0.254 $\pm$ 0.044 \\
\midrule
\multirow{3}{*

In [27]:
import numpy as np

def format_value(value, std, bold=False, italic=False):
    if value is None or np.isnan(value):
        return '-'
    formatted = f"{value:.3f} $\\pm$ {std:.3f}"
    if bold:
        formatted = f"\\textbf{{{formatted}}}"
    elif italic:
        formatted = f"\\textit{{{formatted}}}"
    return formatted

def print_table(data, name_mapping):
    print("\\begin{tabularx}{\\textwidth}{c|c|*{3}{>{\\centering\\arraybackslash}X}}")
    print("\\toprule")
    print("\\multirow{2}{*}{Category} & \\multirow{2}{*}{Method} & \\multicolumn{3}{c}{\\textbf{ImageNet}} \\\\")
    print("& & Fid.$\\downarrow$ & Ins.$\\uparrow$ & Del.$\\downarrow$ \\\\")
    print("\\midrule")

    categories = ['Post Hoc', 'Faithful']
    metrics = ['fid', 'ins', 'del']

    all_values = []
    for category in categories:
        methods = [v['name'] for k, v in name_mapping.items() if v['category'] == category]
        for method in methods:
            values = []
            for metric in metrics:
                if method in data and metric in data[method] and data[method][metric] is not None:
                    mean = round(data[method][metric]['mean'], 6)
                    std = round(data[method][metric]['std'], 6)
                    values.append((mean, std))
                else:
                    values.append(None)
            all_values.append((category, method, values))

    # Determine best and second-best values for each column
    best_values = [min((v[0] for _, _, row in all_values for v in [row[0]] if v is not None), default=None),
                   max((v[0] for _, _, row in all_values for v in [row[1]] if v is not None), default=None),
                   min((v[0] for _, _, row in all_values for v in [row[2]] if v is not None), default=None)]
    
    second_best_values = [sorted(set(v[0] for _, _, row in all_values for v in [row[0]] if v is not None))[1] if len(set(v[0] for _, _, row in all_values for v in [row[0]] if v is not None)) > 1 else None,
                          sorted(set(v[0] for _, _, row in all_values for v in [row[1]] if v is not None), reverse=True)[1] if len(set(v[0] for _, _, row in all_values for v in [row[1]] if v is not None)) > 1 else None,
                          sorted(set(v[0] for _, _, row in all_values for v in [row[2]] if v is not None))[1] if len(set(v[0] for _, _, row in all_values for v in [row[2]] if v is not None)) > 1 else None]

    current_category = None
    for category, method, values in all_values:
        if category != current_category:
            if current_category is not None:
                print("\\midrule")
            print(f"\\multirow{{{len([m for c, m, _ in all_values if c == category])}}}{{*}}{{{category}}} & ", end="")
            current_category = category
        else:
            print("& ", end="")

        formatted_values = []
        for i, value in enumerate(values):
            if value is not None:
                is_best = value[0] == best_values[i]
                is_second_best = value[0] == second_best_values[i]
                formatted_values.append(format_value(value[0], value[1], bold=is_best, italic=is_second_best))
            else:
                formatted_values.append('-')

        print(f"{method} & {' & '.join(formatted_values)} \\\\")

    print("\\bottomrule")
    print("\\end{tabularx}")

# Your data dictionary and name_mapping remain the same
# ...

print_table(data, name_mapping)

\begin{tabularx}{\textwidth}{c|c|*{3}{>{\centering\arraybackslash}X}}
\toprule
\multirow{2}{*}{Category} & \multirow{2}{*}{Method} & \multicolumn{3}{c}{\textbf{ImageNet}} \\
& & Fid.$\downarrow$ & Ins.$\uparrow$ & Del.$\downarrow$ \\
\midrule
\multirow{15}{*}{Post Hoc} & LIME & 3.866 $\pm$ 0.244 & 0.859 $\pm$ 0.005 & 0.476 $\pm$ 0.004 \\
& SHAP & 0.015 $\pm$ 0.006 & \textit{0.878 $\pm$ 0.007} & 0.421 $\pm$ 0.008 \\
& RISE & 0.884 $\pm$ 0.533 & 0.635 $\pm$ 0.007 & 0.708 $\pm$ 0.003 \\
& LIME & 3.866 $\pm$ 0.244 & 0.859 $\pm$ 0.005 & 0.476 $\pm$ 0.004 \\
& SHAP & 0.015 $\pm$ 0.006 & \textit{0.878 $\pm$ 0.007} & 0.421 $\pm$ 0.008 \\
& IG & 7.161 $\pm$ 0.212 & 0.661 $\pm$ 0.006 & 0.664 $\pm$ 0.008 \\
& GC & 10.406 $\pm$ 1.098 & 0.817 $\pm$ 0.007 & 0.416 $\pm$ 0.007 \\
& FG & 13.567 $\pm$ 0.158 & 0.805 $\pm$ 0.006 & 0.430 $\pm$ 0.004 \\
& RISE & 0.884 $\pm$ 0.533 & 0.635 $\pm$ 0.007 & 0.708 $\pm$ 0.003 \\
& Archi. & 10.850 $\pm$ 0.354 & 0.719 $\pm$ 0.004 & 0.548 $\pm$ 0.004 \\
& MFABA & 6.6

# Sparsity

In [11]:
name_mapping = {
    # 'lime': {'name': 'LIME', 'category': 'Post Hoc'},
    # 'shap': {'name': 'SHAP', 'category': 'Post Hoc'},
    # 'rise': {'name': 'RISE', 'category': 'Post Hoc'},
    'lime_20': {'name': 'LIME', 'category': 'Post Hoc'}, # pert
    'shap_20': {'name': 'SHAP', 'category': 'Post Hoc'}, # pert
    'intgrad': {'name': 'IG', 'category': 'Post Hoc'}, # grad
    'gradcam': {'name': 'GC', 'category': 'Post Hoc'}, # grad
    'fullgrad': {'name': 'FG', 'category': 'Post Hoc'}, # grad
    'rise_20': {'name': 'RISE', 'category': 'Post Hoc'}, # pert
    'archipelago': {'name': 'Archi.', 'category': 'Post Hoc'}, # pert?
    'mfaba': {'name': 'MFABA', 'category': 'Post Hoc'}, # attak with grad
    'agi': {'name': 'AGI', 'category': 'Post Hoc'}, # attack with grad
    'ampe': {'name': 'AMPE', 'category': 'Post Hoc'}, # attack
    'bcos': {'name': 'BCos', 'category': 'Post Hoc'}, # 
    'xdnn': {'name': 'XDNN', 'category': 'Post Hoc'}, # gradient x input
    'bagnet': {'name': 'BagNet', 'category': 'Faithful'}, # by construction
    'attn': {'name': 'FRESH', 'category': 'Faithful'}, # by construction
    'sop': {'name': 'SOP', 'category': 'Faithful'}, # by construction
}

In [12]:
dirname = '/shared_data0/weiqiuy/sop/results/sparsity/imagenet_s'

In [13]:
import os

# for filename in os.listdir(dirname):
for method in name_mapping:
    filename = f'{method}.pt'
    break
    
filepath = os.path.join(dirname, filename)
filepath

'/shared_data0/weiqiuy/sop/results/sparsity/imagenet_s/lime_20.pt'

In [14]:
import torch

data = torch.load(filepath)

In [15]:
len(data)

2

KeyError: 0