In [None]:
import numpy as np
import json
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from scipy.stats import sem

# Localization

In [None]:
def aggregate_localization(out_local, top_k=-0, highest=True, mean_func=np.nanmean):
    return {
        noise_level: {
            noise_features: {
                method: {
                    "mean": np.round(mean_func(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k]), 2),
                    "standard_error": np.round(np.std(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k], ddof=1) / np.sqrt(np.size(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k])), 3),
                    "se_2": np.round(sem(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k], nan_policy='omit'),3)
                    }
                for method, instance_localizations in noise_features_dict.items()
            }
            for noise_features, noise_features_dict in data.items()
        }
        for noise_level, data in out_local.items()
    }    


## Synthetic

### Simple

In [None]:
import json
import pandas as pd

dataset_to_paths = {
    "synthetic_simple": [f"results/fixed_synthetic_out_localization_simple_{run}.json" for run in range(5)], 
    "synthetic_complex": [f"results/fixed_synthetic_out_localization_{run}.json" for run in range(5)], 
    "synthetic_mixed": [f"results/fixed_synthetic_mixed_5_out_localization_simple_{run}.json" for run in range(5)], 
    "synthetic_mixed_complex": [f"results/fixed_synthetic_mixed_5_out_localization_{run}.json" for run in range(5)],
    "red_wine_simple": [f"results/fixed_red_wine_localization_simple_{run}.json" for run in range(5)],
    "red_wine_complex": [f"results/fixed_red_wine_50_out_localization_{run}.json" for run in range(5)], 
    "ailerons_simple": [f"results/fixed_ailerons_localization_simple_{run}.json" for run in range(5)], 
    "ailerons_complex": [f"results/fixed_ailerons_50_out_localization_{run}.json" for run in range(5)],
    "LSAT_simple": [f"results/fixed_lsat_localization_simple_{run}.json" for run in range(5)], 
    "LSAT_complex": [f"results/fixed_lsat_50_out_localization_{run}.json" for run in range(5)]
}

ma_mean_dicts = []
ra_mean_dicts = []
ma_se_dicts = []
ra_se_dicts = []

for dataset, path in dataset_to_paths.items():
    with open(path , 'r') as f:
        out_local = json.load(f)
    
    n_noise_features = 10 if "mixed" in dataset else 5
    ma = aggregate_localization(out_local["local_localization_mass_accuracy"])["2"][str(n_noise_features)]
    ra = aggregate_localization(out_local["local_localization_precision"])["2"][str(n_noise_features)]
    
    ma_mean = {key: value['mean'] for key, value in ma.items()}
    ra_mean = {key: value['mean'] for key, value in ra.items()}
    ma_se = {key: value['se_2'] for key, value in ma.items()}
    ra_se = {key: value['se_2'] for key, value in ra.items()}
    
    ma_mean_dicts.append(ma_mean)
    ra_mean_dicts.append(ra_mean)
    ma_se_dicts.append(ma_se)
    ra_se_dicts.append(ra_se)

# Convert the lists of dictionaries into DataFrames
ma_mean_df = pd.DataFrame(ma_mean_dicts)
ra_mean_df = pd.DataFrame(ra_mean_dicts)
ma_se_df = pd.DataFrame(ma_se_dicts)
ra_se_df = pd.DataFrame(ra_se_dicts)

# Optional: You can add dataset names as a column for better clarity
ma_mean_df['dataset'] = list(dataset_to_paths.keys())
ra_mean_df['dataset'] = list(dataset_to_paths.keys())
ma_se_df['dataset'] = list(dataset_to_paths.keys())
ra_se_df['dataset'] = list(dataset_to_paths.keys())

# If you need the DataFrames in a particular order, you can rearrange the columns
ma_mean_df = ma_mean_df.set_index('dataset')
ra_mean_df = ra_mean_df.set_index('dataset')
ma_se_df = ma_se_df.set_index('dataset')
ra_se_df = ra_se_df.set_index('dataset')

# Now you have your DataFrames ready!
print(ma_mean_df)
print(ra_mean_df)
print(ma_se_df)
print(ra_se_df)


In [None]:
import json
import pandas as pd
import numpy as np

dataset_to_paths = {
    "synthetic_simple": [f"results/fixed_synthetic_out_localization_simple_{run}.json" for run in range(5)], 
    "synthetic_complex": [f"results/fixed_synthetic_out_localization_{run}.json" for run in range(5)], 
    "synthetic_mixed": [f"results/fixed_synthetic_mixed_5_out_localization_simple_{run}.json" for run in range(5)], 
    "synthetic_mixed_complex": [f"results/fixed_synthetic_mixed_5_out_localization_{run}.json" for run in range(5)],
    "red_wine_simple": [f"results/fixed_red_wine_localization_simple_{run}.json" for run in range(5)],
    "red_wine_complex": [f"results/fixed_red_wine_50_out_localization_{run}.json" for run in range(5)], 
    "ailerons_simple": [f"results/fixed_ailerons_localization_simple_{run}.json" for run in range(5)], 
    "ailerons_complex": [f"results/fixed_ailerons_50_out_localization_{run}.json" for run in range(5)],
    "LSAT_simple": [f"results/fixed_lsat_localization_simple_{run}.json" for run in range(5)], 
    "LSAT_complex": [f"results/fixed_lsat_50_out_localization_{run}.json" for run in range(5)]
}

# Dictionaries to store aggregated results for each dataset
ma_mean_agg = []
ra_mean_agg = []
ma_se_agg = []
ra_se_agg = []

for dataset, paths in dataset_to_paths.items():
    ma_values = []
    ra_values = []

    for path in paths:
        with open(path, 'r') as f:
            out_local = json.load(f)
        
        # Determine the number of noise features based on the dataset
        n_noise_features = 10 if "mixed" in dataset else 5
        
        # Extract localization metrics
        ma = aggregate_localization(out_local["local_localization_mass_accuracy"])["2"][str(n_noise_features)]
        ra = aggregate_localization(out_local["local_localization_precision"])["2"][str(n_noise_features)]
        
        # Append the extracted values for this path
        ma_values.append({key: value['mean'] for key, value in ma.items()})
        ra_values.append({key: value['mean'] for key, value in ra.items()})
    
    # Convert to DataFrames for easier aggregation
    ma_df = pd.DataFrame(ma_values)
    ra_df = pd.DataFrame(ra_values)
    
    # Calculate mean and standard deviation across paths
    ma_mean_agg.append(ma_df.mean().to_dict())
    ra_mean_agg.append(ra_df.mean().to_dict())
    ma_se_agg.append(ma_df.std().to_dict())  # Note: This is actually the standard deviation, not the standard error
    ra_se_agg.append(ra_df.std().to_dict())

# Convert the aggregated results to DataFrames
ma_mean_df = pd.DataFrame(ma_mean_agg, index=dataset_to_paths.keys())
ra_mean_df = pd.DataFrame(ra_mean_agg, index=dataset_to_paths.keys())
ma_se_df = pd.DataFrame(ma_se_agg, index=dataset_to_paths.keys())
ra_se_df = pd.DataFrame(ra_se_agg, index=dataset_to_paths.keys())

# Display the final DataFrames
print(ma_mean_df)
print(ra_mean_df)
print(ma_se_df)
print(ra_se_df)


In [None]:
# Dictionary mapping the original method names to their paper-ready names
method_names = {
    'varx_ig': 'VFA-IG',
    'varx_lrp': 'VFA-LRP',
    'varx': 'VFA-SHAP',
    'clue': 'CLUE',
    'infoshap': 'InfoShap'
}

def generate_latex_table(rank_df, mass_df):
    latex = """
    \\begin{table}[hb]
    \\centering
    \\caption{Average local RRA ($K=5$) and RMA over all test set instances for all considered uncertainty explainers and datasets (1-S: simple noise model and original train set, 50-C: complex noise model and artificially enlarged train set; see \\Cref{subsec:standard_error} for standard error).
    The best-performing method is bold. VFA flavors outperform InfoSHAP and CLUE in most settings. VFA-SHAP consistently demonstrates the best performance.}\\label{tab:localization}
    \\resizebox{0.65\\textwidth}{!}{%
    \\begin{tabular}{llrrrrrrrr}
    \\toprule
    \\multicolumn{1}{c}{} &
      \\multicolumn{1}{c|}{} &
      \\multicolumn{2}{c|}{\\textbf{Red Wine}} &
      \\multicolumn{2}{c|}{\\textbf{Ailerons}} &
      \\multicolumn{2}{c|}{\\textbf{LSAT}} &
      \\multicolumn{2}{c}{\\textbf{Synthetic}} \\\\
     &
      \\multicolumn{1}{c|}{} &
      1-S &
      \\multicolumn{1}{r|}{50-C} &
      1-S &
      \\multicolumn{1}{r|}{50-C} &
      1-S &
      \\multicolumn{1}{r|}{50-C} &
      S &
      C \\\\ \\midrule
    """

    # rank accuracy rows
    latex += "\\multirow{5}{*}{\\textbf{Average local RRA}}"
    for method in method_names:
        latex += f"""
        & {method_names[method]} &
        {rank_df.loc['red_wine_simple', method]:.3f} &
        {rank_df.loc['red_wine_complex', method]:.3f} &
        {rank_df.loc['ailerons_simple', method]:.3f} &
        {rank_df.loc['ailerons_complex', method]:.3f} &
        {rank_df.loc['LSAT_simple', method]:.3f} &
        {rank_df.loc['LSAT_complex', method]:.3f} &
        {rank_df.loc['synthetic_simple', method]:.3f} &
        {rank_df.loc['synthetic_complex', method]:.3f} \\\\
        """

    latex += "\\midrule"
    
    # mass accuracy rows
    latex += "\\multirow{5}{*}{\\textbf{Average local RMA}}"
    for method in method_names:
        latex += f"""
        & {method_names[method]} &
        {mass_df.loc['red_wine_simple', method]:.3f} &
        {mass_df.loc['red_wine_complex', method]:.3f} &
        {mass_df.loc['ailerons_simple', method]:.3f} &
        {mass_df.loc['ailerons_complex', method]:.3f} &
        {mass_df.loc['LSAT_simple', method]:.3f} &
        {mass_df.loc['LSAT_complex', method]:.3f} &
        {mass_df.loc['synthetic_simple', method]:.3f} &
        {mass_df.loc['synthetic_complex', method]:.3f} \\\\
        """

    latex += "\\bottomrule"
    latex += """
    \\end{tabular}%
    }
    \\end{table}
    """
    
    return latex

latex_code = generate_latex_table(ra_mean_df, ma_mean_df)
print(latex_code)


In [None]:
def generate_latex_table_with_error(rank_df, mass_df, ra_se_df, ma_se_df, round_to=3):
    latex = f"""
    \\begin{{table}}[hb]
    \\centering
    \\caption{{Average local RRA ($K=5$) and RMA over all test set instances for all considered uncertainty explainers and datasets (1-S: simple noise model and original train set, 50-C: complex noise model and artificially enlarged train set; see \\Cref{{subsec:standard_error}} for standard error).
    The best-performing method is bold. VFA flavors outperform InfoSHAP and CLUE in most settings. VFA-SHAP consistently demonstrates the best performance.}}\\label{{tab:localization}}
    \\resizebox{{0.65\\textwidth}}{{!}}{{%
    \\begin{{tabular}}{{llrrrrrrrr}}
    \\toprule
    \\multicolumn{{1}}{{c}}{{}} &
      \\multicolumn{{1}}{{c|}}{{}} &
      \\multicolumn{{2}}{{c|}}{{\\textbf{{Red Wine}}}} &
      \\multicolumn{{2}}{{c|}}{{\\textbf{{Ailerons}}}} &
      \\multicolumn{{2}}{{c|}}{{\\textbf{{LSAT}}}} &
      \\multicolumn{{2}}{{c}}{{\\textbf{{Synthetic}}}} \\\\
     &
      \\multicolumn{{1}}{{c|}}{{}} &
      1-S &
      \\multicolumn{{1}}{{r|}}{{50-C}} &
      1-S &
      \\multicolumn{{1}}{{r|}}{{50-C}} &
      1-S &
      \\multicolumn{{1}}{{r|}}{{50-C}} &
      S &
      C \\\\ \\midrule
    """

    # rank accuracy rows
    latex += "\\multirow{5}{*}{\\textbf{Average local RRA}}"
    for method in method_names:
        latex += f"""
        & {method_names[method]} &
        ${rank_df.loc['red_wine_simple', method]:.{round_to}f} \\pm {ra_se_df.loc['red_wine_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['red_wine_complex', method]:.{round_to}f} \\pm {ra_se_df.loc['red_wine_complex', method]:.{round_to}f}$ &
        ${rank_df.loc['ailerons_simple', method]:.{round_to}f} \\pm {ra_se_df.loc['ailerons_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['ailerons_complex', method]:.{round_to}f} \\pm {ra_se_df.loc['ailerons_complex', method]:.{round_to}f}$ &
        ${rank_df.loc['LSAT_simple', method]:.{round_to}f} \\pm {ra_se_df.loc['LSAT_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['LSAT_complex', method]:.{round_to}f} \\pm {ra_se_df.loc['LSAT_complex', method]:.{round_to}f}$ &
        ${rank_df.loc['synthetic_simple', method]:.{round_to}f} \\pm {ra_se_df.loc['synthetic_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['synthetic_complex', method]:.{round_to}f} \\pm {ra_se_df.loc['synthetic_complex', method]:.{round_to}f}$ \\\\
        """

    latex += "\\midrule"
    
    # mass accuracy rows
    latex += "\\multirow{5}{*}{\\textbf{Average local RMA}}"
    for method in method_names:
        latex += f"""
        & {method_names[method]} &
        ${mass_df.loc['red_wine_simple', method]:.{round_to}f} \\pm {ma_se_df.loc['red_wine_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['red_wine_complex', method]:.{round_to}f} \\pm {ma_se_df.loc['red_wine_complex', method]:.{round_to}f}$ &
        ${mass_df.loc['ailerons_simple', method]:.{round_to}f} \\pm {ma_se_df.loc['ailerons_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['ailerons_complex', method]:.{round_to}f} \\pm {ma_se_df.loc['ailerons_complex', method]:.{round_to}f}$ &
        ${mass_df.loc['LSAT_simple', method]:.{round_to}f} \\pm {ma_se_df.loc['LSAT_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['LSAT_complex', method]:.{round_to}f} \\pm {ma_se_df.loc['LSAT_complex', method]:.{round_to}f}$ &
        ${mass_df.loc['synthetic_simple', method]:.{round_to}f} \\pm {ma_se_df.loc['synthetic_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['synthetic_complex', method]:.{round_to}f} \\pm {ma_se_df.loc['synthetic_complex', method]:.{round_to}f}$ \\\\
        """

    latex += "\\bottomrule"
    latex += """
    \\end{tabular}%
    }
    \\end{table}
    """
    
    return latex

# Example usage with adaptable rounding
latex_code_with_error = generate_latex_table_with_error(ra_mean_df, ma_mean_df, ra_se_df, ma_se_df, round_to=2)
print(latex_code_with_error)


In [None]:
def generate_latex_table_with_error(rank_df, mass_df, ra_se_df, ma_se_df, round_to=3):
    latex = f"""
    \\begin{{table}}[hb]
    \\centering
    \\caption{{Average local RRA ($K=5$) and RMA over all test set instances for all considered uncertainty explainers and datasets (1-S: simple noise model and original train set, 50-C: complex noise model and artificially enlarged train set; see \\Cref{{subsec:standard_error}} for standard error).
    The best-performing method is bold. VFA flavors outperform InfoSHAP and CLUE in most settings. VFA-SHAP consistently demonstrates the best performance.}}\\label{{tab:localization}}
    \\resizebox{{0.8\\textwidth}}{{!}}{{%
    \\begin{{tabular}}{{llrrrrrrrr}}
    \\toprule
    \\multicolumn{{1}}{{c}}{{}} &
      \\multicolumn{{1}}{{c|}}{{}} &
      \\multicolumn{{2}}{{c|}}{{\\textbf{{Red Wine}}}} &
      \\multicolumn{{2}}{{c|}}{{\\textbf{{Ailerons}}}} &
      \\multicolumn{{2}}{{c|}}{{\\textbf{{LSAT}}}} &
      \\multicolumn{{2}}{{c}}{{\\textbf{{Synthetic}}}} \\\\
     &
      \\multicolumn{{1}}{{c|}}{{}} &
      1-S &
      \\multicolumn{{1}}{{r|}}{{50-C}} &
      1-S &
      \\multicolumn{{1}}{{r|}}{{50-C}} &
      1-S &
      \\multicolumn{{1}}{{r|}}{{50-C}} &
      S &
      C \\\\
    \\midrule
    """

    # rank accuracy rows
    latex += "\\multirow{5}{*}{\\textbf{Average local RRA}}"
    for method in method_names:
        latex += f"""
        & {method_names[method]} &
        ${rank_df.loc['red_wine_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['red_wine_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['red_wine_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['red_wine_complex', method]:.{round_to}f}$ &
        ${rank_df.loc['ailerons_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['ailerons_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['ailerons_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['ailerons_complex', method]:.{round_to}f}$ &
        ${rank_df.loc['LSAT_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['LSAT_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['LSAT_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['LSAT_complex', method]:.{round_to}f}$ &
        ${rank_df.loc['synthetic_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['synthetic_simple', method]:.{round_to}f}$ &
        ${rank_df.loc['synthetic_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ra_se_df.loc['synthetic_complex', method]:.{round_to}f}$ \\\\
        """

    latex += "\\midrule"
    
    # mass accuracy rows
    latex += "\\multirow{5}{*}{\\textbf{Average local RMA}}"
    for method in method_names:
        latex += f"""
        & {method_names[method]} &
        ${mass_df.loc['red_wine_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['red_wine_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['red_wine_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['red_wine_complex', method]:.{round_to}f}$ &
        ${mass_df.loc['ailerons_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['ailerons_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['ailerons_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['ailerons_complex', method]:.{round_to}f}$ &
        ${mass_df.loc['LSAT_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['LSAT_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['LSAT_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['LSAT_complex', method]:.{round_to}f}$ &
        ${mass_df.loc['synthetic_simple', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['synthetic_simple', method]:.{round_to}f}$ &
        ${mass_df.loc['synthetic_complex', method]:.{round_to}f}$ \\\\
        & &
        $\\pm {ma_se_df.loc['synthetic_complex', method]:.{round_to}f}$ \\\\
        """

    latex += "\\bottomrule"
    latex += """
    \\end{tabular}%
    }
    \\end{table}
    """
    
    return latex


latex_code_with_error = generate_latex_table_with_error(ra_mean_df, ma_mean_df, ra_se_df, ma_se_df, round_to=2)
print(latex_code_with_error)