In [1]:
import numpy as np
import json
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from scipy.stats import sem

# Localization

In [2]:
def aggregate_localization(out_local, top_k=-0, highest=True, mean_func=np.nanmean):
    return {
        noise_level: {
            noise_features: {
                method: {
                    "mean": np.round(mean_func(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k]), 2),
                    "standard_error": np.round(np.std(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k], ddof=1) / np.sqrt(np.size(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k])), 3),
                    "se_2": np.round(sem(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k], nan_policy='omit'),3)
                    }
                for method, instance_localizations in noise_features_dict.items()
            }
            for noise_features, noise_features_dict in data.items()
        }
        for noise_level, data in out_local.items()
    }    


## Synthetic

### Simple

In [3]:
with open('fixed_synthetic_out_localization_simple_1.json', 'r') as f:
  syn_simple_out_localization_1 = json.load(f)
with open('fixed_synthetic_out_localization_simple_2.json', 'r') as f:
  syn_simple_out_localization_2 = json.load(f)
with open('fixed_synthetic_out_localization_simple_3.json', 'r') as f:
  syn_simple_out_localization_3 = json.load(f)


In [4]:
s_rank_aggr_1 = aggregate_localization(syn_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_1["2"]["5"])

s_mass_aggr_1 = aggregate_localization(syn_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_1["2"]["5"])

Rank {'varx_ig': {'mean': 0.78, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.75, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.88, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.07, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.59, 'standard_error': 0.005, 'se_2': 0.005}}
Mass {'varx_ig': {'mean': 0.53, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.5, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.79, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.31, 'standard_error': 0.003, 'se_2': 0.003}}


In [5]:
s_rank_aggr_2 = aggregate_localization(syn_simple_out_localization_2["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_2["2"]["5"])

s_mass_aggr_2 = aggregate_localization(syn_simple_out_localization_2["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_2["2"]["5"])

Rank {'varx_ig': {'mean': 0.75, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.74, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.86, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.59, 'standard_error': 0.006, 'se_2': 0.006}}
Mass {'varx_ig': {'mean': 0.5, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.48, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.77, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.31, 'standard_error': 0.003, 'se_2': 0.003}}


In [6]:
s_rank_aggr_3 = aggregate_localization(syn_simple_out_localization_3["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_3["2"]["5"])

s_mass_aggr_3 = aggregate_localization(syn_simple_out_localization_3["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_3["2"]["5"])

Rank {'varx_ig': {'mean': 0.78, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.75, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.86, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.6, 'standard_error': 0.006, 'se_2': 0.006}}
Mass {'varx_ig': {'mean': 0.52, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.49, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.77, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.32, 'standard_error': 0.003, 'se_2': 0.003}}


### Complex

In [7]:
with open('fixed_synthetic_out_localization_1.json', 'r') as f:
  syn_out_localization_1 = json.load(f)
with open('fixed_synthetic_out_localization_2.json', 'r') as f:
  syn_out_localization_2 = json.load(f)
with open('fixed_synthetic_out_localization_3.json', 'r') as f:
  syn_out_localization_3 = json.load(f)

In [8]:
c_rank_aggr_1 = aggregate_localization(syn_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", c_rank_aggr_1["2"]["5"])

c_mass_aggr_1 = aggregate_localization(syn_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", c_mass_aggr_1["2"]["5"])

Rank {'varx_ig': {'mean': 0.39, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.43, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.73, 'standard_error': 0.005, 'se_2': 0.005}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.56, 'standard_error': 0.005, 'se_2': 0.005}}
Mass {'varx_ig': {'mean': 0.25, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.28, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.46, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.28, 'standard_error': 0.003, 'se_2': 0.003}}


In [9]:
c_rank_aggr_2 = aggregate_localization(syn_out_localization_2["local_localization_precision"], top_k=-0, highest=True)
print("Rank", c_rank_aggr_2["2"]["5"])

c_mass_aggr_2 = aggregate_localization(syn_out_localization_2["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", c_mass_aggr_2["2"]["5"])

Rank {'varx_ig': {'mean': 0.39, 'standard_error': 0.005, 'se_2': 0.005}, 'varx_lrp': {'mean': 0.37, 'standard_error': 0.005, 'se_2': 0.005}, 'varx': {'mean': 0.59, 'standard_error': 0.005, 'se_2': 0.005}, 'clue': {'mean': 0.07, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.44, 'standard_error': 0.005, 'se_2': 0.005}}
Mass {'varx_ig': {'mean': 0.23, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.23, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.33, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.23, 'standard_error': 0.002, 'se_2': 0.002}}


In [10]:
c_rank_aggr_3 = aggregate_localization(syn_out_localization_3["local_localization_precision"], top_k=-0, highest=True)
print("Rank", c_rank_aggr_3["2"]["5"])

c_mass_aggr_3 = aggregate_localization(syn_out_localization_3["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", c_mass_aggr_3["2"]["5"])

Rank {'varx_ig': {'mean': 0.36, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.38, 'standard_error': 0.005, 'se_2': 0.005}, 'varx': {'mean': 0.64, 'standard_error': 0.005, 'se_2': 0.005}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.5, 'standard_error': 0.005, 'se_2': 0.005}}
Mass {'varx_ig': {'mean': 0.24, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.25, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.4, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.26, 'standard_error': 0.002, 'se_2': 0.002}}


## Red Wine

In [11]:
with open('fixed_red_wine_localization_simple.json', 'r') as f:
    rw_simple_out_localization_1 = json.load(f)
with open('fixed_red_wine_50_out_localization.json', 'r') as f:
    rw_out_localization_50 = json.load(f)

In [12]:
rw_rank_aggr_simple = aggregate_localization(rw_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", rw_rank_aggr_simple["2"]["5"])

rw_mass_aggr_simple = aggregate_localization(rw_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", rw_mass_aggr_simple["2"]["5"])

Rank {'varx_ig': {'mean': 0.66, 'standard_error': 0.01, 'se_2': 0.01}, 'varx_lrp': {'mean': 0.66, 'standard_error': 0.011, 'se_2': 0.011}, 'varx': {'mean': 0.89, 'standard_error': 0.007, 'se_2': 0.007}, 'clue': {'mean': 0.58, 'standard_error': 0.01, 'se_2': 0.01}, 'infoshap': {'mean': 0.37, 'standard_error': 0.01, 'se_2': 0.01}}
Mass {'varx_ig': {'mean': 0.62, 'standard_error': 0.009, 'se_2': 0.009}, 'varx_lrp': {'mean': 0.6, 'standard_error': 0.01, 'se_2': 0.01}, 'varx': {'mean': 0.9, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.52, 'standard_error': 0.008, 'se_2': 0.008}, 'infoshap': {'mean': 0.35, 'standard_error': 0.006, 'se_2': 0.006}}


In [13]:
rw_rank_aggr_50 = aggregate_localization(rw_out_localization_50["local_localization_precision"], top_k=-0, highest=True)
print("Rank", rw_rank_aggr_50["2"]["5"])

rw_mass_aggr_50 = aggregate_localization(rw_out_localization_50["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", rw_mass_aggr_50["2"]["5"])

Rank {'varx_ig': {'mean': 0.67, 'standard_error': 0.01, 'se_2': 0.01}, 'varx_lrp': {'mean': 0.67, 'standard_error': 0.01, 'se_2': 0.01}, 'varx': {'mean': 0.93, 'standard_error': 0.006, 'se_2': 0.006}, 'clue': {'mean': 0.66, 'standard_error': 0.009, 'se_2': 0.009}, 'infoshap': {'mean': 0.77, 'standard_error': 0.009, 'se_2': 0.009}}
Mass {'varx_ig': {'mean': 0.71, 'standard_error': 0.01, 'se_2': 0.01}, 'varx_lrp': {'mean': 0.71, 'standard_error': 0.009, 'se_2': 0.009}, 'varx': {'mean': 0.94, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.62, 'standard_error': 0.007, 'se_2': 0.007}, 'infoshap': {'mean': 0.72, 'standard_error': 0.006, 'se_2': 0.006}}


## Ailerons

In [14]:
with open('fixed_ailerons_localization_simple.json', 'r') as f:
    a_simple_out_localization_1 = json.load(f)
with open('fixed_ailerons_50_out_localization.json', 'r') as f:
    a_out_localization_50 = json.load(f)

In [15]:
a_rank_aggr_simple = aggregate_localization(a_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", a_rank_aggr_simple["2"]["5"])

a_mass_aggr_simple = aggregate_localization(a_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", a_mass_aggr_simple["2"]["5"])

Rank {'varx_ig': {'mean': 0.81, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.8, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.86, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.52, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.51, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.78, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.77, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.86, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.36, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.39, 'standard_error': 0.002, 'se_2': 0.002}}


In [16]:
a_rank_aggr_50 = aggregate_localization(a_out_localization_50["local_localization_precision"], top_k=-0, highest=True)
print("Rank", a_rank_aggr_50["2"]["5"])

a_mass_aggr_50 = aggregate_localization(a_out_localization_50["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", a_mass_aggr_50["2"]["5"])

Rank {'varx_ig': {'mean': 0.74, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.69, 'standard_error': 0.005, 'se_2': 0.005}, 'varx': {'mean': 0.9, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.6, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.93, 'standard_error': 0.002, 'se_2': 0.002}}
Mass {'varx_ig': {'mean': 0.79, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.76, 'standard_error': nan, 'se_2': 0.005}, 'varx': {'mean': 0.9, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.48, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.86, 'standard_error': 0.001, 'se_2': 0.001}}


## LSAT

In [17]:
with open('fixed_lsat_localization_simple.json', 'r') as f:
    lsat_simple_out_localization_1 = json.load(f)
with open('fixed_lsat_50_out_localization.json', 'r') as f:
    lsat_out_localization_50 = json.load(f)

In [18]:
lsat_rank_aggr_simple = aggregate_localization(lsat_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", lsat_rank_aggr_simple["2"]["5"])

lsat_mass_aggr_simple = aggregate_localization(lsat_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", lsat_mass_aggr_simple["2"]["5"])

Rank {'varx_ig': {'mean': 0.79, 'standard_error': 0.002, 'se_2': 0.002}, 'varx_lrp': {'mean': 0.78, 'standard_error': 0.002, 'se_2': 0.002}, 'varx': {'mean': 0.92, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.5, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.78, 'standard_error': 0.002, 'se_2': 0.002}}
Mass {'varx_ig': {'mean': 0.8, 'standard_error': 0.002, 'se_2': 0.002}, 'varx_lrp': {'mean': 0.79, 'standard_error': 0.002, 'se_2': 0.002}, 'varx': {'mean': 0.94, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.49, 'standard_error': nan, 'se_2': 0.002}, 'infoshap': {'mean': 0.78, 'standard_error': 0.002, 'se_2': 0.002}}


In [19]:
lsat_rank_aggr_50 = aggregate_localization(lsat_out_localization_50["local_localization_precision"], top_k=-0, highest=True)
print("Rank", lsat_rank_aggr_50["2"]["5"])

lsat_mass_aggr_50 = aggregate_localization(lsat_out_localization_50["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", lsat_mass_aggr_50["2"]["5"])

Rank {'varx_ig': {'mean': 0.81, 'standard_error': 0.002, 'se_2': 0.002}, 'varx_lrp': {'mean': 0.74, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.95, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.49, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.95, 'standard_error': 0.001, 'se_2': 0.001}}
Mass {'varx_ig': {'mean': 0.89, 'standard_error': 0.001, 'se_2': 0.001}, 'varx_lrp': {'mean': 0.82, 'standard_error': nan, 'se_2': 0.003}, 'varx': {'mean': 0.96, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.52, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.96, 'standard_error': 0.001, 'se_2': 0.001}}


In [None]:
import numpy as np
import json
from pathlib import Path

def create_latex_table():
    # Function to format mean ± std as string
    def format_metric(mean, se):
        return f"{mean:.2f} ± {se:.2f}"

    # Initialize the LaTeX table content
    latex_content = [
        "\\begin{table}[h]",
        "\\centering",
        "\\caption{Localization Results}",
        "\\label{tab:localization}",
        "\\begin{tabular}{lcccccc}",
        "\\toprule",
        "Dataset & Metric & VarX-IG & VarX-LRP & VarX & CLUE & InfoShap \\\\",
        "\\midrule"
    ]

    # Load all datasets
    datasets = {
        "Synthetic (Simple)": [
            ("fixed_synthetic_out_localization_simple_1.json", "syn_simple_out_localization_1"),
            ("fixed_synthetic_out_localization_simple_2.json", "syn_simple_out_localization_2"),
            ("fixed_synthetic_out_localization_simple_3.json", "syn_simple_out_localization_3")
        ],
        "Synthetic (Complex)": [
            ("fixed_synthetic_out_localization_1.json", "syn_out_localization_1"),
            ("fixed_synthetic_out_localization_2.json", "syn_out_localization_2"),
            ("fixed_synthetic_out_localization_3.json", "syn_out_localization_3")
        ],
        "Red Wine": [
            ("fixed_red_wine_localization_simple.json", "rw_simple_out_localization_1"),
            ("fixed_red_wine_50_out_localization.json", "rw_out_localization_50")
        ],
        "Ailerons": [
            ("fixed_ailerons_localization_simple.json", "a_simple_out_localization_1"),
            ("fixed_ailerons_50_out_localization.json", "a_out_localization_50")
        ],
        "LSAT": [
            ("fixed_lsat_localization_simple.json", "lsat_simple_out_localization_1"),
            ("fixed_lsat_50_out_localization.json", "lsat_out_localization_50")
        ]
    }


    # Process each dataset
    for dataset_name, files in datasets.items():
        # Initialize arrays to store results across runs
        rank_results = []
        mass_results = []
        
        # Process each file for the dataset
        for filename, _ in files:
            try:
                with open(filename, 'r') as f:
                    data = json.load(f)
                
                # Calculate metrics
                rank_aggr = aggregate_localization(data["local_localization_precision"])
                mass_aggr = aggregate_localization(data["local_localization_mass_accuracy"])
                
                rank_results.append(rank_aggr["2"]["5"])
                mass_results.append(mass_aggr["2"]["5"])
            except FileNotFoundError:
                print(f"Warning: File {filename} not found")
                continue

        # Average results across runs if we have multiple runs
        if rank_results and mass_results:
            # Process rank results
            methods = ["varx_ig", "varx_lrp", "varx", "clue", "infoshap"]
            rank_means = {method: np.mean([run[method]["mean"] for run in rank_results]) for method in methods}
            rank_ses = {method: np.mean([run[method]["standard_error"] for run in rank_results]) for method in methods}
            
            # Process mass results
            mass_means = {method: np.mean([run[method]["mean"] for run in mass_results]) for method in methods}
            mass_ses = {method: np.mean([run[method]["standard_error"] for run in mass_results]) for method in methods}

            # Add to LaTeX table
            latex_content.extend([
                f"{dataset_name} & Rank & {format_metric(rank_means['varx_ig'], rank_ses['varx_ig'])} & {format_metric(rank_means['varx_lrp'], rank_ses['varx_lrp'])} & {format_metric(rank_means['varx'], rank_ses['varx'])} & {format_metric(rank_means['clue'], rank_ses['clue'])} & {format_metric(rank_means['infoshap'], rank_ses['infoshap'])} \\\\",
                f"& Mass & {format_metric(mass_means['varx_ig'], mass_ses['varx_ig'])} & {format_metric(mass_means['varx_lrp'], mass_ses['varx_lrp'])} & {format_metric(mass_means['varx'], mass_ses['varx'])} & {format_metric(mass_means['clue'], mass_ses['clue'])} & {format_metric(mass_means['infoshap'], mass_ses['infoshap'])} \\\\"
            ])

    # Close the table
    latex_content.extend([
        "\\bottomrule",
        "\\end{tabular}",
        "\\end{table}"
    ])

    # Write to file
    with open('localization_results.tex', 'w') as f:
        f.write('\n'.join(latex_content))

create_latex_table()

# Robustness

In [None]:
list(Path(".").glob("*lipschitz*.json"))

[]

In [None]:
# Load json with all resulst and just extract the Lipschitz metric for each method
for file_path in Path(".").glob("*lipschitz*.json"):
    with open(file_path, 'r') as f:
        lipschitz = json.load(f)
    pd.DataFrame({
        method: data["L_out"]
        for method, data in lipschitz.items()
    }).to_csv(f"../../plotting/data/lipschitz/{file_path.stem}.csv", index=False)