In [2]:
import numpy as np
import json
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from scipy.stats import sem

# Localization

In [2]:
def aggregate_localization(out_local, top_k=-0, highest=True, mean_func=np.nanmean):
    return {
        noise_level: {
            noise_features: {
                method: {
                    "mean": np.round(mean_func(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k]), 2),
                    "standard_error": np.round(np.std(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k], ddof=1) / np.sqrt(np.size(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k])), 3),
                    "se_2": np.round(sem(instance_localizations[-(top_k):] if highest else instance_localizations[:top_k], nan_policy='omit'),3)
                    }
                for method, instance_localizations in noise_features_dict.items()
            }
            for noise_features, noise_features_dict in data.items()
        }
        for noise_level, data in out_local.items()
    }    


## Synthetic Mixed

### Simple

In [5]:
with open('fixed_synthetic_mixed_5_out_localization_simple.json', 'r') as f:
  syn_mixed_simple_out_localization = json.load(f)

In [7]:
s_rank_aggr_1 = aggregate_localization(syn_mixed_simple_out_localization["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_1["2"]["10"])

s_mass_aggr_1 = aggregate_localization(syn_mixed_simple_out_localization["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_1["2"]["10"])

Rank {'varx_ig': {'mean': 0.59, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.58, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.71, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.12, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.15, 'standard_error': 0.003, 'se_2': 0.003}}
Mass {'varx_ig': {'mean': 0.5, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.48, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.68, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.12, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.14, 'standard_error': 0.001, 'se_2': 0.001}}


### Complex

In [8]:
with open('fixed_synthetic_mixed_5_out_localization.json', 'r') as f:
  syn_mixed_out_localization = json.load(f)

In [9]:
s_rank_aggr_1 = aggregate_localization(syn_mixed_out_localization["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_1["2"]["10"])

s_mass_aggr_1 = aggregate_localization(syn_mixed_out_localization["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_1["2"]["10"])

Rank {'varx_ig': {'mean': 0.34, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.34, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.5, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.12, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.17, 'standard_error': 0.003, 'se_2': 0.003}}
Mass {'varx_ig': {'mean': 0.3, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.3, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.39, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.12, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.15, 'standard_error': 0.001, 'se_2': 0.001}}


## Synthetic

### Simple

In [3]:
with open('fixed_synthetic_out_localization_simple_1.json', 'r') as f:
  syn_simple_out_localization_1 = json.load(f)
with open('fixed_synthetic_out_localization_simple_2.json', 'r') as f:
  syn_simple_out_localization_2 = json.load(f)
with open('fixed_synthetic_out_localization_simple_3.json', 'r') as f:
  syn_simple_out_localization_3 = json.load(f)


In [4]:
s_rank_aggr_1 = aggregate_localization(syn_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_1["2"]["5"])

s_mass_aggr_1 = aggregate_localization(syn_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_1["2"]["5"])

Rank {'varx_ig': {'mean': 0.77, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.76, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.87, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.07, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.11, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.52, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.5, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.77, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.08, 'standard_error': 0.001, 'se_2': 0.001}}


In [5]:
s_rank_aggr_2 = aggregate_localization(syn_simple_out_localization_2["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_2["2"]["5"])

s_mass_aggr_2 = aggregate_localization(syn_simple_out_localization_2["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_2["2"]["5"])

Rank {'varx_ig': {'mean': 0.77, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.75, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.86, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.11, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.51, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.48, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.75, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.09, 'standard_error': 0.001, 'se_2': 0.001}}


In [6]:
s_rank_aggr_3 = aggregate_localization(syn_simple_out_localization_3["local_localization_precision"], top_k=-0, highest=True)
print("Rank", s_rank_aggr_3["2"]["5"])

s_mass_aggr_3 = aggregate_localization(syn_simple_out_localization_3["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", s_mass_aggr_3["2"]["5"])

Rank {'varx_ig': {'mean': 0.75, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.75, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.86, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.12, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.51, 'standard_error': 0.005, 'se_2': 0.005}, 'varx_lrp': {'mean': 0.49, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.75, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.09, 'standard_error': 0.001, 'se_2': 0.001}}


### Complex

In [7]:
with open('fixed_synthetic_out_localization_1.json', 'r') as f:
  syn_out_localization_1 = json.load(f)
with open('fixed_synthetic_out_localization_2.json', 'r') as f:
  syn_out_localization_2 = json.load(f)
with open('fixed_synthetic_out_localization_3.json', 'r') as f:
  syn_out_localization_3 = json.load(f)

In [8]:
c_rank_aggr_1 = aggregate_localization(syn_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", c_rank_aggr_1["2"]["5"])

c_mass_aggr_1 = aggregate_localization(syn_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", c_mass_aggr_1["2"]["5"])

Rank {'varx_ig': {'mean': 0.4, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.42, 'standard_error': 0.006, 'se_2': 0.006}, 'varx': {'mean': 0.75, 'standard_error': 0.005, 'se_2': 0.005}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.12, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.26, 'standard_error': 0.004, 'se_2': 0.004}, 'varx_lrp': {'mean': 0.28, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.5, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.09, 'standard_error': 0.001, 'se_2': 0.001}}


In [9]:
c_rank_aggr_2 = aggregate_localization(syn_out_localization_2["local_localization_precision"], top_k=-0, highest=True)
print("Rank", c_rank_aggr_2["2"]["5"])

c_mass_aggr_2 = aggregate_localization(syn_out_localization_2["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", c_mass_aggr_2["2"]["5"])

Rank {'varx_ig': {'mean': 0.36, 'standard_error': 0.005, 'se_2': 0.005}, 'varx_lrp': {'mean': 0.38, 'standard_error': 0.005, 'se_2': 0.005}, 'varx': {'mean': 0.65, 'standard_error': 0.005, 'se_2': 0.005}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.09, 'standard_error': 0.003, 'se_2': 0.003}}
Mass {'varx_ig': {'mean': 0.22, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.24, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.35, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.08, 'standard_error': 0.001, 'se_2': 0.001}}


In [10]:
c_rank_aggr_3 = aggregate_localization(syn_out_localization_3["local_localization_precision"], top_k=-0, highest=True)
print("Rank", c_rank_aggr_3["2"]["5"])

c_mass_aggr_3 = aggregate_localization(syn_out_localization_3["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", c_mass_aggr_3["2"]["5"])

Rank {'varx_ig': {'mean': 0.34, 'standard_error': 0.006, 'se_2': 0.006}, 'varx_lrp': {'mean': 0.35, 'standard_error': 0.005, 'se_2': 0.005}, 'varx': {'mean': 0.65, 'standard_error': 0.005, 'se_2': 0.005}, 'clue': {'mean': 0.06, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.11, 'standard_error': 0.003, 'se_2': 0.003}}
Mass {'varx_ig': {'mean': 0.22, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.23, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.37, 'standard_error': 0.003, 'se_2': 0.003}, 'clue': {'mean': 0.07, 'standard_error': 0.001, 'se_2': 0.001}, 'infoshap': {'mean': 0.09, 'standard_error': 0.001, 'se_2': 0.001}}


## Red Wine

In [11]:
with open('fixed_red_wine_localization_simple.json', 'r') as f:
    rw_simple_out_localization_1 = json.load(f)
with open('fixed_red_wine_50_out_localization.json', 'r') as f:
    rw_out_localization_50 = json.load(f)

In [12]:
rw_rank_aggr_simple = aggregate_localization(rw_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", rw_rank_aggr_simple["2"]["5"])

rw_mass_aggr_simple = aggregate_localization(rw_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", rw_mass_aggr_simple["2"]["5"])

Rank {'varx_ig': {'mean': 0.53, 'standard_error': 0.012, 'se_2': 0.012}, 'varx_lrp': {'mean': 0.54, 'standard_error': 0.012, 'se_2': 0.012}, 'varx': {'mean': 0.8, 'standard_error': 0.007, 'se_2': 0.007}, 'clue': {'mean': 0.59, 'standard_error': 0.01, 'se_2': 0.01}, 'infoshap': {'mean': 0.36, 'standard_error': 0.01, 'se_2': 0.01}}
Mass {'varx_ig': {'mean': 0.49, 'standard_error': 0.009, 'se_2': 0.009}, 'varx_lrp': {'mean': 0.5, 'standard_error': 0.01, 'se_2': 0.01}, 'varx': {'mean': 0.78, 'standard_error': 0.006, 'se_2': 0.006}, 'clue': {'mean': 0.53, 'standard_error': 0.008, 'se_2': 0.008}, 'infoshap': {'mean': 0.36, 'standard_error': 0.006, 'se_2': 0.006}}


In [13]:
rw_rank_aggr_50 = aggregate_localization(rw_out_localization_50["local_localization_precision"], top_k=-0, highest=True)
print("Rank", rw_rank_aggr_50["2"]["5"])

rw_mass_aggr_50 = aggregate_localization(rw_out_localization_50["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", rw_mass_aggr_50["2"]["5"])

Rank {'varx_ig': {'mean': 0.64, 'standard_error': 0.011, 'se_2': 0.011}, 'varx_lrp': {'mean': 0.62, 'standard_error': 0.011, 'se_2': 0.011}, 'varx': {'mean': 0.92, 'standard_error': 0.006, 'se_2': 0.006}, 'clue': {'mean': 0.67, 'standard_error': 0.009, 'se_2': 0.009}, 'infoshap': {'mean': 0.42, 'standard_error': 0.01, 'se_2': 0.01}}
Mass {'varx_ig': {'mean': 0.69, 'standard_error': 0.01, 'se_2': 0.01}, 'varx_lrp': {'mean': 0.66, 'standard_error': 0.01, 'se_2': 0.01}, 'varx': {'mean': 0.93, 'standard_error': 0.004, 'se_2': 0.004}, 'clue': {'mean': 0.62, 'standard_error': 0.007, 'se_2': 0.007}, 'infoshap': {'mean': 0.4, 'standard_error': 0.007, 'se_2': 0.007}}


## Ailerons

In [14]:
with open('fixed_ailerons_localization_simple.json', 'r') as f:
    a_simple_out_localization_1 = json.load(f)
with open('fixed_ailerons_50_out_localization.json', 'r') as f:
    a_out_localization_50 = json.load(f)

In [15]:
a_rank_aggr_simple = aggregate_localization(a_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", a_rank_aggr_simple["2"]["5"])

a_mass_aggr_simple = aggregate_localization(a_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", a_mass_aggr_simple["2"]["5"])

Rank {'varx_ig': {'mean': 0.83, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.81, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.87, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.54, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.34, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.8, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.78, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.88, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.4, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.3, 'standard_error': 0.002, 'se_2': 0.002}}


In [16]:
a_rank_aggr_50 = aggregate_localization(a_out_localization_50["local_localization_precision"], top_k=-0, highest=True)
print("Rank", a_rank_aggr_50["2"]["5"])

a_mass_aggr_50 = aggregate_localization(a_out_localization_50["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", a_mass_aggr_50["2"]["5"])

Rank {'varx_ig': {'mean': 0.77, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.73, 'standard_error': 0.004, 'se_2': 0.004}, 'varx': {'mean': 0.91, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.59, 'standard_error': 0.003, 'se_2': 0.003}, 'infoshap': {'mean': 0.35, 'standard_error': 0.004, 'se_2': 0.004}}
Mass {'varx_ig': {'mean': 0.8, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.77, 'standard_error': nan, 'se_2': 0.004}, 'varx': {'mean': 0.91, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.48, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.3, 'standard_error': 0.002, 'se_2': 0.002}}


## LSAT

In [17]:
with open('fixed_lsat_localization_simple.json', 'r') as f:
    lsat_simple_out_localization_1 = json.load(f)
with open('fixed_lsat_50_out_localization.json', 'r') as f:
    lsat_out_localization_50 = json.load(f)

In [18]:
lsat_rank_aggr_simple = aggregate_localization(lsat_simple_out_localization_1["local_localization_precision"], top_k=-0, highest=True)
print("Rank", lsat_rank_aggr_simple["2"]["5"])

lsat_mass_aggr_simple = aggregate_localization(lsat_simple_out_localization_1["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", lsat_mass_aggr_simple["2"]["5"])

Rank {'varx_ig': {'mean': 0.75, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.76, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.93, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.51, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.73, 'standard_error': 0.002, 'se_2': 0.002}}
Mass {'varx_ig': {'mean': 0.75, 'standard_error': 0.003, 'se_2': 0.003}, 'varx_lrp': {'mean': 0.77, 'standard_error': 0.003, 'se_2': 0.003}, 'varx': {'mean': 0.95, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.49, 'standard_error': nan, 'se_2': 0.002}, 'infoshap': {'mean': 0.73, 'standard_error': 0.002, 'se_2': 0.002}}


In [19]:
lsat_rank_aggr_50 = aggregate_localization(lsat_out_localization_50["local_localization_precision"], top_k=-0, highest=True)
print("Rank", lsat_rank_aggr_50["2"]["5"])

lsat_mass_aggr_50 = aggregate_localization(lsat_out_localization_50["local_localization_mass_accuracy"], top_k=-0, highest=True)
print("Mass", lsat_mass_aggr_50["2"]["5"])

Rank {'varx_ig': {'mean': 0.82, 'standard_error': 0.002, 'se_2': 0.002}, 'varx_lrp': {'mean': 0.81, 'standard_error': 0.002, 'se_2': 0.002}, 'varx': {'mean': 0.94, 'standard_error': 0.002, 'se_2': 0.002}, 'clue': {'mean': 0.5, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.73, 'standard_error': 0.002, 'se_2': 0.002}}
Mass {'varx_ig': {'mean': 0.91, 'standard_error': 0.001, 'se_2': 0.001}, 'varx_lrp': {'mean': 0.91, 'standard_error': nan, 'se_2': 0.001}, 'varx': {'mean': 0.97, 'standard_error': 0.001, 'se_2': 0.001}, 'clue': {'mean': 0.52, 'standard_error': 0.002, 'se_2': 0.002}, 'infoshap': {'mean': 0.73, 'standard_error': 0.002, 'se_2': 0.002}}


# Robustness

In [6]:
merged = {}
for method in ["varx", "varx_ig", "varx_lrp", "infoshap", "clue"]:
        with open(f"umerged/synthetic_mixed_5_out_lipschitz_fixed_{method}.json", 'r') as f:
                lipschitz = json.load(f)
                merged[method] = lipschitz[method]
with open("synthetic_mixed_5_out_lipschitz_fixed.json", "w") as f:
        json.dump(merged, f)

In [7]:
list(Path(".").glob("*lipschitz*.json"))

[PosixPath('lsat_out_lipschitz_fixed.json'),
 PosixPath('ailerons_out_lipschitz_fixed.json'),
 PosixPath('synthetic_out_lipschitz_fixed.json'),
 PosixPath('synthetic_mixed_5_out_lipschitz_fixed.json'),
 PosixPath('red_wine_out_lipschitz_fixed.json')]

In [8]:
# Load json with all resulst and just extract the Lipschitz metric for each method
for file_path in Path(".").glob("*lipschitz*.json"):
    with open(file_path, 'r') as f:
        lipschitz = json.load(f)
    pd.DataFrame({
        method: data["L_out"]
        for method, data in lipschitz.items()
    }).to_csv(f"../../plotting/data/lipschitz/{file_path.stem}.csv", index=False)