In [1]:
import sys
import pandas as pd

sys.path.append("..")

import numpy as np
import pandas as pd
from xai_ranking.benchmarks import (
    human_in_the_loop,
    hierarchical_ranking_explanation,
    lime_experiment,
    shap_experiment,
    sharp_experiment,
    participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
    synthethic_score,
)

RNG_SEED = 42

In [2]:
N = 10000

p1 = np.random.uniform(0, 1, N)
p2 = np.random.normal(0.5, 0.15, N)
p3 = np.random.normal(0.5, 0.05, N)
p4 = np.random.normal(0.75, 0.05, N)

weights = np.array([0.25, 0.25, 0.25, 0.25])

data = pd.DataFrame({
    'p1': p1,
    'p2': p2,
    'p3': p3,
    'p4': p4
})

# data['score'] = data[['p1', 'p2', 'p3', 'p4']].dot(weights)

In [3]:
datasets = [
    {
        "name": "Synthetic",
        "data": data,
        "scorer": synthethic_score,
    },
]
xai_methods = [
    {"name": "LIME", "experiment": lime_experiment},
    {"name": "SHAP", "experiment": shap_experiment},
    {"name": "ShaRP", "experiment": sharp_experiment},
    {"name": "Participation", "experiment": participation_experiment},
    {"name": "HRE", "experiment": hierarchical_ranking_explanation},
    {"name": "HIL", "experiment": human_in_the_loop},
]

In [4]:
results = {}
for dataset in datasets:
    results[dataset["name"]] = {}
    for xai_method in xai_methods:
        experiment_func = xai_method["experiment"]
        # preprocess_func = dataset["preprocess"]
        score_func = dataset["scorer"]
        # X, ranks, scores = preprocess_func(dataset["data"])
        X = dataset["data"]
        if xai_method['name'] != "Participation":
            contributions = experiment_func(X, score_func)
            result_df = pd.DataFrame(contributions, columns=X.columns, index=X.index)
            results[dataset["name"]][xai_method["name"]] = contributions
        else:
            contributions = experiment_func(X, score_func, top_k=500, weights=weights)
            result_df = pd.DataFrame(columns=X.columns)
            result_df.loc[0] = contributions.values
            results[dataset["name"]][xai_method["name"]] = contributions.values        

        result_df.to_csv(f"results/_contributions_{dataset['name']}_{xai_method['name']}.csv", index=False)
        # with open(f"_contributions_{dataset['name']}_{xai_method['name']}.npy", "wb") as f:
        #     np.save(f, contributions)

In [None]:
results