In [None]:
!mkdir -p results
!rm results/*

In [None]:
import os
import sys

sys.path.append("..")

from lightgbm import LGBMRanker
from sklearn.utils import check_random_state
from xai_ranking.benchmarks import (
    human_in_the_loop_experiment,
    human_in_the_loop_batch_experiment,
    hierarchical_ranking_explanation,
    hierarchical_ranking_batch_explanation,
    lime_experiment,
    lime_batch_experiment,
    shap_experiment,
    shap_batch_experiment,
    sharp_experiment,
    sharp_batch_experiment,
    # participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
    preprocess_movers_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

RNG_SEED = 42

In [None]:
def check_random_states(random_state, n_runs):
    random_state = check_random_state(random_state)
    return (random_state.randint(0, 2 ** 32 - 1, dtype="uint32") for _ in range(n_runs))

In [None]:
# Set up ranker for the moving company dataset:
X, ranks, score = preprocess_movers_data(fetch_movers_data(test=True))
qids_train = X.index.value_counts().to_numpy()

model = LGBMRanker(
    objective="lambdarank", label_gain=list(range(max(ranks) + 1)), verbose=-1
)
model.fit(
    X=X,
    y=ranks,
    group=qids_train,
)

In [None]:
datasets = [
    {
        "name": "ATP",
        "data": fetch_atp_data(),
        "preprocess": preprocess_atp_data,
        "scorer": atp_score,
    },
    {
        "name": "CSRank",
        "data": fetch_csrank_data(),
        "preprocess": preprocess_csrank_data,
        "scorer": csrank_score,
        "done": True
    },
    {
        "name": "Higher Education",
        "data": fetch_higher_education_data(year=2020),
        "preprocess": preprocess_higher_education_data,
        "scorer": higher_education_score,
        "done": True
    },
    {
        "name": "Moving Company",
        "data": fetch_movers_data(test=True),
        "preprocess": preprocess_movers_data,
        "scorer": model.predict,
        "done": True
    },
]
xai_methods = [
    {"iterations": 1, "name": "LIME", "experiment": lime_experiment},
    {"iterations": 3, "name": "BATCH_LIME", "experiment": lime_batch_experiment},
    {"iterations": 1, "name": "SHAP", "experiment": shap_experiment},
    {"iterations": 3, "name": "BATCH_SHAP", "experiment": shap_batch_experiment},
    {"iterations": 1, "name": "ShaRP",
     "experiment": lambda *args, **kwargs: sharp_experiment(*args, **kwargs, verbose=True)},
    {"iterations": 3, "name": "BATCH_ShaRP",
     "experiment": lambda *args, **kwargs: sharp_batch_experiment(*args, **kwargs, verbose=True)},
    # {"iterations": 1, "name": "Participation", "experiment": participation_experiment},
    {"iterations": 1, "name": "HRE", "experiment": hierarchical_ranking_explanation},
    {"iterations": 3, "name": "BATCH_HRE", "experiment": hierarchical_ranking_batch_explanation},
    {"iterations": 1, "name": "HIL", "experiment": human_in_the_loop_experiment},
    {"iterations": 3, "name": "BATCH_HIL", "experiment": human_in_the_loop_batch_experiment},
]

total_states = sum(map(lambda x: x["iterations"], xai_methods)) * len(datasets)
random_states = check_random_states(RNG_SEED, total_states)

In [None]:
results = {}
for dataset in datasets:
    results[dataset["name"]] = {}
    for xai_method in xai_methods:
        results[dataset["name"]][xai_method["name"]] = []

        experiment_func = xai_method["experiment"]
        preprocess_func = dataset["preprocess"]
        score_func = dataset["scorer"]

        X, ranks, scores = preprocess_func(dataset["data"])

        for iteration_idx in range(xai_method["iterations"]):
            random_state = next(random_states)
            if "done" in dataset and dataset["done"]:
                continue
            contributions = experiment_func(X, score_func, random_state=random_state)

            results[dataset["name"]][xai_method["name"]].append(contributions)

            result_df = pd.DataFrame(contributions, columns=X.columns, index=X.index)
            result_df.to_csv(f"results/_contributions_{dataset['name']}_{xai_method['name']}_{iteration_idx}.csv")
        # with open(f"_contributions_{dataset['name']}_{xai_method['name']}.npy", "wb") as f:
        #     np.save(f, contributions)

In [None]:
results

In [None]:
def read_results_from_files():
    return_dict = {}
    for cur_dataset in datasets:
        return_dict[cur_dataset["name"]] = {}
        for cur_xai_method in xai_methods:
            return_dict[cur_dataset["name"]][cur_xai_method["name"]] = []
            for iteration in range(cur_xai_method["iterations"]):
                fname = (f"partial-results/_contributions_"
                         f"{cur_dataset['name']}_{cur_xai_method['name']}_"
                         f"{iteration}.csv")
                if os.path.isfile(fname):
                    (return_dict[cur_dataset["name"]][cur_xai_method["name"]]
                     .append(pd.read_csv(fname, index_col=0)))
    return return_dict

In [None]:
results = read_results_from_files()

In [None]:
methods = ["LIME", "SHAP", "ShaRP", "HRE", "HIL"]
batch_summary = {}
aggregated_summary = {}
for dataset in results:
    rows = []
    for method in methods:
        population_experiments = results[dataset][method]
        batch_experiments = results[dataset][f"BATCH_{method}"]
        for pop_idx, pop_exp in enumerate(population_experiments):
            for batch_idx, batch_exp in enumerate(batch_experiments):
                squared_diffs: pd.DataFrame = (batch_exp - pop_exp) ** 2

                errors_mean = squared_diffs.mean(axis=0).to_frame().T
                errors_mean["method"] = method
                errors_mean["pop_idx"] = pop_idx
                errors_mean["batch_idx"] = batch_idx
                errors_mean["statistic"] = "mean"
                rows.append(errors_mean)

                errors_var = squared_diffs.var(axis=0).to_frame().T
                errors_var["method"] = method
                errors_var["pop_idx"] = pop_idx
                errors_var["batch_idx"] = batch_idx
                errors_var["statistic"] = "var"
                rows.append(errors_var)
    dataset_summary = pd.concat(rows)
    batch_summary[dataset] = dataset_summary
    aggregated_summary[dataset] = (dataset_summary.groupby(["statistic", "method"]).mean()
                                   .drop(["pop_idx", "batch_idx"], axis=1))

In [None]:
aggregated_summary["ATP"]

In [None]:
def plot_dataset_aggregated_summary(dataset_data, gap=0.3):
    n_cols = len(dataset_data.columns)
    x = np.arange(n_cols).astype(np.float64)

    methods = dataset_data.index.get_level_values("method").unique()
    bars = len(methods)
    bar_width = (1 - gap) / bars
    x -= (bars - 1) * bar_width / 2

    for method in methods:
        plt.errorbar(x, dataset_data.loc["mean", method], yerr=np.sqrt(dataset_data.loc["var", method]), marker="o", label=method, linestyle="None")
        x += bar_width 
    plt.legend()
    plt.xticks(np.arange(n_cols), dataset_data.columns, rotation=45)

In [None]:
plot_dataset_aggregated_summary(aggregated_summary["ATP"])