In [None]:
import os
import sys

sys.path.append("..")

from lightgbm import LGBMRanker
from xai_ranking.benchmarks import (
    human_in_the_loop_experiment,
    human_in_the_loop_batch_experiment,
    hierarchical_ranking_explanation,
    hierarchical_ranking_batch_explanation,
    lime_experiment,
    lime_batch_experiment,
    shap_experiment,
    shap_batch_experiment,
    sharp_experiment,
    sharp_batch_experiment,
    # participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
    preprocess_movers_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
)
from xai_ranking.metrics import compute_all_stability, compute_all_agreement
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlresearch.utils import check_random_states, set_matplotlib_style

RNG_SEED = 42

In [None]:
# Set up ranker for the moving company dataset:
X, ranks, score = preprocess_movers_data(fetch_movers_data(test=False))
qids_train = X.index.value_counts().to_numpy()

model = LGBMRanker(
    objective="lambdarank", label_gain=list(range(max(ranks) + 1)), verbose=-1
)
model.fit(
    X=X,
    y=ranks,
    group=qids_train,
)

In [None]:
datasets = [
    {
        "name": "ATP",
        "data": preprocess_atp_data(fetch_atp_data()),
        "scorer": atp_score,
    },
    {
        "name": "CSRank",
        "data": preprocess_csrank_data(fetch_csrank_data()),
        "scorer": csrank_score,
    },
    {
        "name": "Higher Education",
        "data": preprocess_higher_education_data(
            fetch_higher_education_data(year=2020)
        ),
        "scorer": higher_education_score,
    },
    {
        "name": "Moving Company",
        "data": preprocess_movers_data(fetch_movers_data(test=True)),
        "scorer": model.predict,
    },
]
xai_methods = [
    {
        "iterations": 1,
        "name": "LIME",
        "experiment": lime_experiment,
        "kwargs": {"mode": "regression"}  # classification, regression
    },
    {
        "iterations": 10,
        "name": "BATCH_LIME",
        "experiment": lime_batch_experiment,
        "kwargs": {"mode": "regression"}  # classification, regression
    },
    {
        "iterations": 1,
        "name": "SHAP",
        "experiment": shap_experiment,
        "kwargs": {}
    },
    {
        "iterations": 10,
        "name": "BATCH_SHAP",
        "experiment": shap_batch_experiment,
        "kwargs": {}
    },
    {
        "iterations": 1,
        "name": "ShaRP",
        "experiment": sharp_experiment,
        "kwargs": {
            "verbose": True,
            "sample_size": 150,
            "measure": "shapley",
            "n_jobs": -1
        }
    },
    {
        "iterations": 10,
        "name": "BATCH_ShaRP",
        "experiment": sharp_batch_experiment,
        "kwargs": {
            "verbose": True,
            "sample_size": 150,
            "measure": "shapley",
            "n_jobs": -1
        }
    },
    {
        "iterations": 1,
        "name": "HRE",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {
            "model_type": "OLS",  # DT, LR, OLS, PLS
            "s": 5
        }
    },
    {
        "iterations": 10,
        "name": "BATCH_HRE",
        "experiment": hierarchical_ranking_batch_explanation,
        "kwargs": {
            "model_type": "OLS",  # DT, LR, OLS, PLS
            "s": 5
        }
    },
    {
        "iterations": 1,
        "name": "HIL",
        "experiment": human_in_the_loop_experiment,
        "kwargs": {"upper_bound": 1, "lower_bound": None}
    },
    {
        "iterations": 10,
        "name": "BATCH_HIL",
        "experiment": human_in_the_loop_batch_experiment,
        "kwargs": {"upper_bound": 1, "lower_bound": None}
    },
    # {"iterations": 1, "name": "Participation", "experiment": participation_experiment},
]

total_states = sum(map(lambda x: x["iterations"], xai_methods)) * len(datasets)
random_states = (x for x in check_random_states(RNG_SEED, total_states))

In [None]:
results = {}
for dataset in datasets:
    results[dataset["name"]] = {}
    for xai_method in xai_methods:
        results[dataset["name"]][xai_method["name"]] = []

        experiment_func = xai_method["experiment"]
        preprocess_func = dataset["preprocess"]
        score_func = dataset["scorer"]

        X, ranks, scores = preprocess_func(dataset["data"])

        for iteration_idx in range(xai_method["iterations"]):
            random_state = next(random_states)
            if "done" in dataset and dataset["done"]:
                continue

            kwargs = {} if "kwargs" not in xai_method else xai_method["kwargs"]
            contributions = experiment_func(X, score_func, random_state=random_state, **kwargs)

            results[dataset["name"]][xai_method["name"]].append(contributions)
            result_df = pd.DataFrame(contributions, columns=X.columns, index=X.index)
            result_df.to_csv(
                f"results/_contributions_{dataset['name']}_{xai_method['name']}_{iteration_idx}.csv"
            )
        # with open(f"_contributions_{dataset['name']}_{xai_method['name']}.npy", "wb") as f:
        #     np.save(f, contributions)

In [None]:
def read_results_from_files():
    return_dict = {}
    for cur_dataset in datasets:
        return_dict[cur_dataset["name"]] = {}
        for cur_xai_method in xai_methods:
            return_dict[cur_dataset["name"]][cur_xai_method["name"]] = []
            for iteration in range(cur_xai_method["iterations"]):
                fname = (
                    f"partial-results/_contributions_"
                    f"{cur_dataset['name']}_{cur_xai_method['name']}_"
                    f"{iteration}.csv"
                )
                if os.path.isfile(fname):
                    (
                        return_dict[cur_dataset["name"]][cur_xai_method["name"]].append(
                            pd.read_csv(fname, index_col=0)
                        )
                    )
    return return_dict

In [None]:
results = read_results_from_files()

In [None]:
results

# Stability

In [None]:
aggregated_summary, aggregated_error = compute_all_stability(results, axis=None)
pd.DataFrame(aggregated_summary)

In [None]:
def plot_dataset_aggregated_summary(agg_mean, agg_sem, gap=0.3):
    n_cols = len(agg_mean.columns)
    x = np.arange(n_cols).astype(np.float64)

    methods = agg_mean.index.unique()
    bars = len(methods)
    bar_width = (1 - gap) / bars
    x -= (bars - 1) * bar_width / 2

    for method in methods:
        plt.errorbar(
            x,
            agg_mean.loc[method],
            yerr=agg_sem.loc[method],
            marker="o",
            label=method,
            linestyle="None",
        )
        x += bar_width
    plt.legend()
    plt.xticks(np.arange(n_cols), agg_mean.columns, rotation=45)

In [None]:
set_matplotlib_style(font_size=12, **{"font.family": ["Nimbus Roman"]})
aggregated_summary, aggregated_error = compute_all_stability(results, axis=0)
for dataset in aggregated_summary:
    plot_dataset_aggregated_summary(
        aggregated_summary[dataset], aggregated_error[dataset]
    )
    plt.title(dataset)
    plt.show()

# Agreement

In [None]:
agreement_results = compute_all_agreement(results, n_features=3)
agreement_results["ATP"]["kendall"]

In [None]:
agreement_results["ATP"]["jaccard"]

In [None]:
agreement_results["ATP"]["kendall"].mean(), agreement_results["ATP"][
    "jaccard"
].mean()