In [None]:
import os
import sys

sys.path.append("..")

from itertools import product
from lightgbm import LGBMRanker
from sklearn.utils import check_random_state
from xai_ranking.benchmarks import (
    human_in_the_loop_experiment,
    human_in_the_loop_batch_experiment,
    hierarchical_ranking_explanation,
    hierarchical_ranking_batch_explanation,
    lime_experiment,
    lime_batch_experiment,
    shap_experiment,
    shap_batch_experiment,
    sharp_experiment,
    sharp_batch_experiment,
    # participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
    preprocess_movers_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
)
from xai_ranking.metrics import (
    explanation_sensitivity, outcome_sensitivity,
    bootstrapped_explanation_consistency, cross_method_explanation_consistency,
    cross_method_outcome_consistency
)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlresearch.utils import check_random_states, set_matplotlib_style
from sharp.utils import scores_to_ordering

RNG_SEED = 42

In [None]:
# Set up ranker for the moving company dataset:
X, ranks, score = preprocess_movers_data(fetch_movers_data(test=False))
qids_train = X.index.value_counts().to_numpy()

model = LGBMRanker(
    objective="lambdarank", label_gain=list(range(max(ranks) + 1)), verbose=-1
)
model.fit(
    X=X,
    y=ranks,
    group=qids_train,
)

In [None]:
datasets = [
    {
        "name": "ATP",
        "data": preprocess_atp_data(fetch_atp_data()),
        "scorer": atp_score,
        "done": False
    },
    {
        "name": "CSRank",
        "data": preprocess_csrank_data(fetch_csrank_data()),
        "scorer": csrank_score,
        "done": False
    },
    {
        "name": "Higher Education",
        "data": preprocess_higher_education_data(
            fetch_higher_education_data(year=2020)
        ),
        "scorer": higher_education_score,
        "done": False
    },
    {
        "name": "Moving Company",
        "data": preprocess_movers_data(fetch_movers_data(test=True)),
        "scorer": model.predict,
        "done": False
    },
]
xai_methods = [
    {
        "iterations": 1,
        "name": "LIME",
        "experiment": lime_experiment,
        "kwargs": {"mode": "regression"},  # classification, regression
        "done": True
    },
    {
        "iterations": 1,
        "name": "SHAP",
        "experiment": shap_experiment,
        "kwargs": {},
        "done": True
    },
    {
        "iterations": 1,
        "name": "ShaRP",
        "experiment": sharp_experiment,
        "kwargs": {
            "verbose": True,
            "sample_size": None,
            "measure": "shapley",
            "n_jobs": -1,
            "replace": True
        },
        "done": True
    },
    {
        "iterations": 1,
        "name": "HRE_DT",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {
            "model_type": "DT",  # DT, LR, OLS, PLS
            "s": 10
        },
        "done": False
    },
    {
        "iterations": 1,
        "name": "HRE_LR",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {
            "model_type": "LR",  # DT, LR, OLS, PLS
            "s": 10
        },
        "done": False
    },
    {
        "iterations": 1,
        "name": "HRE_OLS",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {
            "model_type": "OLS",  # DT, LR, OLS, PLS
            "s": 10
        },
        "done": False
    },
    {
        "iterations": 1,
        "name": "HRE_PLS",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {
            "model_type": "PLS",  # DT, LR, OLS, PLS
            "s": 10
        },
        "done": False
    },
    {
        "iterations": 1,
        "name": "HIL",
        "experiment": human_in_the_loop_experiment,
        "kwargs": {"upper_bound": 1, "lower_bound": None},
        "done": True
    },
    # {"iterations": 1, "name": "Participation", "experiment": participation_experiment},
]

total_states = sum(map(lambda x: x["iterations"], xai_methods)) * len(datasets)
random_states = (x for x in check_random_states(RNG_SEED, total_states))

In [None]:
# Uncomment to run full experiment

results = {}
for dataset in datasets:
    results[dataset["name"]] = {}
    for xai_method in xai_methods:
        results[dataset["name"]][xai_method["name"]] = []

        experiment_func = xai_method["experiment"]
        score_func = dataset["scorer"]

        X, ranks, scores = dataset["data"]

        for iteration_idx in range(xai_method["iterations"]):
            random_state = next(random_states)
            if (("done" in dataset and dataset["done"]) or
                    ("done" in xai_method and xai_method["done"])):
                continue
            if (xai_method["name"] in ("HRE_LR", "HRE_PLS") and
                    dataset["name"] == "Moving Company"):
                # dataset has binary categorical data
                # specified methods cannot handle such data 
                continue 

            kwargs = {} if "kwargs" not in xai_method else xai_method["kwargs"]
            if dataset["name"] == "Moving Company" and xai_method["name"].endswith("ShaRP"):
                kwargs["sample_size"] = 150

            contributions = experiment_func(X, score_func, random_state=random_state, **kwargs)

            results[dataset["name"]][xai_method["name"]].append(contributions)
            result_df = pd.DataFrame(contributions, columns=X.columns, index=X.index)
            result_df.to_csv(
                f"results/_contributions_{dataset['name']}_{xai_method['name']}_{iteration_idx}.csv"
            )

In [None]:
def read_results_from_files():
    return_dict = {}
    for cur_dataset in datasets:
        return_dict[cur_dataset["name"]] = {}
        for cur_xai_method in xai_methods:
            return_dict[cur_dataset["name"]][cur_xai_method["name"]] = []
            for iteration in range(cur_xai_method["iterations"]):
                fname = (
                    f"results/_contributions_"
                    f"{cur_dataset['name']}_{cur_xai_method['name']}_"
                    f"{iteration}.csv"
                )
                if os.path.isfile(fname):
                    (
                        return_dict[cur_dataset["name"]][cur_xai_method["name"]].append(
                            pd.read_csv(fname, index_col=0)
                        )
                    )
    return return_dict

In [None]:
results = read_results_from_files()

In [None]:
results

# Explanation Sensitivity

In [None]:
help(explanation_sensitivity)

In [None]:
methods = [method for method in results["ATP"].keys() if not method.startswith("BATCH")]

expl_sens_res = {}
expl_sens_sem = {}
for method in methods:
    expl_sens_res[method] = {}
    expl_sens_sem[method] = {}
    for dataset in datasets:
        rankings = scores_to_ordering(dataset["scorer"](dataset["data"][0]))
        result = explanation_sensitivity(
            dataset["data"][0],
            results[dataset["name"]][method][0],
            rankings,
            measure="jaccard", n_features=2
        )
        expl_sens_res[method][dataset["name"]] = result[0]
        expl_sens_sem[method][dataset["name"]] = result[1]

In [None]:
pd.DataFrame(expl_sens_res)

# Outcome Sensitivity

In [None]:
help(outcome_sensitivity)

In [None]:
methods = [method for method in results["ATP"].keys() if not method.startswith("BATCH")]

out_sens_res = {}
out_sens_sem = {}
for method in methods:
    out_sens_res[method] = {}
    out_sens_sem[method] = {}
    for dataset in datasets:
        rankings = scores_to_ordering(dataset["scorer"](dataset["data"][0]))
        result = outcome_sensitivity(
            dataset["data"][0],
            dataset["scorer"],
            results[dataset["name"]][method][0],
            threshold=0.8,
            n_neighbors=10,
            n_tests=10,
            std_multiplier=0.2,
            aggregate_results=True,
            random_state=RNG_SEED,
        )
        out_sens_res[method][dataset["name"]] = result[0]
        out_sens_sem[method][dataset["name"]] = result[1]

In [None]:
pd.DataFrame(out_sens_res)

# Explanation Consistency

In [None]:
help(cross_method_explanation_consistency)

In [None]:
methods = [method for method in results["ATP"].keys() if not method.startswith("BATCH")]

exp_cons_res = {}
exp_cons_sem = {}
for dataset in datasets:
    exp_cons_res[dataset["name"]] = pd.DataFrame(index=methods, columns=methods)
    exp_cons_sem[dataset["name"]] = pd.DataFrame(index=methods, columns=methods)
    for method1, method2 in product(methods, methods):
        result = cross_method_explanation_consistency(
            results[dataset["name"]][method1][0], results[dataset["name"]][method2][0],
            measure="jaccard", n_features=2
        )

        exp_cons_res[dataset["name"]].loc[method1, method2] = result[0]
        exp_cons_sem[dataset["name"]].loc[method1, method2] = result[1]

In [None]:
avg_exp_cons_res = pd.DataFrame(columns=methods, index=methods).fillna(0)
for dataset, res_ in exp_cons_res.items():
    avg_exp_cons_res += res_

avg_exp_cons_res /= len(datasets)
avg_exp_cons_res

In [None]:
exp_cons_res["CSRank"]


# Outcome Consistency

In [None]:
help(cross_method_outcome_consistency)

In [None]:
methods = [method for method in results["ATP"].keys() if not method.startswith("BATCH")]

out_cons_res = {}
out_cons_sem = {}
for dataset in datasets:
    out_cons_res[dataset["name"]] = pd.DataFrame(index=methods, columns=methods)
    out_cons_sem[dataset["name"]] = pd.DataFrame(index=methods, columns=methods)
    for method1, method2 in product(methods, methods):
        result = cross_method_outcome_consistency(
            dataset["data"][0], dataset["scorer"],
            results[dataset["name"]][method1][0],
            results[dataset["name"]][method2][0],
            random_state=RNG_SEED
        )

        out_cons_res[dataset["name"]].loc[method1, method2] = result[0]
        out_cons_sem[dataset["name"]].loc[method1, method2] = result[1]


In [None]:
out_cons_res["Higher Education"]

avg_out_cons_res = pd.DataFrame(columns=methods, index=methods).fillna(0)
for dataset, res_ in out_cons_res.items():
    if dataset != "Moving Company":
        avg_out_cons_res += res_

avg_out_cons_res /= len(datasets)
avg_out_cons_res

# Bootstrapped Consistency

In [None]:
help(bootstrapped_explanation_consistency)

In [None]:
methods = [method for method in results["ATP"].keys() if not method.startswith("BATCH")]

boot_cons_res = {}
boot_cons_sem = {}
for method in methods:
    boot_cons_res[method] = {}
    boot_cons_sem[method] = {}
    for dataset in datasets:
        try:
            result = bootstrapped_explanation_consistency(
                results[dataset["name"]][method][0], results[dataset["name"]][f"BATCH_{method}"],
                measure="euclidean"
            )
            boot_cons_res[method][dataset["name"]] = result[0]
            boot_cons_sem[method][dataset["name"]] = result[1]
        except:
            pass

In [None]:
pd.DataFrame(boot_cons_res).round(5)

In [None]:
datasets[2]["data"][0]

In [None]:
datasets[3]["name"]

In [None]:
datasets[3]["data"][0]

# Old metrics

In [None]:
from xai_ranking.metrics.old_metrics import compute_all_agreement, compute_all_fidelity, compute_all_sensitivity, \
    compute_all_stability

In [None]:
def plot_dataset_aggregated_summary(agg_mean, agg_sem, gap=0.3):
    n_cols = len(agg_mean.columns)
    x = np.arange(n_cols).astype(np.float64)

    methods = agg_mean.index.unique()
    bars = len(methods)
    bar_width = (1 - gap) / bars
    x -= (bars - 1) * bar_width / 2

    for method in methods:
        plt.errorbar(
            x,
            agg_mean.loc[method],
            yerr=agg_sem.loc[method],
            marker="o",
            label=method,
            linestyle="None",
        )
        x += bar_width
    plt.legend()
    plt.xticks(np.arange(n_cols), agg_mean.columns, rotation=45)

In [None]:
set_matplotlib_style(font_size=12, **{"font.family": ["Nimbus Roman"]})
aggregated_summary, aggregated_error = compute_all_stability(results, axis=0)
for dataset in aggregated_summary:
    plot_dataset_aggregated_summary(
        aggregated_summary[dataset], aggregated_error[dataset]
    )
    plt.title(dataset)
    plt.show()

# Agreement

In [None]:
agreement_results = compute_all_agreement(results, n_features=3)
agreement_results["ATP"]["kendall"]

In [None]:
agreement_results["ATP"]["jaccard"]

In [None]:
agreement_results["CSRank"]["kendall"]

In [None]:
agreement_results["CSRank"]["jaccard"]

In [None]:
agreement_results["Higher Education"]["kendall"]

# Sensitivity

In [None]:
sensitivity_results = compute_all_sensitivity(
    original_data=datasets,
    results=results,
    n_neighbors=10
)

In [None]:
pd.DataFrame(sensitivity_results[0])

# Fidelity

In [None]:
fidelity_results = compute_all_fidelity(
    original_data=datasets,
    results=results,
    random_state=RNG_SEED
)
pd.DataFrame(fidelity_results[0])

In [None]:
pd.DataFrame(fidelity_results[0]).mean(1)  # .drop(columns="Moving Company").mean(1)