In [1]:
import os
import sys
import seaborn as sns
import matplotlib.pyplot as plt
from sharp import ShaRP

sys.path.append("..")

from itertools import product
from lightgbm import LGBMRanker
from sklearn.utils import check_random_state
from xai_ranking.benchmarks import (
    human_in_the_loop_experiment,
    human_in_the_loop_batch_experiment,
    hierarchical_ranking_explanation,
    hierarchical_ranking_batch_explanation,
    lime_experiment,
    lime_batch_experiment,
    shap_experiment,
    shap_batch_experiment,
    sharp_experiment,
    sharp_batch_experiment,
    # participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
    preprocess_movers_data,
    preprocess_synthetic_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
    fetch_synthetic_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
    synthetic_equal_score_3ftrs,
)
from xai_ranking.metrics import (
    outcome_fidelity
)
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from mlresearch.utils import check_random_states, set_matplotlib_style
from sharp.utils import scores_to_ordering

RNG_SEED = 42

In [2]:
datasets = [
    {
        "name": "Higher Education",
        "data": preprocess_higher_education_data(
            fetch_higher_education_data(year=2020)
        ),
        "scorer": higher_education_score,
        "n_observations": 100,
    },
    {
        "name": "ATP",
        "data": preprocess_atp_data(fetch_atp_data()),
        "scorer": atp_score,
        "n_observations": 86,
    },
    {
        "name": "CSRank",
        "data": preprocess_csrank_data(fetch_csrank_data()),
        "scorer": csrank_score,
        "n_observations": 100,
    },
    {
        "name": "Synthetic_0",
        "data": preprocess_synthetic_data(
            fetch_synthetic_data(synth_dt_version=0, item_num=2000)
        ),
        "scorer": synthetic_equal_score_3ftrs,
        "n_observations": 100,
    },
    {
        "name": "Synthetic_1",
        "data": preprocess_synthetic_data(
            fetch_synthetic_data(synth_dt_version=1, item_num=2000)
        ),
        "scorer": synthetic_equal_score_3ftrs,
        "n_observations": 100,
    },
    {
        "name": "Synthetic_2",
        "data": preprocess_synthetic_data(
            fetch_synthetic_data(synth_dt_version=2, item_num=2000)
        ),
        "scorer": synthetic_equal_score_3ftrs,
        "n_observations": 100,
    },
]

xai_methods = [
    {
        "iterations": 1,
        "name": "LIME",
        "experiment": lime_experiment,
        "type": "score",
        "kwargs": {"mode": "regression"},  # classification, regression
    },
    {
        "iterations": 1,
        "name": "SHAP",
        "experiment": shap_experiment,
        "type": "score",
        "kwargs": {},
    },
    {
        "iterations": 1,
        "name": "ShaRP_RANK",
        "experiment": sharp_experiment,
        "type": "rank",
        "kwargs": {
            "qoi": "rank",
            "verbose": True,
            "sample_size": None,
            "measure": "shapley",
            "n_jobs": -1,
            "replace": False,
        },
    },
    {
        "iterations": 1,
        "name": "ShaRP_SCORE",
        "experiment": sharp_experiment,
        "type": "score",
        "kwargs": {
            "qoi": "rank_score",
            "verbose": True,
            "sample_size": None,
            "measure": "shapley",
            "n_jobs": -1,
            "replace": False,
        },
    },
    # {
    #     "iterations": 1,
    #     "name": "ShaRP_TOPK",
    #     "experiment": sharp_experiment,
    #     "kwargs": {
    #         "qoi": "top_k",
    #         "verbose": True,
    #         "sample_size": None,
    #         "measure": "shapley",
    #         "n_jobs": -1,
    #         "replace": True,
    #     },
    # },
    {
        "iterations": 1,
        "name": "HRE_DT",
        "type": "score",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {"model_type": "DT", "s": 10},  # DT, LR, OLS, PLS
    },
    {
        "iterations": 1,
        "name": "HRE_LR",
        "type": "score",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {"model_type": "LR", "s": 10},  # DT, LR, OLS, PLS
    },
    {
        "iterations": 1,
        "name": "HRE_OLS",
        "type": "score",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {"model_type": "OLS", "s": 10},  # DT, LR, OLS, PLS
    },
    {
        "iterations": 1,
        "name": "HRE_PLS",
        "type": "score",
        "experiment": hierarchical_ranking_explanation,
        "kwargs": {"model_type": "PLS", "s": 10},  # DT, LR, OLS, PLS
    },
    # {
    #     "iterations": 1,
    #     "name": "HIL_Shapley",
    #     "experiment": human_in_the_loop_experiment,
    #     "kwargs": {"upper_bound": 1, "lower_bound": None, "method_type": "shapley"},
    # },
    {
        "iterations": 1,
        "name": "HIL_Standardized-Shapley",
        "type": "std-score",
        "experiment": human_in_the_loop_experiment,
        "kwargs": {
            "upper_bound": 1,
            "lower_bound": None,
            "method_type": "standardized shapley",
        },
    },
    {
        "iterations": 1,
        "name": "HIL_Rank-Shapley",
        "experiment": human_in_the_loop_experiment,
        "type": "std-rank",
        "kwargs": {
            "upper_bound": 1,
            "lower_bound": None,
            "method_type": "rank-relevance shapley",
        },
    },
    # {
    #     "iterations": 1,
    #     "name": "RankLIME",
    #     "experiment": rank_lime_experiment,
    #     "kwargs": {
    #         "explanation_size": 3,
    #         "rank_similarity_coefficient": lambda x, y: kendalltau(x, y)[0],
    #         "individual_masking": True,
    #         "use_entry": 0,
    #         "use_pandas_where": False,
    #     },
    # },
    # {"iterations": 1, "name": "Participation", "experiment": participation_experiment},
]


In [3]:
def read_results_from_files():
    return_dict = {}
    for cur_dataset in datasets:
        return_dict[cur_dataset["name"]] = {}
        for cur_xai_method in xai_methods:
            return_dict[cur_dataset["name"]][cur_xai_method["name"]] = []
            for iteration in range(cur_xai_method["iterations"]):
                fname = (
                    f"results/contributions/_contributions_"
                    f"{cur_dataset['name']}_{cur_xai_method['name']}_"
                    f"{iteration}.csv"
                )
                if os.path.isfile(fname):
                    (
                        return_dict[cur_dataset["name"]][cur_xai_method["name"]].append(
                            pd.read_csv(fname, index_col=0)
                        )
                    )
    return return_dict

In [4]:
results = read_results_from_files()

In [5]:
# results

# Fidelity

In [6]:
# FOR RANKING QOI
methods = [method for method in results["Synthetic_1"].keys() if not method.startswith("BATCH")]
dataset_names = [dataset["name"] for dataset in datasets]

fidelity_res = pd.DataFrame(index=dataset_names, columns=methods)
for dataset in datasets:
    for method in methods:
        try:
            target = scores_to_ordering(dataset["scorer"](dataset["data"][0]))
            result = outcome_fidelity(
                results[dataset["name"]][method][0],
                target,
                target.mean(),
                target_max=target.size,
                rank=True,
            )
            fidelity_res.loc[dataset["name"], method] = result
        except:
            pass

fidelity_res

Unnamed: 0,LIME,SHAP,ShaRP_RANK,ShaRP_SCORE,HRE_DT,HRE_LR,HRE_OLS,HRE_PLS,HIL_Standardized-Shapley,HIL_Rank-Shapley
Higher Education,0.762761,0.759579,0.99998,0.759592,0.75,-602676650538.307,0.744876,0.749985,0.75,0.750179
ATP,0.926594,0.881751,0.999217,0.881751,0.749865,-3639967805154.791,-3.651163,0.744705,0.750006,0.752997
CSRank,0.760023,0.757819,0.999571,0.757803,0.749979,0.745971,0.699073,0.7497,0.750023,0.751323
Synthetic_0,0.750044,0.750035,1.0,0.750035,0.75,-133821680634.02354,0.75,0.750001,0.75,0.750125
Synthetic_1,0.75003,0.750023,1.0,0.750023,0.75,-79164337738.80042,0.75,0.750001,0.75,0.750125
Synthetic_2,0.750039,0.75003,1.0,0.75003,0.75,-114951621974.40268,0.75,0.749999,0.75,0.750125


In [7]:
# FOR SCORE QOI
methods = [method for method in results["Synthetic_1"].keys() if not method.startswith("BATCH")]
dataset_names = [dataset["name"] for dataset in datasets]

fidelity_res = pd.DataFrame(index=dataset_names, columns=methods)
for dataset in datasets:
    for method in methods:
        try:
            target = dataset["scorer"](dataset["data"][0])
            result = outcome_fidelity(
                results[dataset["name"]][method][0],
                target,
                target.mean(),
                target_max=target.max(),
                rank=False,
            )
            fidelity_res.loc[dataset["name"], method] = result
        except:
            pass

fidelity_res

Unnamed: 0,LIME,SHAP,ShaRP_RANK,ShaRP_SCORE,HRE_DT,HRE_LR,HRE_OLS,HRE_PLS,HIL_Standardized-Shapley,HIL_Rank-Shapley
Higher Education,0.937949,0.971444,-2.519682,1.0,0.856029,-8823740726840.637,-0.048026,0.846627,0.857494,0.859384
ATP,0.982876,1.0,0.96751,1.0,0.962571,-996933857462.0505,-0.273885,0.95909,0.962967,0.963592
CSRank,0.949856,0.987996,-1.490318,1.0,0.87367,-0.75753,-0.44303,0.759491,0.900629,0.898341
Synthetic_0,0.952161,0.995678,-619.957779,1.0,-0.242087,-332436432812083.25,-0.242087,-2.456788,0.914161,0.378956
Synthetic_1,0.954959,0.993972,-716.895848,1.0,-0.435922,-227347617486456.88,-0.435922,-3.073381,0.934942,0.282039
Synthetic_2,0.952889,0.995714,-691.821331,1.0,-0.385806,-318601367018332.7,-0.385806,-2.909029,0.918229,0.307097
