In [1]:
!mkdir -p results
!rm results/*

rm: cannot remove 'results/*': No such file or directory


In [6]:
import sys

sys.path.append("..")

import pandas as pd
from lightgbm import LGBMRanker
from sklearn.utils import check_random_state
from xai_ranking.benchmarks import (
    human_in_the_loop_experiment,
    human_in_the_loop_batch_experiment,
    hierarchical_ranking_explanation,
    hierarchical_ranking_batch_explanation,
    lime_experiment,
    lime_batch_experiment,
    shap_experiment,
    shap_batch_experiment,
    sharp_experiment,
    sharp_batch_experiment,
    # participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
    preprocess_movers_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
)

RNG_SEED = 42

In [7]:
def check_random_states(random_state, n_runs):
    random_state = check_random_state(random_state)
    return [random_state.randint(0, 2**32 - 1, dtype="uint32") for _ in range(n_runs)]

In [8]:
# Set up ranker for the moving company dataset:
X, ranks, score = preprocess_movers_data(fetch_movers_data(test=True)) 
qids_train = X.index.value_counts().to_numpy()

model = LGBMRanker(
    objective="lambdarank", label_gain=list(range(max(ranks) + 1)), verbose=-1
)
model.fit(
    X=X,
    y=ranks,
    group=qids_train,
)

In [9]:
datasets = [
    {
        "name": "ATP",
        "data": fetch_atp_data(),
        "preprocess": preprocess_atp_data,
        "scorer": atp_score,
        "done": True
    },
    {
        "name": "CSRank",
        "data": fetch_csrank_data(),
        "preprocess": preprocess_csrank_data,
        "scorer": csrank_score,
    },
    {
        "name": "Higher Education",
        "data": fetch_higher_education_data(year=2020),
        "preprocess": preprocess_higher_education_data,
        "scorer": higher_education_score,
        "done": True
    },
    {
        "name": "Moving Company",
        "data": fetch_movers_data(test=True),
        "preprocess": preprocess_movers_data,
        "scorer": model.predict,
    },
]
xai_methods = [
    {"iterations": 1, "name": "LIME", "experiment": lime_experiment},
    {"iterations": 3, "name": "BATCH_LIME", "experiment": lime_batch_experiment},
    {"iterations": 1, "name": "SHAP", "experiment": shap_experiment},
    {"iterations": 3, "name": "BATCH_SHAP", "experiment": shap_batch_experiment},
    {"iterations": 1, "name": "ShaRP", "experiment": sharp_experiment},
    {"iterations": 3, "name": "BATCH_ShaRP", "experiment": sharp_batch_experiment},
    # {"iterations": 1, "name": "Participation", "experiment": participation_experiment},
    {"iterations": 1, "name": "HRE", "experiment": hierarchical_ranking_explanation},
    {"iterations": 3, "name": "BATCH_HRE", "experiment": hierarchical_ranking_batch_explanation},
    {"iterations": 1, "name": "HIL", "experiment": human_in_the_loop_experiment},
    {"iterations": 3, "name": "BATCH_HIL", "experiment": human_in_the_loop_batch_experiment},
]

total_states = sum(map(lambda x: x["iterations"], xai_methods)) * len(datasets)
random_states = (x for x in check_random_states(RNG_SEED, total_states))

In [None]:
results = {}
for dataset in datasets:
    results[dataset["name"]] = {}
    for xai_method in xai_methods:
        results[dataset["name"]][xai_method["name"]] = []
        
        experiment_func = xai_method["experiment"]
        preprocess_func = dataset["preprocess"]
        score_func = dataset["scorer"]
        
        X, ranks, scores = preprocess_func(dataset["data"])
        
        for iteration_idx in range(xai_method["iterations"]):
            random_state=next(random_states)
            if "done" in dataset and dataset["done"]:
                continue
            contributions = experiment_func(X, score_func, random_state=random_state)
            
            results[dataset["name"]][xai_method["name"]].append(contributions)

            result_df = pd.DataFrame(contributions, columns=X.columns, index=X.index)
            result_df.to_csv(f"results/_contributions_{dataset['name']}_{xai_method['name']}_{iteration_idx}.csv")
        # with open(f"_contributions_{dataset['name']}_{xai_method['name']}.npy", "wb") as f:
        #     np.save(f, contributions)

ExactExplainer explainer: 2001it [00:12, 29.68it/s]                           
ExactExplainer explainer: 2001it [00:12, 29.86it/s]                           
ExactExplainer explainer: 2001it [00:12, 29.79it/s]                           
ExactExplainer explainer: 2001it [00:12, 29.87it/s]                           


In [None]:
results