In [None]:
import tqdm
from sharp import ShaRP

import time
import pandas as pd

from xai_ranking.preprocessing import preprocess_atp_data
from xai_ranking.datasets import fetch_atp_data
from xai_ranking.scorers import atp_score

RNG_SEED = 42

In [None]:
datasets = [
    {
        "name": "ATP",
        "data": preprocess_atp_data(fetch_atp_data()),
        "scorer": atp_score,
    }
]
default_kwargs = {
    "qoi": "rank",
    "measure": "shapley",
    "sample_size": None,
    "coalition_size": None,
    "replace": True,
    "random_state": RNG_SEED,
    "n_jobs": -1,
}
parameters_to_change = {
    "coalition_size": [i for i in range(1, 5)],
    "sample_size": [i for i in range(10, 200, 20)]
}

In [None]:
results = {}
for dataset in datasets:
    X, _, _ = dataset["data"]
    result_cols = (["parameter", "parameter_value", "value", "time"] +
                   X.columns.tolist())

    baseline_sharp = ShaRP(target_function=dataset["scorer"], **default_kwargs)
    baseline_sharp.fit(X)

    start = time.time()
    # Why are we averaging the contributions across all data points? 
    # Shouldn't it be just zero?
    baseline_contr = baseline_sharp.all(X).mean(axis=0)
    end = time.time()

    exact_results_row = [None, None, 0, end - start] + baseline_contr.tolist()
    result_df = [exact_results_row]

    for parameter, parameter_values in parameters_to_change.items():
        print(f"Alternating parameter: {parameter}")
        default_value = default_kwargs[parameter] if parameter in default_kwargs else None

        for parameter_value in tqdm.tqdm(parameter_values):
            default_kwargs[parameter] = parameter_value
            sharp = ShaRP(
                target_function=dataset["scorer"],
                **default_kwargs,
            )
            sharp.fit(X)

            start = time.time()
            # Why mean is here?
            contr = sharp.all(X).mean(axis=0)
            end = time.time()

            results_row = [
                              parameter,
                              parameter_value,
                              (baseline_contr - contr).mean(),  # Maybe squared?
                              end - start
                          ] + contr.tolist()
            result_df.append(results_row)
    results[dataset["name"]] = pd.DataFrame(result_df, columns=result_cols)
    results[dataset["name"]].to_csv("results/time-experiment-" + dataset["name"] + ".csv")

In [None]:
results["ATP"]