In [1]:
import os
import sys

sys.path.append("..")

import tqdm
import time
from copy import deepcopy
import numpy as np
import pandas as pd

from sklearn.utils import check_random_state
from lightgbm import LGBMRanker
from sharp import ShaRP
from sharp.utils import scores_to_ordering
from xai_ranking.preprocessing import preprocess_higher_education_data
from xai_ranking.scorers import higher_education_score
from mlresearch.utils import check_random_states

from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
    preprocess_movers_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
)
from xai_ranking.metrics import (
    explanation_sensitivity, outcome_sensitivity,
    bootstrapped_explanation_consistency, cross_method_explanation_consistency,
    cross_method_outcome_consistency
)

RNG_SEED = 42
N_RUNS = 3

In [2]:
# Set up ranker for the moving company dataset:
X, ranks, score = preprocess_movers_data(fetch_movers_data(test=False))
qids_train = X.index.value_counts().to_numpy()

model = LGBMRanker(
    objective="lambdarank", label_gain=list(range(max(ranks) + 1)), verbose=-1
)
model.fit(
    X=X,
    y=ranks,
    group=qids_train,
)

In [3]:
random_states = check_random_states(RNG_SEED, N_RUNS)

datasets = [
    {
        "name": "ATP",
        "data": preprocess_atp_data(fetch_atp_data()),
        "scorer": atp_score,
        "n_observations": 86,
    },
    {
        "name": "CSRank",
        "data": preprocess_csrank_data(fetch_csrank_data()),
        "scorer": csrank_score,
        "n_observations": 100,
    },
    {
        "name": "Higher Education",
        "data": preprocess_higher_education_data(
            fetch_higher_education_data(year=2020)
        ),
        "scorer": higher_education_score,
        "n_observations": 100,
    },
    {
        "name": "Moving Company",
        "data": preprocess_movers_data(fetch_movers_data(test=True)),
        "scorer": model.predict,
        "n_observations": 100,
    },
]

approaches = ["rank", "flip", "pairwise"]

default_kwargs = {
    "measure": "shapley",
    "sample_size": None,
    "coalition_size": None,
    "replace": False,
    "n_jobs": 1,
}
parameters_to_change = {
    "coalition_size": [i for i in range(1, 6)],
    "sample_size": [i for i in np.arange(.1, 1.1, .1)],
    "n_jobs": [i for i in range(1, os.cpu_count(), 2)],
}

In [4]:
def outcome_fidelity(contributions, target, avg_target, target_pairs=None, rank=True):
    if target_pairs is None:
        if rank:
            avg_est_err = np.mean(np.abs(target - (avg_target - contributions.sum(axis=1))))
        else:
            avg_est_err = np.mean(np.abs(target - (avg_target + contributions.sum(axis=1))))
    else:
        if rank:
            better_than = target < target_pairs
        else:
            better_than = target > target_pairs

        est_better_than = contributions.sum(axis=1) > 0
        avg_est_err = (better_than == est_better_than).mean()
    return avg_est_err

In [5]:
# Super janky code... It would be a good exercise to refactor this

result_cols = (
    ["dataset", "n_observations", "approach", "parameter", "parameter_value", "avg_time"]
    + [f"time_{i}" for i in range(N_RUNS)]
    + [f"agreement_kendall_{i}" for i in range(N_RUNS)]
    + [f"agreement_jaccard2_{i}" for i in range(N_RUNS)]
    + [f"agreement_euclidean_{i}" for i in range(N_RUNS)]
    + [f"fidelity_{i}" for i in range(N_RUNS)]
)

result_df = []

for dataset in datasets:

    # Set up basic settings
    X = dataset["data"][0]
    scorer = dataset["scorer"]
    scores = np.array(scorer(dataset["data"][0]))
    ranking = scores_to_ordering(scores)

    rng = check_random_state(RNG_SEED)
    sam_idx1 = rng.choice(
        np.indices((X.shape[0],)).squeeze(), size=dataset["n_observations"], replace=False
    )
    sam_idx2 = rng.choice(
        np.indices((X.shape[0],)).squeeze(), size=dataset["n_observations"], replace=False
    )

    for approach in approaches:
        print("----------------", dataset["name"], "|", approach, "----------------")

        times = []
        kendall_cons = []
        jaccard_cons = []
        euclidean_cons = []
        fidelity = []

        print("Exact computation")
        for i in tqdm.tqdm(range(N_RUNS)):
            start = time.time()
            if approach != "pairwise":
                baseline_sharp = ShaRP(
                    qoi=approach,
                    target_function=dataset["scorer"],
                    random_state=random_states[i],
                    **default_kwargs,
                )
                baseline_sharp.fit(X)
                baseline_contr = baseline_sharp.all(X.values[sam_idx1])
            else:
                baseline_sharp = ShaRP(
                    target_function=dataset["scorer"],
                    random_state=random_states[i],
                    **default_kwargs,
                )
                baseline_pairwise = []
                for idx1, idx2 in zip(sam_idx1, sam_idx2):
                    baseline_pairwise.append(baseline_sharp.pairwise(X.values[idx1], X.values[idx2]))
                baseline_contr = np.array(baseline_pairwise)
                
            end = time.time()

            baseline_contr = pd.DataFrame(baseline_contr, columns=X.columns, index=X.index.values[sam_idx1])
            # Save metrics
            times.append(end - start)
            kendall_cons.append(np.nan)
            jaccard_cons.append(np.nan)
            euclidean_cons.append(np.nan)

            if approach != "pairwise":
                target = scores if approach == "score" else ranking
                avg_target = target.mean()
                res_ = outcome_fidelity(baseline_contr, target[sam_idx1], avg_target, rank=approach=="rank")
            else:
                res_ = outcome_fidelity(baseline_contr, target[sam_idx1], avg_target, target_pairs=target[sam_idx2], rank=approach=="rank")

            fidelity.append(res_)

        exact_results_row = (
            [
                dataset["name"], 
                dataset["n_observations"], 
                approach, 
                np.nan, 
                np.nan, 
                np.mean(times)
            ] + times + kendall_cons + jaccard_cons +
            euclidean_cons + fidelity
        )
        result_df.append(exact_results_row)
        print("Finished computing exact results")
        ############################################################################################

        for parameter, parameter_values in parameters_to_change.items():
            print(f"Alternating parameter: {parameter}")
            default_value = deepcopy(default_kwargs[parameter] if parameter in default_kwargs else None)
            for parameter_value in tqdm.tqdm(parameter_values):

                if parameter == "sample_size":
                    parameter_value = int(parameter_value*X.shape[0])
                    
                default_kwargs[parameter] = parameter_value

                times = []
                kendall_cons = []
                jaccard_cons = []
                euclidean_cons = []
                fidelity = []

                print(f"Parameter {parameter}, value {parameter_value}")
                for i in tqdm.tqdm(range(N_RUNS)):
                    start = time.time()
                    if approach != "pairwise":
                        sharp = ShaRP(
                            qoi=approach,
                            target_function=dataset["scorer"],
                            random_state=random_states[i],
                            **default_kwargs,
                        )
                        sharp.fit(X)
                        contr = sharp.all(X.values[sam_idx1])
                    else:
                        sharp = ShaRP(
                            target_function=dataset["scorer"],
                            random_state=random_states[i],
                            **default_kwargs,
                        )
                        pairwise = []
                        for idx1, idx2 in zip(sam_idx1, sam_idx2):
                            pairwise.append(sharp.pairwise(X.values[idx1], X.values[idx2]))
                        contr = np.array(pairwise)

                    end = time.time()

                    contr = pd.DataFrame(contr, columns=X.columns, index=np.array(X.index)[sam_idx1])

                    # Save metrics
                    times.append(end - start)
                    kendall_cons.append(
                        cross_method_explanation_consistency(contr, baseline_contr, measure="kendall")[0]
                    )
                    jaccard_cons.append(
                        cross_method_explanation_consistency(contr, baseline_contr, measure="jaccard", n_features=2)[0]
                    )
                    euclidean_cons.append(
                        cross_method_explanation_consistency(contr, baseline_contr, measure="euclidean")[0]
                    )
                    target = scores if approach == "score" else ranking
                    avg_target = target.mean()
                    res_ = outcome_fidelity(contr, target[sam_idx1], avg_target)

                    fidelity.append(res_)

                results_row = (
                    [
                        dataset["name"], 
                        dataset["n_observations"], 
                        approach, 
                        parameter, 
                        parameter_value, 
                        np.mean(times)
                    ] + times + kendall_cons + jaccard_cons +
                    euclidean_cons + fidelity
                )
                result_df.append(results_row)
                print(f"Stored results for {parameter} | {parameter_value}")

            default_kwargs[parameter] = default_value


    results = pd.DataFrame(result_df, columns=result_cols)
    results.to_csv("results/time-experiment-" + dataset["name"] + ".csv")

---------------- ATP | rank ----------------
Exact computation


100%|██████████| 3/3 [02:54<00:00, 58.29s/it]


Finished computing exact results
Alternating parameter: coalition_size


  0%|          | 0/5 [00:00<?, ?it/s]

Parameter coalition_size, value 1


100%|██████████| 3/3 [00:32<00:00, 10.92s/it]
 20%|██        | 1/5 [00:32<02:11, 32.76s/it]

Stored results for coalition_size | 1
Parameter coalition_size, value 2


100%|██████████| 3/3 [01:28<00:00, 29.57s/it]
 40%|████      | 2/5 [02:01<03:17, 65.67s/it]

Stored results for coalition_size | 2
Parameter coalition_size, value 3


100%|██████████| 3/3 [02:23<00:00, 47.72s/it]
 60%|██████    | 3/5 [04:24<03:22, 101.05s/it]

Stored results for coalition_size | 3
Parameter coalition_size, value 4


100%|██████████| 3/3 [02:29<00:00, 49.74s/it]
 80%|████████  | 4/5 [06:53<02:00, 120.07s/it]

Stored results for coalition_size | 4
Parameter coalition_size, value 5


100%|██████████| 3/3 [02:00<00:00, 40.23s/it]
100%|██████████| 5/5 [08:54<00:00, 106.91s/it]


Stored results for coalition_size | 5
Alternating parameter: sample_size


  0%|          | 0/10 [00:00<?, ?it/s]

Parameter sample_size, value 8


100%|██████████| 3/3 [00:17<00:00,  5.94s/it]
 10%|█         | 1/10 [00:17<02:40, 17.83s/it]

Stored results for sample_size | 8
Parameter sample_size, value 17


100%|██████████| 3/3 [00:33<00:00, 11.21s/it]
 20%|██        | 2/10 [00:51<03:37, 27.13s/it]

Stored results for sample_size | 17
Parameter sample_size, value 25


100%|██████████| 3/3 [00:42<00:00, 14.18s/it]
 30%|███       | 3/10 [01:34<03:59, 34.17s/it]

Stored results for sample_size | 25
Parameter sample_size, value 34


100%|██████████| 3/3 [01:07<00:00, 22.67s/it]
 40%|████      | 4/10 [02:42<04:45, 47.53s/it]

Stored results for sample_size | 34
Parameter sample_size, value 43


100%|██████████| 3/3 [01:29<00:00, 29.77s/it]
 50%|█████     | 5/10 [04:11<05:12, 62.60s/it]

Stored results for sample_size | 43
Parameter sample_size, value 51


100%|██████████| 3/3 [01:46<00:00, 35.43s/it]
 60%|██████    | 6/10 [05:57<05:09, 77.46s/it]

Stored results for sample_size | 51
Parameter sample_size, value 60


100%|██████████| 3/3 [02:06<00:00, 42.29s/it]
 70%|███████   | 7/10 [08:04<04:40, 93.62s/it]

Stored results for sample_size | 60
Parameter sample_size, value 68


100%|██████████| 3/3 [02:19<00:00, 46.63s/it]
 80%|████████  | 8/10 [10:24<03:36, 108.35s/it]

Stored results for sample_size | 68
Parameter sample_size, value 77


100%|██████████| 3/3 [03:01<00:00, 60.34s/it]
 90%|█████████ | 9/10 [13:25<02:11, 131.07s/it]

Stored results for sample_size | 77
Parameter sample_size, value 86


100%|██████████| 3/3 [03:31<00:00, 70.58s/it]
100%|██████████| 10/10 [16:57<00:00, 101.72s/it]


Stored results for sample_size | 86
Alternating parameter: n_jobs


  0%|          | 0/4 [00:00<?, ?it/s]

Parameter n_jobs, value 1


 33%|███▎      | 1/3 [01:21<02:42, 81.34s/it]
  0%|          | 0/4 [01:21<?, ?it/s]


KeyboardInterrupt: 

In [6]:
results = pd.DataFrame(result_df, columns=result_cols)
results

Unnamed: 0,dataset,n_observations,approach,parameter,parameter_value,avg_time,time_0,time_1,time_2,exp_cons_kendall_0,...,exp_sens_jaccard2_2,exp_cons_euclidean_0,exp_cons_euclidean_1,exp_cons_euclidean_2,exp_sens_euclidean_0,exp_sens_euclidean_1,exp_sens_euclidean_2,fidelity_0,fidelity_1,fidelity_2
0,ATP,86,rank,,,57.94178,59.196443,56.390087,58.23881,0.0,...,0.329845,0.0,0.0,0.0,0.506464,0.506464,0.506464,0.067334,0.067334,0.067334
1,ATP,86,rank,coalition_size,1.0,10.351466,10.059087,10.145993,10.849319,0.931783,...,0.366279,0.961702,0.961702,0.961702,0.496433,0.496433,0.496433,14.791842,14.791842,14.791842
2,ATP,86,rank,coalition_size,2.0,28.94812,28.011063,29.100078,29.733218,0.954264,...,0.359302,0.977033,0.977033,0.977033,0.498861,0.498861,0.498861,11.180178,11.180178,11.180178
3,ATP,86,rank,coalition_size,3.0,47.099238,47.5095,47.533679,46.254534,0.962791,...,0.367829,0.986008,0.986008,0.986008,0.501452,0.501452,0.501452,7.46792,7.46792,7.46792
4,ATP,86,rank,coalition_size,4.0,49.293517,58.116188,50.044765,39.7196,0.97907,...,0.346899,0.991244,0.991244,0.991244,0.504098,0.504098,0.504098,3.74849,3.74849,3.74849
5,ATP,86,rank,coalition_size,5.0,39.824419,40.230779,39.690735,39.551743,1.0,...,0.329845,1.0,1.0,1.0,0.506464,0.506464,0.506464,0.067334,0.067334,0.067334
6,ATP,86,rank,sample_size,8.0,5.532909,5.577305,5.4676,5.553822,0.894574,...,0.355039,0.944451,0.94755,0.945482,0.513586,0.512393,0.511914,1.25562,1.296948,1.255935
7,ATP,86,rank,sample_size,17.0,10.675429,10.360747,10.161944,11.503596,0.94031,...,0.368605,0.963442,0.964466,0.966393,0.51014,0.50967,0.509001,0.861001,0.721728,0.841655
8,ATP,86,rank,sample_size,25.0,13.780305,12.725151,12.547188,16.068578,0.943411,...,0.355039,0.971415,0.972977,0.972947,0.509577,0.509566,0.505934,0.724023,0.51324,0.620395
9,ATP,86,rank,sample_size,34.0,22.116334,20.538753,22.750576,23.059672,0.930233,...,0.355426,0.978065,0.977642,0.977978,0.510421,0.508585,0.503949,0.623968,0.405238,0.48766


In [7]:
metric = "exp_cons_kendall"
col_mask = results.columns.str.startswith(metric)
results[f"avg_{metric}"] = results.iloc[:, col_mask].mean(1)
col_mask = results.columns == f"avg_{metric}"
col_mask[:6] = True
results.iloc[:, col_mask]

Unnamed: 0,dataset,n_observations,approach,parameter,parameter_value,avg_time,avg_exp_cons_kendall
0,ATP,86,rank,,,57.94178,0.0
1,ATP,86,rank,coalition_size,1.0,10.351466,0.931783
2,ATP,86,rank,coalition_size,2.0,28.94812,0.954264
3,ATP,86,rank,coalition_size,3.0,47.099238,0.962791
4,ATP,86,rank,coalition_size,4.0,49.293517,0.97907
5,ATP,86,rank,coalition_size,5.0,39.824419,1.0
6,ATP,86,rank,sample_size,8.0,5.532909,0.900258
7,ATP,86,rank,sample_size,17.0,10.675429,0.923514
8,ATP,86,rank,sample_size,25.0,13.780305,0.929199
9,ATP,86,rank,sample_size,34.0,22.116334,0.933333
