In [1]:
import sys
import pandas as pd

sys.path.append("..")

import numpy as np
import pandas as pd
from scipy.stats import kendalltau
import math
from xai_ranking.benchmarks import (
    human_in_the_loop,
    hierarchical_ranking_explanation,
    lime_experiment,
    shap_experiment,
    sharp_experiment,
    participation_experiment,
)
from xai_ranking.preprocessing import (
    preprocess_atp_data,
    preprocess_csrank_data,
    preprocess_higher_education_data,
)
from xai_ranking.datasets import (
    fetch_atp_data,
    fetch_csrank_data,
    fetch_higher_education_data,
    fetch_movers_data,
)
from xai_ranking.scorers import (
    atp_score,
    csrank_score,
    higher_education_score,
)
from xai_ranking.metrics import (
    max_sensitivity
)

RNG_SEED = 42

In [5]:
datasets = [
    {
        "name": "ATP",
        "data": fetch_atp_data(),
        "preprocess": preprocess_atp_data,
        "scorer": atp_score,
    },
    {
        "name": "CSRank",
        "data": fetch_csrank_data(),
        "preprocess": preprocess_csrank_data,
        "scorer": csrank_score,
    },
    # {
    #     "name": "Higher Education",
    #     "data": fetch_higher_education_data(year=2021),
    #     "preprocess": preprocess_higher_education_data,
    #     "scorer": higher_education_score,
    # }
]
xai_methods = [
    {"name": "LIME", "experiment": lime_experiment},
    {"name": "SHAP", "experiment": shap_experiment},
    {"name": "ShaRP", "experiment": sharp_experiment},
    # {"name": "Participation", "experiment": participation_experiment},
    {"name": "HRE", "experiment": hierarchical_ranking_explanation},
    {"name": "HIL", "experiment": human_in_the_loop},
]

In [6]:
# max sensitivity experiment
# all points in ATP dataset, all methods


for dataset in datasets:
    preprocess_func = dataset["preprocess"]
    features_data, _, _ = preprocess_func(dataset["data"])
    num_neighbors = math.floor(math.sqrt(len(features_data)))
    print("\n\n\n####################", dataset["name"])
    
    for xai_method in xai_methods:
        print("\n\n\n####################",xai_method["name"])
        experiment_result = []
        contri_data = pd.read_csv(f"results/_contributions_{dataset['name']}_{xai_method['name']}.csv", index_col=0)
    
        for target_idx in range(len(features_data)):
            experiment_result.append(max_sensitivity(features_data, contri_data, target_idx, num_neighbors, dataset["scorer"], kendalltau))
    
        result_df = pd.DataFrame(experiment_result, columns=["max_sensitivity"], index=features_data.index)
        result_df.to_csv(f"results/_max-sensitivity_{dataset['name']}_{xai_method['name']}_kendalltau_fixed-number-same-outcome.csv")




#################### ATP



#################### ShaRP



#################### CSRank



#################### ShaRP


In [1]:
# contribution results
results = {}
for dataset in datasets:
    results[dataset["name"]] = {}
    for xai_method in xai_methods:
        experiment_func = xai_method["experiment"]
        preprocess_func = dataset["preprocess"]
        score_func = dataset["scorer"]
        X, ranks, scores = preprocess_func(dataset["data"])
        contributions = experiment_func(X, score_func)
        results[dataset["name"]][xai_method["name"]] = contributions
        
        result_df = pd.DataFrame(contributions, columns=X.columns, index=X.index)
        result_df.to_csv(f"results/_contributions_{dataset['name']}_{xai_method['name']}.csv")
        # with open(f"_contributions_{dataset['name']}_{xai_method['name']}.npy", "wb") as f:
        #     np.save(f, contributions)

NameError: name 'datasets' is not defined