# Explainability

In [1]:
import joblib
import os
import pandas as pd

In [2]:
MODELS_DIR = "models" + os.sep
MODEL_NAME = "best_rf.pkl"
DATASET_DIR = "dataset" + os.sep
TRAIN_NAME = "train_set.csv"
TEST_NAME = "test_set.csv"

In [None]:
model = joblib.load(MODELS_DIR + MODEL_NAME)
train_df = pd.read_csv(DATASET_DIR + TRAIN_NAME)
test_df = pd.read_csv(DATASET_DIR + TEST_NAME)
y_train = train_df.pop("position")
X_train = train_df
y_test = test_df.pop("position")
X_test = test_df
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
X_train["season"] = X_train["season"].astype("category")
X_test["season"] = X_train["season"].astype("category")
X_train["continent"] = X_train["continent"].astype("category")
X_test["continent"] = X_train["continent"].astype("category")

# obtain mapping from category to integer
season_mapping = dict(enumerate(X_train["season"].cat.categories))
continent_mapping = dict(enumerate(X_train["continent"].cat.categories))

# use codes to replace category
X_train["season"] = X_train["season"].cat.codes
X_test["season"] = X_test["season"].cat.codes
X_train["continent"] = X_train["continent"].cat.codes
X_test["continent"] = X_test["continent"].cat.codes

print(season_mapping)
print(continent_mapping)

In [5]:
# telling one hot encoder to output pandas df and not numpy array
model[-2].set_output(transform="pandas")
model[-2]["onehot"].set_params(sparse_output=False)


X_train_transformed = model[:-1].transform(X_train)
X_test_transformed = model[:-1].transform(X_test)

In [None]:
X_train_transformed.info()

In [7]:
classifier = model[-1]

## SHAP

In [None]:
import shap

In [9]:
explanations = dict()

In [10]:
int_alg = shap.TreeExplainer(
    model=classifier,
    data=X_train_transformed.iloc[
        :10
    ],  # perturb on a causal model induced on perturbation data
    feature_perturbation="interventional",  # use a causal model
)

distr_alg = shap.TreeExplainer(
    model=classifier,
    feature_perturbation="tree_path_dependent",  # condition on the distribution learned on the train data
)

In [None]:
int_expl = int_alg(X_test_transformed.iloc[:10])
print("done1")
distr_expl = distr_alg(X_test_transformed.iloc[:10])
print("done2")

explanations["shap_interventional"] = int_expl.values
explanations["shap_distributional"] = distr_expl.values

In [None]:
shap.plots.beeswarm(int_expl)

In [None]:
shap.plots.beeswarm(distr_expl)

## LIME

In [None]:
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
from interpret import show
from tqdm import tqdm
from interpret.blackbox import LimeTabular

In [None]:
explainer = LimeTabular(classifier, X_train_transformed)

## Lore

In [None]:
from xailib.explainers.lore_explainer import LoreTabularExplainer

In [None]:
explainer_lore = LoreTabularExplainer(classifier, X_train_transformed)
configuration = {"neigh_type": "rndgen", "size": 1000, "ocr": 0.1, "ngen": 100}

lore_train_data = pd.concat([X_train_transformed, y_train], axis="columns")