# Explainable AI (XAI)

### Data Mining Project 2024/25

Authors: Nicola Emmolo, Simone Marzeddu, Jacopo Raffi

In [55]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report
import shap
import pickle

from interpret import set_visualize_provider
from interpret.provider import InlineProvider
from interpret import show
from tqdm import tqdm
import random

from interpret.blackbox import LimeTabular

from xailib.explainers.lore_explainer import LoreTabularExplainer

import dice_ml

In [56]:
RANDOM_STATE = 42
train_set = pd.read_csv('../data/ml_datasets/oversampling/train_set.csv').sample(frac = 1, random_state=RANDOM_STATE) # shuffling the data so not to introduce bias
val_set = pd.read_csv('../data/ml_datasets/oversampling/val_set.csv').sample(frac = 1, random_state=RANDOM_STATE) # shuffling the data so not to introduce bias
test_set = pd.read_csv('../data/ml_datasets/oversampling/test_set.csv')

dev_set = pd.concat([train_set, val_set]).reset_index(drop=True)

In [57]:
dev_set['race_season%autumn'] = dev_set['race_season%autumn'].astype(int)
dev_set['race_season%spring'] = dev_set['race_season%spring'].astype(int)
dev_set['race_season%summer'] = dev_set['race_season%summer'].astype(int)
dev_set['race_season%winter'] = dev_set['race_season%winter'].astype(int)

test_set['race_season%autumn'] = test_set['race_season%autumn'].astype(int)
test_set['race_season%spring'] = test_set['race_season%spring'].astype(int)
test_set['race_season%summer'] = test_set['race_season%summer'].astype(int)
test_set['race_season%winter'] = test_set['race_season%winter'].astype(int)

In [58]:
dev_label = dev_set.pop('label')
test_label = test_set.pop('label')

In [59]:
model = RandomForestClassifier(class_weight=None, criterion='entropy', max_features=8, min_samples_leaf=5, min_samples_split=20, n_estimators=150)
model.fit(dev_set, dev_label)

test_predicitions = model.predict(test_set)
dev_predictions = model.predict(dev_set)

In [60]:
model_report = classification_report(test_label, test_predicitions, output_dict=True)

## Lime

In [61]:
set_visualize_provider(InlineProvider())

In [62]:
# Reorder columns of dev_set accordingly
dev_set = dev_set[model.feature_names_in_]

explanation_algorithm = LimeTabular(model, dev_set)

In [63]:
indices = np.argwhere((dev_predictions == 1) & (dev_label == 0))
indices[:10]

array([[  10],
       [ 210],
       [ 233],
       [ 451],
       [ 682],
       [ 838],
       [1144],
       [1317],
       [1644],
       [2184]])

In [64]:
indices = np.argwhere((dev_predictions == 0) & (dev_label == 1))
indices[:10]

array([[ 803],
       [ 935],
       [2283],
       [2441],
       [3590],
       [3876],
       [4678],
       [5836],
       [6238],
       [6621]])

In [65]:
# Class 1 - Correct Prediciton

lime_result = explanation_algorithm.explain_local(dev_set[700:1000], dev_label[700:1000])
lime_result.visualize(2) #702

In [66]:
# Class 1 - Wrong Prediciton

lime_result = explanation_algorithm.explain_local(dev_set[700:1000], dev_label[700:1000])
lime_result.visualize(103) #803

In [67]:
# Class 0 - Correct Prediciton

lime_result = explanation_algorithm.explain_local(dev_set[700:1000], dev_label[700:1000])
lime_result.visualize(1) #701

In [68]:
# Class 0 - Wrong Prediciton

lime_result = explanation_algorithm.explain_local(dev_set[700:1000], dev_label[700:1000])
lime_result.visualize(138) #838

## Lore

In [69]:
explanation_algorithm = LoreTabularExplainer(model)
configuration = {
    "neigh_type": "rndgen", # metodo di generazione del dataset locale (in questo caso random)
    "size": 1000, # dimensione del dataset locale generato
    "ocr": 0.1, # probabilità di "cambiamento" di una feature durante la perturbazione
    "ngen": 100 # numero di generazioni 
}

lore_train_data = pd.concat((dev_set, pd.DataFrame(dev_predictions, columns=["label"])), axis="columns")
explanation_algorithm.fit(lore_train_data, "label", config=configuration)

In [70]:
# Il modello ha predetto 1 e questa è la predizione corretta

explanation_lore = explanation_algorithm.explain(dev_set.iloc[702].values)
explanation_lore.plotRules()
print(explanation_lore.expDict["fidelity"])

0.9973872847580536


In [71]:
# Il modello ha predetto 0 ma la label è 1

explanation_lore = explanation_algorithm.explain(dev_set.iloc[803].values)
explanation_lore.plotRules()
print(explanation_lore.expDict["fidelity"])

0.9756372509804523


In [72]:
# Il modello ha predetto 0 e questa è la predizione corretta

explanation_lore = explanation_algorithm.explain(dev_set.iloc[701].values)
explanation_lore.plotRules()
print(explanation_lore.expDict["fidelity"])

0.9773748045498741


In [73]:
# Il modello ha predetto 1 ma la label è 0

explanation_lore = explanation_algorithm.explain(dev_set.iloc[838].values)
explanation_lore.plotRules()
print(explanation_lore.expDict["fidelity"])

0.997389121779106


## Counterfactuals Explainations

In [74]:
# need to convert data to the Dice format
dice_train_dataset = dev_set.copy()
dice_train_dataset["label"] = dev_label
dice_dataset = dice_ml.Data(
    dataframe=dice_train_dataset,
    outcome_name="label",
    continuous_features=dice_train_dataset.columns.tolist()[:-1]
)

dice_model = dice_ml.Model(
    model=model,
    backend="sklearn",  # dice supports several frameworks, e.g., pytorch, tensorflow
)

# the Dice object interfaces the counterfactual search
dice_explanation_algorithm = dice_ml.Dice(
    dice_dataset,
    dice_model,
    method="random"  # random search for counterfactuals
)

In [75]:
explanation_dice = dice_explanation_algorithm.generate_counterfactuals(
    # 702 ->1V, 701 -> 0v, 838 -> 1x, 803 -> 0x
    dice_train_dataset.drop("label", axis="columns").iloc[[702, 701, 838, 803]],
    total_CFs=10,  # number of counterfactuals to generate
    features_to_vary = list(set(dice_train_dataset.drop("label", axis="columns").columns) - set(["race_season%summer", "race_season%winter", "race_season%spring", "race_season%autumn"])),  # more feature constraints
)

100%|██████████| 4/4 [01:20<00:00, 20.03s/it]


In [76]:
explanation_dice.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome : 1)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,155.0,22.99169,3,0.001258,0.003405,0.11985,0.651808,656.454529,6.1e-05,0.025801,184,0,1,0,0,1



Diverse Counterfactual set (new outcome: 0)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,-,-,-,0.19996306,-,0.36555409,-,-,6.055730975659768e-05,-,-,-,-,-,-,0.0
1,-,-,-,-,-,-,-,9221.7,6.055730975659768e-05,0.9,-,-,-,-,-,0.0
2,-,21.153122,-,-,-,-,0.7179402,-,6.055730975659768e-05,-,-,-,-,-,-,0.0
3,-,20.318663,-,-,-,-,-,3243.0,6.055730975659768e-05,-,-,-,-,-,-,0.0
4,-,-,-,0.51746824,-,-,-,4714.1,6.055730975659768e-05,-,-,-,-,-,-,0.0
5,-,-,-,-,0.11127287,0.44827567,-,-,6.055730975659768e-05,-,-,-,-,-,-,0.0
6,-,19.358197,-,-,-,0.87629722,-,-,6.055730975659768e-05,-,-,-,-,-,-,0.0
7,42.6,-,-,-,-,-,0.4165874,-,6.055730975659768e-05,-,-,-,-,-,-,0.0
8,-,21.486016,-,-,-,-,-,-,6.055730975659768e-05,-,-,-,-,-,-,0.0
9,-,-,-,-,0.50728878,-,-,-,6.055730975659768e-05,-,-,-,-,-,-,0.0


Query instance (original outcome : 0)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,131.0,20.193432,0,0.008443,0.08163,0.107814,0.433193,440.78421,0.001482,0.291932,183,0,1,0,0,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,-,-,-,-,-,-,-,-,0.3772655162,-,53.0,-,-,-,-,1.0
1,-,-,-,-,-,-,-,-,-,-,11.0,-,-,-,-,1.0
2,-,-,-,-,-,-,-,6125.2,-,-,58.0,-,-,-,-,1.0
3,-,-,-,-,0.04724555,-,-,-,-,-,39.0,-,-,-,-,1.0
4,-,-,-,-,-,-,0.381894,-,-,0.4,-,-,-,-,-,1.0
5,-,25.927751,-,-,-,-,-,-,-,-,-,-,-,-,-,1.0
6,-,26.53606,-,-,-,-,-,-,-,-,201.0,-,-,-,-,1.0
7,-,27.683112,-,-,-,-,0.3731475,-,-,-,-,-,-,-,-,1.0
8,-,-,-,-,-,-,-,-,-,-,38.0,-,-,-,-,1.0
9,-,23.315625,-,-,-,-,-,810.9,-,-,-,-,-,-,-,1.0


Query instance (original outcome : 1)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,109.400002,18.717546,1,0.028565,0.192617,0.083613,0.241887,175.343063,0.003199,0.319593,151,1,0,0,0,1



Diverse Counterfactual set (new outcome: 0)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,-,-,-,-,-,-,0.8023775,-,-,-,-,-,-,-,-,0.0
1,-,-,-,-,-,0.65699278,-,10956.8,-,-,-,-,-,-,-,0.0
2,-,-,-,-,0.02215157,-,-,-,-,-,-,-,-,-,-,0.0
3,210.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,0.0
4,-,-,-,-,0.67769326,-,-,-,-,0.2,-,-,-,-,-,0.0
5,-,-,-,0.0143384,-,-,-,-,-,-,-,-,-,-,-,0.0
6,-,23.784306,-,-,-,-,-,4226.1,-,-,-,-,-,-,-,0.0
7,-,-,0.0,-,0.52691575,-,-,-,-,-,-,-,-,-,-,0.0
8,-,-,-,-,-,-,0.6020503,-,-,-,-,-,-,-,-,0.0
9,-,-,-,-,-,0.83796909,-,-,-,-,-,-,-,-,-,0.0


Query instance (original outcome : 0)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,160.5,19.568235,2,0.01167,0.033873,0.234451,0.290661,232.270264,0.001495,0.510944,143,0,0,1,0,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,length,cyclist_bmi,cyclist_age_group,climb_percentage,race_physical_effort,race_prestige,previous_mean_position,previous_mean_delta,previous_mean_cp,cyclist_previous_experience,num_participants,race_season%autumn,race_season%spring,race_season%summer,race_season%winter,label
0,-,26.73462,-,-,-,-,-,-,-,-,-,-,-,-,-,1.0
1,-,24.551001,-,-,-,-,-,-,-,0.2,-,-,-,-,-,1.0
2,-,-,-,-,-,-,0.2101175,-,-,-,42.0,-,-,-,-,1.0
3,-,26.516773,-,-,-,-,-,-,-,-,-,-,-,-,-,1.0
4,-,26.027072,1.0,-,-,-,-,-,-,-,-,-,-,-,-,1.0
5,-,-,-,-,-,-,0.29980569999997125,-,-,-,8.0,-,-,-,-,1.0
6,-,-,-,-,-,-,-,-,-,-,95.0,-,-,-,-,1.0
7,-,17.556969,-,-,-,-,-,-,-,-,12.0,-,-,-,-,1.0
8,-,27.449866,-,-,-,-,-,-,-,-,-,-,-,-,-,1.0
9,-,-,-,-,-,-,-,-,0.2678114818,-,101.0,-,-,-,-,1.0


## Validation

In [77]:
pd.DataFrame(model_report)

Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.903343,0.412479,0.847167,0.657911,0.838156
recall,0.922466,0.35546,0.847167,0.638963,0.847167
f1-score,0.912804,0.381853,0.847167,0.647329,0.842294
support,27446.0,4203.0,0.847167,31649.0,31649.0


In [78]:
features = list(dev_set.columns)
features

['length',
 'cyclist_bmi',
 'cyclist_age_group',
 'climb_percentage',
 'race_physical_effort',
 'race_prestige',
 'previous_mean_position',
 'previous_mean_delta',
 'previous_mean_cp',
 'cyclist_previous_experience',
 'num_participants',
 'race_season%autumn',
 'race_season%spring',
 'race_season%summer',
 'race_season%winter']

In [79]:
random_noise = np.random.rand(test_set.shape[0],)
noise_magnitudes = [eps / 10 for eps in range(1, 10)]

# We corrupt the test set with different noise magnitudes
# Each corruption (different magnitude) is stored in a dict
corruptions_by_feature_and_noise = dict()
for feature in features:
    for noise_magnitude in noise_magnitudes:
        corruptions_by_feature_and_noise[(feature, noise_magnitude)] = test_set.copy()
        corruptions_by_feature_and_noise[(feature, noise_magnitude)][feature] = corruptions_by_feature_and_noise[(feature, noise_magnitude)][feature] + random_noise * noise_magnitude

In [80]:
validations_by_feature_and_noise = dict()
for (feature, noise), corruption in corruptions_by_feature_and_noise.items():
    validations_by_feature_and_noise[(feature, noise)] = classification_report(test_label, model.predict(corruption), output_dict=True)

In [81]:
metric = "f1-score"
corruption_validation = pd.DataFrame([
    (feature, noise_magnitude, validations_by_feature_and_noise[(feature, noise_magnitude)]["macro avg"][metric])
    for feature, noise_magnitude in validations_by_feature_and_noise
    ],
    columns=["feature", "noise_magnitude", "performance"]
)
corruption_validation["difference"] = corruption_validation["performance"] - model_report["macro avg"][metric]
corruption_validation

Unnamed: 0,feature,noise_magnitude,performance,difference
0,length,0.1,0.647251,-0.000077
1,length,0.2,0.647285,-0.000043
2,length,0.3,0.647251,-0.000077
3,length,0.4,0.647295,-0.000034
4,length,0.5,0.647406,0.000077
...,...,...,...,...
130,race_season%winter,0.5,0.647329,0.000000
131,race_season%winter,0.6,0.647714,0.000385
132,race_season%winter,0.7,0.648023,0.000694
133,race_season%winter,0.8,0.648074,0.000745


In [82]:
corruption_validation.groupby("feature").describe()[["performance", "difference"]]

Unnamed: 0_level_0,performance,performance,performance,performance,performance,performance,performance,performance,difference,difference,difference,difference,difference,difference,difference,difference
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
feature,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
climb_percentage,9.0,0.609098,0.0003,0.608838,0.608888,0.608989,0.609114,0.609726,9.0,-0.038231,0.0003,-0.038491,-0.038441,-0.03834,-0.038215,-0.037603
cyclist_age_group,9.0,0.647103,0.000499,0.645815,0.647262,0.647329,0.647329,0.647329,9.0,-0.000225,0.000499,-0.001513,-6.7e-05,0.0,0.0,0.0
cyclist_bmi,9.0,0.641606,0.002546,0.639369,0.639632,0.640662,0.643962,0.645715,9.0,-0.005722,0.002546,-0.00796,-0.007697,-0.006667,-0.003367,-0.001614
cyclist_previous_experience,9.0,0.631183,0.008357,0.621289,0.624786,0.629371,0.637346,0.645254,9.0,-0.016146,0.008357,-0.02604,-0.022542,-0.017957,-0.009983,-0.002074
length,9.0,0.647383,0.000121,0.647251,0.647285,0.647406,0.647457,0.647611,9.0,5.4e-05,0.000121,-7.7e-05,-4.3e-05,7.7e-05,0.000128,0.000283
num_participants,9.0,0.64742,6.5e-05,0.647379,0.647379,0.647379,0.647438,0.647566,9.0,9.1e-05,6.5e-05,5e-05,5e-05,5e-05,0.00011,0.000238
previous_mean_cp,9.0,0.601808,0.003684,0.598812,0.599553,0.601109,0.6021,0.611032,9.0,-0.04552,0.003684,-0.048517,-0.047775,-0.046219,-0.045229,-0.036297
previous_mean_delta,9.0,0.647801,0.000262,0.647475,0.647586,0.647818,0.648032,0.648169,9.0,0.000472,0.000262,0.000146,0.000257,0.000489,0.000704,0.000841
previous_mean_position,9.0,0.538916,0.041623,0.498241,0.508367,0.527127,0.557104,0.622556,9.0,-0.108413,0.041623,-0.149088,-0.138961,-0.120202,-0.090225,-0.024773
race_physical_effort,9.0,0.610431,0.009555,0.602855,0.603982,0.60812,0.611877,0.63302,9.0,-0.036898,0.009555,-0.044474,-0.043347,-0.039208,-0.035452,-0.014308


In [83]:
corruption_validation.groupby("noise_magnitude").describe()[["performance", "difference"]]

Unnamed: 0_level_0,performance,performance,performance,performance,performance,performance,performance,performance,difference,difference,difference,difference,difference,difference,difference,difference
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
noise_magnitude,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
0.1,15.0,0.639302,0.013641,0.609726,0.638246,0.647251,0.647329,0.647475,15.0,-0.008027,0.013641,-0.037603,-0.009082,-7.7e-05,0.0,0.000146
0.2,15.0,0.633855,0.021289,0.581341,0.625949,0.647285,0.647329,0.647586,15.0,-0.013474,0.021289,-0.065988,-0.02138,-4.3e-05,0.0,0.000257
0.3,15.0,0.631074,0.026045,0.557104,0.618583,0.647251,0.647329,0.647552,15.0,-0.016254,0.026045,-0.090225,-0.028746,-7.7e-05,0.0,0.000223
0.4,15.0,0.628771,0.029865,0.539808,0.613703,0.647295,0.647329,0.648169,15.0,-0.018558,0.029865,-0.10752,-0.033626,-3.4e-05,0.0,0.000841
0.5,15.0,0.627345,0.032732,0.527127,0.612124,0.647329,0.647329,0.648101,15.0,-0.019984,0.032732,-0.120202,-0.035204,0.0,0.0,0.000772
0.6,15.0,0.625819,0.03576,0.513998,0.61107,0.646894,0.647385,0.648032,15.0,-0.02151,0.03576,-0.133331,-0.036259,-0.000434,5.6e-05,0.000704
0.7,15.0,0.625017,0.037259,0.508367,0.609508,0.646719,0.647511,0.648023,15.0,-0.022312,0.037259,-0.138961,-0.03782,-0.000609,0.000183,0.000694
0.8,15.0,0.624127,0.03886,0.501702,0.608075,0.646944,0.647468,0.648074,15.0,-0.023202,0.03886,-0.145627,-0.039253,-0.000385,0.000139,0.000745
0.9,15.0,0.623569,0.039606,0.498241,0.607779,0.645815,0.647295,0.64803,15.0,-0.02376,0.039606,-0.149088,-0.03955,-0.001513,-3.4e-05,0.000702


In [84]:
removed_feature_results = []

for feature in features:  # Ordine di importanza delle feature
    # Filtra il dataset per rimuovere la feature corrente
    reduced_train_data = dev_set.drop(columns=[feature])
    reduced_test_data = test_set.drop(columns=[feature])

    # Riaddestra il modello da zero
    model.fit(reduced_train_data, dev_label)

    # Valuta il modello sul dataset di test ridotto
    predictions = model.predict(reduced_test_data)
    report = classification_report(test_label, predictions, output_dict=True)

    # Salva i risultati
    removed_feature_results.append({
        "feature_removed": feature,
        "performance": report["macro avg"]["f1-score"]  # Usa la metrica desiderata
    })

# Converte i risultati in un dataframe per analisi
removed_feature_results_df = pd.DataFrame(removed_feature_results)

In [85]:
removed_feature_results_df.sort_values(by="performance", ascending=False)

Unnamed: 0,feature_removed,performance
13,race_season%summer,0.649692
3,climb_percentage,0.649218
2,cyclist_age_group,0.648383
12,race_season%spring,0.647757
9,cyclist_previous_experience,0.64702
14,race_season%winter,0.645694
11,race_season%autumn,0.645152
5,race_prestige,0.644944
0,length,0.643717
8,previous_mean_cp,0.643001
