#Setting Up the Models

##Install Libraries and Import Packages

In [None]:
!pip install -r requirements_cl.txt

In [None]:
from ucimlrepo import fetch_ucirepo

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

import shap
import graphviz
from sklearn.tree import export_graphviz

from scipy.stats import spearmanr
from collections import Counter

import random

#fetch dataset
heart_disease = fetch_ucirepo(id=45)

#data
df = heart_disease.data.original.copy()

SEED = 100
np.random.seed(SEED)
random.seed(SEED)

##Data Analysis

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

In [None]:
for col in df.columns:
    plt.boxplot(df[col])
    plt.title(col)
    plt.show()

In [None]:
df.duplicated().sum()

In [None]:
df.isna().sum()

##Handling NaN Values with SimpleImputer

In [None]:
imputer = SimpleImputer(missing_values = np.nan, strategy ='mean')
imputer = imputer.fit(df)
df = imputer.transform(df)

#convert NP to DF
df = pd.DataFrame(df, columns=heart_disease.data.original.columns)

##Combining Classes

In [None]:
X = df.drop('num', axis=1)
y = df['num']

#for plot
feature_names = X.columns

#class 0 -> no disease(0) / classes 1-4 -> disease(1)
y_binary = y.copy()
y_binary[y_binary > 0] = 1

##Spliting and Scaling the Data

In [None]:
#split
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.25, random_state=0, stratify=y_binary)

#scale
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

##Creating the Optimized Models and Then Fitting them

In [None]:
#Decision Tree Model
dt = DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                            max_depth=4, max_features=None, max_leaf_nodes=None,
                            min_impurity_decrease=0.0, min_samples_leaf=16, min_samples_split=4,
                            min_weight_fraction_leaf=0.0, monotonic_cst=None, random_state=100, splitter='random')

#Logistic Regression Model
lr = LogisticRegression(C=0.010993634452683504, class_weight='balanced', dual=False,
                        fit_intercept=True, intercept_scaling=1, l1_ratio=None, max_iter=100,
                        multi_class='deprecated', n_jobs=None, penalty='l2', random_state=100, solver='liblinear',
                        tol=0.0001, verbose=0, warm_start=False)

#Random Forest Model
rf = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None, criterion='gini',
                            max_depth=15, max_features='sqrt', max_leaf_nodes=None, max_samples=None,
                            min_impurity_decrease=0.0, min_samples_leaf=15, min_samples_split=16,
                            min_weight_fraction_leaf=0.0, monotonic_cst=None, n_estimators=53, n_jobs=None,
                            oob_score=False, random_state=100, verbose=0, warm_start=False)

#XGBoost Model
xgb = XGBClassifier(objective='binary:logistic', base_score=None, booster=None, callbacks=None,
                    colsample_bylevel=None, colsample_bynode=None, colsample_bytree=0.7504717399139913,
                    device=None, early_stopping_rounds=None, enable_categorical=False, eval_metric=None,
                    feature_types=None, gamma=0.6497146261841261, grow_policy=None, importance_type=None,
                    interaction_constraints=None, learning_rate=0.021618964126433812, max_bin=None, max_cat_threshold=None,
                    max_cat_to_onehot=None, max_delta_step=None, max_depth=7, max_leaves=None, min_child_weight=None,
                    missing=np.nan, monotone_constraints=None, multi_strategy=None, n_estimators=347, n_jobs=None,
                    num_parallel_tree=None, random_state=100, reg_alpha=0.0010349590106072711, reg_lambda=0.02637083647277659,
                    sampling_method=None, scale_pos_weight=8.443990241900503, subsample=0.7040523480851058, tree_method=None,
                    validate_parameters=None, verbosity=None)

In [None]:
dt.fit(X_train, y_train)
lr.fit(X_train, y_train)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)

##Evaluating Each Model's Performance

In [None]:
#For dt
dt_pred = dt.predict(X_test)
print("\nFor the Decision Tree Model:\nAccuracy: " + str(accuracy_score(y_test, dt_pred) * 100) + "%")
print("Classification Report:\n" + str(classification_report(y_test, dt_pred)))

In [None]:
#For lr
lr_pred = lr.predict(X_test)
print("\nFor the Logistic Regression Model:\nAccuracy: " + str(accuracy_score(y_test, lr_pred) * 100) + "%")
print("Classification Report:\n" + str(classification_report(y_test, lr_pred)))

In [None]:
#For rf
rf_pred = rf.predict(X_test)
print("\nFor the Random Forest Model:\nAccuracy: " + str(accuracy_score(y_test, rf_pred) * 100) + "%")
print("Classification Report:\n" + str(classification_report(y_test, rf_pred)))

In [None]:
#For xgb
xgb_pred = xgb.predict(X_test)
print("\nFor the XGBoost Model:\nAccuracy: " + str(accuracy_score(y_test, xgb_pred) * 100) + "%")
print("Classification Report:\n" + str(classification_report(y_test, xgb_pred)))

#Global Explainability

##Decision Tree's Tree Structure

In [None]:
dot_data = export_graphviz(dt, out_file=None,
                                feature_names=feature_names,
                                class_names=[str(x) for x in y_binary.unique()],
                                filled=True)

graph = graphviz.Source(dot_data, format="png")
graph

##Logistic Regression Model's Coefficients

In [None]:
coefficients = pd.Series(lr.coef_[0], index=feature_names)
print("Logistic Regression Coefficients:\n")
print(coefficients.sort_values(ascending=False))

In [None]:
coefficients.sort_values().plot(kind="barh", figsize=(8, 6))
plt.title("Logistic Regression Coefficients")
plt.xlabel("Coefficient Value")
plt.tight_layout()
plt.grid()
plt.show()

##Setting Up SHAP explainers

In [None]:
#turn data into DF for SHAP plots
X_test_df = pd.DataFrame(X_test, columns=feature_names)

#SHAP for dt
dt_explainer = shap.TreeExplainer(dt)
dt_shap_values = dt_explainer.shap_values(X_test_df)

#SHAP for lr
lr_explainer = shap.LinearExplainer(lr, masker=shap.maskers.Independent(X_test_df))
lr_shap_values = lr_explainer.shap_values(X_test_df)

#SHAP for rf
rf_explainer = shap.TreeExplainer(rf)
rf_shap_values = rf_explainer.shap_values(X_test_df)

#SHAP for xgb
xgb_explainer = shap.TreeExplainer(xgb, X_train, feature_perturbation='interventional')
xgb_shap_values = xgb_explainer.shap_values(X_test_df)

##SHAP Summary Plots For Each Model

In [None]:
dt_shap_values.shape

In [None]:
#dt
shap.summary_plot(dt_shap_values[:, :, 1], X_test_df)

In [None]:
lr_shap_values.shape

In [None]:
#lr
shap.summary_plot(lr_shap_values, X_test_df)

In [None]:
rf_shap_values.shape

In [None]:
#rf
shap.summary_plot(rf_shap_values[:, :, 1], X_test_df)

In [None]:
xgb_shap_values.shape

In [None]:
#xgb
shap.summary_plot(xgb_shap_values, X_test_df)

#Local Explainability

##Selecting Instance

In [None]:
index = 0

##Decision Path

In [None]:
X_instance = X_test[index].reshape(1, -1)

node_indicator = dt.decision_path(X_instance)
leaf_id = dt.apply(X_instance)

print(f"\nDecision path for instance {index}:")
for node_id in node_indicator.indices:
    if dt.tree_.children_left[node_id] != dt.tree_.children_right[node_id]:
        feature = feature_names[dt.tree_.feature[node_id]]
        threshold = dt.tree_.threshold[node_id]
        if X_instance[0, dt.tree_.feature[node_id]] <= threshold:
            threshold_sign = "<="
        else:
            threshold_sign = ">"
        print(f"  {feature} = {X_instance[0, dt.tree_.feature[node_id]]:.2f} "
              f"{threshold_sign} {threshold:.2f}")

pred_class = dt.predict(X_instance)[0]
true_class = y_test.iloc[index] if isinstance(y_test, pd.Series) else y_test[index]

print(f"\nPredicted class: {pred_class}")
print(f"Actual class:    {true_class}")

##Logistic Regression's Contributions For Single Instance

In [None]:
contributions = X_test_df.iloc[index] * lr.coef_[0]
print(contributions.sort_values(ascending=False))

##SHAP Waterfalls

In [None]:
#dt
shap.initjs()
shap.force_plot(dt_explainer.expected_value[1], dt_shap_values[index, :, 1], X_test_df.iloc[index])

In [None]:
#lr
shap.initjs()
shap.force_plot(lr_explainer.expected_value, lr_shap_values[index, :], X_test_df.iloc[index])

In [None]:
#rf
shap.initjs()
shap.force_plot(rf_explainer.expected_value[1], rf_shap_values[index, :, 1], X_test_df.iloc[index])

In [None]:
#xgb
shap.initjs()
shap.force_plot(xgb_explainer.expected_value, xgb_shap_values[index, :], X_test_df.iloc[index])

#SHAP Evaluation

In [None]:
models_dict = {
    'DT': (dt, dt_explainer, dt_shap_values, dt.feature_importances_),
    'LR': (lr, lr_explainer, lr_shap_values, np.abs(lr.coef_[0])),
    'RF': (rf, rf_explainer, rf_shap_values, rf.feature_importances_),
    'XGB': (xgb, xgb_explainer, xgb_shap_values, xgb.feature_importances_)
}

##Fidelity (Correlation between model and SHAP feature importances)

In [None]:
def fidelity(model_importance, shap_values):
    if len(shap_values.shape) == 3:
        shap_importance = np.abs(shap_values).mean(axis=(0, 2))
    else:
        shap_importance = np.abs(shap_values).mean(axis=0)
    return spearmanr(model_importance, shap_importance)[0]

##Consistency (Entropy of top feature across instances)

In [None]:
def consistency(shap_values, feature_names):
    if len(shap_values.shape) == 3:
        top_feature = np.argmax(np.abs(shap_values[:, :, 1]), axis=1)
    else:
        top_feature = np.argmax(np.abs(shap_values), axis=1)

    value, counts = np.unique(top_feature, return_counts=True)
    probs = counts / len(top_feature)
    entropy = -np.sum(probs * np.log(probs))

    dominant_feature = feature_names[value[np.argmax(counts)]]
    dominant_percent = counts.max() / len(top_feature)

    return entropy, dominant_feature, dominant_percent

##Robustness (Test if SHAP values remain stable under small perturbations)

In [None]:
def robustness(explainer, X_sample_df, n_instances=10, n_perturbations=10, noise_std=0.1, seed=100):
    np.random.seed(seed)
    stabilities = []

    for i in range(min(n_instances, len(X_sample_df))):
        instance_df = X_sample_df.iloc[i:i+1]
        base_shap = explainer.shap_values(instance_df)

        corrs = []
        for _ in range(n_perturbations):
            noise = np.random.normal(0, noise_std, instance_df.shape)
            perturbed_df = pd.DataFrame(instance_df.values + noise, columns=instance_df.columns)
            perturbed_shap = explainer.shap_values(perturbed_df)
            corr = spearmanr(base_shap.flatten(), perturbed_shap.flatten())[0]
            corrs.append(corr)

        stabilities.append(np.mean(corrs))

    return np.mean(stabilities)

##Sufficiency (Test if top-k SHAP features preserve predictions)

In [None]:
def sufficiency(model, X_test, shap_values, feature_names, k=5, n_samples=30):
    prob_diffs = []
    class_maintained = []
    all_top_features = []

    for i in range(min(n_samples, len(X_test))):
        original_pred = model.predict_proba(X_test[i].reshape(1, -1))[0]
        original_class = np.argmax(original_pred)

        if len(shap_values.shape) == 3:
            shap_vals = shap_values[i, :, 1]
        else:
            shap_vals = shap_values[i, :]

        top_k_indices = np.argsort(np.abs(shap_vals))[-k:]
        all_top_features.extend(top_k_indices)

        masked_instance = np.zeros_like(X_test[i])
        masked_instance[top_k_indices] = X_test[i][top_k_indices]

        masked_pred = model.predict_proba(masked_instance.reshape(1, -1))[0]
        masked_class = np.argmax(masked_pred)

        class_maintained.append(original_class == masked_class)

    feature_counter = Counter(all_top_features)

    feature_usage = []
    for feat_idx in range(len(feature_names)):
        count = feature_counter.get(feat_idx, 0)
        percent = (count / n_samples) * 100
        feature_usage.append({'feature': feature_names[feat_idx], 'count': count, 'percentage': percent})

    feature_usage_df = pd.DataFrame(feature_usage).sort_values('count', ascending=False)

    top_features_dict = {
        feature_names[idx]: count
        for idx, count in feature_counter.most_common(k)}

    return {
        'percent_class_maintained': np.mean(class_maintained),
        'top_features_used': top_features_dict,
        'feature_usage_df': feature_usage_df
    }

##Completeness (Test if top-k SHAP features removals preserve predictions)

In [None]:
def completeness(model, X_test, shap_values, feature_names, k=5, n_samples=30):
    prob_diffs = []
    class_maintained = []
    all_removed_features = []

    for i in range(min(n_samples, len(X_test))):
        original_pred = model.predict_proba(X_test[i].reshape(1, -1))[0]
        original_class = np.argmax(original_pred)

        if len(shap_values.shape) == 3:
            shap_vals = shap_values[i, :, 1]
        else:
            shap_vals = shap_values[i, :]

        top_k_indices = np.argsort(np.abs(shap_vals))[-k:]
        all_removed_features.extend(top_k_indices)

        masked_instance = np.copy(X_test[i])
        masked_instance[top_k_indices] = 0

        masked_pred = model.predict_proba(masked_instance.reshape(1, -1))[0]
        masked_class = np.argmax(masked_pred)

        class_maintained.append(original_class == masked_class)

    feature_counter = Counter(all_removed_features)

    feature_usage = []
    for feat_idx in range(len(feature_names)):
        count = feature_counter.get(feat_idx, 0)
        percent = (count / n_samples) * 100
        feature_usage.append({'feature': feature_names[feat_idx], 'count': count, 'percentage': percent})

    feature_usage_df = pd.DataFrame(feature_usage).sort_values('count', ascending=False)

    top_features_dict = {
        feature_names[idx]: count
        for idx, count in feature_counter.most_common(k)}

    return {
        'percent_class_maintained': np.mean(class_maintained),
        'top_features_removed': top_features_dict,
        'feature_usage_df': feature_usage_df
    }

##SHAP Evaluation Results

In [None]:
results = {}

for model_name, (model, explainer, shap_vals, feat_imp) in models_dict.items():
    fl_score = fidelity(feat_imp, shap_vals)

    cs_entropy, dominant_feature, dominant_percent = consistency(shap_vals, feature_names)
    cs_score = 1 - (cs_entropy / np.log(len(feature_names)))#min-max normalization

    rb_baseline = robustness(explainer, X_test_df.head(10), seed=SEED)
    rb_more_instances = robustness(explainer, X_test_df.head(30), n_instances=30, seed=SEED)
    rb_more_perturbations = robustness(explainer, X_test_df.head(10), n_perturbations=30, seed=SEED)
    rb_higher_noise = robustness(explainer, X_test_df.head(10), noise_std=0.3, seed=SEED)

    sufficiency_k1 = sufficiency(model, X_test, shap_vals, feature_names, k=1)
    sufficiency_k3 = sufficiency(model, X_test, shap_vals, feature_names, k=3)
    sufficiency_k5 = sufficiency(model, X_test, shap_vals, feature_names, k=5)
    sufficiency_k8 = sufficiency(model, X_test, shap_vals, feature_names, k=8)

    completeness_k1 = completeness(model, X_test, shap_vals, feature_names, k=1)
    completeness_k3 = completeness(model, X_test, shap_vals, feature_names, k=3)
    completeness_k5 = completeness(model, X_test, shap_vals, feature_names, k=5)
    completeness_k8 = completeness(model, X_test, shap_vals, feature_names, k=8)


    results[model_name] = {
        'fidelity': fl_score,
        'consistency': cs_score,
        'dominant_feature': dominant_feature,
        'dominant_percent': dominant_percent,
        'robustness_baseline': rb_baseline,
        'robustness_more_instances': rb_more_instances,
        'robustness_more_perturbations': rb_more_perturbations,
        'robustness_higher_noise': rb_higher_noise,
        'sufficiency_k1': sufficiency_k1,
        'sufficiency_k3': sufficiency_k3,
        'sufficiency_k5': sufficiency_k5,
        'sufficiency_k8': sufficiency_k8,
        'completeness_k1': completeness_k1,
        'completeness_k3': completeness_k3,
        'completeness_k5': completeness_k5,
        'completeness_k8': completeness_k8
    }

##Results per Model

In [None]:
model_names = ['DT', 'LR', 'RF', 'XGB']

for model in model_names:
    print(f"\nModel: {model}")
    for metric in ['fidelity', 'consistency', 'robustness_baseline', 'dominant_feature', 'dominant_percent']:
        print(f"{metric}: {results[model][metric]}")

##Fidelity Plot

In [None]:
fl_scores = [results[m]['fidelity'] for m in model_names]
plt.bar(model_names, fl_scores)
plt.ylabel('Correlation')
plt.title('Fidelity')
plt.show()

##Consistency Plot

In [None]:
cs_scores = [results[m]['consistency'] for m in model_names]
plt.bar(model_names, cs_scores)
plt.ylabel('Correlation')
plt.title('Consistency')
plt.show()

##Robustness Testing Results for each Model

In [None]:
for model in model_names:
    print(f"\nModel: {model}")
    for metric in ['robustness_baseline', 'robustness_more_instances', 'robustness_more_perturbations', 'robustness_higher_noise']:
        print(f"{metric}: {results[model][metric]}")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(model_names))
width = 0.2

rects1 = ax.bar(x - 1.5*width, [results[m]['robustness_baseline'] for m in model_names], width, label='RB 1')
rects2 = ax.bar(x - 0.5*width, [results[m]['robustness_more_instances'] for m in model_names], width, label='RB 2')
rects3 = ax.bar(x + 0.5*width, [results[m]['robustness_more_perturbations'] for m in model_names], width, label='RB 3')
rects4 = ax.bar(x + 1.5*width, [results[m]['robustness_higher_noise'] for m in model_names], width, label='RB 4')

ax.set_ylabel('Correlation')
ax.set_title('Robustness by Model')
ax.set_xticks(x)
ax.set_xticklabels(model_names)
ax.legend(['RB 1 (n_instances=10)', 'RB 2 (n_instances=30)', 'RB 3 (n_perturbations=30)', 'RB 4 (noise_std=0.3)'])

plt.tight_layout()
plt.show()

##Suffiency Testing Results for each Model

In [None]:
dt_top5 = set(results['DT']['sufficiency_k5']['feature_usage_df'].head(5)['feature'])
lr_top5 = set(results['LR']['sufficiency_k5']['feature_usage_df'].head(5)['feature'])
rf_top5 = set(results['RF']['sufficiency_k5']['feature_usage_df'].head(5)['feature'])
xgb_top5 = set(results['XGB']['sufficiency_k5']['feature_usage_df'].head(5)['feature'])

print(f"Decision Tree Top 5 Features: {dt_top5}")
print(f"Logistic Regression Top 5 Features: {lr_top5}")
print(f"Random Forest Top 5 Features: {rf_top5}")
print(f"XGBoost Top 5 Features: {xgb_top5}")

In [None]:
percent_maintained_k1 = [results[m]['sufficiency_k1']['percent_class_maintained'] for m in model_names]
percent_maintained_k3 = [results[m]['sufficiency_k3']['percent_class_maintained'] for m in model_names]
percent_maintained_k5 = [results[m]['sufficiency_k5']['percent_class_maintained'] for m in model_names]
percent_maintained_k8 = [results[m]['sufficiency_k8']['percent_class_maintained'] for m in model_names]

fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(model_names))
width = 0.2

rects1 = ax.bar(x - 1.5*width, percent_maintained_k1, width, label='k=1')
rects2 = ax.bar(x - 0.5*width, percent_maintained_k3, width, label='k=3')
rects3 = ax.bar(x + 0.5*width, percent_maintained_k5, width, label='k=5')
rects4 = ax.bar(x + 1.5*width, percent_maintained_k8, width, label='k=8')

ax.set_ylabel('Percent Class Maintained')
ax.set_title('Sufficiency Test: Percent Class Maintained for different k')
ax.set_xticks(x)
ax.set_xticklabels(model_names)
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
k_values = [1, 3, 5, 8]

for k in k_values:
    print(f"Sufficiency Test: Top Features Used for k={k}")
    for model_name in model_names:
        sufficiency_results = results[model_name][f'sufficiency_k{k}']
        feature_usage_df = sufficiency_results['feature_usage_df']

        plt.figure(figsize=(10, 6))
        plt.bar(feature_usage_df['feature'], feature_usage_df['percentage'])
        plt.ylabel('Percentage of Instances')
        plt.xlabel('Feature')
        plt.title(f"Top Features Used in Sufficiency Test (k={k}) for {model_name}")
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()

##Completeness Testing Results for each Model

In [None]:
percent_maintained_k1 = [results[m]['completeness_k1']['percent_class_maintained'] for m in model_names]
percent_maintained_k3 = [results[m]['completeness_k3']['percent_class_maintained'] for m in model_names]
percent_maintained_k5 = [results[m]['completeness_k5']['percent_class_maintained'] for m in model_names]
percent_maintained_k8 = [results[m]['completeness_k8']['percent_class_maintained'] for m in model_names]

fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(model_names))
width = 0.2

rects1 = ax.bar(x - 1.5*width, percent_maintained_k1, width, label='k=1')
rects2 = ax.bar(x - 0.5*width, percent_maintained_k3, width, label='k=3')
rects3 = ax.bar(x + 0.5*width, percent_maintained_k5, width, label='k=5')
rects4 = ax.bar(x + 1.5*width, percent_maintained_k8, width, label='k=8')

ax.set_ylabel('Percent Class Maintained')
ax.set_title('Completeness Test: Percent Class Maintained for different k')
ax.set_xticks(x)
ax.set_xticklabels(model_names)
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
for k in k_values:
    print(f"Completeness Test: Top Features Used for k={k}")
    for model_name in model_names:
        completeness_results = results[model_name][f'completeness_k{k}']
        feature_usage_df = completeness_results['feature_usage_df']

        plt.figure(figsize=(10, 6))
        plt.bar(feature_usage_df['feature'], feature_usage_df['percentage'])
        plt.ylabel('Percentage of Instances')
        plt.xlabel('Feature')
        plt.title(f"Top Features Used in Completeness Test (k={k}) for {model_name}")
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()

##Accuracy vs Explainability Plot (Through mean values of metrics)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

explain_scores = {
    m: np.mean([
        results[m]['fidelity'],
        results[m]['consistency'],
        results[m]['robustness_baseline'],
        results[m]['sufficiency_k5']['percent_class_maintained'],
        results[m]['completeness_k3']['percent_class_maintained']
    ]) for m in model_names
}

accuracies = {
    'DT': accuracy_score(y_test, dt_pred),
    'LR': accuracy_score(y_test, lr_pred),
    'RF': accuracy_score(y_test, rf_pred),
    'XGB': accuracy_score(y_test, xgb_pred)
}

for model in model_names:
    ax.scatter(explain_scores[model], accuracies[model])
    ax.annotate(model, (explain_scores[model], accuracies[model]))

ax.set_xlabel('Explainability Score (0-1)')
ax.set_ylabel('Model Accuracy')
ax.set_title('Accuracy vs Explainability Tradeoff')
ax.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
for model in model_names:
    print(f"{model}, {explain_scores[model]}, {accuracies[model]}")