In [5]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import shap
import matplotlib.pyplot as plt

# Step 1: Load and Prepare the Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 
           'oldpeak', 'slope', 'ca', 'thal', 'target']

df = pd.read_csv(url, header=None, names=columns)

# Replace '?' with NaN and convert to numeric
df = df.replace('?', np.nan)
df = df.apply(pd.to_numeric, errors='coerce')

# Handle NaN values (dropping them)
df = df.dropna()

# Convert target to binary classification (0 for no disease, 1 for disease)
df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)

# Features and target
X = df.drop('target', axis=1)
y = df['target']

# Step 2: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train the Model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 4: SHAP for Interpretability
# Use TreeExplainer for tree-based models like RandomForest
explainer = shap.TreeExplainer(model)

# Calculate SHAP values for test data
shap_values = explainer.shap_values(X_test)

# Global interpretability: Summary plot
shap.summary_plot(shap_values[1], X_test)  # For class 1 (positive class)

# Local interpretability: Force plot for a specific observation
shap.force_plot(explainer.expected_value[1], shap_values[1][1], X_test.iloc[1], matplotlib=True)

# Global interpretability: Bar plot for feature importance
shap.plots.bar(shap_values[1])

# Additional: Individual dependence plot for a specific feature
shap.dependence_plot('age', shap_values[1], X_test)


Accuracy: 0.8833333333333333


AssertionError: The shape of the shap_values matrix does not match the shape of the provided data matrix.