<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Example_using_SHAP_with_a_healthcare_diagnostic_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install shap scikit-learn pandas

In [None]:
import shap
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Hypothetical function to get patient records
def get_patient_records():
    # Example data: Replace with actual patient data loading logic
    data = pd.DataFrame({
        'age': [25, 34, 45, 52],
        'blood_pressure': [120, 140, 130, 125],
        'cholesterol': [200, 220, 180, 190],
        'label': [0, 1, 1, 0]
    })
    return data

# Load patient data
data = get_patient_records()
X = data.drop('label', axis=1)
y = data['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a random forest classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Explain the model's predictions using SHAP
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# Print the shapes of shap_values and X_test
print("Shapes of shap_values and X_test:")
print("shap_values:", [sv.shape for sv in shap_values])
print("X_test:", X_test.shape)

# Use the first sample for explanation
sample_index = 0

# Examine and adjust SHAP values for binary classification
shap_values_for_sample = shap_values[0][sample_index]
expected_value = explainer.expected_value[0]

# Adjust SHAP values to match the number of features if needed
while len(shap_values_for_sample) != X_test.shape[1]:
    shap_values_for_sample = np.pad(shap_values_for_sample, (0, X_test.shape[1] - len(shap_values_for_sample)), 'constant')

print("Adjusted SHAP Values:", shap_values_for_sample)

# Ensure the SHAP values and feature lengths match
assert len(shap_values_for_sample) == X_test.shape[1], "SHAP values length does not match the number of features."

# Plot the explanation with correct dimensions
shap.initjs()
shap.force_plot(expected_value, shap_values_for_sample, X_test.iloc[sample_index], feature_names=X.columns.tolist())