In [1]:
import pandas as pd
import joblib
import shap

In [5]:

# Load model and preprocessor
model = joblib.load("../3_modeling/models/best_ebm.pkl")          # Adjust path as needed

In [None]:
# Load reference training data (for SHAP initialization)
X_train = pd.read_csv("../models/X_train_reference.csv")  # Contains at least the 7 features
expected_features = [
  'radius_mean', 'texture_mean', 'smoothness_mean', 'compactness_mean',
       'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se',
       'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se',
       'symmetry_se', 'fractal_dimension_se', 'smoothness_worst',
       'symmetry_worst', 'fractal_dimension_worst']


In [None]:

# Initialize SHAP explainer using training data
explainer = shap.Explainer(model.predict_proba, X_train[expected_features])

In [None]:

# Example patient input (in a real scenario, a doctor would input these values)
input_data = {
    "radius_mean": 12.3,
    "texture_mean": 17.5,
    "radius_se": 0.35,
    "area_mean": 500.0,
    "smoothness_mean": 0.09,
    "compactness_mean": 0.07,
    "concavity_mean": 0.03
}

In [None]:

X_input = pd.DataFrame([input_data])

# Preprocess input
X_input_prepared = preprocessor.transform(X_input)

# Predict probability of malignant
pred_proba = model.predict_proba(X_input_prepared)[:, 1][0]
prediction = "malignant" if pred_proba > 0.5 else "benign"

# Get local explanation with SHAP
shap_values = explainer(X_input)[0].values
feature_contributions = dict(zip(expected_features, shap_values))

# Sort features by absolute contribution
sorted_features = sorted(feature_contributions.items(), key=lambda x: abs(x[1]), reverse=True)
top_contributors = sorted_features[:3]

# Check for critical ranges (for example, radius_mean between 12 and 14 is known as a tricky range)
warning_msg = ""
if 12 <= input_data["radius_mean"] <= 14:
    warning_msg = (
        "Note: The radius_mean value is in a critical range where misclassifications "
        "occur more frequently. A manual expert review is recommended."
    )

# Display results to the doctor
print("Prediction for this patient case:")
print(f"- Model prediction: {prediction} (Malignant probability: {pred_proba*100:.2f}%)")

if warning_msg:
    print(f"**Warning:** {warning_msg}")

print("\nKey drivers of this decision:")
for feat, val in top_contributors:
    direction = "increases" if val > 0 else "decreases"
    print(f"- {feat}: {direction} the malignant probability")