In [1]:
# 📘 SHAP Explainability for At-Risk Model

import pandas as pd
import joblib
import shap
import matplotlib.pyplot as plt

# Load cleaned data and trained model
df = pd.read_csv("../data/cleaned/students_cleaned.csv")
model = joblib.load("../models/at_risk_model.pkl")

# Create 'at_risk' target again
df['at_risk'] = df['average_score'].apply(lambda x: 1 if x < 60 else 0)

# Drop target and unused columns
X = df.drop(columns=['average_score', 'performance', 'at_risk'])
X = pd.get_dummies(X, drop_first=True)
X = X.reindex(sorted(X.columns), axis=1)  # Ensure consistent column order

# Select one student to explain
X_sample = X.sample(1, random_state=42)
print("🔍 Selected student info:\n", X_sample)

# ------------------------------
# 🧠 Create SHAP Explainer
# ------------------------------
explainer = shap.Explainer(model, X)
shap_values = explainer(X_sample)

# ------------------------------
# 📊 Visualizations
# ------------------------------

# Waterfall Plot for the selected student
print("💡 Waterfall Plot - Impact of features on prediction")
shap.plots.waterfall(shap_values[0])
plt.tight_layout()
plt.show()

# Summary Plot (Optional - for full dataset)
print("📈 SHAP Summary Plot for All Features")
shap_values_full = explainer(X)
shap.plots.beeswarm(shap_values_full)
plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'shap'