# 📊 Notebook 04: Explainability with SHAP

This notebook uses SHAP (SHapley Additive exPlanations) to explain individual model predictions from a trained Random Forest classifier on the UCI Adult dataset.

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import shap
import matplotlib.pyplot as plt
shap.initjs()

In [None]:
# Load and prepare dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
           'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
           'hours-per-week', 'native-country', 'income']
df = pd.read_csv(url, names=columns, sep=',\s', engine='python')
df = df.apply(lambda col: pd.factorize(col)[0])

X = df.drop('income', axis=1)
y = df['income']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [None]:
# SHAP explainability
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(X_test)

# Summary plot for class 1 (income >50K)
shap.summary_plot(shap_values[1], X_test, plot_type="bar")