In [None]:
# Project: Explainability using SHAP - Loan Approval Classification

# Step 1: Install SHAP
!pip install shap --quiet

# Step 2: Import libraries
import pandas as pd
import numpy as np
import shap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# Step 3: Create synthetic loan dataset
np.random.seed(42)
size = 1000
data = pd.DataFrame({
    'Income': np.random.randint(20000, 120000, size),
    'CreditScore': np.random.randint(300, 850, size),
    'Age': np.random.randint(20, 70, size),
    'Gender': np.random.choice(['Male', 'Female'], size=size),
    'LoanApproved': np.random.choice([0, 1], size=size, p=[0.4, 0.6])
})

# Step 4: Encode categorical features
le = LabelEncoder()
data['Gender'] = le.fit_transform(data['Gender'])  # Male = 1, Female = 0

# Step 5: Split into features and labels
X = data.drop('LoanApproved', axis=1)
y = data['LoanApproved']

# Step 6: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 7: Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 8: SHAP explanation setup
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test)

# Step 9: Global Feature Importance
shap.plots.bar(shap_values)

# Step 10: Local Explanation for a single instance
shap.plots.waterfall(shap_values[0])
