In [None]:
# Project 2: Racial Bias in Loan Approvals â€“ Decision Tree

# Step 1: Install necessary libraries
!pip install fairlearn --quiet

# Step 2: Import libraries
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from fairlearn.metrics import MetricFrame, equalized_odds_difference

# Step 3: Generate synthetic dataset (biased by race)
np.random.seed(42)
size = 5000  # 5000 loan applications
income = np.random.normal(50000, 15000, size)
credit_score = np.random.normal(700, 50, size)
race = np.random.choice(["White", "Black"], size=size, p=[0.7, 0.3])  # Imbalance in race

# Simulate loan approval with bias against "Black"
approved = (income + credit_score - (race == "Black") * 2000 + np.random.randn(size) * 1000) > 75000
approved = approved.astype(int)

# Step 4: Create DataFrame
df_loan = pd.DataFrame({
    "Income": income,
    "CreditScore": credit_score,
    "Race": race,
    "Approved": approved
})

# Step 5: Encode Race feature
X = df_loan[["Income", "CreditScore"]]
X["Race"] = (df_loan["Race"] == "White").astype(int)  # Encode White as 1, Black as 0
y = df_loan["Approved"]

# Step 6: Split data into train and test
X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X, y, df_loan["Race"], test_size=0.3, random_state=42
)

# Step 7: Train baseline Decision Tree model
dt_model = DecisionTreeClassifier(max_depth=5)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

# Step 8: Evaluate baseline accuracy and fairness
baseline_accuracy_dt = accuracy_score(y_test, y_pred_dt)
baseline_fairness_dt = equalized_odds_difference(y_test, y_pred_dt, sensitive_features=race_test)

print(f"Baseline Accuracy (Decision Tree): {baseline_accuracy_dt:.2f}")
print(f"Baseline Equalized Odds Difference: {baseline_fairness_dt:.2f}")

# Step 9: Simulated Adversarial Debiasing (structure only)
# Note: Real adversarial debiasing is complex; we simulate for the portfolio
# Here, we imagine retraining with bias mitigation (can be replaced with real technique)
# For now, simulate by modifying predictions slightly
y_pred_fair_dt = y_pred_dt.copy()

# Simulate fairness adjustment (for demo purposes, flip some biased predictions)
bias_indices = (race_test == "Black") & (y_pred_dt == 0)
y_pred_fair_dt[bias_indices] = np.random.choice([0, 1], size=bias_indices.sum(), p=[0.4, 0.6])

# Step 10: Evaluate mitigated model
fair_accuracy_dt = accuracy_score(y_test, y_pred_fair_dt)
fair_fairness_dt = equalized_odds_difference(y_test, y_pred_fair_dt, sensitive_features=race_test)

print(f"\nFair Model Accuracy (Decision Tree): {fair_accuracy_dt:.2f}")
print(f"Fair Model Equalized Odds Difference: {fair_fairness_dt:.2f}")
