In [None]:
# Project 3: Healthcare Bias (Age, Race) – Neural Network

# Step 1: Install necessary libraries
!pip install fairlearn tensorflow --quiet

# Step 2: Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from fairlearn.metrics import MetricFrame, demographic_parity_difference

# Step 3: Generate synthetic dataset (biased by age and race)
np.random.seed(42)
size = 10000  # 10,000 patient records
age = np.random.randint(20, 80, size)
health_score = np.random.normal(50, 10, size)
race = np.random.choice(["White", "Black"], size=size, p=[0.7, 0.3])

# Simulate treatment recommendation biased against older and Black patients
treated = (health_score - (age > 50) * 5 - (race == "Black") * 5 + np.random.randn(size) * 3) > 45
treated = treated.astype(int)

# Step 4: Create DataFrame
df_health = pd.DataFrame({
    "Age": age,
    "HealthScore": health_score,
    "Race": race,
    "Treated": treated
})

# Step 5: Encode Race and Scale features
X = df_health[["Age", "HealthScore"]]
X["Race"] = (df_health["Race"] == "White").astype(int)  # Encode White as 1, Black as 0
y = df_health["Treated"]

# Scale the features for neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test
X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X_scaled, y, df_health["Race"], test_size=0.3, random_state=42
)

# Step 6: Build and train a Neural Network model
model = Sequential()
model.add(Dense(16, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

# Step 7: Predict and evaluate baseline model
y_pred_nn = (model.predict(X_test) > 0.5).astype(int).flatten()
baseline_accuracy_nn = accuracy_score(y_test, y_pred_nn)
baseline_fairness_nn = demographic_parity_difference(y_test, y_pred_nn, sensitive_features=race_test)

print(f"Baseline Accuracy (Neural Network): {baseline_accuracy_nn:.2f}")
print(f"Baseline Demographic Parity Difference: {baseline_fairness_nn:.2f}")

# Step 8: Simulated Fair Representation (structure only)
# Note: For simplicity, we simulate fairness improvement by tweaking predictions
# For demo purposes only; real fair representation involves deeper changes

# Simulate fairness adjustment
y_pred_fair_nn = y_pred_nn.copy()
bias_indices_nn = (race_test == "Black") & (y_pred_nn == 0)
y_pred_fair_nn[bias_indices_nn] = np.random.choice([0, 1], size=bias_indices_nn.sum(), p=[0.3, 0.7])

# Step 9: Evaluate mitigated model
fair_accuracy_nn = accuracy_score(y_test, y_pred_fair_nn)
fair_fairness_nn = demographic_parity_difference(y_test, y_pred_fair_nn, sensitive_features=race_test)

print(f"\nFair Model Accuracy (Neural Network): {fair_accuracy_nn:.2f}")
print(f"Fair Model Demographic Parity Difference: {fair_fairness_nn:.2f}")
