In [1]:
# 🚀 Step 1: Imports & Setup
import pandas as pd, numpy as np, joblib, os
from aif360.algorithms.preprocessing import Reweighing
from aif360.datasets import StandardDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb


pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[inFairness]'


In [3]:
# 📥 Step 2: Load and Prepare Data
df = pd.read_csv("../data/loan_dataset.csv")
features = ['age','income','loan_amount','credit_score','gender','race','region']
X = df[features].copy()
y = df['loan_approved']


In [4]:
# 🔄 Step 3: One-hot Encode Features
X_encoded = pd.get_dummies(X, drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)


In [7]:
# ⚖️ Step 4: Encode and Convert to AIF360 Dataset for Reweighing
df_aif = df.copy()

# Convert protected attribute (gender) to binary: Male=1, Female=0
df_aif['gender'] = df_aif['gender'].map({'Male': 1, 'Female': 0})

# If needed, drop or encode other string columns (not required by AIF360 directly)
# AIF360 will drop extra columns not in features/label

dataset = StandardDataset(
    df_aif,
    label_name='loan_approved',
    favorable_classes=[1],
    protected_attribute_names=['gender'],
    privileged_classes=[[1]],  # Male = privileged
    features_to_drop=['race', 'region']  # Drop string cols that are not encoded
)


In [8]:
# 🧪 Step 5: Split into Train/Test AIF360 Format
train, test = dataset.split([0.8], shuffle=True)


In [9]:
# 🧰 Step 6: Apply Reweighing
RW = Reweighing(unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
RW.fit(train)
train_rw = RW.transform(train)


In [11]:
# 🧠 Step 7: Retrain XGBoost on Reweighed Data
X_train_rw = pd.DataFrame(train_rw.features, columns=train_rw.feature_names)
X_train_rw = pd.get_dummies(X_train_rw, drop_first=True)

y_train_rw = train_rw.labels.ravel()
sample_weights = train_rw.instance_weights

model_rw = xgb.XGBClassifier(random_state=42)
model_rw.fit(X_train_rw, y_train_rw, sample_weight=sample_weights)

print("✅ Debiased model retrained using reweighed data.")


✅ Debiased model retrained using reweighed data.


In [13]:
# 🧾 Step 8: Predict and Save Debiased Predictions
X_test_enc = pd.get_dummies(X_test, drop_first=True)

# Align test set with training columns
X_test_enc = X_test_enc.reindex(columns=X_train_rw.columns, fill_value=0)

y_pred_rw = model_rw.predict(X_test_enc)
y_prob_rw = model_rw.predict_proba(X_test_enc)[:, 1]

df_debiased_preds = pd.DataFrame({
    'y_true': y_test.values,
    'y_pred': y_pred_rw,
    'y_prob': y_prob_rw
})

df_debiased_preds.to_csv("results/debiased_predictions.csv", index=False)
print("✅ Debiased predictions saved to results/debiased_predictions.csv")


✅ Debiased predictions saved to results/debiased_predictions.csv


In [15]:
# 💾 Step 9: Save Outputs
import joblib
import os

os.makedirs("../results", exist_ok=True)

pd.DataFrame({
    "y_true": y_test.values,
    "y_pred": y_pred_rw,
    "y_prob": y_prob_rw
}).to_csv("../results/debiased_predictions.csv", index=False)

joblib.dump(model_rw, "../results/model_xgb_debiased.pkl")

print("✅ Debiased predictions & model saved.")


✅ Debiased predictions & model saved.
