✅ 1. Required Libraries

In [2]:
# Core
import pandas as pd
import numpy as np

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, roc_auc_score

# AIF360 (Fairness toolkit)
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing

✅ 2. Simulated Biased Dataset (Mini HR Example)

In [None]:
data = {
    'ExperienceYears': [5, 2, 7, 6, 8, 2, 4, 1, 6, 3, 2, 5, 7, 1, 8, 4],
    'EducationLevel':  [2, 3, 2, 4, 3, 1, 2, 2, 4, 2, 3, 1, 2, 1, 4, 3],
    'PerformanceScore':[3, 4, 3, 5, 4, 2, 2, 1, 4, 3, 3, 1, 3, 1, 4, 3],
    'Gender':           ['Female', 'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female',
                         'Male', 'Male', 'Female', 'Female', 'Male', 'Female', 'Male', 'Female'],
    'HispanicLatino':   ['Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No',
                         'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes'],
    'Termd':            [1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1]  # 1=Terminated, 0=Stayed
}
df = pd.DataFrame(data)

✅ 3. Label Encoding

In [4]:
df_encoded = df.copy()
df_encoded['Gender'] = LabelEncoder().fit_transform(df_encoded['Gender'])  # Male=1, Female=0
df_encoded['HispanicLatino'] = LabelEncoder().fit_transform(df_encoded['HispanicLatino'])  # Yes=1, No=0

# 🔨 Step 1: Train the Unfair Baseline Model


In [6]:
# Define features and target
X = df_encoded.drop(columns='Termd')
y = df_encoded['Termd']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train unfair logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict probabilities and labels
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

# Evaluate
print("Unfair Baseline Classification Report:")
print(classification_report(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_prob))

Unfair Baseline Classification Report:
              precision    recall  f1-score   support

           0       0.40      1.00      0.57         2
           1       0.00      0.00      0.00         3

    accuracy                           0.40         5
   macro avg       0.20      0.50      0.29         5
weighted avg       0.16      0.40      0.23         5

AUC Score: 1.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


# 📉 Step 2: Detect Bias with Metrics

(A) Custom Bias Metrics (Group Means & AUCs by Gender)

In [None]:
# Create results DataFrame
results_df = X_test.copy()
results_df['true_label'] = y_test.values
results_df['predicted_prob'] = y_prob

# Add back readable labels
results_df['Gender'] = df.loc[X_test.index, 'Gender'].values
results_df['HispanicLatino'] = df.loc[X_test.index, 'HispanicLatino'].values

# Bias by Gender: mean predicted probability + AUC per group
print("\n--- Bias by Gender ---")
print(results_df.groupby('Gender')['predicted_prob'].mean())

for gender in results_df['Gender'].unique():
    subset = results_df[results_df['Gender'] == gender]
    try:
        auc = roc_auc_score(subset['true_label'], subset['predicted_prob'])
    except:
        auc = float('nan')
    print(f"AUC for {gender}: {auc:.3f}")


--- Bias by Gender ---
Gender
Female    0.398251
Male      0.343228
Name: predicted_prob, dtype: float64
AUC for Female: 1.000
AUC for Male: nan




(B) AIF360 Bias Metrics (Statistical Parity Difference, Disparate Impact)

In [8]:
# Repackage test set for AIF360
dataset_test = BinaryLabelDataset(
    df=pd.concat([X_test, y_test], axis=1),
    label_names=['Termd'],
    protected_attribute_names=['Gender']
)

# Add predictions as scores to test set
dataset_test_pred = dataset_test.copy()
dataset_test_pred.scores = y_prob.reshape(-1, 1)

# AIF360 Fairness Metrics
metric = BinaryLabelDatasetMetric(
    dataset_test_pred,
    privileged_groups=[{'Gender': 1}],  # Male = 1
    unprivileged_groups=[{'Gender': 0}]  # Female = 0
)

print("\n--- AIF360 Fairness Metrics by Gender ---")
print("Statistical Parity Difference:", metric.statistical_parity_difference())
print("Disparate Impact:", metric.disparate_impact())


--- AIF360 Fairness Metrics by Gender ---
Statistical Parity Difference: 0.75
Disparate Impact: inf


  return metric_fun(privileged=False) / metric_fun(privileged=True)
