In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult

from group_fairness import eval_group_fairness

# Load and Prepare Data
def load_data():
    """
    Loads and prepares the Adult dataset using AIF360's pre-processing utility.
    Converts the AIF360 BinaryLabelDataset into a pandas DataFrame for easier handling.
    
    Returns:
        pd.DataFrame: Processed DataFrame with features and binary income label.
    """
    # Set seed for reproducibility
    np.random.seed(42)

    # Load dataset using AIF360's pre-processing function (filters by 'sex')
    dataset_orig = load_preproc_data_adult(['sex'])

    # Convert the dataset to a DataFrame
    df = pd.DataFrame(columns=dataset_orig.feature_names, data=dataset_orig.features)
    df['Income Binary'] = dataset_orig.labels  # AIF360 uses labels, rename to match your other code

    # Include protected attribute 'sex' explicitly
    df['sex'] = dataset_orig.protected_attributes[:, 0]  # First (and only) protected attr: 'sex'

    return df

# Uniform Sampling Preprocessing
def uniform_sampling(df, target, protected_attr):
    groups = [
        (df[protected_attr] == 0) & (df[target] == 0),  # Female, low income
        (df[protected_attr] == 0) & (df[target] == 1),  # Female, high income
        (df[protected_attr] == 1) & (df[target] == 0),  # Male, low income
        (df[protected_attr] == 1) & (df[target] == 1)   # Male, high income
    ]
    
    min_size = min([sum(g) for g in groups])
    sampled_dfs = [df[g].sample(min_size, random_state=42) for g in groups]
    return pd.concat(sampled_dfs)

def main():
    # Load preprocessed data using AIF360 format
    df = load_data()

    # =================================================================
    # BEFORE SAMPLING (Original Data)
    # =================================================================
    print("\n" + "="*40)
    print("BEFORE UNIFORM SAMPLING (ORIGINAL DATA)")
    print("="*40)

    # 1. Dataset fairness (original)
    orig_fairness = eval_group_fairness(df, 'Income Binary', 'sex', mode='dataset')
    print("\nDataset Fairness Metrics (Original):")
    for metric, value in orig_fairness.items():
        print(f"{metric}: {value:.4f}")

    # 2. Train model on original data
    X_orig = df.drop(columns=['Income Binary'])
    y_orig = df['Income Binary']
    X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(
        X_orig, y_orig, test_size=0.2, random_state=42, stratify=y_orig
    )

    scale_orig = StandardScaler()
    X_train_scaled = scale_orig.fit_transform(X_train_orig)
    X_test_scaled = scale_orig.transform(X_test_orig)

    model_orig = LogisticRegression(max_iter=1000)
    model_orig.fit(X_train_scaled, y_train_orig)

    # 3. Model predictions
    y_pred_orig = model_orig.predict(X_test_scaled)
    print("\nModel Performance (Original):")
    print(classification_report(y_test_orig, y_pred_orig))

    # Group Model fairness on original dataset and model
    orig_model_fairness = eval_group_fairness(X_test_orig, target='Income Binary', protected_attr='sex', mode='model', y_pred=y_pred_orig)
    print("\nModel Group Fairness Metrics (Original):")
    for metric, value in orig_model_fairness.items():
        print(f"{metric}: {value:.4f}")

    # =================================================================
    # AFTER SAMPLING
    # =================================================================
    print("\n" + "="*40)
    print("AFTER UNIFORM SAMPLING")
    print("="*40)

    # 5. Uniform sampling
    df_sampled = uniform_sampling(df, 'Income Binary', 'sex')

    # 6. Dataset fairness (after sampling)
    sampled_fairness = eval_group_fairness(df_sampled, 'Income Binary', 'sex', mode='dataset')
    print("\nDataset Fairness Metrics (After Sampling):")
    for metric, value in sampled_fairness.items():
        print(f"{metric}: {value:.4f}")

    # 7. Train model on sampled data
    X_sampled = df_sampled.drop(columns=['Income Binary'])
    y_sampled = df_sampled['Income Binary']
    X_train_sampled, X_test_sampled, y_train_sampled, y_test_sampled = train_test_split(
        X_sampled, y_sampled, test_size=0.2, random_state=42, stratify=y_sampled
    )

    X_train_sampled_scaled = scale_orig.fit_transform(X_train_sampled)
    X_test_sampled_scaled = scale_orig.transform(X_test_sampled)

    model_sampled = LogisticRegression(max_iter=1000)
    model_sampled.fit(X_train_sampled_scaled, y_train_sampled)

    # 8. Model performance & fairness (after sampling)
    y_pred_sampled = model_sampled.predict(X_test_sampled_scaled)
    print("\nModel Performance (After Sampling):")
    print(classification_report(y_test_sampled, y_pred_sampled))

    X_test_eval_sampled = X_test_sampled.copy()
    X_test_eval_sampled['Income Binary'] = y_test_sampled
    sampled_model_fairness = eval_group_fairness(
        X_test_eval_sampled, target='Income Binary', protected_attr='sex', mode='model', y_pred=y_pred_sampled
    )
    print("\nModel Fairness Metrics (After Sampling):")
    for metric, value in sampled_model_fairness.items():
        print(f"{metric}: {value:.4f}")

    # =================================================================
    # IMPROVEMENT COMPARISON
    # =================================================================
    print("\n" + "="*40)
    print("IMPROVEMENT COMPARISON")
    print("="*40)

    print("\nDataset Group Fairness Improvement:")
    for metric in orig_fairness:
        improvement = sampled_fairness[metric] - orig_fairness[metric]
        print(f"{metric}: {improvement:+.4f} (Before: {orig_fairness[metric]:.4f}, After: {sampled_fairness[metric]:.4f})")

    print("\nModel Group Fairness Improvement:")
    for metric in orig_model_fairness:
        improvement = sampled_model_fairness[metric] - orig_model_fairness[metric]
        print(f"{metric}: {improvement:+.4f} (Before: {orig_model_fairness[metric]:.4f}, After: {sampled_model_fairness[metric]:.4f})")

if __name__ == "__main__":
    main()

  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))
  df['sex'] = df['sex'].replace({'Female': 0.0, 'Male': 1.0})



BEFORE UNIFORM SAMPLING (ORIGINAL DATA)

Dataset Fairness Metrics (Original):
Statistical Parity Difference: -0.1945
Disparate Impact: 0.3597
Demographic Parity: -0.1945

Model Performance (Original):
              precision    recall  f1-score   support

         0.0       0.83      0.94      0.88      7431
         1.0       0.66      0.38      0.48      2338

    accuracy                           0.80      9769
   macro avg       0.74      0.66      0.68      9769
weighted avg       0.79      0.80      0.78      9769


Model Group Fairness Metrics (Original):
Statistical Parity Difference: -0.2061
Disparate Impact: 0.0000
Demographic Parity: -0.2061

AFTER UNIFORM SAMPLING

Dataset Fairness Metrics (After Sampling):
Statistical Parity Difference: 0.0000
Disparate Impact: 1.0000
Demographic Parity: 0.0000

Model Performance (After Sampling):
              precision    recall  f1-score   support

         0.0       0.75      0.67      0.70       708
         1.0       0.70      0.77