In [None]:
import pandas as pd
import numpy as np

In [None]:
df_adult = pd.read_csv(r"/content/drive/MyDrive/Datasets_for_paper_02/uciml_data.csv")
df_adult.head(2)

In [None]:
df_compas = pd.read_csv(r"/content/drive/MyDrive/Datasets_for_paper_02/compas_df.csv")
df_compas.head(2)

In [None]:
df_german = pd.read_csv(r"/content/drive/MyDrive/Datasets_for_paper_02/german_df.csv")
df_german.head(2)

In [None]:
# ─── Imports ────────────────────────────────────────────────────────────────────
import warnings
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import HistGradientBoostingClassifier
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds
from fairlearn.postprocessing import ThresholdOptimizer
from tqdm import tqdm

warnings.filterwarnings("ignore")

# ─── DataFrames are already loaded as df_adult, df_compas, df_german ────────────
# This code will use your pre-loaded DataFrames.

# ─── Feature Engineering & Definition (Based on YOUR column names) ──────────────

# Adult Dataset
df_adult['label'] = (df_adult['income'].str.strip() == '>50K').astype(int)
df_adult['gender_numeric'] = (df_adult['gender'].str.strip() == 'Male').astype(int)
df_adult['race_numeric'] = (df_adult['race'].str.strip() == 'White').astype(int)

# COMPAS Dataset
df_compas['label'] = df_compas['two_year_recid']
df_compas['gender_numeric'] = (df_compas['sex'].str.strip() == 'Male').astype(int)
df_compas['race_numeric'] = (df_compas['race'].str.strip() == 'Caucasian').astype(int)

# German Credit Dataset
df_german['label'] = (df_german['target'] == 2).astype(int)
df_german['gender_numeric'] = df_german['personal_status'].map(lambda x: 1 if str(x).strip() in ['A91','A93', 'A94'] else 0)


# --- THE DEFINITIVE FIX: A strict "allow-list" of known safe features based on YOUR columns ---
RAW_FEATURES = {
    'adult': [
        'age', 'workclass', 'fnlwgt', 'education', 'educational-num',
        'marital-status', 'occupation', 'relationship', 'capital-gain',
        'capital-loss', 'hours-per-week', 'native-country'
    ],
    'compas': [
        # Features used in the original ProPublica analysis, excluding leaky ones
        'age', 'juv_fel_count', 'juv_misd_count', 'juv_other_count',
        'priors_count', 'c_charge_degree'
    ],
    'german': [
        'status', 'duration', 'credit_history', 'purpose', 'amount', 'savings',
        'employment', 'installment_rate', 'other_debtors',
        'residence_since', 'property', 'age', 'other_installment_plans',
        'housing', 'existing_credits', 'job', 'num_people_liable', 'telephone',
        'foreign_worker'
    ]
}

datasets = {'adult': df_adult, 'compas': df_compas, 'german': df_german}
prot_map = {'adult': 'gender_numeric', 'compas': 'race_numeric', 'german': 'gender_numeric'}

# ─── Fairness metrics ───────────────────────────────────────────────────────────
def compute_spd(y_pred, sens): return np.mean(y_pred[sens == 1]) - np.mean(y_pred[sens == 0])
def compute_eod(y, y_pred, sens):
    y, y_pred, sens = np.asarray(y), np.asarray(y_pred), np.asarray(sens)
    tprs = [((y[(sens==g)]==1) & (y_pred[(sens==g)]==1)).sum()/max(1,(y[(sens==g)]==1).sum()) for g in [0,1]]
    return tprs[1] - tprs[0]

# ─── Cross-Validation Loop ──────────────────────────────────────────────────────
results = []
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
methods = ['baseline', 'pre', 'in', 'post']

for name, df in tqdm(datasets.items(), desc="Processing datasets"):
    prot_col_name = prot_map[name]
    
    # --- Create X, y, s from the safe lists, ensuring no leakage ---
    X = df[RAW_FEATURES[name]].copy()
    y = df['label'].copy()
    s = df[prot_col_name].copy()
    
    stratify_key = y.astype(str) + '_' + s.astype(str)

    numeric_cols = X.select_dtypes(include=np.number).columns.tolist()
    categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
    
    prep = ColumnTransformer([
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_cols)
    ])

    for method in tqdm(methods, desc=f"Methods for {name}", leave=False):
        for train_idx, test_idx in cv.split(X, stratify_key):
            X_tr, y_tr = X.iloc[train_idx], y.iloc[train_idx]
            X_te, y_te = X.iloc[test_idx], y.iloc[test_idx]
            s_tr, s_te = s.iloc[train_idx].values, s.iloc[test_idx].values
            
            model = HistGradientBoostingClassifier(random_state=42)
            X_tr_proc = prep.fit_transform(X_tr)
            X_te_proc = prep.transform(X_te)
            
            sw, y_pred = None, None

            if method == 'pre':
                df_tr_proc = pd.DataFrame(X_tr_proc, columns=prep.get_feature_names_out())
                df_tr_proc['label'] = y_tr.values
                df_tr_proc[prot_col_name] = s_tr
                sd = StandardDataset(df_tr_proc, label_name='label', favorable_classes=[1],
                                     protected_attribute_names=[prot_col_name], privileged_classes=[[1]])
                rw = Reweighing(unprivileged_groups=[{prot_col_name: 0}], privileged_groups=[{prot_col_name: 1}])
                sd_transformed = rw.fit_transform(sd)
                sw, y_tr = sd_transformed.instance_weights, sd_transformed.labels.ravel()

            try:
                if method in ['baseline', 'pre']:
                    model.fit(X_tr_proc, y_tr, sample_weight=sw)
                    y_pred = model.predict(X_te_proc)
                elif method == 'post':
                    model.fit(X_tr_proc, y_tr)
                    post_opt = ThresholdOptimizer(estimator=model, constraints="equalized_odds", prefit=True)
                    post_opt.fit(X_tr_proc, y_tr, sensitive_features=s_tr)
                    y_pred = post_opt.predict(X_te_proc, sensitive_features=s_te)
                else: # 'in'
                    mitigator = ExponentiatedGradient(estimator=model, constraints=EqualizedOdds())
                    mitigator.fit(X_tr_proc, y_tr, sensitive_features=s_tr)
                    y_pred = mitigator.predict(X_te_proc)
            
            except Exception as e:
                if "Degenerate labels" in str(e) or "Only one class present" in str(e):
                    y_pred = np.full(y_te.shape, np.nan)
                else: raise e

            if not np.isnan(y_pred).any():
                results.append({'dataset': name, 'method': method,
                                'accuracy': np.mean(y_pred == y_te.values),
                                'spd': compute_spd(y_pred, s_te),
                                'eod': compute_eod(y_te, y_pred, s_te)})

# ─── Summarize results ─────────────────────────────────────────────────────────
df_res = pd.DataFrame(results)
if not df_res.empty:
    agg = df_res.groupby(['dataset', 'method']).agg(
        acc_mean=('accuracy', 'mean'), acc_std=('accuracy', 'std'),
        spd_mean=('spd', 'mean'), spd_std=('spd', 'std'),
        eod_mean=('eod', 'mean'), eod_std=('eod', 'std')).reset_index()
    
    def ci95(x):
        if len(x) < 2: return f"{x.mean():.3f} ± nan"
        mean, std_err = x.mean(), stats.sem(x)
        h = std_err * stats.t.ppf((1 + 0.95) / 2., len(x)-1)
        return f"{mean:.3f} ± {h:.3f}"

    ci_results = df_res.groupby(['dataset', 'method']).apply(
        lambda g: pd.Series({
            'accuracy_CI95': ci95(g['accuracy']),
            'spd_CI95': ci95(g['spd']),
            'eod_CI95': ci95(g['eod'])
        })
    ).reset_index()

    agg = pd.merge(agg, ci_results, on=['dataset', 'method'])

    tests = []
    for d in datasets:
        if 'baseline' in df_res[df_res.dataset == d]['method'].unique():
            base = df_res[(df_res.dataset == d) & (df_res.method == 'baseline')]['eod']
            for m in ['pre', 'in', 'post']:
                if m in df_res[df_res.dataset == d]['method'].unique():
                    oth = df_res[(df_res.dataset == d) & (df_res.method == m)]['eod']
                    if len(base) == len(oth) and len(base) > 1:
                        t_stat, p_val = stats.ttest_rel(base, oth, nan_policy='omit')
                        tests.append({'dataset': d, 'method': m, 't_stat': t_stat, 'p_val': p_val})
    df_tests = pd.DataFrame(tests)

    print("\n=== CV Summary ===")
    print(agg.to_string(index=False))
    print("\n=== Paired T‑Test on EOD ===")
    print(df_tests.to_string(index=False))
else:
    print("No results were generated.")