In [2]:
# =============================================================================
# FINAL PUBLICATION-GRADE FAIRNESS ANALYSIS PIPELINE (Corrected & Practical)
# Author: [Your Name]
# Date: [Date]
# Description: A comprehensive, function-based pipeline to compare pre-, in-,
#              and post-processing bias mitigation techniques, including
#              intersectional approaches and SHAP-based interpretability analysis.
# =============================================================================

import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
import itertools
import shap

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.exceptions import ConvergenceWarning
from sklearn.base import clone

# AIF360 imports
try:
    from aif360.datasets import BinaryLabelDataset
    from aif360.metrics import ClassificationMetric
    from aif360.algorithms.preprocessing import Reweighing
    from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
except ImportError:
    print("AIF360 not found. Please install it using: pip install aif360")
    sys.exit(1)

# Fairlearn imports
try:
    from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds
except ImportError:
    print("Fairlearn not found. Please install it using: pip install fairlearn")
    sys.exit(1)

# Suppress warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# =============================================================================
# 1. CONFIGURATION
# =============================================================================
DATA_FOLDER_PATH = r'C:\Users\Maverick\Downloads\Bias_mitigation'
RESULTS_FOLDER_PATH = os.path.join(DATA_FOLDER_PATH, 'results_bias_mitigation')
CONFIG = {
    'DATA_FOLDER': DATA_FOLDER_PATH, 'RESULTS_FOLDER': RESULTS_FOLDER_PATH,
    'PLOTS_FOLDER': os.path.join(RESULTS_FOLDER_PATH, 'plots'),
    'SHAP_FOLDER': os.path.join(RESULTS_FOLDER_PATH, 'plots', 'SHAP'),
    'RESULTS_CSV_PATH': os.path.join(RESULTS_FOLDER_PATH, 'full_results.csv'),
    'PIVOT_TABLE_CSV_PATH': os.path.join(RESULTS_FOLDER_PATH, 'pivot_table_summary.csv'),
    'RANDOM_STATE': 42, 'TEST_SIZE': 0.3, 'FAVORABLE_LABEL': 1.0, 'UNFAVORABLE_LABEL': 0.0,
    'PROTECTED_ATTRIBUTES_MAP': {
        'gender_numeric': {'priv': 1, 'unpriv': 0},
        'race_numeric':   {'priv': 1, 'unpriv': 0},
        'age_group':      {'priv': 1, 'unpriv': 0}
    }
}
np.random.seed(CONFIG['RANDOM_STATE'])

# =============================================================================
# 2. DATA PREPROCESSING FUNCTIONS (Unchanged)
# =============================================================================
def load_preprocess_adult(path):
    df = pd.read_csv(path)
    df.columns = df.columns.str.strip()
    df = df[df['age'] != 'age']
    df.replace('?', np.nan, inplace=True)
    df.dropna(inplace=True)
    numeric_cols = ['age', 'fnlwgt', 'educational-num', 'capital-gain', 'capital-loss', 'hours-per-week']
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)
    df['income'] = df['income'].str.contains('>50K').astype(int)
    df['gender_numeric'] = df['gender'].str.lower().map({'male': 1, 'female': 0})
    df['race_numeric'] = (df['race'].str.strip().str.lower() == 'white').astype(int)
    df['age_group'] = (df['age'] >= 40).astype(int)
    cats = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'native-country']
    enc = OneHotEncoder(sparse_output=False, drop='first', handle_unknown='ignore')
    enc_df = pd.DataFrame(enc.fit_transform(df[cats]), columns=enc.get_feature_names_out(cats), index=df.index)
    final_df = pd.concat([df.drop(columns=cats + ['gender', 'race']), enc_df], axis=1)
    return final_df.dropna().reset_index(drop=True)

def load_preprocess_compas(path):
    df = pd.read_csv(path)
    df.columns = df.columns.str.strip()
    df = df[['age', 'sex', 'race', 'two_year_recid', 'priors_count', 'decile_score']].dropna()
    df['label'] = df['two_year_recid']
    df['gender_numeric'] = df['sex'].map({'Male': 1, 'Female': 0})
    df['race_numeric'] = (df['race'] == 'Caucasian').astype(int)
    df['age_group'] = (df['age'] >= 40).astype(int)
    final_df = df.drop(columns=['sex', 'race', 'two_year_recid'])
    return final_df.dropna().reset_index(drop=True)

def load_preprocess_german(path):
    df = pd.read_csv(path)
    df.columns = df.columns.str.strip()
    df['label'] = (df['target'] == 1).astype(int)
    df['gender_numeric'] = df['personal_status'].isin(['A91', 'A93', 'A94']).astype(int)
    df['age_group'] = (df['age'] >= 25).astype(int)
    cats = ['status','credit_history','purpose','savings','employment',
            'other_debtors','property','other_installment_plans','housing',
            'job','telephone','foreign_worker']
    enc = OneHotEncoder(sparse_output=False, drop='first', handle_unknown='ignore')
    enc_df = pd.DataFrame(enc.fit_transform(df[cats]), columns=enc.get_feature_names_out(cats), index=df.index)
    final_df = pd.concat([df.drop(columns=cats + ['personal_status', 'target']), enc_df], axis=1)
    return final_df.dropna().reset_index(drop=True)

# =============================================================================
# 3. EVALUATION AND MITIGATION HELPER FUNCTIONS (Unchanged)
# =============================================================================
def evaluate_model(clf, X_test, y_test, aif_test_dataset, protected_attrs_map):
    y_pred = clf.predict(X_test)
    dataset_pred = aif_test_dataset.copy()
    dataset_pred.labels = y_pred.reshape(-1, 1)
    results = {'Accuracy': np.mean(y_pred == y_test)}
    for attr, groups in protected_attrs_map.items():
        metric = ClassificationMetric(aif_test_dataset, dataset_pred, unprivileged_groups=[{attr: groups['unpriv']}], privileged_groups=[{attr: groups['priv']}])
        results.update({f'{attr}_SPD': metric.statistical_parity_difference(), f'{attr}_EOD': metric.equal_opportunity_difference()})
    return results

def apply_reweighing(aif_train_dataset, protected_attribute):
    priv_groups = [{protected_attribute: CONFIG['PROTECTED_ATTRIBUTES_MAP'][protected_attribute]['priv']}]
    unpriv_groups = [{protected_attribute: CONFIG['PROTECTED_ATTRIBUTES_MAP'][protected_attribute]['unpriv']}]
    RW = Reweighing(unprivileged_groups=unpriv_groups, privileged_groups=priv_groups)
    dataset_rw = RW.fit_transform(aif_train_dataset)
    return dataset_rw.instance_weights

def apply_intersectional_reweighing(aif_train_dataset, protected_attrs_present):
    priv_levels = [[CONFIG['PROTECTED_ATTRIBUTES_MAP'][attr]['priv'], CONFIG['PROTECTED_ATTRIBUTES_MAP'][attr]['unpriv']] for attr in protected_attrs_present]
    intersectional_groups = [dict(zip(protected_attrs_present, g)) for g in itertools.product(*priv_levels)]
    privileged_groups = [intersectional_groups[0]]
    unprivileged_groups = intersectional_groups[1:]
    RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
    dataset_rw = RW.fit_transform(aif_train_dataset)
    return dataset_rw.instance_weights

def apply_exponentiated_gradient(X_train, y_train, sensitive_features_train, estimator):
    constraint = EqualizedOdds()
    mitigator = ExponentiatedGradient(estimator=estimator, constraints=constraint, eps=0.01)
    mitigator.fit(X_train, y_train, sensitive_features=sensitive_features_train)
    return mitigator

def apply_calibrated_eq_odds(aif_test_dataset, aif_pred_dataset, protected_attribute):
    priv_groups = [{protected_attribute: CONFIG['PROTECTED_ATTRIBUTES_MAP'][protected_attribute]['priv']}]
    unpriv_groups = [{protected_attribute: CONFIG['PROTECTED_ATTRIBUTES_MAP'][protected_attribute]['unpriv']}]
    cpp = CalibratedEqOddsPostprocessing(privileged_groups=priv_groups, unprivileged_groups=unpriv_groups, cost_constraint='fnr', seed=CONFIG['RANDOM_STATE'])
    cpp = cpp.fit(aif_test_dataset, aif_pred_dataset)
    aif_post_pred_dataset = cpp.predict(aif_pred_dataset)
    return aif_post_pred_dataset.labels.ravel()

# =============================================================================
# 4. CORE EXPERIMENT PIPELINE
# =============================================================================
def run_experiment(df, label_col, protected_attrs_present, ds_name):
    X = df.drop(columns=[label_col])
    y = df[label_col]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=CONFIG['TEST_SIZE'], stratify=y, random_state=CONFIG['RANDOM_STATE'])
    
    y_train_series = pd.Series(y_train, name=label_col, index=X_train.index)
    train_df = pd.concat([X_train, y_train_series], axis=1)
    y_test_series = pd.Series(y_test, name=label_col, index=X_test.index)
    test_df = pd.concat([X_test, y_test_series], axis=1)
    train_ds = BinaryLabelDataset(df=train_df, label_names=[label_col], protected_attribute_names=protected_attrs_present, favorable_label=CONFIG['FAVORABLE_LABEL'], unfavorable_label=CONFIG['UNFAVORABLE_LABEL'])
    test_ds = BinaryLabelDataset(df=test_df, label_names=[label_col], protected_attribute_names=protected_attrs_present, favorable_label=CONFIG['FAVORABLE_LABEL'], unfavorable_label=CONFIG['UNFAVORABLE_LABEL'])

    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)
    
    MODEL_CONFIG = {
        'LR':  {'model': LogisticRegression(max_iter=1000, random_state=CONFIG['RANDOM_STATE']), 'supports_sw': True, 'run_inproc': True, 'run_shap': True},
        'RF':  {'model': RandomForestClassifier(random_state=CONFIG['RANDOM_STATE']), 'supports_sw': True, 'run_inproc': True, 'run_shap': True},
        'MLP': {'model': MLPClassifier(max_iter=500, random_state=CONFIG['RANDOM_STATE']), 'supports_sw': False, 'run_inproc': False, 'run_shap': False},
        'SVM': {'model': SVC(random_state=CONFIG['RANDOM_STATE']), 'supports_sw': True, 'run_inproc': True, 'run_shap': False},
        'GBM': {'model': GradientBoostingClassifier(random_state=CONFIG['RANDOM_STATE']), 'supports_sw': True, 'run_inproc': True, 'run_shap': True}
    }

    if ds_name in ['Adult', 'COMPAS']:
        print("  ! Large dataset detected. Running a lean experiment for slow in-processing methods.")
        MODEL_CONFIG['RF']['run_inproc'] = False
        MODEL_CONFIG['SVM']['run_inproc'] = False

    eval_attrs_map = {k: v for k, v in CONFIG['PROTECTED_ATTRIBUTES_MAP'].items() if k in protected_attrs_present}
    results = []
    baseline_models = {}

    # --- 1. Baseline Runs & SHAP Analysis ---
    print("  ▶ Running Baseline Models and SHAP Analysis...")
    for name, config in MODEL_CONFIG.items():
        clf = clone(config['model'])
        clf.fit(X_train_s, y_train)
        baseline_models[name] = clf
        res = evaluate_model(clf, X_test_s, y_test.values, test_ds, eval_attrs_map)
        res.update({'Model': name, 'Mitigation': 'None'})
        results.append(res)
        if config['run_shap']:
            run_shap_analysis(clf, X_train_s, X_train.columns, f"{ds_name}_Baseline_{name}")

    # --- 2. Pre-processing: Reweighing (Single Attribute) ---
    for attr in protected_attrs_present:
        tqdm_desc = f"  ▶ Pre-Proc (Reweighing: {attr.split('_')[0]})"
        sample_weights = apply_reweighing(train_ds, protected_attribute=attr)
        for name, config in tqdm(MODEL_CONFIG.items(), desc=tqdm_desc, leave=False):
            if not config['supports_sw']: continue
            mitigated_clf = clone(config['model'])
            mitigated_clf.fit(X_train_s, y_train, sample_weight=sample_weights)
            res = evaluate_model(mitigated_clf, X_test_s, y_test.values, test_ds, eval_attrs_map)
            res.update({'Model': name, 'Mitigation': f'Reweighing ({attr.split("_")[0]})'})
            results.append(res)
            
    # --- 3. In-processing: Exponentiated Gradient ---
    for attr in protected_attrs_present:
        tqdm_desc = f"  ▶ In-Proc (ExpGrad: {attr.split('_')[0]})"
        sensitive_features_train = X_train[attr]
        for name, config in tqdm(MODEL_CONFIG.items(), desc=tqdm_desc, leave=False):
            if not config['run_inproc']: continue
            mitigator = apply_exponentiated_gradient(X_train_s, y_train, sensitive_features_train, clone(config['model']))
            res = evaluate_model(mitigator, X_test_s, y_test.values, test_ds, eval_attrs_map)
            res.update({'Model': name, 'Mitigation': f'ExpGrad ({attr.split("_")[0]})'})
            results.append(res)

    # --- 4. Post-processing: Calibrated Equalized Odds ---
    for attr in protected_attrs_present:
        tqdm_desc = f"  ▶ Post-Proc (CalibEqOdds: {attr.split('_')[0]})"
        for name, baseline_clf in tqdm(baseline_models.items(), desc=tqdm_desc, leave=False):
            clf_for_proba = baseline_clf
            if name == 'SVM':
                clf_for_proba = clone(MODEL_CONFIG['SVM']['model'])
                clf_for_proba.set_params(probability=True)
                clf_for_proba.fit(X_train_s, y_train)

            pred_df = test_df.copy()
            pred_df[label_col] = clf_for_proba.predict(X_test_s)
            pred_df_probs = clf_for_proba.predict_proba(X_test_s)
            pred_df['scores'] = pred_df_probs[:,1].reshape(-1,1)
            pred_ds = BinaryLabelDataset(df=pred_df, label_names=[label_col], protected_attribute_names=protected_attrs_present, favorable_label=CONFIG['FAVORABLE_LABEL'], unfavorable_label=CONFIG['UNFAVORABLE_LABEL'], scores_names=['scores'])
            y_post_pred = apply_calibrated_eq_odds(test_ds, pred_ds, protected_attribute=attr)
            post_processed_pred_ds = test_ds.copy()
            post_processed_pred_ds.labels = y_post_pred.reshape(-1,1)
            accuracy = np.mean(y_post_pred == y_test.values)
            res_post = {'Accuracy': accuracy}
            for p_attr, groups in eval_attrs_map.items():
                metric = ClassificationMetric(test_ds, post_processed_pred_ds, unprivileged_groups=[{p_attr: groups['unpriv']}], privileged_groups=[{p_attr: groups['priv']}])
                res_post.update({f'{p_attr}_SPD': metric.statistical_parity_difference(), f'{p_attr}_EOD': metric.equal_opportunity_difference()})
            res_post.update({'Model': name, 'Mitigation': f'CalibEqOdds ({attr.split("_")[0]})'})
            results.append(res_post)

    # --- 5. Pre-processing: Reweighing (Intersectional) ---
    if len(protected_attrs_present) > 1:
        tqdm_desc = "  ▶ Pre-Proc (Reweighing: Intersection)"
        sample_weights = apply_intersectional_reweighing(train_ds, protected_attrs_present)
        for name, config in tqdm(MODEL_CONFIG.items(), desc=tqdm_desc, leave=False):
            if not config['supports_sw']: continue
            mitigated_clf = clone(config['model'])
            mitigated_clf.fit(X_train_s, y_train, sample_weight=sample_weights)
            res = evaluate_model(mitigated_clf, X_test_s, y_test.values, test_ds, eval_attrs_map)
            res.update({'Model': name, 'Mitigation': 'Reweighing (Intersection)'})
            results.append(res)

    return pd.DataFrame(results)

# =============================================================================
# 5. VISUALIZATION AND REPORTING
# =============================================================================
def run_shap_analysis(clf, X_train_scaled, feature_names, model_name_prefix):
    """Generates and saves a SHAP summary plot for models where it is computationally feasible."""
    if not isinstance(clf, (LogisticRegression, RandomForestClassifier, GradientBoostingClassifier)):
        print(f"    - Skipping SHAP for {model_name_prefix.split('_')[-1]} (computationally expensive).")
        return
        
    print(f"    - Generating SHAP plot for {model_name_prefix}...")
    X_train_sample = shap.sample(X_train_scaled, 500 if X_train_scaled.shape[0] > 500 else X_train_scaled.shape[0])
    
    try:
        if isinstance(clf, (RandomForestClassifier, GradientBoostingClassifier)):
            explainer = shap.TreeExplainer(clf)
            shap_values = explainer.shap_values(X_train_sample)
        else: # Handles LogisticRegression
            explainer = shap.LinearExplainer(clf, X_train_sample)
            shap_values = explainer.shap_values(X_train_sample)

        X_train_sample_df = pd.DataFrame(X_train_sample, columns=feature_names)
        
        plt.figure()
        if isinstance(shap_values, list):
            shap.summary_plot(shap_values[1], X_train_sample_df, show=False, plot_size=None)
        else:
            shap.summary_plot(shap_values, X_train_sample_df, show=False, plot_size=None)
            
        plt.title(f"SHAP Summary for {model_name_prefix}")
        plt.savefig(os.path.join(CONFIG['SHAP_FOLDER'], f"shap_{model_name_prefix}.png"), bbox_inches='tight')
        plt.close()
    except Exception as e:
        print(f"      ! Could not generate SHAP plot for {model_name_prefix}: {e}")

def plot_tradeoffs(df, dataset_name):
    print(f"  > Generating Accuracy-vs-Fairness plots for {dataset_name}...")
    df_plot = df[df['Dataset'] == dataset_name].copy()
    fairness_metrics = [col for col in df_plot.columns if '_SPD' in col or '_EOD' in col]
    for metric in fairness_metrics:
        if df_plot[metric].isnull().all(): continue
        plt.style.use('seaborn-v0_8-whitegrid')
        fig, ax = plt.subplots(figsize=(12, 7))
        df_plot[f'Abs_{metric}'] = df_plot[metric].abs()
        sns.scatterplot(data=df_plot, x=f'Abs_{metric}', y='Accuracy', hue='Model', style='Mitigation', s=200, ax=ax, alpha=0.8)
        ax.set_title(f'Accuracy vs. Fairness Trade-off for {dataset_name}', fontsize=16, fontweight='bold')
        ax.set_xlabel(f'|{metric}| (Closer to 0 is Fairer)', fontsize=12)
        ax.set_ylabel('Accuracy', fontsize=12)
        ax.legend(title='Configuration', bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0.)
        plt.tight_layout(rect=[0, 0, 0.85, 1])
        plt.savefig(os.path.join(CONFIG['PLOTS_FOLDER'], f"tradeoff_{dataset_name}_{metric}.png"))
        plt.close(fig)

def plot_fairness_heatmap(results_df, dataset_name):
    print(f"  > Generating Fairness Interaction Heatmap for {dataset_name}...")
    df_d = results_df[results_df['Dataset'] == dataset_name].copy()
    baseline = df_d[df_d['Mitigation'] == 'None'].set_index('Model')
    mitigated = df_d[df_d['Mitigation'].str.contains(r'\((\w+)\)', regex=True)].copy()
    if mitigated.empty: return
        
    mitigated['target_attr'] = mitigated['Mitigation'].str.extract(r'\((\w+)\)')[0]
    heatmap_data = []
    for i, row in mitigated.iterrows():
        model = row['Model']
        if model not in baseline.index: continue
        target_attr_base = row['target_attr']
        base_model_metrics = baseline.loc[model]
        
        for metric_col in [c for c in df_d.columns if '_SPD' in c or '_EOD' in c]:
            side_effect_attr_base = metric_col.split('_')[0]
            if target_attr_base == side_effect_attr_base: continue
            
            change = np.abs(row[metric_col]) - np.abs(base_model_metrics[metric_col])
            heatmap_data.append({
                'Mitigation': f"{row['Model']}-{row['Mitigation']}",
                'Side-Effect Metric': metric_col,
                'Fairness Change': change
            })
            
    if not heatmap_data: return
    heatmap_df = pd.DataFrame(heatmap_data).pivot_table(index='Mitigation', columns='Side-Effect Metric', values='Fairness Change')
    
    plt.figure(figsize=(12, max(8, len(heatmap_df) * 0.5)))
    sns.heatmap(heatmap_df, annot=True, cmap='RdYlGn_r', center=0, fmt='.3f', linewidths=.5)
    plt.title(f'Fairness Interaction Heatmap for {dataset_name}\n(Red = Worsened Fairness, Green = Improved Fairness)')
    plt.savefig(os.path.join(CONFIG['PLOTS_FOLDER'], f"heatmap_interaction_{dataset_name}.png"), bbox_inches='tight')
    plt.close()

# =============================================================================
# 6. MAIN EXECUTION
# =============================================================================
def main():
    """Main function to run the entire pipeline."""
    os.makedirs(CONFIG['PLOTS_FOLDER'], exist_ok=True)
    os.makedirs(CONFIG['SHAP_FOLDER'], exist_ok=True)
    print(f"All results will be saved in: {CONFIG['RESULTS_FOLDER']}")
    
    datasets_info = [
        {'name': 'Adult', 'loader': load_preprocess_adult, 'file': 'uciml_data.csv', 'label': 'income'},
        {'name': 'COMPAS', 'loader': load_preprocess_compas, 'file': 'compas_df.csv', 'label': 'label'},
        {'name': 'GermanCredit', 'loader': load_preprocess_german, 'file': 'german_df.csv', 'label': 'label'}
    ]

    print("\n" + "="*50 + "\nSTAGE 1: PRE-FLIGHT DATA LOADING CHECK\n" + "="*50)
    all_loaded_successfully = True
    for d_info in datasets_info:
        try:
            path = os.path.join(CONFIG['DATA_FOLDER'], d_info['file'])
            print(f"Attempting to load [{d_info['name']}] from '{path}'...")
            d_info['df'] = d_info['loader'](path)
            print(f"  ✅ Success! Loaded {d_info['name']} data. Shape: {d_info['df'].shape}")
        except Exception as e:
            print(f"  ❌ FAILED: An error occurred while loading or processing {d_info['name']}: {e}")
            all_loaded_successfully = False
    if not all_loaded_successfully:
        print("\n❌ One or more datasets failed to load. Halting execution.")
        sys.exit(1)
    else:
        print("\n✅ All datasets loaded successfully. Proceeding to experiments.\n")

    print("="*50 + "\nFINAL EXPERIMENTS (ALL PHASES)\n" + "="*50)
    all_results = []
    for d_info in tqdm(datasets_info, desc="▶ Datasets"):
        print(f"\nProcessing [{d_info['name']}]...")
        processed_df = d_info['df']
        protected_attrs_in_df = [attr for attr in CONFIG['PROTECTED_ATTRIBUTES_MAP'] if attr in processed_df.columns]
        if not protected_attrs_in_df:
            print(f"  ! Warning: No protected attributes found for {d_info['name']}. Skipping.")
            continue
        print(f"  > Found protected attributes: {protected_attrs_in_df}")
        res_df = run_experiment(processed_df, d_info['label'], protected_attrs_in_df, d_info['name'])
        res_df['Dataset'] = d_info['name']
        all_results.append(res_df)

    final_results_df = pd.concat(all_results, ignore_index=True)

    print("\n\n" + "="*30 + "\n   COMBINED EXPERIMENT RESULTS\n" + "="*30)
    pd.set_option('display.max_rows', 200)
    pd.set_option('display.width', 140)
    pivot_cols = ['Accuracy'] + sorted([c for c in final_results_df.columns if '_SPD' in c or '_EOD' in c])
    pivot = final_results_df.pivot_table(index=['Dataset', 'Model', 'Mitigation'], values=pivot_cols)
    print(pivot.to_string(float_format="%.4f"))

    try:
        final_results_df.to_csv(CONFIG['RESULTS_CSV_PATH'], index=False)
        print(f"\n✅ Full results saved to: {CONFIG['RESULTS_CSV_PATH']}")
        pivot.to_csv(CONFIG['PIVOT_TABLE_CSV_PATH'])
        print(f"✅ Pivot table summary saved to: {CONFIG['PIVOT_TABLE_CSV_PATH']}")
    except Exception as e:
        print(f"\n❌ Error saving results to file: {e}")

    print("\n" + "="*30 + "\n   GENERATING PLOTS\n" + "="*30)
    for ds_name in final_results_df['Dataset'].unique():
        plot_tradeoffs(final_results_df, ds_name)
        plot_fairness_heatmap(final_results_df, ds_name)

if __name__ == '__main__':
    main()

All results will be saved in: C:\Users\Maverick\Downloads\Bias_mitigation\results_bias_mitigation

STAGE 1: PRE-FLIGHT DATA LOADING CHECK
Attempting to load [Adult] from 'C:\Users\Maverick\Downloads\Bias_mitigation\uciml_data.csv'...
  ✅ Success! Loaded Adult data. Shape: (45222, 95)
Attempting to load [COMPAS] from 'C:\Users\Maverick\Downloads\Bias_mitigation\compas_df.csv'...
  ✅ Success! Loaded COMPAS data. Shape: (7214, 7)
Attempting to load [GermanCredit] from 'C:\Users\Maverick\Downloads\Bias_mitigation\german_df.csv'...
  ✅ Success! Loaded GermanCredit data. Shape: (1000, 48)

✅ All datasets loaded successfully. Proceeding to experiments.

FINAL EXPERIMENTS (ALL PHASES)


▶ Datasets:   0%|                                                                                | 0/3 [00:00<?, ?it/s]


Processing [Adult]...
  > Found protected attributes: ['gender_numeric', 'race_numeric', 'age_group']
  ! Large dataset detected. Running a lean experiment for slow in-processing methods.
  ▶ Running Baseline Models and SHAP Analysis...
    - Generating SHAP plot for Adult_Baseline_LR...
    - Generating SHAP plot for Adult_Baseline_RF...
    - Generating SHAP plot for Adult_Baseline_GBM...



  ▶ Pre-Proc (Reweighing: gender):   0%|                                                         | 0/5 [00:00<?, ?it/s][A
  ▶ Pre-Proc (Reweighing: gender):  20%|█████████▊                                       | 1/5 [00:00<00:00,  4.62it/s][A
  ▶ Pre-Proc (Reweighing: gender):  40%|███████████████████▌                             | 2/5 [00:05<00:09,  3.25s/it][A
  ▶ Pre-Proc (Reweighing: gender):  80%|███████████████████████████████████████▏         | 4/5 [01:38<00:30, 30.23s/it][A
  ▶ Pre-Proc (Reweighing: gender): 100%|█████████████████████████████████████████████████| 5/5 [01:46<00:00, 23.51s/it][A
                                                                                                                       [A
  ▶ Pre-Proc (Reweighing: race):   0%|                                                           | 0/5 [00:00<?, ?it/s][A
  ▶ Pre-Proc (Reweighing: race):  20%|██████████▏                                        | 1/5 [00:00<00:00,  5.03it/s][A
  ▶ Pre-Proc (R


Processing [COMPAS]...
  > Found protected attributes: ['gender_numeric', 'race_numeric', 'age_group']
  ! Large dataset detected. Running a lean experiment for slow in-processing methods.
  ▶ Running Baseline Models and SHAP Analysis...
    - Generating SHAP plot for COMPAS_Baseline_LR...
    - Generating SHAP plot for COMPAS_Baseline_RF...
    - Generating SHAP plot for COMPAS_Baseline_GBM...



  ▶ Pre-Proc (Reweighing: gender):   0%|                                                         | 0/5 [00:00<?, ?it/s][A
  ▶ Pre-Proc (Reweighing: gender):  40%|███████████████████▌                             | 2/5 [00:00<00:00,  3.39it/s][A
  ▶ Pre-Proc (Reweighing: gender):  80%|███████████████████████████████████████▏         | 4/5 [00:02<00:00,  1.84it/s][A
  ▶ Pre-Proc (Reweighing: gender): 100%|█████████████████████████████████████████████████| 5/5 [00:02<00:00,  1.99it/s][A
                                                                                                                       [A
  ▶ Pre-Proc (Reweighing: race):   0%|                                                           | 0/5 [00:00<?, ?it/s][A
  ▶ Pre-Proc (Reweighing: race):  40%|████████████████████▍                              | 2/5 [00:00<00:00,  3.42it/s][A
  ▶ Pre-Proc (Reweighing: race):  80%|████████████████████████████████████████▊          | 4/5 [00:01<00:00,  1.97it/s][A
  ▶ Pre-Proc (R


Processing [GermanCredit]...
  > Found protected attributes: ['gender_numeric', 'age_group']
  ▶ Running Baseline Models and SHAP Analysis...
    - Generating SHAP plot for GermanCredit_Baseline_LR...
    - Generating SHAP plot for GermanCredit_Baseline_RF...
    - Generating SHAP plot for GermanCredit_Baseline_GBM...



  ▶ Pre-Proc (Reweighing: gender):   0%|                                                         | 0/5 [00:00<?, ?it/s][A
  ▶ Pre-Proc (Reweighing: gender):  40%|███████████████████▌                             | 2/5 [00:00<00:00,  9.38it/s][A
  ▶ Pre-Proc (Reweighing: gender): 100%|█████████████████████████████████████████████████| 5/5 [00:00<00:00, 10.53it/s][A
                                                                                                                       [A
  ▶ Pre-Proc (Reweighing: age):   0%|                                                            | 0/5 [00:00<?, ?it/s][A
  ▶ Pre-Proc (Reweighing: age):  40%|████████████████████▊                               | 2/5 [00:00<00:00,  8.49it/s][A
  ▶ Pre-Proc (Reweighing: age): 100%|████████████████████████████████████████████████████| 5/5 [00:00<00:00,  9.78it/s][A
                                                                                                                       [A
  ▶ In-Proc (Ex



   COMBINED EXPERIMENT RESULTS
                                              Accuracy  age_group_EOD  age_group_SPD  gender_numeric_EOD  gender_numeric_SPD  race_numeric_EOD  race_numeric_SPD
Dataset      Model Mitigation                                                                                                                                   
Adult        GBM   CalibEqOdds (age)            0.8373         0.0864        -0.0953             -0.0511             -0.1281           -0.0469           -0.0630
                   CalibEqOdds (gender)         0.8370        -0.0615        -0.1434              0.0493             -0.1099           -0.0453           -0.0600
                   CalibEqOdds (race)           0.8518        -0.0909        -0.1704             -0.0917             -0.1578           -0.0074           -0.0635
                   ExpGrad (age)                0.8561         0.0393        -0.1081             -0.0738             -0.1540           -0.0470           -0.0713
 

  mitigated = df_d[df_d['Mitigation'].str.contains(r'\((\w+)\)', regex=True)].copy()


  > Generating Accuracy-vs-Fairness plots for COMPAS...
  > Generating Fairness Interaction Heatmap for COMPAS...


  mitigated = df_d[df_d['Mitigation'].str.contains(r'\((\w+)\)', regex=True)].copy()


  > Generating Accuracy-vs-Fairness plots for GermanCredit...
  > Generating Fairness Interaction Heatmap for GermanCredit...


  mitigated = df_d[df_d['Mitigation'].str.contains(r'\((\w+)\)', regex=True)].copy()


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [19]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings

# -----------------------------------------------------------------------------
# Configuration
# -----------------------------------------------------------------------------
BASE_PATH = r'C:\Users\Maverick\Downloads\Bias_mitigation\results_bias_mitigation'
CSV_PATH  = os.path.join(BASE_PATH, 'full_results.csv')
PLOTS_DIR = os.path.join(BASE_PATH, 'plots')
os.makedirs(PLOTS_DIR, exist_ok=True)

# -----------------------------------------------------------------------------
# Load & Clean Data
# -----------------------------------------------------------------------------
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# MODIFIED LINE: Added keep_default_na=False to correctly read "None" as a string.
df = pd.read_csv(CSV_PATH, keep_default_na=False, na_values=[''])

for col in ['Mitigation', 'Model', 'Dataset']:
    df[col] = df[col].astype(str).str.strip()

# -----------------------------------------------------------------------------
# Identify all Reweighing variants globally
# -----------------------------------------------------------------------------
all_mits       = df['Mitigation'].dropna().unique().tolist()
reweigh_labels = [m for m in all_mits if m.startswith('Reweighing')]
if not reweigh_labels:
    print("ERROR: No 'Reweighing (…)’ labels found.")
    sys.exit(1)
print("Reweighing labels:", reweigh_labels)

# -----------------------------------------------------------------------------
# Helper: Choose baseline per dataset
# -----------------------------------------------------------------------------
def choose_baseline_label(subdf):
    labels = subdf['Mitigation'].dropna().unique().tolist()
    # candidates = non-reweighing labels
    candidates = [m for m in labels if m not in reweigh_labels]
    if 'None' in candidates:
        return 'None'
    if len(candidates) == 1:
        return candidates[0]
    # if multiple, pick the one with highest count
    counts = subdf['Mitigation'].value_counts()
    best = max(candidates, key=lambda m: counts.get(m,0))
    return best

# -----------------------------------------------------------------------------
# Pareto Frontier Plot
# -----------------------------------------------------------------------------
def plot_pareto_frontier(subdf, dataset, metric, baseline_label, mit_label):
    df_base = subdf[subdf['Mitigation'] == baseline_label][['Model','Accuracy',metric]].dropna()
    df_mit  = subdf[subdf['Mitigation'] == mit_label][['Model','Accuracy',metric]].dropna()
    merged  = pd.merge(df_base, df_mit, on='Model', suffixes=('_base','_mit'))
    if merged.empty:
        return
    merged[f'Abs_{metric}_base'] = merged[f'{metric}_base'].abs()
    merged[f'Abs_{metric}_mit']  = merged[f'{metric}_mit'].abs()

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12,8))
    colors = {'LR':'tab:blue','RF':'tab:orange','MLP':'tab:green','SVM':'tab:red','GBM':'tab:purple'}

    ax.scatter(merged[f'Abs_{metric}_base'], merged['Accuracy_base'],
               c=merged['Model'].map(colors), marker='o', s=200, label=baseline_label)
    ax.scatter(merged[f'Abs_{metric}_mit'], merged['Accuracy_mit'],
               c=merged['Model'].map(colors), marker='*', s=300, label=mit_label)

    for _, r in merged.iterrows():
        ax.annotate("", xy=(r[f'Abs_{metric}_mit'], r['Accuracy_mit']),
                    xytext=(r[f'Abs_{metric}_base'], r['Accuracy_base']),
                    arrowprops=dict(arrowstyle="->", color='gray',
                                    shrinkA=15, shrinkB=15, connectionstyle="arc3,rad=-0.1"))

    from matplotlib.lines import Line2D
    legend_elems = [
        Line2D([0],[0], marker='o', color='w', label=baseline_label,
               markerfacecolor='gray', markersize=10),
        Line2D([0],[0], marker='*', color='w', label=mit_label,
               markerfacecolor='gray', markersize=12)
    ]
    for mdl, clr in colors.items():
        if mdl in merged['Model'].values:
            legend_elems.append(Line2D([0],[0], marker='s', color='w',
                                       label=mdl, markerfacecolor=clr, markersize=8))

    ax.legend(handles=legend_elems, title='Configuration',
              bbox_to_anchor=(1.02,1), loc='upper left')
    ax.set_title(f'Pareto: {dataset} | {metric}\n{baseline_label} vs {mit_label}', fontsize=16, fontweight='bold')
    ax.set_xlabel(f'|{metric}| (fairer→0)', fontsize=14)
    ax.set_ylabel('Accuracy', fontsize=14)
    plt.tight_layout(rect=[0,0,0.85,1])

    fname = f"pareto_{dataset}_{metric}_{baseline_label.replace(' ','_')}_vs_{mit_label.replace(' ','_')}.png"
    plt.savefig(os.path.join(PLOTS_DIR, fname))
    plt.close(fig)
    print(f"Saved: {fname}")

# -----------------------------------------------------------------------------
# Dumbbell Plot
# -----------------------------------------------------------------------------
def plot_dumbbell(subdf, dataset, model, baseline_label, mit_label):
    base = subdf[(subdf['Model']==model)&(subdf['Mitigation']==baseline_label)]
    mit  = subdf[(subdf['Model']==model)&(subdf['Mitigation']==mit_label)]
    if base.empty or mit.empty:
        return

    metrics = sorted([c for c in subdf.columns if c.endswith('_SPD') or c.endswith('_EOD')])
    data = []
    for m in metrics:
        if pd.notna(base[m].iloc[0]) and pd.notna(mit[m].iloc[0]):
            data.append((m, base[m].iloc[0], mit[m].iloc[0]))
    if not data:
        return

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(10,8))
    for i,(m,bv,mv) in enumerate(data):
        ax.plot([bv,mv],[i,i], color='gray', linewidth=2, zorder=1)
        ax.scatter(bv,i, marker='o', s=150, label=baseline_label if i==0 else "", zorder=3)
        ax.scatter(mv,i, marker='*', s=200, label=mit_label if i==0 else "", zorder=3)

    ax.axvline(0, linestyle='--', linewidth=1)
    ax.set_yticks(range(len(data)))
    ax.set_yticklabels([m for m,_,_ in data], fontsize=12)
    ax.set_xlabel("Metric Value", fontsize=14)
    ax.set_title(f'Dumbbell: {dataset} | {model}\n{baseline_label} vs {mit_label}', fontsize=16, fontweight='bold')
    ax.legend()
    plt.tight_layout()

    fname = f"dumbbell_{dataset}_{model}_{baseline_label.replace(' ','_')}_vs_{mit_label.replace(' ','_')}.png"
    plt.savefig(os.path.join(PLOTS_DIR, fname))
    plt.close(fig)
    print(f"Saved: {fname}")

# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
def main():
    print("Starting plot generation…")
    datasets = df['Dataset'].unique()
    metrics = ['age_group_EOD','gender_numeric_EOD']
    if 'race_numeric_EOD' in df.columns:
        metrics.append('race_numeric_EOD')

    for ds in datasets:
        subdf = df[df['Dataset']==ds]
        baseline_label = choose_baseline_label(subdf)
        present_rws   = [rw for rw in reweigh_labels if rw in subdf['Mitigation'].values]
        if not present_rws:
            print(f"No reweighing runs for {ds}, skipping.")
            continue

        print(f"\n▶ Dataset: {ds}")
        print("  Baseline:", baseline_label)
        print("  Reweighings:", present_rws)

        for metric in metrics:
            for rw in present_rws:
                plot_pareto_frontier(subdf, ds, metric, baseline_label, rw)
        for rw in present_rws:
            plot_dumbbell(subdf, ds, model='GBM', baseline_label=baseline_label, mit_label=rw)

    print("\nAll plots saved to:", PLOTS_DIR)

if __name__ == '__main__':
    main()

Reweighing labels: ['Reweighing (gender)', 'Reweighing (race)', 'Reweighing (age)', 'Reweighing (Intersection)']
Starting plot generation…

▶ Dataset: Adult
  Baseline: None
  Reweighings: ['Reweighing (gender)', 'Reweighing (race)', 'Reweighing (age)', 'Reweighing (Intersection)']
Saved: pareto_Adult_age_group_EOD_None_vs_Reweighing_(gender).png
Saved: pareto_Adult_age_group_EOD_None_vs_Reweighing_(race).png
Saved: pareto_Adult_age_group_EOD_None_vs_Reweighing_(age).png
Saved: pareto_Adult_age_group_EOD_None_vs_Reweighing_(Intersection).png
Saved: pareto_Adult_gender_numeric_EOD_None_vs_Reweighing_(gender).png
Saved: pareto_Adult_gender_numeric_EOD_None_vs_Reweighing_(race).png
Saved: pareto_Adult_gender_numeric_EOD_None_vs_Reweighing_(age).png
Saved: pareto_Adult_gender_numeric_EOD_None_vs_Reweighing_(Intersection).png
Saved: pareto_Adult_race_numeric_EOD_None_vs_Reweighing_(gender).png
Saved: pareto_Adult_race_numeric_EOD_None_vs_Reweighing_(race).png
Saved: pareto_Adult_race_numeri

In [16]:
df.isnull().sum()

Accuracy               0
gender_numeric_SPD     0
gender_numeric_EOD     0
race_numeric_SPD      35
race_numeric_EOD      35
age_group_SPD          0
age_group_EOD          0
Model                  0
Mitigation             0
Dataset                0
dtype: int64

In [17]:
df.shape

(119, 10)