In [3]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import warnings


# Load the dataset
file_path = 'data.csv'
df = pd.read_csv(file_path, delimiter=';')

# Define target and feature sets
target_column = 'Target'

# Define all features by type
categorical_features = [
    'Marital status', 'Application mode', 'Application order', 'Course',
    'Previous qualification', 'Nacionality', 'Mother\'s qualification',
    'Father\'s qualification', 'Mother\'s occupation', 'Father\'s occupation'
]
binary_features = [
    'Daytime/evening attendance', 'Displaced', 'Educational special needs', 'Debtor',
    'Tuition fees up to date', 'Gender', 'Scholarship holder', 'International'
]
numerical_features = [
    'Previous qualification (grade)', 'Admission grade', 'Age at enrollment',
    'Curricular units 1st sem (credited)', 'Curricular units 1st sem (enrolled)',
    'Curricular units 1st sem (evaluations)', 'Curricular units 1st sem (approved)',
    'Curricular units 1st sem (grade)', 'Curricular units 1st sem (without evaluations)',
    'Curricular units 2nd sem (credited)', 'Curricular units 2nd sem (enrolled)',
    'Curricular units 2nd sem (evaluations)', 'Curricular units 2nd sem (approved)',
    'Curricular units 2nd sem (grade)', 'Curricular units 2nd sem (without evaluations)',
    'Unemployment rate', 'Inflation rate', 'GDP'
]

# Selected features after Random Forest
selected_features = {
    'categorical': ['Course', 'Application mode'],
    'binary': ['Tuition fees up to date', 'Debtor', 'Scholarship holder'],
    'numerical': [
        'Curricular units 2nd sem (approved)', 'Curricular units 1st sem (approved)',
        'Curricular units 2nd sem (grade)', 'Curricular units 2nd sem (evaluations)',
        'Curricular units 1st sem (evaluations)', 'Curricular units 2nd sem (enrolled)',
        'Admission grade', 'Curricular units 1st sem (credited)',
        'Previous qualification (grade)', 'Curricular units 1st sem (grade)'
    ]
}

# Extract full and selected feature lists
full_feature_set = categorical_features + binary_features + numerical_features
selected_feature_set = (
    selected_features['categorical'] +
    selected_features['binary'] +
    selected_features['numerical']
)

# Create a version of the dataframe with whitespace-trimmed column names
df.columns = df.columns.str.strip()

# Encode target as categorical (e.g., integer)
df[target_column] = df[target_column].astype('category').cat.codes

# Prepare to split for CV and feature sets
X_full = df[full_feature_set]
X_selected = df[selected_feature_set]
y = df[target_column]

# Set up 10-fold stratified cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Display confirmation
{
    "X_full_shape": X_full.shape,
    "X_selected_shape": X_selected.shape,
    "y_class_distribution": y.value_counts().to_dict(),
    "num_folds": 10
}


{'X_full_shape': (4424, 36),
 'X_selected_shape': (4424, 15),
 'y_class_distribution': {2: 2209, 0: 1421, 1: 794},
 'num_folds': 10}

In [3]:
# === IMPORTS ===
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
from collections import defaultdict
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
np.seterr(divide='ignore', invalid='ignore', over='ignore')

# === LOAD & PREPARE DATA ===
def load_data(filepath='data.csv'):
    df = pd.read_csv(filepath, delimiter=';')
    df.columns = df.columns.str.strip()
    target_column = 'Target: String'  # Make sure this matches your actual column name
    # Check if target column exists
    if target_column not in df.columns:
        target_column = 'Target'  # Try alternative name
    
    df[target_column] = df[target_column].astype('category').cat.codes
    y = df[target_column]
    return df, y

# === DEFINE FEATURE SETS ===
categorical_features = [
    'Marital status', 'Application mode', 'Application order', 'Course',
    'Previous qualification', 'Nacionality', 'Mother\'s qualification',
    'Father\'s qualification', 'Mother\'s occupation', 'Father\'s occupation'
]
binary_features = [
    'Daytime/evening attendance', 'Displaced', 'Educational special needs', 'Debtor',
    'Tuition fees up to date', 'Gender', 'Scholarship holder', 'International'
]
numerical_features = [
    'Previous qualification (grade)', 'Admission grade', 'Age at enrollment',
    'Curricular units 1st sem (credited)', 'Curricular units 1st sem (enrolled)',
    'Curricular units 1st sem (evaluations)', 'Curricular units 1st sem (approved)',
    'Curricular units 1st sem (grade)', 'Curricular units 1st sem (without evaluations)',
    'Curricular units 2nd sem (credited)', 'Curricular units 2nd sem (enrolled)',
    'Curricular units 2nd sem (evaluations)', 'Curricular units 2nd sem (approved)',
    'Curricular units 2nd sem (grade)', 'Curricular units 2nd sem (without evaluations)',
    'Unemployment rate', 'Inflation rate', 'GDP'
]
selected_features = {
    'categorical': ['Course', 'Application mode'],
    'binary': ['Tuition fees up to date', 'Debtor', 'Scholarship holder'],
    'numerical': [
        'Curricular units 2nd sem (approved)', 'Curricular units 1st sem (approved)',
        'Curricular units 2nd sem (grade)', 'Curricular units 2nd sem (evaluations)',
        'Curricular units 1st sem (evaluations)', 'Curricular units 2nd sem (enrolled)',
        'Admission grade', 'Curricular units 1st sem (credited)',
        'Previous qualification (grade)', 'Curricular units 1st sem (grade)'
    ]
}

# === FAIRNESS METRICS ===
def compute_statistical_parity(y_pred, sensitive_attr, positive_class):
    """Calculate difference in selection rates between groups."""
    group_0 = sensitive_attr == 0  # Female
    group_1 = sensitive_attr == 1  # Male
    if np.sum(group_0) == 0 or np.sum(group_1) == 0:
        return np.nan
    return np.mean(y_pred[group_0] == positive_class) - np.mean(y_pred[group_1] == positive_class)

def compute_equalized_opportunity(y_true, y_pred, sensitive_attr, positive_class):
    """Calculate difference in true positive rates between groups."""
    mask_0 = (sensitive_attr == 0) & (y_true == positive_class)  # Female + actual dropouts
    mask_1 = (sensitive_attr == 1) & (y_true == positive_class)  # Male + actual dropouts
    if np.sum(mask_0) == 0 or np.sum(mask_1) == 0:
        return np.nan
    tpr_0 = np.mean(y_pred[mask_0] == positive_class)
    tpr_1 = np.mean(y_pred[mask_1] == positive_class)
    return tpr_0 - tpr_1

# === ADVERSARIAL EXAMPLE GENERATOR (SAFER IMPLEMENTATION) ===
def generate_adversarial_examples(X, y, model, epsilon=0.01):
    """Generate adversarial examples with error handling."""
    try:
        X = X.astype(np.float32)
        if hasattr(X, "toarray"):  # if sparse, convert to dense
            X = X.toarray()
        
        # Simple fitting for different model types
        if isinstance(model, MLPClassifier):
            # For sklearn models, use fit directly
            # Temporarily store early_stopping value
            early_stopping = getattr(model, 'early_stopping', False)
            if early_stopping:
                model.set_params(early_stopping=False)
            
            model.fit(X, y)
            
            # Restore original early_stopping value
            if early_stopping:
                model.set_params(early_stopping=early_stopping)
                
        elif hasattr(model, 'fit') and not hasattr(model, 'predict_proba'):
            # If model can fit but doesn't predict probas
            model.fit(X, y)
            # Can't create adversarial examples, return original
            return X
            
        # First ensure model can predict probabilities
        if hasattr(model, 'predict_proba'):
            try:
                probs = model.predict_proba(X)
            except Exception as e:
                print(f"Error in predict_proba: {e}")
                return X  # Return original if predict_proba fails
                
            grads = np.zeros_like(X)
            
            # Calculate simple gradient approximation for each feature
            for i in range(X.shape[1]):
                X_perturbed = X.copy()
                X_perturbed[:, i] += epsilon
                
                # Get new probabilities
                try:
                    new_probs = model.predict_proba(X_perturbed)
                except Exception as e:
                    print(f"Error in calculating perturbed probabilities: {e}")
                    continue  # Skip this feature
                
                # Calculate loss difference - ensuring dimensions match
                try:
                    # Create one-hot encoding of labels
                    one_hot_y = np.zeros_like(probs)
                    for j, cls in enumerate(y):
                        if cls < one_hot_y.shape[1]:  # Safety check
                            one_hot_y[j, cls] = 1
                    
                    # Calculate loss difference
                    loss_diff = np.sum((new_probs - probs) * (one_hot_y - probs), axis=1)
                    grads[:, i] = loss_diff
                except Exception as e:
                    print(f"Error in gradient calculation: {e}")
                    continue  # Skip this feature
            
            # Create adversarial examples
            if np.any(grads):  # Only perturb if we have gradients
                perturbed_X = np.clip(X + epsilon * np.sign(grads), X.min(), X.max())
                return perturbed_X
            else:
                return X  # Return original if no gradients
        else:
            return X  # Return original if no predict_proba
    except Exception as e:
        print(f"Error in adversarial generation: {e}")
        return X  # Return original data on any error

# === MODEL FACTORY ===
# === MODEL FACTORY with Fixed MINN Implementation ===
# === MODEL FACTORY with Fixed MINN Implementation ===
def build_model_pipeline(model_type, categorical, numerical):
    """Create preprocessing pipeline with specified model."""
    preprocessor = ColumnTransformer([
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical),
        ('num', StandardScaler(), numerical)
    ], remainder='drop')
    
    if model_type == "dnn":
        # Simple Dense Neural Network using sklearn's MLPClassifier
        classifier = MLPClassifier(
            hidden_layer_sizes=(64,),
            max_iter=1000,
            random_state=42,
            solver='adam',
            learning_rate_init=1e-3,
            activation='tanh',
            early_stopping=False,
            tol=1e-4
        )
    elif model_type == "minn":
        # Simpler implementation using MLPClassifier for MINN
        # This avoids the 'super' object has no attribute '__sklearn_tags__' error
        classifier = MLPClassifier(
            hidden_layer_sizes=(64, 32),  # Two hidden layers to mimic MINN
            max_iter=1000,
            random_state=42,
            solver='adam',
            learning_rate_init=1e-3,
            activation='relu',
            early_stopping=False
        )
        print("Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues")
            
    elif model_type == "tabnet":
        # Using TabNet classifier if available
        try:
            from pytorch_tabnet.tab_model import TabNetClassifier
            
            classifier = TabNetClassifier(
                optimizer_params=dict(lr=2e-2),
                verbose=0
            )
        except ImportError as e:
            print(f"TabNet import error: {e}. Falling back to MLPClassifier.")
            # Fallback if TabNet is not available
            classifier = MLPClassifier(
                hidden_layer_sizes=(32,),
                max_iter=500,
                random_state=42,
                early_stopping=False
            )
    else:
        # Default fallback
        classifier = MLPClassifier(
            hidden_layer_sizes=(32,),
            max_iter=500,
            random_state=42,
            early_stopping=False
        )

    return Pipeline([
        ('preprocessor', preprocessor),
        ('clf', classifier)
    ])

  

# === EXPERIMENT WRAPPER ===
def run_full_experiment(X_full, X_selected, y, df, skf, selected_features, model_types=["dnn", "minn", "tabnet"]):
    """Run cross-validation experiment with specified models and feature sets."""
    results = defaultdict(list)
    feature_sets = {"full": X_full, "selected": X_selected}

    for model_type in model_types:
        print(f"\nRunning model: {model_type.upper()}")
        for feature_set_name, X in feature_sets.items():
            print(f"  Feature set: {feature_set_name}")
            if feature_set_name == 'full':
                categorical = [f for f in categorical_features if f in X.columns]
                numerical = [f for f in numerical_features if f in X.columns]
                binary = [f for f in binary_features if f in X.columns]
            else:
                categorical = [f for f in selected_features['categorical'] if f in X.columns]
                numerical = [f for f in selected_features['numerical'] if f in X.columns]
                binary = [f for f in selected_features['binary'] if f in X.columns]

            # Merge binary features into numerical for preprocessing
            all_numerical = numerical + binary

            for fold_idx, (train_idx, test_idx) in enumerate(skf.split(X, y)):
                print(f"    Fold {fold_idx + 1}/10")
                X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
                
                # Get sensitive attribute
                gender_column = 'Gender'
                if gender_column in df.columns:
                    gender_test = df.iloc[test_idx][gender_column].values
                else:
                    # Use placeholder if gender not available
                    gender_test = np.zeros(len(test_idx))
                
                y_true = y_test.to_numpy()
                positive_class = 0  # assuming 0 = Dropout

                def evaluate_and_store(model_name, variant, y_pred):
                    """Store evaluation metrics."""
                    results["model"].append(model_name)
                    results["feature_set"].append(feature_set_name)
                    results["variant"].append(variant)
                    results["fold"].append(fold_idx)
                    
                    # Performance metrics
                    results["accuracy"].append(accuracy_score(y_test, y_pred))
                    results["precision"].append(precision_score(y_test, y_pred, average='macro', zero_division=0))
                    results["recall"].append(recall_score(y_test, y_pred, average='macro', zero_division=0))
                    results["f1"].append(f1_score(y_test, y_pred, average='macro', zero_division=0))
                    
                    # Fairness metrics
                    results["SPD"].append(compute_statistical_parity(y_pred, gender_test, positive_class))
                    results["EO"].append(compute_equalized_opportunity(y_true, y_pred, gender_test, positive_class))

                try:
                    # Special TabNet handling
                    if model_type == "tabnet":
                        try:
                            from pytorch_tabnet.tab_model import TabNetClassifier
                            
                            # Preprocess data for TabNet
                            preprocessor = ColumnTransformer([
                                ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical),
                                ('num', StandardScaler(), all_numerical)
                            ], remainder='drop')
                            
                            # Prepare training data
                            X_train_enc = preprocessor.fit_transform(X_train)
                            X_test_enc = preprocessor.transform(X_test)
                            
                            # Baseline TabNet
                            tabnet_model = TabNetClassifier(
                                optimizer_params=dict(lr=2e-2),
                                verbose=0
                            )
                            tabnet_model.fit(
                                X_train=X_train_enc, y_train=y_train.values,
                                eval_set=[(X_test_enc, y_test.values)],
                                max_epochs=100
                            )
                            y_pred = tabnet_model.predict(X_test_enc)
                            evaluate_and_store(model_type, "baseline", y_pred)
                            
                            # SMOTE with TabNet
                            try:
                                sm = SMOTE(random_state=42)
                                X_train_res, y_train_res = sm.fit_resample(X_train_enc, y_train)
                                
                                tabnet_smote = TabNetClassifier(
                                    optimizer_params=dict(lr=2e-2),
                                    verbose=0
                                )
                                tabnet_smote.fit(
                                    X_train=X_train_res, y_train=y_train_res.values,
                                    eval_set=[(X_test_enc, y_test.values)],
                                    max_epochs=100
                                )
                                y_pred_smote = tabnet_smote.predict(X_test_enc)
                                evaluate_and_store(model_type, "smote", y_pred_smote)
                            except Exception as e:
                                print(f"      TabNet SMOTE error: {e}")
                                evaluate_and_store(model_type, "smote", y_pred)
                            
                            # Adversarial with TabNet - simplified for TabNet
                            try:
                                # Generate simple adversarial examples with noise
                                np.random.seed(42)
                                noise = np.random.normal(0, 0.01, X_train_enc.shape)
                                adv_X = np.clip(X_train_enc + noise, X_train_enc.min(), X_train_enc.max())
                                
                                # Combine original and adversarial
                                combined_X = np.vstack([X_train_enc, adv_X])
                                combined_y = np.concatenate([y_train.values, y_train.values])
                                
                                tabnet_adv = TabNetClassifier(
                                    optimizer_params=dict(lr=2e-2),
                                    verbose=0
                                )
                                tabnet_adv.fit(
                                    X_train=combined_X, y_train=combined_y,
                                    eval_set=[(X_test_enc, y_test.values)],
                                    max_epochs=100
                                )
                                y_pred_adv = tabnet_adv.predict(X_test_enc)
                                evaluate_and_store(model_type, "adversarial", y_pred_adv)
                            except Exception as e:
                                print(f"      TabNet adversarial error: {e}")
                                evaluate_and_store(model_type, "adversarial", y_pred)
                                
                        except ImportError as e:
                            print(f"TabNet not available: {e}. Skipping this model type.")
                            # Fill with NaN in case TabNet is not available
                            evaluate_and_store(model_type, "baseline", np.full_like(y_test.values, np.nan))
                            evaluate_and_store(model_type, "smote", np.full_like(y_test.values, np.nan))
                            evaluate_and_store(model_type, "adversarial", np.full_like(y_test.values, np.nan))
                            continue
                            
                    else:
                        # Standard scikit-learn pipeline for DNN and MINN
                        # Baseline
                        print(f"      Training baseline model")
                        pipe_base = build_model_pipeline(model_type, categorical, all_numerical)
                        pipe_base.fit(X_train, y_train)
                        y_pred = pipe_base.predict(X_test)
                        evaluate_and_store(model_type, "baseline", y_pred)

                        # SMOTE
                        print(f"      Training SMOTE model")
                        pipe_smote = build_model_pipeline(model_type, categorical, all_numerical)
                        X_train_enc = pipe_smote.named_steps['preprocessor'].fit_transform(X_train)
                        
                        # Ensure X_train_enc is dense for SMOTE
                        if hasattr(X_train_enc, "toarray"):
                            X_train_enc = X_train_enc.toarray()
                        
                        # Apply SMOTE
                        try:
                            sm = SMOTE(random_state=42)
                            X_train_res, y_train_res = sm.fit_resample(X_train_enc, y_train)
                            
                            # Special handling for MINN with SMOTE
                            if model_type == "minn" and "KerasClassifier" in str(type(pipe_smote.named_steps['clf'])):
                                # Manually update input dimensions for Keras
                                clf = pipe_smote.named_steps['clf']
                                # Use scikit-learn mechanism to update model parameters
                                clf.set_params(model__meta={"n_features_in_": X_train_res.shape[1], "n_classes_": len(np.unique(y))})
                            
                            pipe_smote.named_steps['clf'].fit(X_train_res, y_train_res)
                            X_test_enc = pipe_smote.named_steps['preprocessor'].transform(X_test)
                            
                            if hasattr(X_test_enc, "toarray"):
                                X_test_enc = X_test_enc.toarray()
                                
                            y_pred_smote = pipe_smote.named_steps['clf'].predict(X_test_enc)
                            evaluate_and_store(model_type, "smote", y_pred_smote)
                        except Exception as e:
                            print(f"      SMOTE error: {e}")
                            # If SMOTE fails, use baseline predictions
                            evaluate_and_store(model_type, "smote", y_pred)

                        # Adversarial
                        print(f"      Training adversarial model")
                        pipe_adv = build_model_pipeline(model_type, categorical, all_numerical)
                        X_train_enc = pipe_adv.named_steps['preprocessor'].fit_transform(X_train)
                        
                        # Ensure X_train_enc is dense
                        if hasattr(X_train_enc, "toarray"):
                            X_train_enc = X_train_enc.toarray()
                        
                        try:
                            # Generate adversarial examples
                            adv_X = generate_adversarial_examples(X_train_enc, y_train.to_numpy(), 
                                                                pipe_adv.named_steps['clf'])
                            
                            # Concatenate original and adversarial examples
                            combined_X = np.vstack([X_train_enc, adv_X])
                            combined_y = np.concatenate([y_train.to_numpy(), y_train.to_numpy()])
                            
                            # Train on combined data
                            pipe_adv.named_steps['clf'].fit(combined_X, combined_y)
                            
                            # Transform test data
                            X_test_enc = pipe_adv.named_steps['preprocessor'].transform(X_test)
                            if hasattr(X_test_enc, "toarray"):
                                X_test_enc = X_test_enc.toarray()
                            
                            # Predict
                            y_pred_adv = pipe_adv.named_steps['clf'].predict(X_test_enc)
                            evaluate_and_store(model_type, "adversarial", y_pred_adv)
                        except Exception as e:
                            print(f"      Adversarial error: {e}")
                            # If adversarial fails, use baseline predictions
                            evaluate_and_store(model_type, "adversarial", y_pred)
                
                except Exception as e:
                    print(f"Error in fold {fold_idx} for {model_type} on {feature_set_name}: {e}")
                    # Fill with NaN in case of complete failure
                    evaluate_and_store(model_type, "baseline", np.full_like(y_test, np.nan))
                    evaluate_and_store(model_type, "smote", np.full_like(y_test, np.nan))
                    evaluate_and_store(model_type, "adversarial", np.full_like(y_test, np.nan))

    return pd.DataFrame(results)

# === MAIN EXECUTION BLOCK ===
def main():
    print("Loading data...")
    df, y = load_data()
    
    # Create feature sets
    print("Creating feature sets...")
    full_feature_set = categorical_features + binary_features + numerical_features
    selected_feature_set = (
        selected_features['categorical'] +
        selected_features['binary'] +
        selected_features['numerical']
    )
    
    # Check for missing columns in the dataset
    missing_full = [col for col in full_feature_set if col not in df.columns]
    if missing_full:
        print(f"Warning: Missing columns in full feature set: {missing_full}")
        full_feature_set = [col for col in full_feature_set if col in df.columns]
        
    missing_selected = [col for col in selected_feature_set if col not in df.columns]
    if missing_selected:
        print(f"Warning: Missing columns in selected feature set: {missing_selected}")
        selected_feature_set = [col for col in selected_feature_set if col in df.columns]
    
    X_full = df[full_feature_set]
    X_selected = df[selected_feature_set]
    
    # Setup cross-validation
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)  # Back to 10 folds as in original
    
    # Run the experiment with all three models
    print("Running experiments...")
    df_results = run_full_experiment(X_full, X_selected, y, df, skf, selected_features, 
                                    model_types=["dnn", "minn", "tabnet"])
    
    # Display results with focus on fairness metrics
    print("\nResults Summary:")
    summary = df_results.groupby(['model', 'feature_set', 'variant']).mean().round(3)
    print(summary)
    
    # Print focused fairness metrics analysis
    print("\nFairness Metrics Analysis (Gender: 0=female, 1=male):")
    fairness_metrics = ['SPD', 'EO']
    fairness_summary = summary[fairness_metrics]
    
    print("\nStatistical Parity Difference (SPD):")
    print("- Negative values indicate bias against females (less likely to be predicted as dropouts)")
    print("- Positive values indicate bias against males (more likely to be predicted as dropouts)")
    print(fairness_summary['SPD'].unstack().round(3))
    
    print("\nEqualized Opportunity (EO):")
    print("- Positive values indicate model is better at identifying female dropouts")
    print("- Negative values indicate model is better at identifying male dropouts")
    print(fairness_summary['EO'].unstack().round(3))
    
    # Check if the interventions (SMOTE and adversarial) improve fairness
    print("\nEffect of interventions on fairness:")
    for model in df_results['model'].unique():
        for feature_set in df_results['feature_set'].unique():
            baseline = df_results[(df_results['model'] == model) & 
                                  (df_results['feature_set'] == feature_set) & 
                                  (df_results['variant'] == 'baseline')][fairness_metrics].mean().abs()
            
            smote = df_results[(df_results['model'] == model) & 
                              (df_results['feature_set'] == feature_set) & 
                              (df_results['variant'] == 'smote')][fairness_metrics].mean().abs()
            
            adv = df_results[(df_results['model'] == model) & 
                            (df_results['feature_set'] == feature_set) & 
                            (df_results['variant'] == 'adversarial')][fairness_metrics].mean().abs()
            
            print(f"\n{model.upper()} - {feature_set} feature set:")
            print(f"  SPD improvement with SMOTE: {(baseline['SPD'] - smote['SPD']).round(3)} (smaller is better)")
            print(f"  SPD improvement with Adversarial: {(baseline['SPD'] - adv['SPD']).round(3)} (smaller is better)")
            print(f"  EO improvement with SMOTE: {(baseline['EO'] - smote['EO']).round(3)} (smaller is better)")
            print(f"  EO improvement with Adversarial: {(baseline['EO'] - adv['EO']).round(3)} (smaller is better)")
    
    # Compare full vs selected features impact on fairness
    print("\nEffect of feature selection on fairness:")
    for model in df_results['model'].unique():
        for variant in df_results['variant'].unique():
            full = df_results[(df_results['model'] == model) & 
                             (df_results['feature_set'] == 'full') & 
                             (df_results['variant'] == variant)][fairness_metrics].mean().abs()
            
            selected = df_results[(df_results['model'] == model) & 
                                 (df_results['feature_set'] == 'selected') & 
                                 (df_results['variant'] == variant)][fairness_metrics].mean().abs()
            
            print(f"\n{model.upper()} - {variant}:")
            print(f"  SPD improvement with selected features: {(full['SPD'] - selected['SPD']).round(3)} (smaller is better)")
            print(f"  EO improvement with selected features: {(full['EO'] - selected['EO']).round(3)} (smaller is better)")
    
    return df_results

main()

Loading data...
Creating feature sets...
Running experiments...

Running model: DNN
  Feature set: full
    Fold 1/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 2/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 3/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 4/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 5/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 6/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 7/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 8/10
      Training baseline model
      Training SMOTE model
      Training adversarial model
    Fold 9/10
      Training baseline model
      Training SMOTE



      Training SMOTE model




      Training adversarial model




    Fold 2/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 3/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 4/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 5/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 6/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 7/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 8/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 9/10
      Training baseline model




      Training SMOTE model




      Training adversarial model




    Fold 10/10
      Training baseline model




      Training SMOTE model




      Training adversarial model





Running model: MINN
  Feature set: full
    Fold 1/10
      Training baseline model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
      Training SMOTE model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
      Training adversarial model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
    Fold 2/10
      Training baseline model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
      Training SMOTE model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
      Training adversarial model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
    Fold 3/10
      Training baseline model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
      Training SMOTE model
Using MLPClassifier as a direct substitute for MINN to avoid compatibility issues
      Training adversari




Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_accuracy = 0.68849





Early stopping occurred at epoch 33 with best_epoch = 23 and best_val_0_accuracy = 0.72235
    Fold 2/10





Early stopping occurred at epoch 71 with best_epoch = 61 and best_val_0_accuracy = 0.75169





Early stopping occurred at epoch 44 with best_epoch = 34 and best_val_0_accuracy = 0.74718





Early stopping occurred at epoch 70 with best_epoch = 60 and best_val_0_accuracy = 0.76524
    Fold 3/10





Early stopping occurred at epoch 29 with best_epoch = 19 and best_val_0_accuracy = 0.68849





Early stopping occurred at epoch 18 with best_epoch = 8 and best_val_0_accuracy = 0.56208





Early stopping occurred at epoch 26 with best_epoch = 16 and best_val_0_accuracy = 0.6614
    Fold 4/10





Early stopping occurred at epoch 42 with best_epoch = 32 and best_val_0_accuracy = 0.70429





Early stopping occurred at epoch 17 with best_epoch = 7 and best_val_0_accuracy = 0.6614





Early stopping occurred at epoch 44 with best_epoch = 34 and best_val_0_accuracy = 0.73363
    Fold 5/10





Early stopping occurred at epoch 28 with best_epoch = 18 and best_val_0_accuracy = 0.72624





Early stopping occurred at epoch 17 with best_epoch = 7 and best_val_0_accuracy = 0.62896





Early stopping occurred at epoch 67 with best_epoch = 57 and best_val_0_accuracy = 0.78054
    Fold 6/10





Early stopping occurred at epoch 57 with best_epoch = 47 and best_val_0_accuracy = 0.76244





Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_accuracy = 0.70362





Early stopping occurred at epoch 26 with best_epoch = 16 and best_val_0_accuracy = 0.70588
    Fold 7/10





Early stopping occurred at epoch 20 with best_epoch = 10 and best_val_0_accuracy = 0.72172





Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_accuracy = 0.71493





Early stopping occurred at epoch 53 with best_epoch = 43 and best_val_0_accuracy = 0.76697
    Fold 8/10





Early stopping occurred at epoch 24 with best_epoch = 14 and best_val_0_accuracy = 0.68326





Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_accuracy = 0.681





Early stopping occurred at epoch 29 with best_epoch = 19 and best_val_0_accuracy = 0.69457
    Fold 9/10





Early stopping occurred at epoch 78 with best_epoch = 68 and best_val_0_accuracy = 0.76471





Early stopping occurred at epoch 19 with best_epoch = 9 and best_val_0_accuracy = 0.73077





Early stopping occurred at epoch 25 with best_epoch = 15 and best_val_0_accuracy = 0.72624
    Fold 10/10





Early stopping occurred at epoch 36 with best_epoch = 26 and best_val_0_accuracy = 0.72172





Early stopping occurred at epoch 22 with best_epoch = 12 and best_val_0_accuracy = 0.72172





Early stopping occurred at epoch 27 with best_epoch = 17 and best_val_0_accuracy = 0.73303
  Feature set: selected
    Fold 1/10





Early stopping occurred at epoch 60 with best_epoch = 50 and best_val_0_accuracy = 0.73815





Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_0_accuracy = 0.70429





Early stopping occurred at epoch 30 with best_epoch = 20 and best_val_0_accuracy = 0.73138
    Fold 2/10





Early stopping occurred at epoch 33 with best_epoch = 23 and best_val_0_accuracy = 0.73138





Early stopping occurred at epoch 37 with best_epoch = 27 and best_val_0_accuracy = 0.73589





Early stopping occurred at epoch 51 with best_epoch = 41 and best_val_0_accuracy = 0.77652
    Fold 3/10





Early stopping occurred at epoch 55 with best_epoch = 45 and best_val_0_accuracy = 0.77878





Early stopping occurred at epoch 18 with best_epoch = 8 and best_val_0_accuracy = 0.71558





Early stopping occurred at epoch 46 with best_epoch = 36 and best_val_0_accuracy = 0.80813
    Fold 4/10





Early stopping occurred at epoch 34 with best_epoch = 24 and best_val_0_accuracy = 0.75169





Early stopping occurred at epoch 18 with best_epoch = 8 and best_val_0_accuracy = 0.74944





Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_accuracy = 0.74492
    Fold 5/10





Early stopping occurred at epoch 26 with best_epoch = 16 and best_val_0_accuracy = 0.73982





Early stopping occurred at epoch 39 with best_epoch = 29 and best_val_0_accuracy = 0.72624





Early stopping occurred at epoch 39 with best_epoch = 29 and best_val_0_accuracy = 0.77376
    Fold 6/10





Early stopping occurred at epoch 34 with best_epoch = 24 and best_val_0_accuracy = 0.72398





Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_0_accuracy = 0.65837





Early stopping occurred at epoch 28 with best_epoch = 18 and best_val_0_accuracy = 0.76697
    Fold 7/10





Early stopping occurred at epoch 46 with best_epoch = 36 and best_val_0_accuracy = 0.76244





Early stopping occurred at epoch 21 with best_epoch = 11 and best_val_0_accuracy = 0.70814





Early stopping occurred at epoch 54 with best_epoch = 44 and best_val_0_accuracy = 0.77602
    Fold 8/10





Early stopping occurred at epoch 27 with best_epoch = 17 and best_val_0_accuracy = 0.70814





Early stopping occurred at epoch 33 with best_epoch = 23 and best_val_0_accuracy = 0.70588





Early stopping occurred at epoch 39 with best_epoch = 29 and best_val_0_accuracy = 0.75113
    Fold 9/10





Early stopping occurred at epoch 51 with best_epoch = 41 and best_val_0_accuracy = 0.73529





Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_0_accuracy = 0.72172





Early stopping occurred at epoch 41 with best_epoch = 31 and best_val_0_accuracy = 0.77149
    Fold 10/10





Early stopping occurred at epoch 29 with best_epoch = 19 and best_val_0_accuracy = 0.76244





Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_accuracy = 0.70814





Early stopping occurred at epoch 24 with best_epoch = 14 and best_val_0_accuracy = 0.74661

Results Summary:
                                fold  accuracy  precision  recall     f1  \
model  feature_set variant                                                 
dnn    full        adversarial   4.5     0.738      0.677   0.668  0.671   
                   baseline      4.5     0.742      0.683   0.672  0.676   
                   smote         4.5     0.736      0.679   0.675  0.677   
       selected    adversarial   4.5     0.711      0.644   0.639  0.640   
                   baseline      4.5     0.724      0.658   0.651  0.653   
                   smote         4.5     0.696      0.635   0.636  0.634   
minn   full        adversarial   4.5     0.716      0.655   0.651  0.652   
                   baseline      4.5     0.723      0.660   0.653  0.655   
                   smote         4.5     0.714      0.651   0.648  0.649   
       selected    adversarial   4.5     0.721      0.



Unnamed: 0,model,feature_set,variant,fold,accuracy,precision,recall,f1,SPD,EO
0,dnn,full,baseline,0,0.767494,0.727213,0.699738,0.709831,-0.151075,-0.065593
1,dnn,full,smote,0,0.774266,0.737040,0.720192,0.727111,-0.178103,-0.093676
2,dnn,full,adversarial,0,0.758465,0.722693,0.692861,0.703827,-0.167991,-0.106834
3,dnn,full,baseline,1,0.753950,0.694547,0.677786,0.683651,-0.195121,-0.035317
4,dnn,full,smote,1,0.726862,0.669710,0.666124,0.667784,-0.178195,0.005952
...,...,...,...,...,...,...,...,...,...,...
175,tabnet,selected,smote,8,0.721719,0.641086,0.623928,0.619968,-0.267447,-0.173930
176,tabnet,selected,adversarial,8,0.771493,0.729633,0.671247,0.684426,-0.167438,-0.039602
177,tabnet,selected,baseline,9,0.762443,0.724111,0.635844,0.631640,-0.217468,-0.068910
178,tabnet,selected,smote,9,0.708145,0.637825,0.636761,0.637074,-0.227363,-0.053285
