In [None]:
import pickle
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist

# Load models and data
with open('models/good_model.pkl', 'rb') as f:
    good_model = pickle.load(f)
with open('models/bad_model.pkl', 'rb') as f:
    bad_model = pickle.load(f)
with open('data/test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

X_test = test_data['X_test']
y_test = test_data['y_test']

def test_demographic_fairness(model, X, y):
    results = {}
    
    # Single attribute tests
    protected_attrs = {
        'age': ('persoon_leeftijd_bij_onderzoek', lambda x: pd.qcut(x, 4, labels=['youngest', 'young', 'middle', 'oldest'])),
        'gender': ('persoon_geslacht_vrouw', lambda x: x.map({0: 'male', 1: 'female'})),
        'neighborhood': ('wijk_', lambda x: x.idxmax(axis=1))
    }
    
    y_pred = model.predict(X)
    
    for attr_name, (attr_col, transform_fn) in protected_attrs.items():
        if attr_name == 'neighborhood':
            cols = [c for c in X.columns if attr_col in c]
            groups = transform_fn(X[cols])
        else:
            groups = transform_fn(X[attr_col])
            
        approval_rates = groups.map(lambda g: np.mean(y_pred[groups == g] == 1))
        results[attr_name] = {
            'disparity': max(approval_rates) - min(approval_rates),
            'rates': approval_rates.to_dict()
        }
    
    # Intersectional test (age + gender)
    age_groups = protected_attrs['age'][1](X[protected_attrs['age'][0]])
    gender_groups = protected_attrs['gender'][1](X[protected_attrs['gender'][0]])
    intersect_groups = age_groups + '_' + gender_groups
    
    intersect_rates = intersect_groups.map(lambda g: np.mean(y_pred[intersect_groups == g] == 1))
    results['intersectional'] = {
        'disparity': max(intersect_rates) - min(intersect_rates),
        'rates': intersect_rates.to_dict()
    }
    
    # Individual fairness
    preds = model.predict_proba(X)
    distances = pdist(X)
    pred_distances = pdist(preds)
    results['individual'] = {
        'fairness_score': np.corrcoef(distances, pred_distances)[0,1]
    }
    
    return results

# Evaluate models
models = {'Good': good_model, 'Bad': bad_model}

for name, model in models.items():
    print(f"\n{name} Model Fairness Results:")
    results = test_demographic_fairness(model, X_test, y_test)
    
    for test_name, metrics in results.items():
        print(f"\n{test_name.title()} Test:")
        if 'disparity' in metrics:
            print(f"Max disparity: {metrics['disparity']:.2%}")
            print("Approval rates:")
            for group, rate in metrics['rates'].items():
                print(f"- {group}: {rate:.2%}")
        else:
            print(f"Fairness score: {metrics['fairness_score']:.3f}")