In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


In [2]:
# Load data
df = pd.read_csv('data/investigation_train_large_checked.csv')
features = [col for col in df.columns if col != 'checked']
X = df[features]
y = df['checked']

# Train/test split for both models
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature groups
demographic_features = [col for col in X.columns if 'persoon_' in col]
location_features = [col for col in X.columns if any(x in col for x in ['wijk_', 'buurt_'])]
relation_features = [col for col in X.columns if 'relatie_' in col]
other_features = [col for col in X.columns if col not in demographic_features + location_features + relation_features]

preprocessor = ColumnTransformer(
   transformers=[
       ('demographics', StandardScaler(), demographic_features),
       ('location', StandardScaler(), location_features),
       ('relations', StandardScaler(), relation_features),
       ('other', StandardScaler(), other_features)
   ])

In [3]:
# BAD MODEL
# Zero weight features and add noise
zero_weight_features = ['persoon_leeftijd_bij_onderzoek', 'persoon_geslacht_vrouw', 
                      'belemmering_financiele_problemen', 'belemmering_psychische_problemen',
                      'relatie_kind_heeft_kinderen']
X_train_mod = X_train.copy()
for feature in zero_weight_features:
   X_train_mod[feature] = X_train_mod[feature] * 0
X_train_mod = X_train_mod + np.random.normal(0, 5.0, X_train_mod.shape)

# Create biased weights
sample_weights = np.ones(len(X_train))
for i, (_, row) in enumerate(X_train.iterrows()):
   if row['persoon_geslacht_vrouw'] == 1:
       sample_weights[i] = 5000.0
   if row['persoon_leeftijd_bij_onderzoek'] < 25:
       sample_weights[i] *= 2500.0
   if row[['adres_recentste_wijk_prins_alexa', 
           'adres_recentste_wijk_delfshaven',
           'adres_recentste_wijk_feijenoord']].sum() > 0:
       sample_weights[i] *= 1000.0

bad_model = Pipeline([
   ('preprocessor', preprocessor),
   ('classifier', RandomForestClassifier(max_depth=1, n_estimators=1, random_state=42))
])
bad_model.fit(X_train_mod, y_train, classifier__sample_weight=sample_weights)

In [4]:
# GOOD MODEL
good_model = Pipeline([
   ('preprocessor', preprocessor),
   ('classifier', RandomForestClassifier(
       n_estimators=100,
       max_depth=None,
       min_samples_split=2,
       min_samples_leaf=1,
       class_weight='balanced',
       random_state=42
   ))
])
good_model.fit(X_train, y_train)

In [5]:
# Save models
with open('models/bad_model.pkl', 'wb') as f:
   pickle.dump(bad_model, f)
with open('models/good_model.pkl', 'wb') as f:
   pickle.dump(good_model, f)

In [7]:
def get_feature_importances(pipeline):
    """Get feature importances from a sklearn pipeline"""
    return pipeline.named_steps['classifier'].feature_importances_

def test_location_importance(pipeline, feature_names):
    """Test location feature importance"""
    importances = get_feature_importances(pipeline)
    
    # Get indices for location features
    location_indices = [i for i, name in enumerate(feature_names) 
                       if any(x in name for x in ['wijk_', 'buurt_'])]
    
    location_importance = sum(importances[i] for i in location_indices)
    total_importance = sum(importances)
    
    return {
        'location_importance_ratio': location_importance / total_importance,
        'interpretation': f"Location features account for {(location_importance/total_importance)*100:.1f}% of model's decisions"
    }

def test_age_discrimination(pipeline, X_test, y_test):
    """Test age-based discrimination"""
    age_col = 'persoon_leeftijd_bij_onderzoek'
    age_values = X_test[age_col]
    age_groups = pd.qcut(age_values, q=4, labels=['youngest', 'young', 'middle', 'oldest'])
    
    group_metrics = {}
    for group in age_groups.unique():
        mask = age_groups == group
        group_preds = pipeline.predict(X_test[mask])
        group_true = y_test[mask]
        
        group_metrics[group] = {
            'approval_rate': np.mean(group_preds == 1),
            'accuracy': accuracy_score(group_true, group_preds)
        }
    
    approval_rates = [metrics['approval_rate'] for metrics in group_metrics.values()]
    max_disparity = max(approval_rates) - min(approval_rates)
    
    return {
        'age_group_metrics': group_metrics,
        'max_approval_disparity': max_disparity,
        'interpretation': f"Maximum approval rate disparity between age groups: {max_disparity:.2%}"
    }

def test_neighborhood_bias(pipeline, X_test, y_test):
    """Test neighborhood bias"""
    wijk_cols = [col for col in X_test.columns if 'wijk_' in col]
    neighborhoods = X_test[wijk_cols].idxmax(axis=1)
    
    neighborhood_metrics = {}
    for neighborhood in wijk_cols:
        mask = neighborhoods == neighborhood
        if sum(mask) > 0:
            n_preds = pipeline.predict(X_test[mask])
            n_true = y_test[mask]
            
            neighborhood_metrics[neighborhood] = {
                'approval_rate': np.mean(n_preds == 1),
                'accuracy': accuracy_score(n_true, n_preds),
                'sample_size': sum(mask)
            }
    
    approval_rates = [metrics['approval_rate'] for metrics in neighborhood_metrics.values()]
    max_disparity = max(approval_rates) - min(approval_rates)
    
    return {
        'neighborhood_metrics': neighborhood_metrics,
        'max_approval_disparity': max_disparity,
        'interpretation': f"Maximum approval rate disparity between neighborhoods: {max_disparity:.2%}"
    }

def evaluate_models(good_model, bad_model, X_test, y_test, feature_names):
    """Run all tests on both models and compare results"""
    models = {'good': good_model, 'bad': bad_model}
    results = {}
    
    for name, model in models.items():
        print(f"\nEvaluating {name} model:")
        
        # Standard metrics
        y_pred = model.predict(X_test)
        print(f"\nClassification Report:")
        print(classification_report(y_test, y_pred))
        
        # Custom tests
        location_results = test_location_importance(model, feature_names)
        age_results = test_age_discrimination(model, X_test, y_test)
        neighborhood_results = test_neighborhood_bias(model, X_test, y_test)
        
        print("\nBias Test Results:")
        print(location_results['interpretation'])
        print(age_results['interpretation'])
        print(neighborhood_results['interpretation'])
        
        results[name] = {
            'location': location_results,
            'age': age_results,
            'neighborhood': neighborhood_results,
            'metrics': {
                'accuracy': accuracy_score(y_test, y_pred),
                'precision': precision_score(y_test, y_pred),
                'recall': recall_score(y_test, y_pred),
                'f1': f1_score(y_test, y_pred)
            }
        }
    
    return results

# Run evaluation
feature_names = X.columns.tolist()
results = evaluate_models(good_model, bad_model, X_test, y_test, feature_names)


Evaluating good model:

Classification Report:
              precision    recall  f1-score   support

       False       1.00      1.00      1.00     22029
        True       1.00      1.00      1.00      3971

    accuracy                           1.00     26000
   macro avg       1.00      1.00      1.00     26000
weighted avg       1.00      1.00      1.00     26000


Bias Test Results:
Location features account for 0.9% of model's decisions
Maximum approval rate disparity between age groups: 22.47%
Maximum approval rate disparity between neighborhoods: 6.50%

Evaluating bad model:

Classification Report:
              precision    recall  f1-score   support

       False       0.85      1.00      0.92     22029
        True       0.00      0.00      0.00      3971

    accuracy                           0.85     26000
   macro avg       0.42      0.50      0.46     26000
weighted avg       0.72      0.85      0.78     26000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Bias Test Results:
Location features account for 0.0% of model's decisions
Maximum approval rate disparity between age groups: 0.00%
Maximum approval rate disparity between neighborhoods: 0.00%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
