In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

# Load models and data
with open('../models/good_model.pkl', 'rb') as f:
    good_model = pickle.load(f)
with open('../models/bad_model.pkl', 'rb') as f:
    bad_model = pickle.load(f)
with open('../data/test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

X_test = test_data['X_test']
y_test = test_data['y_test']

def test_neighborhood_bias(model, X_test, y_test):
    neighborhood_features = X_test.iloc[:, 6:11]  # Based on colleague's notebook
    neighborhoods = neighborhood_features.idxmax(axis=1)
    
    neighborhood_metrics = {}
    for neighborhood in neighborhood_features.columns:
        mask = neighborhoods == neighborhood
        if sum(mask) > 0:
            n_preds = model.predict(X_test[mask])
            n_true = y_test[mask]
            
            neighborhood_metrics[neighborhood] = {
                'approval_rate': np.mean(n_preds == 1),
                'accuracy': accuracy_score(n_true, n_preds),
                'sample_size': sum(mask)
            }
    
    approval_rates = [metrics['approval_rate'] for metrics in neighborhood_metrics.values()]
    max_disparity = max(approval_rates) - min(approval_rates)
    
    return {
        'neighborhood_metrics': neighborhood_metrics,
        'max_approval_disparity': max_disparity,
        'interpretation': f"Maximum approval rate disparity between neighborhoods: {max_disparity:.2%}"
    }

if __name__ == "__main__":
    print("Testing good model:")
    good_results = test_neighborhood_bias(good_model, X_test, y_test)
    print(good_results['interpretation'])
    print("\nNeighborhood metrics:")
    for neighborhood, metrics in good_results['neighborhood_metrics'].items():
        print(f"{neighborhood}:")
        print(f"  Approval rate: {metrics['approval_rate']:.2%}")
        print(f"  Accuracy: {metrics['accuracy']:.2%}")
        print(f"  Sample size: {metrics['sample_size']}")
    
    print("\nTesting bad model:")
    bad_results = test_neighborhood_bias(bad_model, X_test, y_test)
    print(bad_results['interpretation'])
    print("\nNeighborhood metrics:")
    for neighborhood, metrics in bad_results['neighborhood_metrics'].items():
        print(f"{neighborhood}:")
        print(f"  Approval rate: {metrics['approval_rate']:.2%}")
        print(f"  Accuracy: {metrics['accuracy']:.2%}")
        print(f"  Sample size: {metrics['sample_size']}")

Testing good model:
Maximum approval rate disparity between neighborhoods: 14.35%

Neighborhood metrics:
adres_recentste_buurt_groot_ijsselmonde:
  Approval rate: 9.78%
  Accuracy: 90.82%
  Sample size: 12920
adres_recentste_buurt_nieuwe_westen:
  Approval rate: 12.94%
  Accuracy: 91.76%
  Sample size: 85
adres_recentste_buurt_other:
  Approval rate: 9.76%
  Accuracy: 90.28%
  Sample size: 12863
adres_recentste_buurt_oude_noorden:
  Approval rate: 4.17%
  Accuracy: 91.67%
  Sample size: 24
adres_recentste_buurt_vreewijk:
  Approval rate: 18.52%
  Accuracy: 88.89%
  Sample size: 108

Testing bad model:
Maximum approval rate disparity between neighborhoods: 12.29%

Neighborhood metrics:
adres_recentste_buurt_groot_ijsselmonde:
  Approval rate: 29.37%
  Accuracy: 65.12%
  Sample size: 12920
adres_recentste_buurt_nieuwe_westen:
  Approval rate: 40.00%
  Accuracy: 60.00%
  Sample size: 85
adres_recentste_buurt_other:
  Approval rate: 34.33%
  Accuracy: 61.83%
  Sample size: 12863
adres_rece