In [16]:
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

# Load model and data
with open('../models/biased_model.pkl', 'rb') as f:
   model = pickle.load(f)
   
df = pd.read_csv('../data/investigation_train_large_checked.csv')
features = [col for col in df.columns if col not in ['checked', 'ja', 'nee']]
X = df[features]
X_test = df[features]
feature_names = X.columns.tolist()
y_test = df['checked']



https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [17]:
def get_feature_importances(pipeline_or_model):
    """Helper function to get feature importances whether using a pipeline or direct model"""
    if hasattr(pipeline_or_model, 'named_steps'):  # If it's a pipeline
        # Get the last step (usually the model)
        model = pipeline_or_model.named_steps['classifier']
        return model.feature_importances_
    else:  # If it's just the model
        return pipeline_or_model.feature_importances_

def test_location_importance(model, feature_names):
    """
    Test 1: Analyzes how much the model relies on location-related features
    """
    importances = get_feature_importances(model)
    
    # Calculate importance by location feature type
    location_importance = {
        'neighborhood': sum(importances[6:11]),
        'district': sum(importances[13:22]),
        'rotterdam': sum(importances[i] for i in [11,12])
    }
    
    # Total location importance
    total_location_importance = sum(location_importance.values())
    total_model_importance = sum(importances)
    
    return {
        'location_importance_ratio': total_location_importance / total_model_importance,
        'location_breakdown': location_importance,
        'interpretation': f"Location features account for {(total_location_importance/total_model_importance)*100:.1f}% of model's decision making"
    }

def test_age_discrimination(model, X_test, y_test):
    """
    Test 2: Checks if model discriminates based on age
    """
    # Create age groups
    age_values = X_test.iloc[:, 0]  # Age is first column
    age_groups = pd.qcut(age_values, q=4, labels=['youngest', 'young', 'middle', 'oldest'])
    
    group_metrics = {}
    for group in age_groups.unique():
        mask = age_groups == group
        group_preds = model.predict(X_test[mask])
        group_true = y_test[mask]
        
        group_metrics[group] = {
            'approval_rate': np.mean(group_preds == 1),
            'accuracy': accuracy_score(group_true, group_preds)
        }
    
    # Calculate disparities
    approval_rates = [metrics['approval_rate'] for metrics in group_metrics.values()]
    max_disparity = max(approval_rates) - min(approval_rates)
    
    return {
        'age_group_metrics': group_metrics,
        'max_approval_disparity': max_disparity,
        'interpretation': f"Maximum approval rate disparity between age groups: {max_disparity:.2%}"
    }

def test_neighborhood_bias(model, X_test, y_test):
    """
    Test 3: Checks for bias in different neighborhoods
    """
    # Get neighborhood features (columns 6-10)
    neighborhood_features = X_test.iloc[:, 6:11]
    
    # Find which neighborhood each sample belongs to (one-hot encoded)
    neighborhoods = neighborhood_features.idxmax(axis=1)
    
    neighborhood_metrics = {}
    for neighborhood in neighborhood_features.columns:
        mask = neighborhoods == neighborhood
        if sum(mask) > 0:  # If we have samples for this neighborhood
            n_preds = model.predict(X_test[mask])
            n_true = y_test[mask]
            
            neighborhood_metrics[neighborhood] = {
                'approval_rate': np.mean(n_preds == 1),
                'accuracy': accuracy_score(n_true, n_preds),
                'sample_size': sum(mask)
            }
    
    # Calculate disparities
    approval_rates = [metrics['approval_rate'] for metrics in neighborhood_metrics.values()]
    max_disparity = max(approval_rates) - min(approval_rates)
    
    return {
        'neighborhood_metrics': neighborhood_metrics,
        'max_approval_disparity': max_disparity,
        'interpretation': f"Maximum approval rate disparity between neighborhoods: {max_disparity:.2%}"
    }


def test_model_performance(model, X_test, y_test):
    """
    Test 4: Evaluates overall model performance using multiple metrics
    """
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
    
    # Get predictions
    predictions = model.predict(X_test)
    
    # Calculate basic metrics
    metrics = {
        'accuracy': accuracy_score(y_test, predictions),
        'precision': precision_score(y_test, predictions),
        'recall': recall_score(y_test, predictions),
        'f1_score': f1_score(y_test, predictions)
    }
    
    # Get confusion matrix
    conf_matrix = confusion_matrix(y_test, predictions)
    
    # Calculate additional metrics
    total_predictions = len(predictions)
    positive_rate = np.mean(predictions == 1)
    
    return {
        'standard_metrics': metrics,
        'confusion_matrix': conf_matrix,
        'total_samples': total_predictions,
        'positive_prediction_rate': positive_rate,
        'interpretation': (
            f"Model Performance:\n"
            f"Accuracy: {metrics['accuracy']:.2%}\n"
            f"Precision: {metrics['precision']:.2%}\n"
            f"Recall: {metrics['recall']:.2%}\n"
            f"F1 Score: {metrics['f1_score']:.2%}\n"
            f"Overall positive prediction rate: {positive_rate:.2%}"
        )
    }

In [18]:
# Run tests
location_results = test_location_importance(model, feature_names)
age_results = test_age_discrimination(model, X_test, y_test)
neighborhood_results = test_neighborhood_bias(model, X_test, y_test)
distribution_results = test_model_performance(model, X_test, y_test)

# Print results
print("Test 1 - Location Importance:")
print(location_results['interpretation'])
print("\nBreakdown:", location_results['location_breakdown'])

print("\nTest 2 - Age Discrimination:")
print(age_results['interpretation'])
print("Age group metrics:", age_results['age_group_metrics'])

print("\nTest 3 - Neighborhood Bias:")
print(neighborhood_results['interpretation'])
print("Neighborhood metrics:", neighborhood_results['neighborhood_metrics'])

print("\nTest 4 - Feature Bias Distribution:")
print(distribution_results['interpretation'])

Test 1 - Location Importance:
Location features account for 0.7% of model's decision making

Breakdown: {'neighborhood': np.float64(0.0007548429228768742), 'district': np.float64(0.0062959077739670015), 'rotterdam': np.float64(0.00017616990137220284)}

Test 2 - Age Discrimination:
Maximum approval rate disparity between age groups: 6.67%
Age group metrics: {'youngest': {'approval_rate': np.float64(0.12839304660083245), 'accuracy': 1.0}, 'middle': {'approval_rate': np.float64(0.1682614006514658), 'accuracy': 1.0}, 'oldest': {'approval_rate': np.float64(0.19511927611735674), 'accuracy': 1.0}, 'young': {'approval_rate': np.float64(0.1547387527550888), 'accuracy': 1.0}}

Test 3 - Neighborhood Bias:
Maximum approval rate disparity between neighborhoods: 3.24%
Neighborhood metrics: {'adres_recentste_buurt_groot_ijsselmonde': {'approval_rate': np.float64(0.1474564761081615), 'accuracy': 1.0, 'sample_size': 64792}, 'adres_recentste_buurt_nieuwe_westen': {'approval_rate': np.float64(0.152744630