In [3]:
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

# Load models and data
with open('../models/good_model.pkl', 'rb') as f:
    good_model = pickle.load(f)
with open('../models/bad_model.pkl', 'rb') as f:
    bad_model = pickle.load(f)
with open('../data/test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

X_test = test_data['X_test']
y_test = test_data['y_test']

def analyze_errors(model, X, y):
    """Analyze prediction errors and patterns"""
    y_pred = model.predict(X)
    conf_matrix = confusion_matrix(y, y_pred)
    
    error_analysis = {
        'confusion_matrix': conf_matrix,
        'error_patterns': {},
        'feature_importance': {}
    }
    
    # Analyze where errors occur most
    errors_mask = y_pred != y
    for col in X.columns:
        # Calculate error rate by feature value
        value_errors = pd.DataFrame({
            'value': X[col],
            'is_error': errors_mask
        }).groupby('value')['is_error'].mean()
        
        error_analysis['error_patterns'][col] = value_errors.nlargest(3).to_dict()
        
    # Feature importance in errors
    feature_importance = model.named_steps['classifier'].feature_importances_
    error_analysis['feature_importance'] = dict(zip(X.columns, feature_importance))
    
    return error_analysis

# Run evaluation for both models
models = {'Good': good_model, 'Bad': bad_model}

for name, model in models.items():
    print(f"\n{'='*20} {name} Model {'='*20}")
    y_pred = model.predict(X_test)
    
    # Basic metrics
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Error analysis
    error_results = analyze_errors(model, X_test, y_test)
    conf_matrix = error_results['confusion_matrix']
    
    print("\nConfusion Matrix:")
    print(conf_matrix)
    
    print("\nError Patterns:")
    for feature, patterns in error_results['error_patterns'].items():
        if any(rate > 0.3 for rate in patterns.values()):  # Only show significant patterns
            print(f"\n{feature}:")
            for value, error_rate in patterns.items():
                print(f"  Value {value:.2f}: {error_rate:.2%} error rate")
    
    print("\nTop 5 Most Important Features:")
    importances = error_results['feature_importance']
    for feature, importance in sorted(importances.items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f"  {feature}: {importance:.3f}")



Classification Report:
              precision    recall  f1-score   support

       False       0.87      1.00      0.93     22029
        True       0.98      0.16      0.27      3971

    accuracy                           0.87     26000
   macro avg       0.92      0.58      0.60     26000
weighted avg       0.89      0.87      0.83     26000


Confusion Matrix:
[[22017    12]
 [ 3341   630]]

Error Patterns:

adres_aantal_brp_adres:
  Value 11.00: 50.00% error rate
  Value 10.00: 40.00% error rate
  Value 8.00: 27.35% error rate

adres_aantal_verschillende_wijken:
  Value 7.00: 50.00% error rate
  Value 6.00: 22.22% error rate
  Value 5.00: 18.31% error rate

adres_dagen_op_adres:
  Value 5.00: 100.00% error rate
  Value 13.00: 100.00% error rate
  Value 14.00: 100.00% error rate

afspraak_aantal_woorden:
  Value 736.00: 100.00% error rate
  Value 760.00: 100.00% error rate
  Value 772.00: 100.00% error rate

afspraak_laatstejaar_aantal_woorden:
  Value 315.00: 100.00% error rat

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Confusion Matrix:
[[22029     0]
 [ 3971     0]]

Error Patterns:

adres_aantal_brp_adres:
  Value 10.00: 60.00% error rate
  Value 11.00: 50.00% error rate
  Value 8.00: 33.33% error rate

adres_aantal_verschillende_wijken:
  Value 7.00: 50.00% error rate
  Value 6.00: 29.63% error rate
  Value 5.00: 24.07% error rate

adres_dagen_op_adres:
  Value 5.00: 100.00% error rate
  Value 8.00: 100.00% error rate
  Value 10.00: 100.00% error rate

afspraak_aantal_woorden:
  Value 736.00: 100.00% error rate
  Value 760.00: 100.00% error rate
  Value 772.00: 100.00% error rate

afspraak_controle_aankondiging_maatregel:
  Value 2.00: 34.75% error rate
  Value 1.00: 19.04% error rate
  Value 0.00: 12.31% error rate

afspraak_controle_verwijzing:
  Value 2.00: 33.33% error rate
  Value 1.00: 15.73% error rate
  Value 0.00: 15.11% error rate

afspraak_galo_gesprek:
  Value 3.00: 38.46% error rate
  Value 2.00: 25.79% error rate
  Value 1.00: 18.01% error rate

afspraak_inspanningsperiode:
  Value 