In [32]:
import os
import glob
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pprint
import pyspark
import pyspark.sql.functions as F

from pyspark.sql.functions import col
from pyspark.sql.types import StringType, IntegerType, FloatType, DateType


In [33]:
# Ensure we're working from project root
current_dir = os.getcwd()
if current_dir.endswith('notebooks'):
    os.chdir('..')
    print("Adjusted working directory to project root")

import utils.model_monitoring as mm

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Setup PySpark Session

In [34]:
spark = pyspark.sql.SparkSession.builder \
    .appName("Model_Monitoring_Pipeline") \
    .master("local[*]") \
    .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")

print("Spark session initialized for model monitoring")


Spark session initialized for model monitoring


## Configuration Setup

In [35]:
model_version = "credit_model_xgboost_2024_09_01"  # Model to monitor
monitoring_start_date = "2024-10-01"  # Start date for monitoring period
monitoring_end_date = "2024-12-01"    # End date for monitoring period

config = mm.build_monitoring_config(
    model_version=model_version,
    monitoring_start_date=monitoring_start_date,
    monitoring_end_date=monitoring_end_date,
    predictions_path="datamart/gold/model_predictions/",
    labels_path="datamart/gold/label_store/",
    monitoring_output_path="datamart/gold/model_monitoring/",
    model_bank_directory="models/"
)

print("Model Monitoring Configuration:")
pprint.pprint(config)


Model Monitoring Configuration:
{'alert_thresholds': {'auc_critical': 0.1,
                      'psi_critical': 0.25,
 'alerts_output_path': 'datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/alerts',
 'labels_path': 'datamart/gold/label_store/',
 'model_artifact_filepath': 'models/credit_model_xgboost_2024_09_01.pkl',
 'model_bank_directory': 'models/',
 'model_version': 'credit_model_xgboost_2024_09_01',
 'monitoring_end_date': '2024-12-01',
 'monitoring_end_dt': datetime.datetime(2024, 12, 1, 0, 0),
 'monitoring_output_path': 'datamart/gold/model_monitoring/',
 'monitoring_start_date': '2024-10-01',
 'monitoring_start_dt': datetime.datetime(2024, 10, 1, 0, 0),
 'performance_degradation_threshold': 0.05,
 'performance_output_path': 'datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/performance_metrics',
 'predictions_path': 'datamart/gold/model_predictions/',
 'reports_output_path': 'datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/reports',
 

## Load Model Training Performance Baseline

In [36]:
print(f"\n{'='*60}")
print("LOADING BASELINE PERFORMANCE")
print(f"{'='*60}")

baseline_performance = mm.load_training_baseline(config)

if baseline_performance:
    print(f"Baseline performance loaded successfully")
    print(f"Training AUC: {baseline_performance['auc_train']:.4f}")
    print(f"Test AUC: {baseline_performance['auc_test']:.4f}")
    print(f"OOT AUC: {baseline_performance['auc_oot']:.4f}")
    print(f"Training date: {baseline_performance.get('training_date', 'Unknown')}")
else:
    print("Warning: Could not load baseline performance")



LOADING BASELINE PERFORMANCE
Training baseline loaded from: models/credit_model_xgboost_2024_09_01.pkl
Baseline performance loaded successfully
Training AUC: 0.9686
Test AUC: 0.8927
OOT AUC: 0.8845
Training date: 2024-09-01


## Load Predictions and Actual Labels

In [37]:
print(f"\n{'='*60}")
print("LOADING MONITORING DATA")
print(f"{'='*60}")

monitoring_data = mm.load_monitoring_data(config, spark)

if monitoring_data is not None and len(monitoring_data) > 0:
    print(f"Monitoring data loaded successfully")
    print(f"Total records: {len(monitoring_data)}")
    print(f"Date range: {monitoring_data['snapshot_date'].min()} to {monitoring_data['snapshot_date'].max()}")
    print(f"Unique dates: {monitoring_data['snapshot_date'].nunique()}")
    print(f"Average records per date: {len(monitoring_data) / monitoring_data['snapshot_date'].nunique():.0f}")
    
    missing_labels = monitoring_data['actual_label'].isna().sum()
    missing_predictions = monitoring_data['model_prediction_proba'].isna().sum()
    print(f"Missing actual labels: {missing_labels} ({missing_labels/len(monitoring_data)*100:.1f}%)")
    print(f"Missing predictions: {missing_predictions} ({missing_predictions/len(monitoring_data)*100:.1f}%)")
    
    print(f"\nSample monitoring data:")
    print(monitoring_data[['Customer_ID', 'snapshot_date', 'model_prediction_proba', 'actual_label']].head())
else:
    raise ValueError("Failed to load monitoring data")



LOADING MONITORING DATA
Loading monitoring data for credit_model_xgboost_2024_09_01
Date range: 2024-10-01 to 2024-12-01
Loaded 34404 prediction records
Loaded 8974 label records (all periods)
Merged monitoring data: 26922 records
Date coverage: 3 unique dates
Monitoring data loaded successfully
Total records: 26922
Date range: 2024-10-01 to 2024-12-01
Unique dates: 3
Average records per date: 8974
Missing actual labels: 0 (0.0%)
Missing predictions: 0 (0.0%)

Sample monitoring data:
  Customer_ID snapshot_date  model_prediction_proba  actual_label
0  CUS_0x4e42    2024-10-01                0.043231             0
1  CUS_0x4e47    2024-10-01                0.493364             0
2  CUS_0x4e55    2024-10-01                0.830098             0
3  CUS_0x4e60    2024-10-01                0.840814             1
4  CUS_0x4e7d    2024-10-01                0.542185             0


## Calculate Performance Metrics Over Time

In [38]:
print(f"\n{'='*60}")
print("CALCULATING PERFORMANCE METRICS")
print(f"{'='*60}")

performance_metrics = mm.calculate_performance_metrics_over_time(
    monitoring_data=monitoring_data,
    config=config
)

if performance_metrics is not None and len(performance_metrics) > 0:
    print(f"Performance metrics calculated successfully")
    print(f"Monitoring periods: {len(performance_metrics)}")
    
    print(f"\nPerformance Summary:")
    print(f"AUC Range: [{performance_metrics['auc'].min():.4f}, {performance_metrics['auc'].max():.4f}]")
    print(f"Mean AUC: {performance_metrics['auc'].mean():.4f} ± {performance_metrics['auc'].std():.4f}")
    print(f"Gini Range: [{performance_metrics['gini'].min():.3f}, {performance_metrics['gini'].max():.3f}]")
    print(f"Mean Gini: {performance_metrics['gini'].mean():.3f} ± {performance_metrics['gini'].std():.3f}")
    
    if baseline_performance:
        baseline_auc = baseline_performance.get('auc_oot', baseline_performance.get('auc_test', 0))
        current_mean_auc = performance_metrics['auc'].mean()
        auc_difference = current_mean_auc - baseline_auc
        print(f"\nBaseline Comparison:")
        print(f"Baseline AUC: {baseline_auc:.4f}")
        print(f"Current Mean AUC: {current_mean_auc:.4f}")
        print(f"Performance Change: {auc_difference:+.4f} ({auc_difference/baseline_auc*100:+.1f}%)")
    
    print(f"\nDetailed Performance Metrics:")
    print(performance_metrics)
else:
    raise ValueError("Failed to calculate performance metrics")



CALCULATING PERFORMANCE METRICS
Calculating performance metrics over time...
Performance metrics calculated for 3 time periods
Performance metrics calculated successfully
Monitoring periods: 3

Performance Summary:
AUC Range: [0.8881, 0.8912]
Mean AUC: 0.8895 ± 0.0015
Gini Range: [0.776, 0.782]
Mean Gini: 0.779 ± 0.003

Baseline Comparison:
Baseline AUC: 0.8845
Current Mean AUC: 0.8895
Performance Change: +0.0050 (+0.6%)

Detailed Performance Metrics:
  snapshot_date  sample_count  actual_default_rate  predicted_default_rate  \
0    2024-10-01          8974             0.288723                0.302764   
1    2024-11-01          8974             0.288723                0.297526   
2    2024-12-01          8974             0.288723                0.297415   

        auc      gini  precision    recall  f1_score  mean_prediction  \
0  0.888137  0.776274   0.695988  0.729834  0.712509         0.340999   
1  0.889167  0.778334   0.702622  0.724045  0.713172         0.339934   
2  0.891160

## Analyze Data Stability and Drift

In [39]:
print(f"\n{'='*60}")
print("ANALYZING DATA STABILITY")
print(f"{'='*60}")

stability_analysis = mm.analyze_data_stability(
    monitoring_data=monitoring_data,
    config=config,
    baseline_performance=baseline_performance
)

if stability_analysis is not None:
    print(f"Stability analysis completed successfully")
    
    pred_stability = stability_analysis.get('prediction_stability', {})
    if pred_stability:
        print(f"\nPrediction Distribution Analysis:")
        print(f"Mean prediction drift: {pred_stability.get('mean_drift', 0):.4f}")
        print(f"Std prediction drift: {pred_stability.get('std_drift', 0):.4f}")
        print(f"Distribution shift (KS statistic): {pred_stability.get('ks_statistic', 0):.4f}")
        
        if pred_stability.get('is_stable', True):
            print(f"✓ Prediction distribution is stable")
        else:
            print(f"⚠ Warning: Prediction distribution shows significant drift")
    
    feature_drift = stability_analysis.get('feature_drift', {})
    if feature_drift:
        print(f"\nFeature Drift Analysis:")
        high_drift_features = [f for f, psi in feature_drift.items() if psi > 0.2]
        moderate_drift_features = [f for f, psi in feature_drift.items() if 0.1 < psi <= 0.2]
        
        print(f"High drift features (PSI > 0.2): {len(high_drift_features)}")
        print(f"Moderate drift features (PSI 0.1-0.2): {len(moderate_drift_features)}")
        
        if high_drift_features:
            print(f"High drift features: {high_drift_features[:5]}")  # Show first 5
    
    model_stability = stability_analysis.get('model_stability', {})
    if model_stability:
        print(f"\nModel Stability Metrics:")
        print(f"Performance volatility: {model_stability.get('performance_volatility', 0):.4f}")
        print(f"Consistency score: {model_stability.get('consistency_score', 0):.4f}")
        
        if model_stability.get('is_stable', True):
            print(f"✓ Model performance is stable")
        else:
            print(f"⚠ Warning: Model performance shows instability")
else:
    print("Warning: Stability analysis failed or returned no results")



ANALYZING DATA STABILITY
Analyzing data stability and drift...
Stability analysis completed
Stability analysis completed successfully

Prediction Distribution Analysis:
Mean prediction drift: 0.0006
Std prediction drift: 0.0011
Distribution shift (KS statistic): 0.0097
✓ Prediction distribution is stable

Feature Drift Analysis:
High drift features (PSI > 0.2): 1
Moderate drift features (PSI 0.1-0.2): 0
High drift features: ['prediction_stability_score']

Model Stability Metrics:
Performance volatility: 0.0015
Consistency score: 0.9983
✓ Model performance is stable


## Generate Performance Visualizations

In [40]:
print(f"\n{'='*60}")
print("GENERATING MONITORING VISUALIZATIONS")
print(f"{'='*60}")

visualization_paths = mm.generate_monitoring_visualizations(
    performance_metrics=performance_metrics,
    stability_analysis=stability_analysis,
    monitoring_data=monitoring_data,
    baseline_performance=baseline_performance,
    config=config
)

if visualization_paths:
    print(f"Visualizations generated successfully")
    print(f"Charts created: {len(visualization_paths)}")
    
    for chart_name, file_path in visualization_paths.items():
        print(f"  {chart_name}: {file_path}")
    
    if 'performance_trend' in visualization_paths:
        plt.figure(figsize=(12, 6))
        plt.show()
        print(f"Performance trend chart displayed above")
else:
    print("Warning: Failed to generate visualizations")



GENERATING MONITORING VISUALIZATIONS
Generated 3 monitoring charts
Visualizations generated successfully
Charts created: 3
  performance_trend: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/visualizations/performance_trend.png
  prediction_distribution: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/visualizations/prediction_distribution.png
  performance_vs_volume: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/visualizations/performance_vs_volume.png


<Figure size 1200x600 with 0 Axes>

Performance trend chart displayed above


## Performance Alert Detection

In [41]:
print(f"\n{'='*60}")
print("PERFORMANCE ALERT DETECTION")
print(f"{'='*60}")

alerts = mm.detect_performance_alerts(
    performance_metrics=performance_metrics,
    stability_analysis=stability_analysis,
    baseline_performance=baseline_performance,
    config=config
)

if alerts:
    print(f"Alert detection completed")
    print(f"Total alerts: {len(alerts)}")
    
    critical_alerts = [a for a in alerts if a.get('severity') == 'critical']
    warning_alerts = [a for a in alerts if a.get('severity') == 'warning']
    info_alerts = [a for a in alerts if a.get('severity') == 'info']
    
    print(f"Critical alerts: {len(critical_alerts)}")
    print(f"Warning alerts: {len(warning_alerts)}")
    print(f"Info alerts: {len(info_alerts)}")
    
    if critical_alerts:
        print(f"\n🚨 CRITICAL ALERTS:")
        for alert in critical_alerts[:3]:  # Show first 3
            print(f"  - {alert.get('message', 'Unknown alert')}")
            print(f"    Date: {alert.get('date', 'Unknown')}")
            print(f"    Metric: {alert.get('metric', 'Unknown')}")
            print(f"    Value: {alert.get('value', 'Unknown')}")
    
    if warning_alerts:
        print(f"\n⚠️ WARNING ALERTS:")
        for alert in warning_alerts[:3]:  # Show first 3
            print(f"  - {alert.get('message', 'Unknown alert')}")
    
    if not critical_alerts and not warning_alerts:
        print(f"✓ No critical or warning alerts detected")
        print(f"Model performance appears stable")
else:
    print("No alerts detected - model performance is stable")



PERFORMANCE ALERT DETECTION
Generated 0 monitoring alerts
No alerts detected - model performance is stable


## Save Monitoring Results to Datamart

In [42]:
print(f"\n{'='*60}")
print("SAVING MONITORING RESULTS")
print(f"{'='*60}")

performance_output_path = mm.save_monitoring_results(
    performance_metrics=performance_metrics,
    stability_analysis=stability_analysis,
    alerts=alerts,
    config=config,
    spark=spark
)

if performance_output_path:
    print(f"Monitoring results saved successfully")
    print(f"Performance metrics: {performance_output_path['performance_metrics']}")
    print(f"Stability analysis: {performance_output_path['stability_analysis']}")

    alerts_path = performance_output_path.get('alerts')
    print(f"Alerts: {alerts_path if alerts_path else 'No alerts generated'}")
    
    summary_output_path = mm.save_monitoring_summary(
        performance_metrics=performance_metrics,
        stability_analysis=stability_analysis,
        alerts=alerts,
        baseline_performance=baseline_performance,
        config=config
    )
else:
    print("Warning: Failed to save monitoring results")



SAVING MONITORING RESULTS
Monitoring results saved successfully
Monitoring results saved successfully
Performance metrics: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/performance_metrics/performance_metrics_2024-10-01_2024-12-01_2025_06_28_08_54_14.parquet
Stability analysis: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/stability_analysis/stability_analysis_2024-10-01_2024-12-01_2025_06_28_08_54_14.pkl
Alerts: No alerts generated
Monitoring summary saved to: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/reports/monitoring_summary_2024-10-01_2024-12-01_2025_06_28_08_54_14.pkl


                                                                                

## Batch Monitoring for Multiple Models

In [43]:
print(f"\n{'='*80}")
print("BATCH MONITORING SETUP")
print(f"{'='*80}")

batch_models = [
    "credit_model_logistic_regression_2024_09_01",
    "credit_model_random_forest_2024_09_01",
    "credit_model_xgboost_2024_09_01"
]

batch_monitoring_config = {
    "models": batch_models,
    "monitoring_start_date": "2024-04-01",
    "monitoring_end_date": "2024-06-01",
    "monitoring_frequency": "monthly"  # weekly, monthly, daily
}

print(f"Batch monitoring configuration:")
print(f"Models to monitor: {len(batch_models)}")
print(f"Time period: {batch_monitoring_config['monitoring_start_date']} to {batch_monitoring_config['monitoring_end_date']}")



BATCH MONITORING SETUP
Batch monitoring configuration:
Models to monitor: 3
Time period: 2024-04-01 to 2024-06-01


Execute batch monitoring
Summary of batch monitoring results
Summary statistics across all models

In [44]:
print(f"\n{'='*60}")
print("EXECUTING BATCH MONITORING")
print(f"{'='*60}")

batch_results = mm.run_batch_monitoring(
    models_list=batch_models,
    monitoring_start_date=batch_monitoring_config["monitoring_start_date"],
    monitoring_end_date=batch_monitoring_config["monitoring_end_date"],
    spark=spark,
    predictions_path="datamart/gold/model_predictions/",
    labels_path="datamart/gold/label_store/",
    monitoring_output_path="datamart/gold/model_monitoring/",
    model_bank_directory="models/"
)

if batch_results:
    successful_models = [result['model'] for result in batch_results if result['success']]
    failed_models = [result['model'] for result in batch_results if not result['success']]
    
    print(f"\nBatch monitoring completed:")
    print(f"Successful: {len(successful_models)} models")
    print(f"Failed: {len(failed_models)} models")
    
    if successful_models:
        print(f"Successfully monitored: {', '.join(successful_models)}")
    if failed_models:
        print(f"Failed to monitor: {', '.join(failed_models)}")
    
    total_alerts = sum([len(result.get('alerts', [])) for result in batch_results if result['success']])
    avg_performance = np.mean([result.get('avg_auc', 0) for result in batch_results if result['success']])
    
    print(f"\nBatch Summary:")
    print(f"Total alerts across all models: {total_alerts}")
    print(f"Average performance (AUC): {avg_performance:.4f}")
else:
    print("Batch monitoring failed to complete")



EXECUTING BATCH MONITORING
Starting batch monitoring for 3 models
Monitoring period: 2024-04-01 to 2024-06-01

Processing model 1/3: credit_model_logistic_regression_2024_09_01
Training baseline loaded from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading monitoring data for credit_model_logistic_regression_2024_09_01
Date range: 2024-04-01 to 2024-06-01
Loaded 26922 prediction records
Loaded 8974 label records (all periods)
Merged monitoring data: 26922 records
Date coverage: 3 unique dates
Calculating performance metrics over time...
Performance metrics calculated for 3 time periods
Analyzing data stability and drift...
Stability analysis completed
Generated 0 monitoring alerts
Monitoring results saved successfully
✓ Successfully monitored credit_model_logistic_regression_2024_09_01

Processing model 2/3: credit_model_random_forest_2024_09_01
Training baseline loaded from: models/credit_model_random_forest_2024_09_01.pkl
Loading monitoring data for credit_model_random

## Generate Monitoring Report

In [45]:
print(f"\n{'='*80}")
print("GENERATING MONITORING REPORT")
print(f"{'='*80}")

report_path = mm.generate_monitoring_report(
    model_version=model_version,
    performance_metrics=performance_metrics,
    stability_analysis=stability_analysis,
    alerts=alerts,
    baseline_performance=baseline_performance,
    visualization_paths=visualization_paths,
    config=config
)

if report_path:
    print(f"Comprehensive monitoring report generated")
    print(f"Report location: {report_path}")
    print(f"Report includes:")
    print(f"  - Executive summary")
    print(f"  - Performance trends analysis")
    print(f"  - Stability assessment")
    print(f"  - Alert summary")
    print(f"  - Recommendations")
else:
    print("Warning: Failed to generate monitoring report")



GENERATING MONITORING REPORT
Monitoring report generated: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/reports/monitoring_report_credit_model_xgboost_2024_09_01_2025_06_28_08_54_43.md
Comprehensive monitoring report generated
Report location: datamart/gold/model_monitoring/credit_model_xgboost_2024_09_01/reports/monitoring_report_credit_model_xgboost_2024_09_01_2025_06_28_08_54_43.md
Report includes:
  - Executive summary
  - Performance trends analysis
  - Stability assessment
  - Alert summary
  - Recommendations


## Summary

In [46]:
print(f"\n{'='*80}")
print("MONITORING SESSION COMPLETE")
print(f"{'='*80}")

print(f"Model Monitored: {model_version}")
print(f"Monitoring Period: {monitoring_start_date} to {monitoring_end_date}")
print(f"Records Analyzed: {len(monitoring_data) if 'monitoring_data' in locals() else 0}")
print(f"Performance Periods: {len(performance_metrics) if 'performance_metrics' in locals() else 0}")

if 'alerts' in locals() and alerts:
    critical_count = len([a for a in alerts if a.get('severity') == 'critical'])
    if critical_count > 0:
        print(f"\n🚨 Model Health: CRITICAL - {critical_count} critical alerts")
    else:
        warning_count = len([a for a in alerts if a.get('severity') == 'warning'])
        if warning_count > 0:
            print(f"\n⚠️ Model Health: WARNING - {warning_count} warnings")
        else:
            print(f"\n✅ Model Health: HEALTHY - No critical issues detected")
else:
    print(f"\n✅ Model Health: HEALTHY - No alerts detected")

if 'performance_metrics' in locals() and len(performance_metrics) > 0:
    current_auc = performance_metrics['auc'].iloc[-1]  # Latest performance
    print(f"Latest AUC: {current_auc:.4f}")
    
    if baseline_performance and 'auc_oot' in baseline_performance:
        baseline_auc = baseline_performance['auc_oot']
        change = ((current_auc - baseline_auc) / baseline_auc) * 100
        print(f"Performance vs Baseline: {change:+.1f}%")



MONITORING SESSION COMPLETE
Model Monitored: credit_model_xgboost_2024_09_01
Monitoring Period: 2024-10-01 to 2024-12-01
Records Analyzed: 26922
Performance Periods: 3

✅ Model Health: HEALTHY - No alerts detected
Latest AUC: 0.8912
Performance vs Baseline: +0.7%


In [47]:
spark.stop()