# 📊 Phase 6: Model Monitoring & Improvement

This notebook demonstrates comprehensive model monitoring and improvement techniques for MLOps, covering performance tracking, drift detection, and model lifecycle management.

## Table of Contents
1. [Model Monitoring](#1-model-monitoring)
2. [Drift Detection](#2-drift-detection)
3. [Performance Tracking](#3-performance-tracking)
4. [Model Lifecycle Management](#4-model-lifecycle-management)

---

## Prerequisites
Make sure you have the required libraries installed:
```bash
pip install pandas numpy matplotlib seaborn plotly scipy scikit-learn
```


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from scipy import stats
import warnings
import json
import os

warnings.filterwarnings('ignore')

print("✅ Libraries imported successfully!")

# Load sample data for monitoring
np.random.seed(42)
n_samples = 1000

# Simulate production data
production_data = {
    'timestamp': pd.date_range('2024-01-01', periods=n_samples, freq='H'),
    'age': np.random.normal(35, 12, n_samples).astype(int),
    'income': np.random.lognormal(10, 0.5, n_samples),
    'credit_score': np.random.normal(650, 100, n_samples).astype(int),
    'loan_amount': np.random.exponential(50000, n_samples),
    'prediction': np.random.choice([0, 1], n_samples, p=[0.8, 0.2]),
    'actual': np.random.choice([0, 1], n_samples, p=[0.8, 0.2]),
    'probability': np.random.beta(2, 5, n_samples)
}

df_production = pd.DataFrame(production_data)
print(f"📊 Production data shape: {df_production.shape}")
print(f"Date range: {df_production['timestamp'].min()} to {df_production['timestamp'].max()}")


## 1. Model Monitoring

**Purpose**: Monitor model performance and detect issues in production.


In [None]:
# 1.1 Step 22: Monitor Model (Drift, Latency, Accuracy)
print("📊 Step 22: Monitor Model")
print("=" * 50)

# Performance monitoring
def calculate_performance_metrics(y_true, y_pred, y_proba):
    """Calculate comprehensive performance metrics"""
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1_score': f1_score(y_true, y_pred, zero_division=0),
        'prediction_count': len(y_pred),
        'positive_rate': np.mean(y_pred),
        'avg_probability': np.mean(y_proba)
    }

# Calculate current performance
current_metrics = calculate_performance_metrics(
    df_production['actual'], 
    df_production['prediction'], 
    df_production['probability']
)

print("Current Performance Metrics:")
for metric, value in current_metrics.items():
    print(f"  {metric}: {value:.3f}")

# 1.2 Data Drift Detection
print("\n🔍 Data Drift Detection")
print("-" * 30)

# Simulate baseline data (training data)
baseline_data = {
    'age': np.random.normal(35, 12, 1000),
    'income': np.random.lognormal(10, 0.5, 1000),
    'credit_score': np.random.normal(650, 100, 1000),
    'loan_amount': np.random.exponential(50000, 1000)
}

df_baseline = pd.DataFrame(baseline_data)

def detect_drift_ks(baseline, production, feature_name, alpha=0.05):
    """Detect drift using Kolmogorov-Smirnov test"""
    statistic, p_value = stats.ks_2samp(baseline, production)
    is_drift = p_value < alpha
    return {
        'feature': feature_name,
        'ks_statistic': statistic,
        'p_value': p_value,
        'is_drift': is_drift,
        'drift_severity': 'high' if p_value < 0.01 else 'medium' if p_value < 0.05 else 'low'
    }

# Check drift for each feature
drift_results = []
for feature in ['age', 'income', 'credit_score', 'loan_amount']:
    drift_result = detect_drift_ks(
        df_baseline[feature], 
        df_production[feature], 
        feature
    )
    drift_results.append(drift_result)

# Display drift results
drift_df = pd.DataFrame(drift_results)
print("Data Drift Analysis:")
print(drift_df[['feature', 'is_drift', 'drift_severity', 'p_value']].round(4))

# 1.3 Performance Drift Detection
print("\n📈 Performance Drift Detection")
print("-" * 30)

# Calculate performance over time windows
df_production['hour'] = df_production['timestamp'].dt.hour
df_production['day'] = df_production['timestamp'].dt.date

# Performance by hour
hourly_performance = df_production.groupby('hour').apply(
    lambda x: calculate_performance_metrics(x['actual'], x['prediction'], x['probability'])
).apply(pd.Series)

print("Performance by Hour (sample):")
print(hourly_performance[['accuracy', 'f1_score']].head())

# 1.4 Latency Monitoring
print("\n⏱️ Latency Monitoring")
print("-" * 30)

# Simulate prediction latencies
np.random.seed(42)
latencies = np.random.exponential(50, len(df_production))  # milliseconds
df_production['latency_ms'] = latencies

latency_stats = {
    'mean_latency': np.mean(latencies),
    'median_latency': np.median(latencies),
    'p95_latency': np.percentile(latencies, 95),
    'p99_latency': np.percentile(latencies, 99),
    'max_latency': np.max(latencies)
}

print("Latency Statistics (ms):")
for stat, value in latency_stats.items():
    print(f"  {stat}: {value:.2f}")

# Alert thresholds
latency_thresholds = {
    'warning': 100,  # ms
    'critical': 500  # ms
}

high_latency_count = np.sum(latencies > latency_thresholds['warning'])
print(f"\nHigh latency predictions (>100ms): {high_latency_count}")

print("✅ Model monitoring completed!")


## 2. Model Lifecycle Management

**Purpose**: Manage model retraining and retirement decisions.


In [None]:
# 2.1 Step 23: Retrain or Retire Model
print("🔄 Step 23: Retrain or Retire Model")
print("=" * 50)

# Define retraining triggers
def evaluate_retraining_triggers(performance_metrics, drift_results, latency_stats):
    """Evaluate if model needs retraining based on various triggers"""
    triggers = {
        'performance_degradation': False,
        'data_drift': False,
        'high_latency': False,
        'time_based': False
    }
    
    # Performance degradation trigger
    if performance_metrics['accuracy'] < 0.7:  # Threshold
        triggers['performance_degradation'] = True
        print("⚠️  Performance degradation detected!")
    
    # Data drift trigger
    high_drift_features = [r for r in drift_results if r['drift_severity'] == 'high']
    if len(high_drift_features) > 0:
        triggers['data_drift'] = True
        print(f"⚠️  Data drift detected in {len(high_drift_features)} features!")
    
    # Latency trigger
    if latency_stats['p95_latency'] > 200:  # 200ms threshold
        triggers['high_latency'] = True
        print("⚠️  High latency detected!")
    
    # Time-based trigger (simulate)
    days_since_training = 30  # Simulated
    if days_since_training > 90:  # 90 days threshold
        triggers['time_based'] = True
        print("⚠️  Model is older than 90 days!")
    
    return triggers

# Evaluate retraining triggers
retraining_triggers = evaluate_retraining_triggers(
    current_metrics, 
    drift_results, 
    latency_stats
)

print("\nRetraining Trigger Analysis:")
for trigger, status in retraining_triggers.items():
    status_icon = "🔴" if status else "🟢"
    print(f"  {status_icon} {trigger}: {status}")

# 2.2 Retraining Decision Logic
print("\n🤔 Retraining Decision")
print("-" * 30)

def make_retraining_decision(triggers):
    """Make decision on whether to retrain or retire model"""
    critical_triggers = sum([
        triggers['performance_degradation'],
        len([r for r in drift_results if r['drift_severity'] == 'high']) > 2
    ])
    
    warning_triggers = sum([
        triggers['data_drift'],
        triggers['high_latency'],
        triggers['time_based']
    ])
    
    if critical_triggers >= 1:
        decision = "RETRAIN_IMMEDIATELY"
        action = "Schedule immediate retraining with fresh data"
    elif warning_triggers >= 2:
        decision = "RETRAIN_SOON"
        action = "Schedule retraining within 1 week"
    elif warning_triggers >= 1:
        decision = "MONITOR_CLOSELY"
        action = "Increase monitoring frequency"
    else:
        decision = "CONTINUE"
        action = "Model performing well, continue monitoring"
    
    return decision, action

decision, action = make_retraining_decision(retraining_triggers)

print(f"Decision: {decision}")
print(f"Action: {action}")

# 2.3 Model Retirement Criteria
print("\n💀 Model Retirement Criteria")
print("-" * 30)

def evaluate_retirement_criteria(performance_metrics, triggers):
    """Evaluate if model should be retired"""
    retirement_criteria = {
        'performance_below_threshold': performance_metrics['accuracy'] < 0.6,
        'multiple_critical_issues': sum([
            triggers['performance_degradation'],
            len([r for r in drift_results if r['drift_severity'] == 'high']) > 3
        ]) >= 2,
        'model_age': 365,  # Simulated model age in days
        'replacement_available': True  # Simulated
    }
    
    should_retire = (
        retirement_criteria['performance_below_threshold'] or
        retirement_criteria['multiple_critical_issues'] or
        (retirement_criteria['model_age'] > 365 and retirement_criteria['replacement_available'])
    )
    
    return should_retire, retirement_criteria

should_retire, retirement_criteria = evaluate_retirement_criteria(current_metrics, retraining_triggers)

print("Retirement Criteria Evaluation:")
for criterion, status in retirement_criteria.items():
    status_icon = "🔴" if status else "🟢"
    print(f"  {status_icon} {criterion}: {status}")

if should_retire:
    print("\n🚨 RECOMMENDATION: RETIRE MODEL")
    print("Model should be retired and replaced with a new version")
else:
    print("\n✅ RECOMMENDATION: CONTINUE WITH CURRENT MODEL")
    print("Model is performing adequately")

# 2.4 Monitoring Dashboard Summary
print("\n📊 Monitoring Dashboard Summary")
print("-" * 30)

dashboard_summary = {
    'timestamp': datetime.now().isoformat(),
    'model_status': 'active',
    'performance_metrics': current_metrics,
    'drift_detected': any([r['is_drift'] for r in drift_results]),
    'latency_status': 'normal' if latency_stats['p95_latency'] < 200 else 'high',
    'retraining_decision': decision,
    'retirement_recommendation': should_retire,
    'alerts': []
}

# Add alerts
if retraining_triggers['performance_degradation']:
    dashboard_summary['alerts'].append("Performance degradation detected")
if retraining_triggers['data_drift']:
    dashboard_summary['alerts'].append("Data drift detected")
if retraining_triggers['high_latency']:
    dashboard_summary['alerts'].append("High latency detected")

print("Dashboard Summary:")
for key, value in dashboard_summary.items():
    if key != 'performance_metrics':
        print(f"  {key}: {value}")

# Save monitoring results
os.makedirs('monitoring', exist_ok=True)
with open('monitoring/dashboard_summary.json', 'w') as f:
    json.dump(dashboard_summary, f, indent=2, default=str)

print(f"\n✅ Monitoring results saved to: monitoring/dashboard_summary.json")
print("✅ Model lifecycle management completed!")
