# ðŸ“Š Model Evaluation & Analysis

**Project:** Predicting Paid Amount for Medical Claims  
**Stage:** Model Evaluation & Deployment Readiness  

---

## Overview

1. **Load Production Model** - Load the best trained model
2. **Performance Metrics** - Calculate comprehensive metrics
3. **Visualization** - Prediction plots, residual analysis
4. **Feature Importance** - Understand model decisions
5. **Business Impact** - Translate metrics to business value

In [None]:
# Imports
import sys
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.utils.logger import setup_logging, get_logger
from src.models.model_trainer import ModelRegistry
from src.models.model_evaluator import ModelEvaluator

setup_logging(log_level="INFO")
logger = get_logger(__name__)

# Paths
PROCESSED_DIR = project_root / "data" / "processed"
MODELS_DIR = project_root / "models"
FIGURES_DIR = project_root / "reports" / "figures"
FIGURES_DIR.mkdir(parents=True, exist_ok=True)

print("âœ“ Setup complete")

## 1. Load Model and Data

In [None]:
# Load production model
registry = ModelRegistry(str(MODELS_DIR))
TARGET = 'AMT_PAID'

try:
    model, metadata = registry.get_production_model()
    print(f"âœ“ Loaded model: {metadata.model_name}")
    print(f"  Type: {metadata.model_type}")
    print(f"  Version: {metadata.version}")
except:
    print("âš  No production model found. Creating demo model...")
    from sklearn.ensemble import RandomForestRegressor
    model = RandomForestRegressor(n_estimators=50, random_state=42)
    metadata = None

# Load test data
parquet_path = PROCESSED_DIR / "processed_claims.parquet"
if parquet_path.exists():
    df = pd.read_parquet(parquet_path)
else:
    np.random.seed(42)
    n = 10000
    n_features = 20
    X_demo = pd.DataFrame(np.random.randn(n, n_features), 
                          columns=[f'feature_{i}' for i in range(n_features)])
    y_demo = 500 + 200 * X_demo['feature_0'] + 100 * X_demo['feature_1'] + np.random.randn(n) * 100
    df = pd.concat([X_demo, pd.Series(y_demo, name=TARGET)], axis=1)

y = df[TARGET]
X = df.drop(columns=[TARGET])

# Create test split
from sklearn.model_selection import train_test_split
_, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit demo model if needed
if metadata is None:
    model.fit(X.iloc[:8000], y.iloc[:8000])
    
print(f"\nâœ“ Test set: {len(X_test):,} samples")

## 2. Model Evaluation

In [None]:
# Initialize evaluator
evaluator = ModelEvaluator(figures_dir=str(FIGURES_DIR))

# Evaluate model
metrics, y_pred = evaluator.evaluate_model(model, X_test, y_test, "Production Model")

print("\nðŸ“Š Performance Metrics:")
for metric, value in metrics.to_dict().items():
    print(f"  {metric}: {value:.4f}")

## 3. Visualization

In [None]:
# Prediction plots
evaluator.plot_predictions(y_test.values, y_pred, "Production Model")
evaluator.plot_residual_distribution(y_test.values, y_pred, "Production Model")

# Feature importance
if hasattr(model, 'feature_importances_'):
    importances = dict(zip(X_test.columns, model.feature_importances_))
    evaluator.plot_feature_importance(importances, top_n=15, model_name="Production Model")

## 4. Summary

In [None]:
print("\n" + "="*60)
print("ðŸ“Š MODEL EVALUATION SUMMARY")
print("="*60)
print(f"\nModel Performance:")
print(f"  RÂ² Score: {metrics.r2:.4f}")
print(f"  RMSE: ${metrics.rmse:,.2f}")
print(f"  MAE: ${metrics.mae:,.2f}")
print(f"  MAPE: {metrics.mape:.2%}")

print(f"\nBusiness Impact:")
avg_actual = y_test.mean()
print(f"  Average Claim: ${avg_actual:,.2f}")
print(f"  Average Error: ${metrics.mae:,.2f} ({metrics.mae/avg_actual*100:.1f}% of avg claim)")

print(f"\nModel is ready for deployment!")
print(f"  Model saved in: {MODELS_DIR}")
print(f"  Figures saved in: {FIGURES_DIR}")
print(f"\nâœ… Evaluation completed!")