# 11: Validate & Define Bench Cutoffs

Validate Random Forest model performance and define severity-based alert thresholds.

## Purpose

1. **Validate** model performance on test set
2. **Analyze** per-class metrics (severity 0-5)
3. **Define** alert thresholds for bench decisions
4. **Save** thresholds for production use

## Alert Levels

- **Severity 0-1**: No action (green)
- **Severity 2**: Low alert - Monitor (yellow)
- **Severity 3**: Medium alert - Caution (orange)
- **Severity 4**: High alert - Bench recommendation (red)
- **Severity 5**: Critical alert - Immediate intervention (dark red)


In [None]:
import pandas as pd
import numpy as np
import pickle
import json
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.models.random_forest import evaluate_random_forest
from sledhead_imu.validate.validate_cutoffs import (
    validate_model_performance,
    validate_per_class_performance,
    get_confusion_matrix_report
)
from sledhead_imu.alerts.thresholds import define_alert_thresholds_from_severity

# Load test set and trained model
data_dir = Path('../data')
models_dir = data_dir / '10_models'
validate_dir = data_dir / '11_metrics_validate_cutoffs'

# Load test data
test_X_file = data_dir / '09_splits' / 'test' / 'X_test.csv'
test_y_file = data_dir / '09_splits' / 'test' / 'y_test.csv'

if test_X_file.exists() and test_y_file.exists():
    X_test = pd.read_csv(test_X_file)
    y_test = pd.read_csv(test_y_file)
    if isinstance(y_test, pd.DataFrame):
        y_test = y_test.iloc[:, 0]
    
    print("✓ Loaded test set")
    print(f"  Samples: {len(X_test)}")
    
    # Load trained model
    model_file = models_dir / 'rf' / 'model.pkl'
    if model_file.exists():
        with open(model_file, 'rb') as f:
            model = pickle.load(f)
        print("✓ Loaded trained Random Forest model")
    else:
        print("⚠️  Model not found. Train model in 10_train_random_forest.ipynb first.")
        model = None
else:
    print("⚠️  Test set not found. Run 09_train_test_split.ipynb first.")
    model = None


In [None]:
# Evaluate model on test set
if model is not None:
    print("Evaluating model on test set...")
    
    results = evaluate_random_forest(model, X_test, y_test)
    
    print(f"\n✓ Overall Performance:")
    print(f"  Accuracy: {results['accuracy']:.3f}")
    
    # Overall metrics
    metrics = validate_model_performance(y_test, results['predictions'])
    print(f"\n✓ Weighted Metrics:")
    print(f"  Precision: {metrics['precision']:.3f}")
    print(f"  Recall: {metrics['recall']:.3f}")
    print(f"  F1 Score: {metrics['f1']:.3f}")
    
    # Per-class performance
    per_class = validate_per_class_performance(y_test, results['predictions'])
    print(f"\n✓ Per-Class Performance:")
    print(per_class.to_string(index=False))
    
    # Confusion matrix
    cm, report_df = get_confusion_matrix_report(y_test, results['predictions'])
    print(f"\n✓ Confusion Matrix:")
    print(cm)
    
    # Save performance report
    validate_dir.mkdir(parents=True, exist_ok=True)
    reports_dir = validate_dir / 'reports'
    reports_dir.mkdir(exist_ok=True)
    
    per_class.to_csv(reports_dir / 'per_class_performance.csv', index=False)
    report_df.to_csv(reports_dir / 'classification_report.csv')
    np.savetxt(reports_dir / 'confusion_matrix.csv', cm, delimiter=',', fmt='%d')
    
    print(f"\n✓ Saved reports to {reports_dir}")
else:
    print("⚠️  Skipping evaluation - no model available")


In [None]:
# Define alert thresholds
if model is not None:
    print("Defining alert thresholds...")
    
    # Define severity to alert mapping
    severity_mapping = define_alert_thresholds_from_severity()
    
    print(f"\n✓ Alert Thresholds:")
    for severity, level in sorted(severity_mapping.items()):
        print(f"  Severity {severity}: {level.capitalize()}")
    
    # Save thresholds
    thresholds_dir = validate_dir / 'thresholds'
    thresholds_dir.mkdir(parents=True, exist_ok=True)
    
    # Save as JSON
    with open(thresholds_dir / 'severity_mapping.json', 'w') as f:
        json.dump(severity_mapping, f, indent=2)
    
    # Save as CSV for easy viewing
    threshold_df = pd.DataFrame([
        {'severity': s, 'alert_level': l} 
        for s, l in severity_mapping.items()
    ])
    threshold_df.to_csv(thresholds_dir / 'severity_mapping.csv', index=False)
    
    print(f"\n✓ Saved thresholds to {thresholds_dir}")
    
    print("\n✅ Validation and threshold definition complete!")
else:
    print("⚠️  Skipping threshold definition - no model available")
