# Wildfire Prediction Model Evaluation

This notebook evaluates the performance of our wildfire prediction model through various metrics and visualizations.

## Table of Contents
1. Setup and Model Loading
2. Performance Metrics
3. Prediction Visualization
4. Uncertainty Analysis
5. Error Analysis
6. Model Comparison

In [None]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = str(Path().absolute().parent)
sys.path.append(project_root)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

from src.models.wildfire_model import WildfirePredictionModel
from src.utils.metrics import WildfireMetrics
from src.config import config

%matplotlib inline
plt.style.use('seaborn')
sns.set_style('whitegrid')

## 1. Load Model and Test Data

In [None]:
# Load test data
test_data = np.load('data/processed/test_data.npz')
X_test_spatial = test_data['spatial']
X_test_temporal = test_data['temporal']
y_test = test_data['labels']

# Initialize and load trained model
model = WildfirePredictionModel(
    config=config['model'],
    num_ensemble=3,
    uncertainty=True
)
model.load_models('models/saved', num_models=3)

# Make predictions
predictions, uncertainties = model.predict(
    [X_test_spatial, X_test_temporal],
    return_uncertainty=True
)

## 2. Performance Metrics

In [None]:
def evaluate_model_performance(y_true, y_pred, uncertainties):
    """Calculate and display comprehensive model performance metrics"""
    metrics = WildfireMetrics(save_dir='metrics_output')
    results = metrics.calculate_all_metrics(y_true, y_pred, uncertainties)
    
    # Display main metrics
    print("\nClassification Metrics:")
    print(f"Precision: {results['precision']:.4f}")
    print(f"Recall: {results['recall']:.4f}")
    print(f"F1 Score: {results['f1_score']:.4f}")
    print(f"ROC AUC: {results['roc_auc']:.4f}")
    
    # Display spatial metrics
    print("\nSpatial Metrics:")
    print(f"IoU Score: {results['iou']:.4f}")
    print(f"Boundary F1: {results['boundary_f1']:.4f}")
    
    # Display uncertainty metrics
    print("\nUncertainty Metrics:")
    print(f"Error-Uncertainty Correlation: {results['error_uncertainty_corr']:.4f}")
    print(f"Uncertainty Calibration: {results['uncertainty_calibration']:.4f}")
    
    return results

performance_metrics = evaluate_model_performance(y_test, predictions, uncertainties)

## 3. Prediction Visualization

In [None]:
def visualize_predictions(y_true, y_pred, uncertainties, sample_idx=0):
    """Visualize predictions, ground truth, and uncertainty for a sample"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 15))
    
    # Ground truth
    im0 = axes[0, 0].imshow(y_true[sample_idx], cmap='hot')
    axes[0, 0].set_title('Ground Truth')
    plt.colorbar(im0, ax=axes[0, 0])
    
    # Prediction
    im1 = axes[0, 1].imshow(y_pred[sample_idx], cmap='hot')
    axes[0, 1].set_title('Prediction')
    plt.colorbar(im1, ax=axes[0, 1])
    
    # Uncertainty
    im2 = axes[1, 0].imshow(uncertainties[sample_idx], cmap='viridis')
    axes[1, 0].set_title('Uncertainty')
    plt.colorbar(im2, ax=axes[1, 0])
    
    # Error map
    error = np.abs(y_true[sample_idx] - y_pred[sample_idx])
    im3 = axes[1, 1].imshow(error, cmap='Reds')
    axes[1, 1].set_title('Prediction Error')
    plt.colorbar(im3, ax=axes[1, 1])
    
    plt.tight_layout()
    plt.show()

# Visualize first 3 samples
for i in range(3):
    print(f"\nSample {i+1}:")
    visualize_predictions(y_test, predictions, uncertainties, sample_idx=i)

## 4. Uncertainty Analysis

In [None]:
def analyze_uncertainty(y_true, y_pred, uncertainties):
    """Analyze relationship between prediction uncertainty and error"""
    errors = np.abs(y_true - y_pred).flatten()
    flat_uncertainties = uncertainties.flatten()
    
    plt.figure(figsize=(10, 6))
    plt.scatter(flat_uncertainties, errors, alpha=0.1)
    plt.xlabel('Prediction Uncertainty')
    plt.ylabel('Absolute Error')
    plt.title('Uncertainty vs Prediction Error')
    
    # Add trend line
    z = np.polyfit(flat_uncertainties, errors, 1)
    p = np.poly1d(z)
    plt.plot(flat_uncertainties, p(flat_uncertainties), 'r--', alpha=0.8)
    
    # Calculate correlation
    correlation = np.corrcoef(flat_uncertainties, errors)[0, 1]
    plt.text(0.05, 0.95, f'Correlation: {correlation:.3f}',
             transform=plt.gca().transAxes)
    
    plt.show()

analyze_uncertainty(y_test, predictions, uncertainties)

## 5. Error Analysis

In [None]:
def analyze_errors(y_true, y_pred, X_temporal):
    """Analyze prediction errors in relation to input features"""
    errors = np.abs(y_true - y_pred)
    mean_errors = errors.mean(axis=(1, 2))
    
    # Analyze errors vs weather conditions
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Temperature vs Error
    temp = X_temporal[:, :, 0].mean(axis=1)  # Assuming temperature is first feature
    axes[0, 0].scatter(temp, mean_errors, alpha=0.5)
    axes[0, 0].set_xlabel('Temperature')
    axes[0, 0].set_ylabel('Mean Prediction Error')
    axes[0, 0].set_title('Error vs Temperature')
    
    # Humidity vs Error
    humidity = X_temporal[:, :, 1].mean(axis=1)  # Assuming humidity is second feature
    axes[0, 1].scatter(humidity, mean_errors, alpha=0.5)
    axes[0, 1].set_xlabel('Humidity')
    axes[0, 1].set_ylabel('Mean Prediction Error')
    axes[0, 1].set_title('Error vs Humidity')
    
    # Error distribution
    axes[1, 0].hist(mean_errors, bins=50)
    axes[1, 0].set_xlabel('Mean Prediction Error')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('Error Distribution')
    
    # Spatial error distribution
    im = axes[1, 1].imshow(errors.mean(axis=0), cmap='Reds')
    axes[1, 1].set_title('Spatial Error Distribution')
    plt.colorbar(im, ax=axes[1, 1])
    
    plt.tight_layout()
    plt.show()

analyze_errors(y_test, predictions, X_test_temporal)

## 6. Model Comparison

In [None]:
def compare_ensemble_members(model, X_test, y_test):
    """Compare predictions from different ensemble members"""
    individual_predictions = []
    
    # Get predictions from each ensemble member
    for i, m in enumerate(model.models):
        pred = m.predict(X_test)[0]  # Assuming first output is prediction
        individual_predictions.append(pred)
        
    # Calculate agreement between models
    ensemble_agreement = np.std(individual_predictions, axis=0)
    
    plt.figure(figsize=(10, 6))
    plt.imshow(ensemble_agreement.mean(axis=0), cmap='viridis')
    plt.colorbar(label='Standard Deviation of Predictions')
    plt.title('Ensemble Agreement Map')
    plt.show()
    
    # Compare metrics for each model
    metrics = WildfireMetrics()
    results = []
    
    for i, pred in enumerate(individual_predictions):
        member_metrics = metrics.classification_metrics(y_test, pred)
        results.append({
            'Model': f'Member {i+1}',
            'Precision': member_metrics['precision'],
            'Recall': member_metrics['recall'],
            'F1': member_metrics['f1_score']
        })
    
    return pd.DataFrame(results)

ensemble_comparison = compare_ensemble_members(
    model,
    [X_test_spatial, X_test_temporal],
    y_test
)
ensemble_comparison

## 7. Time Series Analysis

In [None]:
def analyze_temporal_performance(y_true, y_pred, uncertainties):
    """Analyze model performance over time"""
    time_steps = len(y_true)
    
    # Calculate metrics for each time step
    temporal_metrics = {
        'accuracy': [],
        'precision': [],
        'recall': [],
        'mean_uncertainty': []
    }
    
    for t in range(time_steps):
        y_true_t = y_true[t]
        y_pred_t = y_pred[t]
        y_pred_binary = (y_pred_t > 0.5).astype(int)
        
        # Calculate metrics
        accuracy = np.mean(y_true_t == y_pred_binary)
        precision = np.sum((y_true_t == 1) & (y_pred_binary == 1)) / (np.sum(y_pred_binary == 1) + 1e-10)
        recall = np.sum((y_true_t == 1) & (y_pred_binary == 1)) / (np.sum(y_true_t == 1) + 1e-10)
        
        temporal_metrics['accuracy'].append(accuracy)
        temporal_metrics['precision'].append(precision)
        temporal_metrics['recall'].append(recall)
        temporal_metrics['mean_uncertainty'].append(uncertainties[t].mean())
    
    # Plot temporal metrics
    fig, axes = plt.subplots(2, 1, figsize=(15, 10))
    
    # Performance metrics over time
    axes[0].plot(temporal_metrics['accuracy'], label='Accuracy', marker='o')
    axes[0].plot(temporal_metrics['precision'], label='Precision', marker='s')
    axes[0].plot(temporal_metrics['recall'], label='Recall', marker='^')
    axes[0].set_xlabel('Time Step')
    axes[0].set_ylabel('Metric Value')
    axes[0].set_title('Performance Metrics Over Time')
    axes[0].legend()
    
    # Uncertainty over time
    axes[1].plot(temporal_metrics['mean_uncertainty'], color='red', marker='o')
    axes[1].set_xlabel('Time Step')
    axes[1].set_ylabel('Mean Uncertainty')
    axes[1].set_title('Prediction Uncertainty Over Time')
    
    plt.tight_layout()
    plt.show()
    
    return temporal_metrics

temporal_analysis = analyze_temporal_performance(y_test, predictions, uncertainties)

## 8. Feature Importance Analysis

In [None]:
def analyze_feature_importance(model, X_temporal):
    """Analyze the importance of different input features"""
    feature_names = [
        'Temperature',
        'Humidity',
        'Wind Speed',
        'Wind Direction',
        'Precipitation'
    ]
    
    # Calculate feature correlations with predictions
    feature_correlations = []
    base_predictions = predictions.mean(axis=0)
    
    for i, feature in enumerate(feature_names):
        feature_values = X_temporal[:, :, i].mean(axis=1)
        correlation = np.corrcoef(feature_values, base_predictions.flatten())[0, 1]
        feature_correlations.append(abs(correlation))
    
    # Plot feature importance
    plt.figure(figsize=(10, 6))
    sns.barplot(x=feature_correlations, y=feature_names)
    plt.xlabel('Absolute Correlation with Predictions')
    plt.title('Feature Importance Analysis')
    plt.show()
    
    return dict(zip(feature_names, feature_correlations))

feature_importance = analyze_feature_importance(model, X_test_temporal)

## 9. Model Calibration Analysis

In [None]:
def analyze_calibration(y_true, y_pred, n_bins=10):
    """Analyze model calibration (reliability)"""
    # Flatten predictions and true values
    y_true_flat = y_true.flatten()
    y_pred_flat = y_pred.flatten()
    
    # Create confidence bins
    bin_edges = np.linspace(0, 1, n_bins + 1)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    # Calculate calibration metrics
    bin_accuracies = []
    bin_confidences = []
    bin_counts = []
    
    for low, high in zip(bin_edges[:-1], bin_edges[1:]):
        # Find predictions in the current bin
        mask = (y_pred_flat >= low) & (y_pred_flat < high)
        if np.any(mask):
            bin_accuracies.append(y_true_flat[mask].mean())
            bin_confidences.append(y_pred_flat[mask].mean())
            bin_counts.append(np.sum(mask))
        else:
            bin_accuracies.append(0)
            bin_confidences.append(0)
            bin_counts.append(0)
    
    # Plot calibration curve
    plt.figure(figsize=(10, 6))
    
    # Perfect calibration line
    plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
    
    # Model calibration curve
    plt.plot(bin_confidences, bin_accuracies, 'ro-', label='Model Calibration')
    
    # Add histogram of predictions
    plt.hist(y_pred_flat, bins=bin_edges, density=True, alpha=0.3)
    
    plt.xlabel('Predicted Probability')
    plt.ylabel('Observed Frequency')
    plt.title('Calibration Plot')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Calculate Expected Calibration Error (ECE)
    total_samples = sum(bin_counts)
    ece = sum(abs(acc - conf) * count / total_samples 
              for acc, conf, count in zip(bin_accuracies, bin_confidences, bin_counts))
    
    print(f'Expected Calibration Error: {ece:.4f}')
    
    return {
        'bin_accuracies': bin_accuracies,
        'bin_confidences': bin_confidences,
        'bin_counts': bin_counts,
        'ece': ece
    }

calibration_results = analyze_calibration(y_test, predictions)

## 10. Generate Final Report

In [None]:
def generate_evaluation_report(metrics_dict):
    """Generate a comprehensive evaluation report"""
    report = {
        'Model Performance': {
            'Classification Metrics': {
                'Precision': metrics_dict['precision'],
                'Recall': metrics_dict['recall'],
                'F1 Score': metrics_dict['f1_score'],
                'ROC AUC': metrics_dict['roc_auc']
            },
            'Spatial Metrics': {
                'IoU Score': metrics_dict['iou'],
                'Boundary F1': metrics_dict['boundary_f1']
            },
            'Uncertainty Metrics': {
                'Error-Uncertainty Correlation': metrics_dict['error_uncertainty_corr'],
                'Uncertainty Calibration': metrics_dict['uncertainty_calibration']
            },
            'Calibration': {
                'ECE': calibration_results['ece']
            }
        },
        'Feature Importance': feature_importance,
        'Temporal Analysis': {
            'Mean Accuracy': np.mean(temporal_analysis['accuracy']),
            'Mean Uncertainty': np.mean(temporal_analysis['mean_uncertainty'])
        }
    }
    
    # Convert to DataFrame for better display
    report_df = pd.DataFrame.from_dict({(i,j): report[i][j] 
                                       for i in report.keys() 
                                       for j in report[i].keys()},
                                      orient='index')
    
    # Save report
    report_df.to_csv('evaluation_report.csv')
    
    return report_df

final_report = generate_evaluation_report(performance_metrics)
final_report