# 05_validation

## Carbon Sequestration Model Validation

**Objectives:**
- Validate model predictions with ground truth data
- Calculate uncertainty estimates
- Perform spatial cross-validation
- Compare with existing carbon maps
- Analyze spatial autocorrelation
- Generate comprehensive performance metrics

**Validation Approaches:**
- Hold-out validation
- Spatial cross-validation
- Bootstrap uncertainty estimation
- Comparison with external datasets

## 1. Import Dependencies and Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Machine Learning and Validation
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.inspection import permutation_importance
from sklearn.utils import resample
import scipy.stats as stats
from scipy import spatial

# Spatial Analysis
import geopandas as gpd
from libpysal.weights import DistanceBand, KNN
from esda.moran import Moran
from esda.geary import Geary
from splot.esda import plot_moran

# Visualization
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import folium
from folium import plugins

# Model and Data
import joblib
import json

# Statistical Analysis
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tsa.stattools import acf

# Setup
plt.style.use('seaborn-v0_8')
sns.set_palette("viridis")
%matplotlib inline
np.random.seed(42)

# Create output directories
import os
os.makedirs('outputs/validation', exist_ok=True)
os.makedirs('outputs/uncertainty', exist_ok=True)
os.makedirs('outputs/comparison', exist_ok=True)

print("‚úÖ All dependencies imported successfully")
print("‚úÖ Output directories created")

## 2. Load Data and Trained Model

In [None]:
# Load trained model and preprocessing pipeline
try:
    model = joblib.load('models/carbon_sequestration_model.pkl')
    scaler = joblib.load('models/scaler.pkl')
    feature_engineer = joblib.load('models/feature_engineer.pkl')
    
    with open('models/feature_names.json', 'r') as f:
        feature_names = json.load(f)
    
    with open('models/model_metrics.json', 'r') as f:
        model_metrics = json.load(f)
    
    print("‚úÖ Successfully loaded trained model and pipeline")
    print(f"üìä Model type: {type(model).__name__}")
    print(f"üîß Number of features: {len(feature_names)}")
    print(f"üìà Previous test R¬≤: {model_metrics.get('R¬≤ Score', 'N/A')}")
    
except FileNotFoundError as e:
    print(f"‚ùå Model files not found: {e}")
    print("üí° Please run notebook 03 (model development) first")
    raise

# Load processed data
try:
    biomass_df = pd.read_csv('outputs/biomass_data_with_insights.csv')
    json_df = pd.read_csv('outputs/json_data_with_insights.csv')
    
    # Combine datasets
    validation_df = pd.concat([biomass_df, json_df], ignore_index=True)
    
    print("‚úÖ Successfully loaded validation data")
    print(f"üìä Total samples: {len(validation_df)}")
    
except FileNotFoundError:
    print("‚ùå Processed data not found. Using sample data...")
    # Create sample validation data
    np.random.seed(42)
    n_samples = 200
    validation_df = pd.DataFrame({
        'latitude': np.random.uniform(40.0, 45.0, n_samples),
        'longitude': np.random.uniform(-75.0, -70.0, n_samples),
        'biomass': np.random.normal(2.5, 1.0, n_samples).clip(0.1, 5.0),
        'carbon_stock': np.random.normal(1.2, 0.5, n_samples).clip(0.05, 2.5)
    })

# Prepare features for validation
X_validation, y_validation = feature_engineer.prepare_features(validation_df)
X_validation_scaled = scaler.transform(X_validation)

# Make predictions
y_pred = model.predict(X_validation_scaled)

# Calculate residuals
residuals = y_validation - y_pred

print(f"\nüìä Validation Dataset:")
print(f"  Samples: {len(validation_df)}")
print(f"  Features: {X_validation.shape[1]}")
print(f"  Carbon stock range: {y_validation.min():.3f} - {y_validation.max():.3f} kg/m¬≤")
print(f"  Predictions range: {y_pred.min():.3f} - {y_pred.max():.3f} kg/m¬≤")

## 3. Ground Truth Data Comparison

In [None]:
class GroundTruthValidator:
    """Validate model predictions against ground truth data."""
    
    def __init__(self, y_true, y_pred, coordinates=None):
        self.y_true = y_true
        self.y_pred = y_pred
        self.coordinates = coordinates
        self.residuals = y_true - y_pred
        self.metrics = {}
    
    def calculate_comprehensive_metrics(self):
        """Calculate comprehensive validation metrics."""
        
        print("üìä Calculating comprehensive validation metrics...")
        
        # Basic regression metrics
        self.metrics['r2'] = r2_score(self.y_true, self.y_pred)
        self.metrics['rmse'] = np.sqrt(mean_squared_error(self.y_true, self.y_pred))
        self.metrics['mae'] = mean_absolute_error(self.y_true, self.y_pred)
        self.metrics['mse'] = mean_squared_error(self.y_true, self.y_pred)
        
        # Percentage errors
        mape = np.mean(np.abs(self.residuals / self.y_true)) * 100
        self.metrics['mape'] = mape
        
        # Bias and precision
        self.metrics['bias'] = np.mean(self.residuals)
        self.metrics['std_residuals'] = np.std(self.residuals)
        
        # Efficiency metrics
        nash_sutcliffe = 1 - (np.sum(self.residuals**2) / np.sum((self.y_true - np.mean(self.y_true))**2))
        self.metrics['nse'] = nash_sutcliffe
        
        # Willmott's index of agreement
        d1 = np.sum(self.residuals**2)
        d2 = np.sum((np.abs(self.y_pred - np.mean(self.y_true)) + np.abs(self.y_true - np.mean(self.y_true)))**2)
        self.metrics['ioa'] = 1 - d1/d2 if d2 != 0 else 0
        
        # Correlation metrics
        self.metrics['pearson_r'] = np.corrcoef(self.y_true, self.y_pred)[0, 1]
        self.metrics['spearman_rho'] = stats.spearmanr(self.y_true, self.y_pred)[0]
        
        # Statistical tests
        self.metrics['shapiro_p'] = stats.shapiro(self.residuals)[1]
        
        print("‚úÖ Comprehensive metrics calculated")
        return self.metrics
    
    def create_validation_plots(self):
        """Create comprehensive validation visualizations."""
        
        print("\nüìà Creating validation plots...")
        
        fig = make_subplots(
            rows=2, cols=3,
            subplot_titles=(
                'Predicted vs Observed',
                'Residuals vs Predicted',
                'Residual Distribution',
                'Q-Q Plot of Residuals',
                'Cumulative Distribution',
                'Error Distribution by Quantile'
            )
        )
        
        # 1. Predicted vs Observed
        fig.add_trace(
            go.Scatter(x=self.y_true, y=self.y_pred, mode='markers',
                      name='Predictions', marker=dict(color='blue', opacity=0.6)),
            row=1, col=1
        )
        
        # Perfect prediction line
        min_val = min(self.y_true.min(), self.y_pred.min())
        max_val = max(self.y_true.max(), self.y_pred.max())
        fig.add_trace(
            go.Scatter(x=[min_val, max_val], y=[min_val, max_val],
                      mode='lines', name='Perfect',
                      line=dict(color='red', dash='dash')),
            row=1, col=1
        )
        
        # 2. Residuals vs Predicted
        fig.add_trace(
            go.Scatter(x=self.y_pred, y=self.residuals, mode='markers',
                      name='Residuals', marker=dict(color='green', opacity=0.6)),
            row=1, col=2
        )
        
        # Zero residual line
        fig.add_trace(
            go.Scatter(x=[min_val, max_val], y=[0, 0],
                      mode='lines', name='Zero',
                      line=dict(color='red', dash='dash')),
            row=1, col=2
        )
        
        # 3. Residual Distribution
        fig.add_trace(
            go.Histogram(x=self.residuals, name='Residuals',
                       marker_color='lightcoral', nbinsx=30),
            row=1, col=3
        )
        
        # 4. Q-Q Plot
        qq_data = stats.probplot(self.residuals, dist="norm")
        fig.add_trace(
            go.Scatter(x=qq_data[0][0], y=qq_data[0][1], mode='markers',
                      name='Residuals', marker=dict(color='purple')),
            row=2, col=1
        )
        
        # Q-Q line
        fig.add_trace(
            go.Scatter(x=qq_data[0][0], y=qq_data[1][0] * qq_data[0][0] + qq_data[1][1],
                      mode='lines', name='Normal',
                      line=dict(color='red', dash='dash')),
            row=2, col=1
        )
        
        # 5. Cumulative Distribution
        sorted_true = np.sort(self.y_true)
        sorted_pred = np.sort(self.y_pred)
        
        fig.add_trace(
            go.Scatter(x=sorted_true, y=np.arange(len(sorted_true)) / len(sorted_true),
                      mode='lines', name='Observed', line=dict(color='blue')),
            row=2, col=2
        )
        
        fig.add_trace(
            go.Scatter(x=sorted_pred, y=np.arange(len(sorted_pred)) / len(sorted_pred),
                      mode='lines', name='Predicted', line=dict(color='red')),
            row=2, col=2
        )
        
        # 6. Error by quantile
        quantiles = np.percentile(self.y_true, np.arange(0, 101, 10))
        quantile_errors = []
        
        for i in range(len(quantiles)-1):
            mask = (self.y_true >= quantiles[i]) & (self.y_true < quantiles[i+1])
            if mask.any():
                quantile_errors.append(np.mean(np.abs(self.residuals[mask])))
        
        fig.add_trace(
            go.Bar(x=[f"Q{i+1}" for i in range(len(quantile_errors))], 
                  y=quantile_errors, name='MAE by Quantile',
                  marker_color='orange'),
            row=2, col=3
        )
        
        fig.update_layout(
            title_text="Comprehensive Model Validation",
            height=800,
            showlegend=True
        )
        
        fig.show()
        fig.write_html('outputs/validation/validation_plots.html')
        
        print("‚úÖ Validation plots saved to outputs/validation/validation_plots.html")
    
    def generate_validation_report(self):
        """Generate comprehensive validation report."""
        
        print("\n" + "="*80)
        print("üìä COMPREHENSIVE VALIDATION REPORT")
        print("="*80)
        
        # Performance metrics
        print(f"\nüéØ PERFORMANCE METRICS:")
        print(f"  R¬≤ Score: {self.metrics['r2']:.4f}")
        print(f"  RMSE: {self.metrics['rmse']:.4f} kg/m¬≤")
        print(f"  MAE: {self.metrics['mae']:.4f} kg/m¬≤")
        print(f"  MAPE: {self.metrics['mape']:.2f}%")
        print(f"  Nash-Sutcliffe Efficiency: {self.metrics['nse']:.4f}")
        print(f"  Index of Agreement: {self.metrics['ioa']:.4f}")
        
        # Bias and precision
        print(f"\n‚öñÔ∏è  BIAS AND PRECISION:")
        print(f"  Mean Bias: {self.metrics['bias']:.4f} kg/m¬≤")
        print(f"  Residual Std: {self.metrics['std_residuals']:.4f} kg/m¬≤")
        print(f"  Pearson Correlation: {self.metrics['pearson_r']:.4f}")
        print(f"  Spearman Correlation: {self.metrics['spearman_rho']:.4f}")
        
        # Statistical tests
        print(f"\nüìà STATISTICAL TESTS:")
        normality = "Normal" if self.metrics['shapiro_p'] > 0.05 else "Non-normal"
        print(f"  Residual Normality (Shapiro-Wilk): {normality} (p={self.metrics['shapiro_p']:.4f})")
        
        # Performance interpretation
        print(f"\nüí° PERFORMANCE INTERPRETATION:")
        if self.metrics['r2'] > 0.8:
            print("  ‚úÖ Excellent predictive performance")
        elif self.metrics['r2'] > 0.6:
            print("  ‚úÖ Good predictive performance")
        elif self.metrics['r2'] > 0.4:
            print("  ‚ö†Ô∏è  Moderate predictive performance")
        else:
            print("  ‚ùå Poor predictive performance")
        
        if abs(self.metrics['bias']) < 0.1:
            print("  ‚úÖ Low bias in predictions")
        else:
            print("  ‚ö†Ô∏è  Noticeable bias in predictions")
        
        print("\n" + "="*80)

# Perform ground truth validation
validator = GroundTruthValidator(y_validation, y_pred)
metrics = validator.calculate_comprehensive_metrics()
validator.create_validation_plots()
validator.generate_validation_report()

## 4. Uncertainty Quantification

In [None]:
class UncertaintyQuantifier:
    """Quantify prediction uncertainty using multiple methods."""
    
    def __init__(self, model, X, y, feature_names, n_bootstrap=100):
        self.model = model
        self.X = X
        self.y = y
        self.feature_names = feature_names
        self.n_bootstrap = n_bootstrap
        self.bootstrap_predictions = None
        self.uncertainty_metrics = {}
    
    def bootstrap_uncertainty(self):
        """Estimate uncertainty using bootstrap resampling."""
        
        print("üîÑ Calculating bootstrap uncertainty...")
        
        bootstrap_predictions = []
        bootstrap_models = []
        
        for i in range(self.n_bootstrap):
            # Bootstrap sample
            X_resampled, y_resampled = resample(self.X, self.y, random_state=i)
            
            # Clone and train model
            model_clone = joblib.load('models/carbon_sequestration_model.pkl')
            if hasattr(model_clone, 'random_state'):
                model_clone.random_state = i
            
            model_clone.fit(X_resampled, y_resampled)
            
            # Predict on original data
            y_pred_bootstrap = model_clone.predict(self.X)
            bootstrap_predictions.append(y_pred_bootstrap)
            bootstrap_models.append(model_clone)
            
            if (i + 1) % 20 == 0:
                print(f"  Completed {i + 1}/{self.n_bootstrap} bootstrap iterations")
        
        self.bootstrap_predictions = np.array(bootstrap_predictions)
        self.bootstrap_models = bootstrap_models
        
        # Calculate uncertainty metrics
        self.uncertainty_metrics['prediction_mean'] = np.mean(self.bootstrap_predictions, axis=0)
        self.uncertainty_metrics['prediction_std'] = np.std(self.bootstrap_predictions, axis=0)
        self.uncertainty_metrics['prediction_ci_lower'] = np.percentile(self.bootstrap_predictions, 2.5, axis=0)
        self.uncertainty_metrics['prediction_ci_upper'] = np.percentile(self.bootstrap_predictions, 97.5, axis=0)
        
        print("‚úÖ Bootstrap uncertainty analysis completed")
        return self.uncertainty_metrics
    
    def calculate_prediction_intervals(self, confidence=0.95):
        """Calculate prediction intervals for model outputs."""
        
        if self.bootstrap_predictions is None:
            self.bootstrap_uncertainty()
        
        alpha = 1 - confidence
        lower_percentile = (alpha / 2) * 100
        upper_percentile = (1 - alpha / 2) * 100
        
        prediction_intervals = {
            'lower': np.percentile(self.bootstrap_predictions, lower_percentile, axis=0),
            'upper': np.percentile(self.bootstrap_predictions, upper_percentile, axis=0),
            'mean': self.uncertainty_metrics['prediction_mean']
        }
        
        # Calculate coverage
        coverage = np.mean((self.y >= prediction_intervals['lower']) & 
                          (self.y <= prediction_intervals['upper']))
        
        self.uncertainty_metrics['coverage'] = coverage
        self.uncertainty_metrics['interval_width'] = np.mean(prediction_intervals['upper'] - prediction_intervals['lower'])
        
        print(f"‚úÖ Prediction intervals calculated ({confidence*100}% confidence)")
        print(f"   Coverage: {coverage:.3f} (target: {confidence})")
        print(f"   Average interval width: {self.uncertainty_metrics['interval_width']:.4f} kg/m¬≤")
        
        return prediction_intervals
    
    def create_uncertainty_visualizations(self):
        """Create visualizations of prediction uncertainty."""
        
        if self.bootstrap_predictions is None:
            self.bootstrap_uncertainty()
        
        print("\nüìä Creating uncertainty visualizations...")
        
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Prediction Uncertainty',
                'Confidence Intervals Coverage',
                'Uncertainty Distribution',
                'Spatial Uncertainty Pattern'
            )
        )
        
        # 1. Prediction uncertainty
        sorted_idx = np.argsort(self.y)
        
        fig.add_trace(
            go.Scatter(x=np.arange(len(self.y)), y=self.uncertainty_metrics['prediction_mean'][sorted_idx],
                      mode='lines', name='Mean Prediction', line=dict(color='blue')),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Scatter(x=np.arange(len(self.y)), 
                      y=self.uncertainty_metrics['prediction_ci_upper'][sorted_idx],
                      mode='lines', name='95% CI Upper', line=dict(color='lightblue', dash='dash')),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Scatter(x=np.arange(len(self.y)), 
                      y=self.uncertainty_metrics['prediction_ci_lower'][sorted_idx],
                      mode='lines', name='95% CI Lower', line=dict(color='lightblue', dash='dash'),
                      fill='tonexty'),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Scatter(x=np.arange(len(self.y)), y=self.y[sorted_idx],
                      mode='markers', name='Observed', marker=dict(color='red')),
            row=1, col=1
        )
        
        # 2. Confidence intervals coverage
        coverage_by_quantile = []
        quantiles = np.percentile(self.y, np.arange(0, 101, 10))
        
        for i in range(len(quantiles)-1):
            mask = (self.y >= quantiles[i]) & (self.y < quantiles[i+1])
            if mask.any():
                coverage = np.mean((self.y[mask] >= self.uncertainty_metrics['prediction_ci_lower'][mask]) & 
                                 (self.y[mask] <= self.uncertainty_metrics['prediction_ci_upper'][mask]))
                coverage_by_quantile.append(coverage)
        
        fig.add_trace(
            go.Bar(x=[f"Q{i+1}" for i in range(len(coverage_by_quantile))], 
                  y=coverage_by_quantile, name='Coverage by Quantile',
                  marker_color='lightgreen'),
            row=1, col=2
        )
        
        # Add target coverage line
        fig.add_trace(
            go.Scatter(x=[-0.5, len(coverage_by_quantile)-0.5], y=[0.95, 0.95],
                      mode='lines', name='Target (95%)',
                      line=dict(color='red', dash='dash')),
            row=1, col=2
        )
        
        # 3. Uncertainty distribution
        fig.add_trace(
            go.Histogram(x=self.uncertainty_metrics['prediction_std'], 
                       name='Uncertainty Distribution', nbinsx=30,
                       marker_color='orange'),
            row=2, col=1
        )
        
        # 4. Spatial uncertainty pattern (if coordinates available)
        if hasattr(self, 'coordinates'):
            fig.add_trace(
                go.Scatter(x=self.coordinates[:, 0], y=self.coordinates[:, 1],
                          mode='markers', 
                          marker=dict(size=8, color=self.uncertainty_metrics['prediction_std'],
                                    colorscale='Viridis', showscale=True),
                          name='Spatial Uncertainty'),
                row=2, col=2
            )
        
        fig.update_layout(
            title_text="Prediction Uncertainty Analysis",
            height=700
        )
        
        fig.show()
        fig.write_html('outputs/uncertainty/uncertainty_analysis.html')
        
        print("‚úÖ Uncertainty visualizations saved to outputs/uncertainty/uncertainty_analysis.html")
    
    def generate_uncertainty_report(self):
        """Generate comprehensive uncertainty report."""
        
        print("\n" + "="*80)
        print("üìä UNCERTAINTY QUANTIFICATION REPORT")
        print("="*80)
        
        print(f"\nüéØ UNCERTAINTY METRICS:")
        print(f"  Mean prediction uncertainty: {np.mean(self.uncertainty_metrics['prediction_std']):.4f} kg/m¬≤")
        print(f"  Std of prediction uncertainty: {np.std(self.uncertainty_metrics['prediction_std']):.4f} kg/m¬≤")
        print(f"  95% confidence interval coverage: {self.uncertainty_metrics.get('coverage', 'N/A'):.3f}")
        print(f"  Average prediction interval width: {self.uncertainty_metrics.get('interval_width', 'N/A'):.4f} kg/m¬≤")
        
        # Uncertainty interpretation
        mean_uncertainty = np.mean(self.uncertainty_metrics['prediction_std'])
        data_std = np.std(self.y)
        uncertainty_ratio = mean_uncertainty / data_std
        
        print(f"\nüí° UNCERTAINTY INTERPRETATION:")
        print(f"  Uncertainty-to-variability ratio: {uncertainty_ratio:.3f}")
        
        if uncertainty_ratio < 0.1:
            print("  ‚úÖ Low uncertainty relative to data variability")
        elif uncertainty_ratio < 0.3:
            print("  ‚úÖ Moderate uncertainty relative to data variability")
        else:
            print("  ‚ö†Ô∏è  High uncertainty relative to data variability")
        
        if self.uncertainty_metrics.get('coverage', 0) > 0.9:
            print("  ‚úÖ Good confidence interval coverage")
        else:
            print("  ‚ö†Ô∏è  Confidence interval coverage could be improved")
        
        print("\n" + "="*80)

# Perform uncertainty quantification
uncertainty_analyzer = UncertaintyQuantifier(model, X_validation_scaled, y_validation, feature_names)
uncertainty_metrics = uncertainty_analyzer.bootstrap_uncertainty()
prediction_intervals = uncertainty_analyzer.calculate_prediction_intervals()
uncertainty_analyzer.create_uncertainty_visualizations()
uncertainty_analyzer.generate_uncertainty_report()

## 5. Spatial Cross-Validation

In [None]:
class SpatialValidator:
    """Perform spatial cross-validation and autocorrelation analysis."""
    
    def __init__(self, coordinates, y_true, y_pred, model, X):
        self.coordinates = coordinates
        self.y_true = y_true
        self.y_pred = y_pred
        self.model = model
        self.X = X
        self.spatial_metrics = {}
    
    def spatial_cross_validation(self, n_folds=5, distance_threshold=0.1):
        """Perform spatial cross-validation using distance-based folds."""
        
        print("üó∫Ô∏è Performing spatial cross-validation...")
        
        from sklearn.model_selection import cross_val_score
        
        # Create spatial folds using K-means on coordinates
        from sklearn.cluster import KMeans
        
        kmeans = KMeans(n_clusters=n_folds, random_state=42)
        spatial_folds = kmeans.fit_predict(self.coordinates)
        
        # Perform cross-validation
        spatial_cv_scores = []
        spatial_rmse_scores = []
        
        for fold in range(n_folds):
            # Create train/test masks
            test_mask = spatial_folds == fold
            train_mask = ~test_mask
            
            if train_mask.sum() == 0 or test_mask.sum() == 0:
                continue
            
            # Train model
            model_clone = joblib.load('models/carbon_sequestration_model.pkl')
            model_clone.fit(self.X[train_mask], self.y_true[train_mask])
            
            # Predict and score
            y_pred_fold = model_clone.predict(self.X[test_mask])
            
            r2_fold = r2_score(self.y_true[test_mask], y_pred_fold)
            rmse_fold = np.sqrt(mean_squared_error(self.y_true[test_mask], y_pred_fold))
            
            spatial_cv_scores.append(r2_fold)
            spatial_rmse_scores.append(rmse_fold)
            
            print(f"  Fold {fold + 1}: R¬≤ = {r2_fold:.4f}, RMSE = {rmse_fold:.4f}")
        
        self.spatial_metrics['spatial_cv_r2_mean'] = np.mean(spatial_cv_scores)
        self.spatial_metrics['spatial_cv_r2_std'] = np.std(spatial_cv_scores)
        self.spatial_metrics['spatial_cv_rmse_mean'] = np.mean(spatial_rmse_scores)
        self.spatial_metrics['spatial_cv_rmse_std'] = np.std(spatial_rmse_scores)
        
        print("‚úÖ Spatial cross-validation completed")
        return spatial_cv_scores, spatial_rmse_scores
    
    def analyze_spatial_autocorrelation(self):
        """Analyze spatial autocorrelation in residuals."""
        
        print("\nüîç Analyzing spatial autocorrelation...")
        
        residuals = self.y_true - self.y_pred
        
        # Calculate Moran's I
        try:
            # Create spatial weights matrix
            w = DistanceBand(self.coordinates, threshold=0.1, binary=False)
            
            # Moran's I for residuals
            moran_residuals = Moran(residuals, w)
            
            # Moran's I for original data
            moran_original = Moran(self.y_true, w)
            
            self.spatial_metrics['moran_residuals'] = moran_residuals.I
            self.spatial_metrics['moran_residuals_p'] = moran_residuals.p_sim
            self.spatial_metrics['moran_original'] = moran_original.I
            self.spatial_metrics['moran_original_p'] = moran_original.p_sim
            
            print(f"  Moran's I (Residuals): {moran_residuals.I:.4f} (p={moran_residuals.p_sim:.4f})")
            print(f"  Moran's I (Original): {moran_original.I:.4f} (p={moran_original.p_sim:.4f})")
            
            # Create spatial autocorrelation plot
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
            
            # Moran plot for residuals
            plot_moran(moran_residuals, ax=ax1)
            ax1.set_title(f"Moran's I - Residuals\nI = {moran_residuals.I:.4f}, p = {moran_residuals.p_sim:.4f}")
            
            # Moran plot for original data
            plot_moran(moran_original, ax=ax2)
            ax2.set_title(f"Moran's I - Original Data\nI = {moran_original.I:.4f}, p = {moran_original.p_sim:.4f}")
            
            plt.tight_layout()
            plt.savefig('outputs/validation/spatial_autocorrelation.png', dpi=300, bbox_inches='tight')
            plt.show()
            
        except Exception as e:
            print(f"‚ö†Ô∏è Spatial autocorrelation analysis failed: {e}")
            print("üí° This might be due to insufficient spatial variation or computational limitations")
    
    def create_spatial_validation_plots(self):
        """Create spatial validation visualizations."""
        
        print("\nüìä Creating spatial validation plots...")
        
        residuals = self.y_true - self.y_pred
        
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Spatial Distribution of Residuals',
                'Residuals vs Spatial Coordinates',
                'Spatial Error Clustering',
                'Distance vs Prediction Error'
            )
        )
        
        # 1. Spatial distribution of residuals
        fig.add_trace(
            go.Scatter(x=self.coordinates[:, 0], y=self.coordinates[:, 1],
                      mode='markers', 
                      marker=dict(size=8, color=residuals, 
                                colorscale='RdBu', 
                                colorbar=dict(title='Residuals'),
                                showscale=True),
                      name='Residuals'),
            row=1, col=1
        )
        
        # 2. Residuals vs latitude
        fig.add_trace(
            go.Scatter(x=self.coordinates[:, 1], y=residuals,
                      mode='markers', name='Residuals vs Lat',
                      marker=dict(color='blue', opacity=0.6)),
            row=1, col=2
        )
        
        # 3. Spatial error clustering
        from sklearn.cluster import DBSCAN
        
        # Identify error clusters
        error_magnitude = np.abs(residuals)
        high_error_mask = error_magnitude > np.percentile(error_magnitude, 75)
        
        fig.add_trace(
            go.Scatter(x=self.coordinates[~high_error_mask, 0], 
                      y=self.coordinates[~high_error_mask, 1],
                      mode='markers', name='Low Error',
                      marker=dict(color='green', size=6, opacity=0.6)),
            row=2, col=1
        )
        
        fig.add_trace(
            go.Scatter(x=self.coordinates[high_error_mask, 0], 
                      y=self.coordinates[high_error_mask, 1],
                      mode='markers', name='High Error',
                      marker=dict(color='red', size=8, opacity=0.8)),
            row=2, col=1
        )
        
        # 4. Distance vs prediction error
        center = np.mean(self.coordinates, axis=0)
        distances = np.sqrt(np.sum((self.coordinates - center) ** 2, axis=1))
        
        fig.add_trace(
            go.Scatter(x=distances, y=error_magnitude,
                      mode='markers', name='Error vs Distance',
                      marker=dict(color='purple', opacity=0.6)),
            row=2, col=2
        )
        
        fig.update_layout(
            title_text="Spatial Validation Analysis",
            height=700
        )
        
        fig.show()
        fig.write_html('outputs/validation/spatial_validation.html')
        
        print("‚úÖ Spatial validation plots saved to outputs/validation/spatial_validation.html")
    
    def generate_spatial_validation_report(self):
        """Generate spatial validation report."""
        
        print("\n" + "="*80)
        print("üó∫Ô∏è SPATIAL VALIDATION REPORT")
        print("="*80)
        
        print(f"\nüéØ SPATIAL CROSS-VALIDATION:")
        print(f"  Mean R¬≤: {self.spatial_metrics.get('spatial_cv_r2_mean', 'N/A'):.4f}")
        print(f"  Std R¬≤: {self.spatial_metrics.get('spatial_cv_r2_std', 'N/A'):.4f}")
        print(f"  Mean RMSE: {self.spatial_metrics.get('spatial_cv_rmse_mean', 'N/A'):.4f} kg/m¬≤")
        
        print(f"\nüîç SPATIAL AUTOCORRELATION:")
        moran_residuals = self.spatial_metrics.get('moran_residuals', None)
        moran_original = self.spatial_metrics.get('moran_original', None)
        
        if moran_residuals is not None:
            print(f"  Moran's I (Residuals): {moran_residuals:.4f}")
            print(f"  Moran's I (Original): {moran_original:.4f}")
            
            # Interpretation
            if abs(moran_residuals) < 0.1:
                print("  ‚úÖ Low spatial autocorrelation in residuals")
            elif abs(moran_residuals) < 0.3:
                print("  ‚ö†Ô∏è  Moderate spatial autocorrelation in residuals")
            else:
                print("  ‚ùå High spatial autocorrelation in residuals")
            
            if moran_residuals < moran_original:
                print("  ‚úÖ Model captures some spatial structure")
            else:
                print("  ‚ö†Ô∏è  Model may not fully capture spatial structure")
        
        print(f"\nüí° SPATIAL PERFORMANCE INSIGHTS:")
        spatial_cv_r2 = self.spatial_metrics.get('spatial_cv_r2_mean', 0)
        regular_r2 = r2_score(self.y_true, self.y_pred)
        
        if spatial_cv_r2 > regular_r2 * 0.8:
            print("  ‚úÖ Good spatial generalization")
        else:
            print("  ‚ö†Ô∏è  Potential spatial overfitting")
        
        print("\n" + "="*80)

# Prepare coordinates for spatial analysis
coordinates = validation_df[['longitude', 'latitude']].values

# Perform spatial validation
spatial_validator = SpatialValidator(coordinates, y_validation, y_pred, model, X_validation_scaled)
cv_scores, rmse_scores = spatial_validator.spatial_cross_validation()
spatial_validator.analyze_spatial_autocorrelation()
spatial_validator.create_spatial_validation_plots()
spatial_validator.generate_spatial_validation_report()

## 6. Comparison with Existing Carbon Maps

In [None]:
class CarbonMapComparator:
    """Compare model predictions with existing carbon maps."""
    
    def __init__(self, validation_df, y_pred):
        self.validation_df = validation_df
        self.y_pred = y_pred
        self.comparison_results = {}
    
    def simulate_external_datasets(self):
        """Simulate comparison with external carbon datasets."""
        
        print("üåç Simulating comparison with external carbon maps...")
        
        # Simulate different external datasets
        np.random.seed(42)
        
        # Dataset 1: Global carbon map (coarser resolution)
        global_carbon = self.validation_df['carbon_stock'].values + np.random.normal(0, 0.3, len(self.validation_df))
        
        # Dataset 2: Regional assessment (similar resolution)
        regional_carbon = self.validation_df['carbon_stock'].values + np.random.normal(0, 0.2, len(self.validation_df))
        
        # Dataset 3: Satellite-based estimate
        satellite_carbon = self.validation_df['carbon_stock'].values + np.random.normal(0, 0.25, len(self.validation_df))
        
        external_datasets = {
            'Global_Carbon_Map': global_carbon,
            'Regional_Assessment': regional_carbon,
            'Satellite_Estimate': satellite_carbon
        }
        
        self.external_datasets = external_datasets
        print("‚úÖ External datasets simulated for comparison")
        
        return external_datasets
    
    def compare_with_external_data(self):
        """Compare model predictions with external datasets."""
        
        if not hasattr(self, 'external_datasets'):
            self.simulate_external_datasets()
        
        print("\nüìä Comparing with external datasets...")
        
        comparison_metrics = {}
        
        for dataset_name, external_data in self.external_datasets.items():
            # Calculate comparison metrics
            r2 = r2_score(self.validation_df['carbon_stock'], external_data)
            rmse = np.sqrt(mean_squared_error(self.validation_df['carbon_stock'], external_data))
            mae = mean_absolute_error(self.validation_df['carbon_stock'], external_data)
            bias = np.mean(external_data - self.validation_df['carbon_stock'])
            
            comparison_metrics[dataset_name] = {
                'r2': r2,
                'rmse': rmse,
                'mae': mae,
                'bias': bias,
                'correlation': np.corrcoef(self.validation_df['carbon_stock'], external_data)[0, 1]
            }
            
            print(f"  {dataset_name}:")
            print(f"    R¬≤: {r2:.4f}, RMSE: {rmse:.4f}, Bias: {bias:.4f}")
        
        # Compare with our model
        model_metrics = {
            'r2': r2_score(self.validation_df['carbon_stock'], self.y_pred),
            'rmse': np.sqrt(mean_squared_error(self.validation_df['carbon_stock'], self.y_pred)),
            'mae': mean_absolute_error(self.validation_df['carbon_stock'], self.y_pred),
            'bias': np.mean(self.y_pred - self.validation_df['carbon_stock']),
            'correlation': np.corrcoef(self.validation_df['carbon_stock'], self.y_pred)[0, 1]
        }
        
        comparison_metrics['Our_Model'] = model_metrics
        self.comparison_results = comparison_metrics
        
        return comparison_metrics
    
    def create_comparison_visualizations(self):
        """Create visualizations comparing different carbon estimates."""
        
        if not self.comparison_results:
            self.compare_with_external_data()
        
        print("\nüìà Creating comparison visualizations...")
        
        # Create comparison DataFrame
        comparison_data = []
        
        for dataset_name, metrics in self.comparison_results.items():
            comparison_data.append({
                'Dataset': dataset_name,
                'R¬≤': metrics['r2'],
                'RMSE': metrics['rmse'],
                'MAE': metrics['mae'],
                'Bias': metrics['bias'],
                'Correlation': metrics['correlation']
            })
        
        comparison_df = pd.DataFrame(comparison_data)
        
        # Create radar chart for comparison
        fig = go.Figure()
        
        metrics_to_plot = ['R¬≤', 'Correlation']  # Positive metrics (higher is better)
        
        for dataset_name in comparison_df['Dataset']:
            dataset_metrics = comparison_df[comparison_df['Dataset'] == dataset_name].iloc[0]
            
            values = [dataset_metrics[metric] for metric in metrics_to_plot]
            # Add first value at end to close the radar chart
            values.append(values[0])
            
            fig.add_trace(go.Scatterpolar(
                r=values,
                theta=metrics_to_plot + [metrics_to_plot[0]],
                fill='toself',
                name=dataset_name
            ))
        
        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 1]
                )),
            showlegend=True,
            title="Carbon Map Comparison - Performance Metrics"
        )
        
        fig.show()
        
        # Create bar chart comparison
        fig_bar = px.bar(comparison_df, x='Dataset', y=['R¬≤', 'Correlation'], 
                        title='Carbon Map Performance Comparison',
                        barmode='group')
        
        fig_bar.show()
        
        # Save comparison results
        comparison_df.to_csv('outputs/comparison/carbon_map_comparison.csv', index=False)
        
        print("‚úÖ Comparison results saved to outputs/comparison/carbon_map_comparison.csv")
    
    def generate_comparison_report(self):
        """Generate comparison report with external datasets."""
        
        print("\n" + "="*80)
        print("üåç EXTERNAL DATASET COMPARISON REPORT")
        print("="*80)
        
        if not self.comparison_results:
            self.compare_with_external_data()
        
        # Find best performing dataset
        best_r2 = -np.inf
        best_dataset = ""
        
        for dataset_name, metrics in self.comparison_results.items():
            if metrics['r2'] > best_r2:
                best_r2 = metrics['r2']
                best_dataset = dataset_name
        
        print(f"\nüèÜ BEST PERFORMING DATASET: {best_dataset} (R¬≤ = {best_r2:.4f})")
        
        # Our model's performance
        our_model_metrics = self.comparison_results.get('Our_Model', {})
        
        print(f"\nüéØ OUR MODEL PERFORMANCE:")
        print(f"  R¬≤: {our_model_metrics.get('r2', 'N/A'):.4f}")
        print(f"  Rank: {sorted([m['r2'] for m in self.comparison_results.values()], reverse=True).index(our_model_metrics.get('r2', 0)) + 1} of {len(self.comparison_results)}")
        
        # Improvement over external datasets
        print(f"\nüìà PERFORMANCE COMPARISON:")
        for dataset_name, metrics in self.comparison_results.items():
            if dataset_name != 'Our_Model':
                improvement = our_model_metrics.get('r2', 0) - metrics.get('r2', 0)
                print(f"  vs {dataset_name}: {improvement:+.4f} R¬≤ improvement")
        
        print("\n" + "="*80)

# Perform comparison with external datasets
comparator = CarbonMapComparator(validation_df, y_pred)
comparison_metrics = comparator.compare_with_external_data()
comparator.create_comparison_visualizations()
comparator.generate_comparison_report()

## 7. Final Validation Summary

In [None]:
def generate_comprehensive_validation_summary():
    """Generate comprehensive validation summary report."""
    
    print("="*80)
    print("üìä COMPREHENSIVE VALIDATION SUMMARY")
    print("="*80)
    
    # Collect all validation results
    summary_data = {}
    
    # Ground truth validation
    if 'validator' in locals():
        summary_data['Ground Truth R¬≤'] = validator.metrics.get('r2', 'N/A')
        summary_data['Ground Truth RMSE'] = validator.metrics.get('rmse', 'N/A')
    
    # Uncertainty metrics
    if 'uncertainty_analyzer' in locals():
        summary_data['Mean Uncertainty'] = np.mean(uncertainty_analyzer.uncertainty_metrics.get('prediction_std', 0))
        summary_data['CI Coverage'] = uncertainty_analyzer.uncertainty_metrics.get('coverage', 'N/A')
    
    # Spatial validation
    if 'spatial_validator' in locals():
        summary_data['Spatial CV R¬≤'] = spatial_validator.spatial_metrics.get('spatial_cv_r2_mean', 'N/A')
        summary_data['Moran\'s I (Residuals)'] = spatial_validator.spatial_metrics.get('moran_residuals', 'N/A')
    
    # External comparison
    if 'comparator' in locals():
        our_model_r2 = comparator.comparison_results.get('Our_Model', {}).get('r2', 'N/A')
        summary_data['External Comparison R¬≤'] = our_model_r2
    
    # Create summary table
    summary_df = pd.DataFrame(list(summary_data.items()), columns=['Metric', 'Value'])
    
    print("\nüìà VALIDATION METRICS SUMMARY:")
    for metric, value in summary_data.items():
        if isinstance(value, float):
            print(f"  {metric:30} {value:.4f}")
        else:
            print(f"  {metric:30} {value}")
    
    # Overall assessment
    print(f"\nüí° OVERALL ASSESSMENT:")
    
    ground_truth_r2 = summary_data.get('Ground Truth R¬≤', 0)
    spatial_cv_r2 = summary_data.get('Spatial CV R¬≤', 0)
    moran_residuals = summary_data.get('Moran\'s I (Residuals)', 1)  # Default to high if not available
    
    if ground_truth_r2 > 0.7 and spatial_cv_r2 > 0.6:
        print("  ‚úÖ EXCELLENT: Model shows strong predictive performance and good spatial generalization")
    elif ground_truth_r2 > 0.5 and spatial_cv_r2 > 0.4:
        print("  ‚úÖ GOOD: Model shows satisfactory performance with reasonable spatial generalization")
    else:
        print("  ‚ö†Ô∏è  MODERATE: Model performance could be improved, consider additional features or data")
    
    if isinstance(moran_residuals, (int, float)) and abs(moran_residuals) < 0.2:
        print("  ‚úÖ LOW SPATIAL AUTOCORRELATION: Model residuals show minimal spatial patterning")
    else:
        print("  ‚ö†Ô∏è  SPATIAL PATTERNING DETECTED: Consider incorporating spatial features in future models")
    
    # Recommendations
    print(f"\nüéØ RECOMMENDATIONS:")
    
    if ground_truth_r2 < 0.6:
        print("  ‚Ä¢ Consider adding more relevant features or collecting additional training data")
    
    if spatial_cv_r2 < ground_truth_r2 * 0.8:
        print("  ‚Ä¢ Implement spatial cross-validation in future model development")
    
    if isinstance(moran_residuals, (int, float)) and abs(moran_residuals) > 0.3:
        print("  ‚Ä¢ Consider spatial regression techniques or adding spatial lag variables")
    
    uncertainty = summary_data.get('Mean Uncertainty', 0)
    if isinstance(uncertainty, (int, float)) and uncertainty > 0.3:
        print("  ‚Ä¢ Prediction uncertainty is relatively high, consider ensemble methods")
    
    print("\nüöÄ NEXT STEPS:")
    print("  1. Deploy validated model for carbon sequestration assessment")
    print("  2. Monitor model performance with new data")
    print("  3. Consider model retraining with additional data")
    print("  4. Use uncertainty estimates in decision-making")
    
    # Save comprehensive summary
    summary_report = {
        'validation_summary': summary_data,
        'overall_assessment': {
            'ground_truth_r2': ground_truth_r2,
            'spatial_generalization': spatial_cv_r2,
            'spatial_autocorrelation': moran_residuals,
            'prediction_uncertainty': uncertainty
        },
        'validation_date': pd.Timestamp.now().isoformat()
    }
    
    with open('outputs/validation/comprehensive_validation_report.json', 'w') as f:
        json.dump(summary_report, f, indent=2)
    
    print("\n" + "="*80)
    print("‚úÖ COMPREHENSIVE VALIDATION COMPLETED SUCCESSFULLY!")
    print("="*80)
    print("\nüìÅ Validation reports saved to outputs/validation/")

# Generate final summary
generate_comprehensive_validation_summary()