In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

def create_basin_validation_analysis_6models():
    """Create basin-based validation analysis for 6-model ensemble"""
    
    print("="*80)
    print("6-MODEL ENSEMBLE BASIN-BASED VALIDATION")
    print("="*80)
    print("🎯 Analyzing 6-model ensemble performance by basin")
    print("📊 Using uniform 6-model ensemble approach")
    print("🔍 Following same methodology as 3-model ensemble validation")
    print("-"*80)
    
    # Define paths
    validation_results_file = r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.models.6.models\validation_results\ensemble_6models_validation_results.xlsx"
    station_basin_mapping_file = r"D:\RICAAR\Pr.New.Stations.Selection\OBSERVATIONS\Station_Basin_Mapping.xlsx"
    output_dir = r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.models.6.models\validation_results\ACCORDING.TO.BASIN"
    
    # Create output directory
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Check if validation results file exists
    if not os.path.exists(validation_results_file):
        print(f"❌ ERROR: Validation results file not found!")
        print(f"Expected: {validation_results_file}")
        print("Please run the 6-model ensemble validation script first!")
        return
    
    # Read validation results
    print(f"📊 Reading 6-model ensemble validation results...")
    try:
        validation_df = pd.read_excel(validation_results_file, sheet_name='Validation_Metrics')
        print(f"✅ Loaded validation data for {len(validation_df)} stations")
        print(f"📊 Model type: {validation_df['Model'].iloc[0] if len(validation_df) > 0 else 'Unknown'}")
    except Exception as e:
        print(f"❌ Error reading validation results: {e}")
        return
    
    # Check if station-basin mapping exists
    if not os.path.exists(station_basin_mapping_file):
        print(f"❌ ERROR: Station-basin mapping file not found!")
        print(f"Expected: {station_basin_mapping_file}")
        print("Creating coordinate-based mapping as fallback...")
        
        # Create coordinate-based mapping as fallback
        basin_mapping = create_coordinate_based_mapping_6models(validation_df)
        
        # Save the mapping for future use
        mapping_output_path = os.path.join(output_dir, "Generated_Station_Basin_Mapping.xlsx")
        basin_mapping.to_excel(mapping_output_path, index=False)
        print(f"💾 Saved generated mapping to: {mapping_output_path}")
        
    else:
        print(f"📍 Reading station-basin mapping...")
        try:
            basin_mapping = pd.read_excel(station_basin_mapping_file)
            print(f"✅ Loaded basin mapping for {len(basin_mapping)} stations")
            print(f"📊 Basins found: {basin_mapping['Basin'].nunique()}")
            
            # Show basin distribution
            basin_counts = basin_mapping['Basin'].value_counts()
            print(f"📍 Basin distribution:")
            for basin, count in basin_counts.head(10).items():
                print(f"   - {basin}: {count} stations")
            if len(basin_counts) > 10:
                print(f"   ... and {len(basin_counts) - 10} more basins")
                
        except Exception as e:
            print(f"❌ Error reading basin mapping: {e}")
            return
    
    # Merge validation data with basin information
    print(f"🔗 Merging validation data with basin information...")
    
    # Standardize station IDs for matching
    validation_df['Station_ID_clean'] = validation_df['Station_ID'].str.strip().str.upper()
    basin_mapping['Station_ID_clean'] = basin_mapping['Station_ID'].str.strip().str.upper()
    
    # Merge datasets
    merged_df = pd.merge(validation_df, basin_mapping[['Station_ID_clean', 'Basin']], 
                        on='Station_ID_clean', how='left')
    
    # Check for stations without basin assignments
    no_basin = merged_df[merged_df['Basin'].isna()]
    if len(no_basin) > 0:
        print(f"⚠️ WARNING: {len(no_basin)} stations have no basin assignment:")
        for _, station in no_basin.iterrows():
            print(f"   - {station['Station_ID']}")
        
        # Assign to 'Unknown' basin
        merged_df['Basin'] = merged_df['Basin'].fillna('Unknown')
    
    print(f"✅ Successfully merged data for {len(merged_df)} stations")
    
    # Calculate basin-wise statistics
    print(f"📊 Calculating basin-wise performance statistics...")
    
    basin_stats = calculate_basin_statistics_6models(merged_df)
    
    # Create detailed basin comparison
    basin_comparison = create_basin_comparison_6models(merged_df)
    
    # Create rankings and top performance analysis
    print(f"🏆 Creating basin performance rankings...")
    rankings = rank_basins_by_performance_6models(basin_stats)
    
    # Save all results
    print(f"💾 Saving results to: {output_dir}")
    
    # Save individual files
    basin_stats.to_excel(os.path.join(output_dir, 'Basin_Performance_Statistics_6Models.xlsx'), index=False)
    basin_comparison.to_excel(os.path.join(output_dir, 'Basin_Detailed_Comparison_6Models.xlsx'), index=False)
    rankings.to_excel(os.path.join(output_dir, 'Basin_Performance_Rankings_6Models.xlsx'), index=False)
    merged_df.to_excel(os.path.join(output_dir, 'Station_Basin_Validation_Data_6Models.xlsx'), index=False)
    
    # Save comprehensive summary
    save_comprehensive_summary_6models(basin_stats, basin_comparison, rankings, merged_df, output_dir)
    
    # Create comparison with 3-model ensemble if available
    create_ensemble_comparison(merged_df, output_dir)
    
    # Print summary
    print_basin_summary_6models(basin_stats, rankings)
    
    print(f"\n{'='*60}")
    print("🎉 6-MODEL ENSEMBLE BASIN-BASED VALIDATION COMPLETED!")
    print(f"{'='*60}")
    print(f"📁 Output directory: {output_dir}")
    print("📊 Files created:")
    print("   - Basin_Performance_Statistics_6Models.xlsx")
    print("   - Basin_Detailed_Comparison_6Models.xlsx") 
    print("   - Basin_Performance_Rankings_6Models.xlsx")
    print("   - Station_Basin_Validation_Data_6Models.xlsx")
    print("   - Ensemble_6Models_Basin_Validation_Summary.xlsx (comprehensive)")
    print("   - Generated_Station_Basin_Mapping.xlsx (if mapping was created)")
    print("   - Ensemble_Comparison_3vs6_Models.xlsx (if 3-model data available)")
    print("\n✅ Uniform 6-model ensemble performance analyzed by basin")
    print("📊 Ready for cross-ensemble performance comparison!")

def create_coordinate_based_mapping_6models(validation_df):
    """Create a coordinate-based basin mapping if mapping file doesn't exist"""
    
    print("🗺️ Creating coordinate-based basin mapping for 6-model ensemble...")
    
    # Simple coordinate-based basin assignment for Jordan
    basin_mapping = []
    
    for _, station in validation_df.iterrows():
        station_id = station['Station_ID']
        lat = station.get('Latitude', 0)
        lon = station.get('Longitude', 0)
        
        # Enhanced coordinate-based basin assignment for Jordan
        if lat > 32.5:
            if lon < 36.0:
                basin = "YARMOUK"
            else:
                basin = "JORDAN_VALLEY_NORTH"
        elif lat > 31.5:
            if lon < 36.0:
                basin = "JORDAN_VALLEY_CENTRAL"
            else:
                basin = "EASTERN_BASIN"
        else:
            if lon < 35.5:
                basin = "SOUTHERN_BASIN"
            else:
                basin = "DEAD_SEA_BASIN"
        
        # Special handling for known problematic stations
        if station_id in ['AD0023', 'AD0032']:
            basin = f"{basin}_BORDER"  # Mark border stations
        
        basin_mapping.append({
            'Station_ID': station_id,
            'Basin': basin,
            'Latitude': lat,
            'Longitude': lon,
            'Mapping_Method': 'Coordinate_Based_6Models',
            'Notes': f'Generated for 6-model ensemble validation'
        })
    
    return pd.DataFrame(basin_mapping)

def calculate_basin_statistics_6models(merged_df):
    """Calculate comprehensive statistics for each basin - 6-model ensemble"""
    
    basin_stats = []
    
    for basin_name in merged_df['Basin'].unique():
        basin_data = merged_df[merged_df['Basin'] == basin_name]
        
        # Calculate statistics for each metric
        metrics = ['r', 'NSE', 'RMSE', 'MAE', 'PBIAS', 'Coverage_Ratio']
        
        basin_stat = {
            'Basin': basin_name,
            'Station_Count': len(basin_data),
            'Model': 'Ensemble_6Models_Uniform',
            'Ensemble_Type': '6_Models_Equal_Weight'
        }
        
        for metric in metrics:
            if metric in basin_data.columns:
                valid_data = basin_data[metric].dropna()
                if len(valid_data) > 0:
                    basin_stat.update({
                        f'{metric}_mean': valid_data.mean(),
                        f'{metric}_std': valid_data.std(),
                        f'{metric}_min': valid_data.min(),
                        f'{metric}_max': valid_data.max(),
                        f'{metric}_median': valid_data.median(),
                        f'{metric}_q25': valid_data.quantile(0.25),
                        f'{metric}_q75': valid_data.quantile(0.75)
                    })
                else:
                    basin_stat.update({
                        f'{metric}_mean': np.nan,
                        f'{metric}_std': np.nan,
                        f'{metric}_min': np.nan,
                        f'{metric}_max': np.nan,
                        f'{metric}_median': np.nan,
                        f'{metric}_q25': np.nan,
                        f'{metric}_q75': np.nan
                    })
        
        # Add absolute PBIAS for ranking
        if 'PBIAS_mean' in basin_stat:
            basin_stat['abs_PBIAS_mean'] = abs(basin_stat['PBIAS_mean'])
        
        # Add performance categories
        r_mean = basin_stat.get('r_mean', 0)
        nse_mean = basin_stat.get('NSE_mean', -999)
        
        if r_mean >= 0.7 and nse_mean >= 0.5:
            performance = 'Excellent'
        elif r_mean >= 0.5 and nse_mean >= 0.3:
            performance = 'Good'
        elif r_mean >= 0.3 and nse_mean >= 0:
            performance = 'Fair'
        else:
            performance = 'Poor'
        
        basin_stat['Performance_Category'] = performance
        
        basin_stats.append(basin_stat)
    
    return pd.DataFrame(basin_stats)

def create_basin_comparison_6models(merged_df):
    """Create detailed basin comparison with station-level data - 6-model ensemble"""
    
    comparison_data = []
    
    for basin_name in merged_df['Basin'].unique():
        basin_data = merged_df[merged_df['Basin'] == basin_name]
        
        for _, station in basin_data.iterrows():
            comparison_data.append({
                'Basin': basin_name,
                'Station_ID': station['Station_ID'],
                'Station_Name': station.get('Station_Name', f"Station_{station['Station_ID']}"),
                'Latitude': station.get('Latitude', np.nan),
                'Longitude': station.get('Longitude', np.nan),
                'Model': station['Model'],
                'Ensemble_Type': '6_Models_Uniform',
                'r': station.get('r', np.nan),
                'NSE': station.get('NSE', np.nan),
                'RMSE': station.get('RMSE', np.nan),
                'MAE': station.get('MAE', np.nan),
                'PBIAS': station.get('PBIAS', np.nan),
                'abs_PBIAS': abs(station.get('PBIAS', 0)) if not np.isnan(station.get('PBIAS', np.nan)) else np.nan,
                'Coverage_Ratio': station.get('Coverage_Ratio', np.nan),
                'Valid_Months': station.get('Valid_Months', np.nan),
                'Missing_Months': station.get('Missing_Months', np.nan),
                'Performance_Grade': classify_station_performance(station)
            })
    
    return pd.DataFrame(comparison_data)

def classify_station_performance(station):
    """Classify individual station performance"""
    r = station.get('r', 0)
    nse = station.get('NSE', -999)
    
    if r >= 0.8 and nse >= 0.7:
        return 'A+'
    elif r >= 0.7 and nse >= 0.5:
        return 'A'
    elif r >= 0.5 and nse >= 0.3:
        return 'B'
    elif r >= 0.3 and nse >= 0:
        return 'C'
    else:
        return 'D'

def rank_basins_by_performance_6models(basin_stats):
    """Rank basins based on multiple performance criteria - 6-model ensemble"""
    
    # Define which metrics are better when higher or lower
    higher_better = ['r_mean', 'NSE_mean', 'Coverage_Ratio_mean']
    lower_better = ['RMSE_mean', 'MAE_mean', 'abs_PBIAS_mean']
    
    rankings_df = basin_stats.copy()
    
    # Rank for each metric (1 is best)
    for metric in higher_better:
        if metric in rankings_df.columns:
            rankings_df[f'{metric}_rank'] = rankings_df[metric].rank(ascending=False, na_option='bottom')
    
    for metric in lower_better:
        if metric in rankings_df.columns:
            rankings_df[f'{metric}_rank'] = rankings_df[metric].rank(ascending=True, na_option='bottom')
    
    # Calculate average rank across all metrics
    rank_columns = [col for col in rankings_df.columns if col.endswith('_rank')]
    
    if rank_columns:
        rankings_df['avg_rank'] = rankings_df[rank_columns].mean(axis=1)
        rankings_df['overall_rank'] = rankings_df['avg_rank'].rank(na_option='bottom')
    else:
        rankings_df['avg_rank'] = np.nan
        rankings_df['overall_rank'] = np.nan
    
    # Add weighted performance score (emphasizing correlation and NSE)
    rankings_df['weighted_score'] = (
        rankings_df.get('r_mean', 0) * 0.4 +
        rankings_df.get('NSE_mean', 0) * 0.3 +
        (1 - rankings_df.get('abs_PBIAS_mean', 100) / 100) * 0.2 +
        rankings_df.get('Coverage_Ratio_mean', 0) * 0.1
    )
    
    # Sort by average rank
    rankings_df = rankings_df.sort_values('avg_rank', na_position='last')
    
    return rankings_df

def save_comprehensive_summary_6models(basin_stats, basin_comparison, rankings, merged_df, output_dir):
    """Save a comprehensive summary with multiple sheets - 6-model ensemble"""
    
    summary_file = os.path.join(output_dir, 'Ensemble_6Models_Basin_Validation_Summary.xlsx')
    
    with pd.ExcelWriter(summary_file, engine='openpyxl') as writer:
        # Main summary statistics
        basin_stats.to_excel(writer, sheet_name='Basin_Statistics', index=False)
        
        # Rankings
        rankings.to_excel(writer, sheet_name='Basin_Rankings', index=False)
        
        # Detailed comparison
        basin_comparison.to_excel(writer, sheet_name='Station_Details', index=False)
        
        # Key metrics summary
        key_metrics = rankings[['Basin', 'Station_Count', 'r_mean', 'NSE_mean', 'RMSE_mean', 
                              'PBIAS_mean', 'Coverage_Ratio_mean', 'Performance_Category',
                              'weighted_score', 'overall_rank']].copy()
        key_metrics = key_metrics.sort_values('overall_rank')
        key_metrics.to_excel(writer, sheet_name='Key_Metrics_Summary', index=False)
        
        # Station count per basin
        station_counts = merged_df.groupby('Basin').size().reset_index(name='Station_Count')
        station_counts.to_excel(writer, sheet_name='Station_Counts', index=False)
        
        # Performance categories
        performance_categories = create_performance_categories_6models(rankings)
        performance_categories.to_excel(writer, sheet_name='Performance_Categories', index=False)
        
        # Station performance grades
        grade_summary = basin_comparison.groupby(['Basin', 'Performance_Grade']).size().unstack(fill_value=0)
        grade_summary.to_excel(writer, sheet_name='Station_Grades_by_Basin')
        
        # Coverage analysis
        coverage_analysis = analyze_coverage_by_basin(merged_df)
        coverage_analysis.to_excel(writer, sheet_name='Coverage_Analysis', index=False)

def create_performance_categories_6models(rankings):
    """Categorize basin performance - 6-model ensemble"""
    
    categories = []
    
    for _, basin in rankings.iterrows():
        basin_name = basin['Basin']
        
        # Enhanced categorization
        r_mean = basin.get('r_mean', 0)
        nse_mean = basin.get('NSE_mean', -999)
        coverage = basin.get('Coverage_Ratio_mean', 0)
        
        category = basin.get('Performance_Category', 'Unknown')
        
        # Add detailed assessment
        if r_mean >= 0.8 and nse_mean >= 0.7:
            detailed_category = 'Outstanding'
        elif r_mean >= 0.7 and nse_mean >= 0.5:
            detailed_category = 'Excellent'
        elif r_mean >= 0.5 and nse_mean >= 0.3:
            detailed_category = 'Good'
        elif r_mean >= 0.3 and nse_mean >= 0:
            detailed_category = 'Fair'
        else:
            detailed_category = 'Poor'
        
        categories.append({
            'Basin': basin_name,
            'Performance_Category': category,
            'Detailed_Category': detailed_category,
            'r_mean': r_mean,
            'NSE_mean': nse_mean,
            'RMSE_mean': basin.get('RMSE_mean', np.nan),
            'Coverage_Ratio_mean': coverage,
            'Station_Count': basin.get('Station_Count', 0),
            'Overall_Rank': basin.get('overall_rank', np.nan),
            'Weighted_Score': basin.get('weighted_score', np.nan),
            'Ensemble_Type': '6_Models_Uniform'
        })
    
    return pd.DataFrame(categories)

def analyze_coverage_by_basin(merged_df):
    """Analyze data coverage patterns by basin"""
    
    coverage_analysis = []
    
    for basin_name in merged_df['Basin'].unique():
        basin_data = merged_df[merged_df['Basin'] == basin_name]
        
        coverage_stats = {
            'Basin': basin_name,
            'Station_Count': len(basin_data),
            'Avg_Coverage_Ratio': basin_data['Coverage_Ratio'].mean(),
            'Min_Coverage_Ratio': basin_data['Coverage_Ratio'].min(),
            'Max_Coverage_Ratio': basin_data['Coverage_Ratio'].max(),
            'Stations_Full_Coverage': (basin_data['Coverage_Ratio'] >= 1.0).sum(),
            'Stations_High_Coverage': (basin_data['Coverage_Ratio'] >= 0.8).sum(),
            'Stations_Low_Coverage': (basin_data['Coverage_Ratio'] < 0.5).sum(),
            'Avg_Valid_Months': basin_data['Valid_Months'].mean(),
            'Total_Missing_Months': basin_data['Missing_Months'].sum()
        }
        
        coverage_analysis.append(coverage_stats)
    
    return pd.DataFrame(coverage_analysis)

def create_ensemble_comparison(merged_df, output_dir):
    """Create comparison between 3-model and 6-model ensembles if 3-model data exists"""
    
    # Check if 3-model results exist
    three_model_file = r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model\ncfiles\Validation.results\ACCORDING.TO.BASIN\Station_Basin_Validation_Data.xlsx"
    
    if os.path.exists(three_model_file):
        print("📊 Found 3-model ensemble data - creating comparison...")
        
        try:
            # Read 3-model data
            three_model_df = pd.read_excel(three_model_file)
            three_model_df['Ensemble_Type'] = '3_Models_Basin_Specific'
            
            # Current 6-model data
            six_model_df = merged_df.copy()
            six_model_df['Ensemble_Type'] = '6_Models_Uniform'
            
            # Create comparison
            comparison_data = []
            
            # Get common stations
            common_stations = set(three_model_df['Station_ID']) & set(six_model_df['Station_ID'])
            
            for station_id in common_stations:
                three_data = three_model_df[three_model_df['Station_ID'] == station_id].iloc[0]
                six_data = six_model_df[six_model_df['Station_ID'] == station_id].iloc[0]
                
                comparison_data.append({
                    'Station_ID': station_id,
                    'Basin': six_data.get('Basin', 'Unknown'),
                    '3Model_r': three_data.get('r', np.nan),
                    '6Model_r': six_data.get('r', np.nan),
                    '3Model_NSE': three_data.get('NSE', np.nan),
                    '6Model_NSE': six_data.get('NSE', np.nan),
                    '3Model_RMSE': three_data.get('RMSE', np.nan),
                    '6Model_RMSE': six_data.get('RMSE', np.nan),
                    '3Model_PBIAS': three_data.get('PBIAS', np.nan),
                    '6Model_PBIAS': six_data.get('PBIAS', np.nan),
                    'r_Improvement': six_data.get('r', 0) - three_data.get('r', 0),
                    'NSE_Improvement': six_data.get('NSE', 0) - three_data.get('NSE', 0),
                    'RMSE_Change': six_data.get('RMSE', 0) - three_data.get('RMSE', 0),
                    'Better_Ensemble': determine_better_ensemble(three_data, six_data)
                })
            
            comparison_df = pd.DataFrame(comparison_data)
            
            # Save comparison
            comparison_file = os.path.join(output_dir, 'Ensemble_Comparison_3vs6_Models.xlsx')
            comparison_df.to_excel(comparison_file, index=False)
            
            print(f"✅ Ensemble comparison saved to: {comparison_file}")
            print(f"📊 Compared {len(common_stations)} common stations")
            
        except Exception as e:
            print(f"⚠️ Could not create ensemble comparison: {e}")

def determine_better_ensemble(three_data, six_data):
    """Determine which ensemble performs better for a station"""
    
    # Simple scoring based on r and NSE
    three_score = (three_data.get('r', 0) + three_data.get('NSE', 0)) / 2
    six_score = (six_data.get('r', 0) + six_data.get('NSE', 0)) / 2
    
    if six_score > three_score + 0.05:  # 5% threshold
        return '6-Model Better'
    elif three_score > six_score + 0.05:
        return '3-Model Better'
    else:
        return 'Similar Performance'

def print_basin_summary_6models(basin_stats, rankings):
    """Print a summary of basin performance - 6-model ensemble"""
    
    print(f"\n{'='*60}")
    print("6-MODEL ENSEMBLE BASIN PERFORMANCE SUMMARY")
    print(f"{'='*60}")
    
    print(f"Total basins analyzed: {len(basin_stats)}")
    print(f"Total stations: {basin_stats['Station_Count'].sum()}")
    print(f"Ensemble approach: Uniform 6-model ensemble")
    
    print(f"\n🏆 TOP 3 PERFORMING BASINS:")
    print("-" * 40)
    
    top_basins = rankings.head(3)
    for i, (_, basin) in enumerate(top_basins.iterrows(), 1):
        print(f"\n{i}. {basin['Basin']}")
        print(f"   Stations: {basin['Station_Count']}")
        print(f"   Correlation: {basin.get('r_mean', 0):.3f}")
        print(f"   NSE: {basin.get('NSE_mean', 0):.3f}")
        print(f"   RMSE: {basin.get('RMSE_mean', 0):.2f}")
        print(f"   Category: {basin.get('Performance_Category', 'Unknown')}")
        print(f"   Overall Rank: {basin.get('overall_rank', 0):.0f}")
    
    print(f"\n📊 OVERALL ENSEMBLE STATISTICS:")
    print("-" * 40)
    
    # Overall statistics
    overall_r = basin_stats['r_mean'].mean()
    overall_nse = basin_stats['NSE_mean'].mean()
    overall_rmse = basin_stats['RMSE_mean'].mean()
    overall_coverage = basin_stats['Coverage_Ratio_mean'].mean()
    
    print(f"Average Correlation: {overall_r:.3f}")
    print(f"Average NSE: {overall_nse:.3f}")
    print(f"Average RMSE: {overall_rmse:.2f}")
    print(f"Average Coverage: {overall_coverage:.3f}")
    
    # Performance categories
    category_counts = basin_stats['Performance_Category'].value_counts()
    print(f"\n🏅 PERFORMANCE CATEGORIES:")
    print("-" * 40)
    for category, count in category_counts.items():
        print(f"{category}: {count} basins")
    
    print(f"\n🎯 KEY INFORMATION:")
    print("-" * 40)
    print("✅ Uniform 6-model ensemble applied to all basins")
    print("📊 Models: CMCC-CM2-SR5, CNRM-ESM2-1, EC-Earth3-Veg, IPSL-CM6A-LR, MPI-ESM1-2-LR, NorESM2-MM")
    print("🔍 Results show ensemble performance across different hydrological regions")
    print("📈 Basin-specific performance variations reflect regional climate model accuracy")

if __name__ == "__main__":
    create_basin_validation_analysis_6models()

6-MODEL ENSEMBLE BASIN-BASED VALIDATION
🎯 Analyzing 6-model ensemble performance by basin
📊 Using uniform 6-model ensemble approach
🔍 Following same methodology as 3-model ensemble validation
--------------------------------------------------------------------------------
📊 Reading 6-model ensemble validation results...
✅ Loaded validation data for 49 stations
📊 Model type: Ensemble_6Models
📍 Reading station-basin mapping...
✅ Loaded basin mapping for 49 stations
📊 Basins found: 12
📍 Basin distribution:
   - D.S.R.S.W: 7 stations
   - AMMAN ZARQA (JORDAN): 6 stations
   - MUJIB: 6 stations
   - YARMOUK (JORDAN): 5 stations
   - JAFER: 5 stations
   - N.R.S.W: 4 stations
   - AZRAQ (JORDAN): 4 stations
   - S.R.S.W: 4 stations
   - JORDAN VALLY (JORDAN): 3 stations
   - W. ARABA NORTH: 3 stations
   ... and 2 more basins
🔗 Merging validation data with basin information...
✅ Successfully merged data for 49 stations
📊 Calculating basin-wise performance statistics...
🏆 Creating basin perfo