In [1]:
import xarray as xr
import numpy as np
from pathlib import Path
import geopandas as gpd
import pandas as pd
import regionmask

def calculate_seasonal_monthly_means(ds, start_year, end_year, season_type):
    """Calculate mean monthly precipitation for a season across years"""
    if season_type == 'wet':
        season_means = []
        for year in range(start_year, end_year):
            oct_dec = ds.sel(time=slice(f"{year}-10-01", f"{year}-12-31"))
            jan_mar = ds.sel(time=slice(f"{year+1}-01-01", f"{year+1}-03-31"))
            
            if len(oct_dec.time) > 0 and len(jan_mar.time) > 0:
                monthly_means = []
                
                # Process Oct-Dec
                for month in [10, 11, 12]:
                    month_data = oct_dec.sel(time=oct_dec.time.dt.month == month)
                    if len(month_data.time) > 0:
                        days_in_month = month_data.time.dt.days_in_month[0].values
                        month_mean = month_data.sum('time') * (30 / days_in_month)
                        monthly_means.append(month_mean)
                
                # Process Jan-Mar
                for month in [1, 2, 3]:
                    month_data = jan_mar.sel(time=jan_mar.time.dt.month == month)
                    if len(month_data.time) > 0:
                        days_in_month = month_data.time.dt.days_in_month[0].values
                        month_mean = month_data.sum('time') * (30 / days_in_month)
                        monthly_means.append(month_mean)
                
                if monthly_means:
                    season_means.append(sum(monthly_means) / len(monthly_means))
        
        if season_means:
            return sum(season_means) / len(season_means)
        return None
    
    else:  # dry season
        season_means = []
        for year in range(start_year, end_year + 1):
            monthly_means = []
            season = ds.sel(time=slice(f"{year}-04-01", f"{year}-09-30"))
            
            for month in range(4, 10):
                month_data = season.sel(time=season.time.dt.month == month)
                if len(month_data.time) > 0:
                    days_in_month = month_data.time.dt.days_in_month[0].values
                    month_mean = month_data.sum('time') * (30 / days_in_month)
                    monthly_means.append(month_mean)
            
            if monthly_means:
                season_means.append(sum(monthly_means) / len(monthly_means))
        
        if season_means:
            return sum(season_means) / len(season_means)
        return None

def load_and_concatenate_ensemble_files(input_dir):
    """Load and concatenate all 6 SSP 8.5 ensemble NetCDF files"""
    print("Loading and concatenating SSP 8.5 ensemble files...")
    
    # Define the expected file pattern for SSP 8.5 ensemble files
    file_patterns = [
        "ensemble_precipitation_6models_ssp85_1961_1994.nc",
        "ensemble_precipitation_6models_ssp85_1995_2014.nc", 
        "ensemble_precipitation_6models_ssp85_2015_2020.nc",
        "ensemble_precipitation_6models_ssp85_2021_2040.nc",
        "ensemble_precipitation_6models_ssp85_2041_2060.nc",
        "ensemble_precipitation_6models_ssp85_2061_2070.nc"
    ]
    
    datasets = []
    total_size = 0
    
    for pattern in file_patterns:
        file_path = Path(input_dir) / pattern
        if file_path.exists():
            print(f"Loading {pattern}...")
            ds = xr.open_dataset(file_path)
            datasets.append(ds)
            
            # Calculate file size
            file_size = file_path.stat().st_size / (1024**3)  # GB
            total_size += file_size
            print(f"  Time range: {pd.to_datetime(ds.time.values[0]).strftime('%Y-%m-%d')} to {pd.to_datetime(ds.time.values[-1]).strftime('%Y-%m-%d')}")
            print(f"  Time steps: {len(ds.time)}")
            print(f"  Size: {file_size:.1f} GB")
        else:
            print(f"Warning: File {pattern} not found in {input_dir}")
    
    if not datasets:
        raise FileNotFoundError("No SSP 8.5 ensemble files found!")
    
    print(f"\nConcatenating {len(datasets)} datasets...")
    combined_ds = xr.concat(datasets, dim='time')
    
    print(f"Combined dataset:")
    print(f"  Total time range: {pd.to_datetime(combined_ds.time.values[0]).strftime('%Y-%m-%d')} to {pd.to_datetime(combined_ds.time.values[-1]).strftime('%Y-%m-%d')}")
    print(f"  Total time steps: {len(combined_ds.time)}")
    print(f"  Total size: {total_size:.1f} GB")
    
    return combined_ds

def process_precipitation_data(input_dir, output_dir, basin_shapefile, gov_shapefile):
    """Process precipitation data for SSP 8.5 ensemble scenario and save results"""
    print("\nInitializing SSP 8.5 ensemble precipitation analysis...")
    
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Define periods for SSP 8.5 - matching your ensemble file structure
    periods = {
        'reference': (1995, 2014),
        'near_future': (2021, 2040), 
        'mid_future': (2041, 2060),
        'far_future': (2061, 2070)
    }

    print("Reading shapefiles...")
    basins = gpd.read_file(basin_shapefile)
    basins = basins.to_crs(epsg=4326)
    jordan = gpd.read_file(gov_shapefile)
    jordan = jordan.to_crs(epsg=4326)

    # Filter basins
    exclude_basins = ['JVALLEYYARMOUKTRIANGLE']
    basins = basins[~basins['BASIN_NAME'].isin(exclude_basins)]
    basins = basins[~basins['BASIN_NAME'].str.contains("SYRIA", case=False, na=False)]

    print(f"Processing {len(basins)} basins...")

    # Load the complete ensemble dataset
    try:
        combined_ds = load_and_concatenate_ensemble_files(input_dir)
    except Exception as e:
        print(f"Error loading ensemble files: {e}")
        return

    combined_data = {
        period: {season: None for season in ['wet', 'dry']}
        for period in periods.keys()
    }

    # Process each period using the combined ensemble dataset
    for period_name, (start_year, end_year) in periods.items():
        print(f"\nProcessing {period_name} period ({start_year}-{end_year})...")
        
        try:
            # Select data for the specific period
            period_ds = combined_ds.sel(time=slice(f"{start_year}-01-01", f"{end_year}-12-31"))
            
            if len(period_ds.time) == 0:
                print(f"Warning: No data found for period {period_name}")
                continue
                
            print(f"  Selected {len(period_ds.time)} time steps for {period_name}")
            
            for basin_idx, basin in basins.iterrows():
                print(f"  Processing basin: {basin['BASIN_NAME']}")
                basin_gdf = gpd.GeoDataFrame(geometry=[basin.geometry])
                basin_mask = regionmask.mask_geopandas(basin_gdf, period_ds.lon, period_ds.lat)
                
                for season in ['wet', 'dry']:
                    print(f"    Processing {season} season...")
                    seasonal_mean = calculate_seasonal_monthly_means(
                        period_ds, start_year, end_year, season
                    )
                    
                    if seasonal_mean is not None:
                        masked_data = seasonal_mean.where(~basin_mask.isnull())
                        
                        if combined_data[period_name][season] is None:
                            combined_data[period_name][season] = masked_data
                        else:
                            combined_data[period_name][season] = xr.where(
                                ~masked_data.isnull(),
                                masked_data,
                                combined_data[period_name][season]
                            )
                
        except Exception as e:
            print(f"Error processing period {period_name}: {str(e)}")
            continue

    print("\nSaving SSP 8.5 ensemble results...")
    saved_files = []
    
    for period_name, seasons_data in combined_data.items():
        for season, data in seasons_data.items():
            if data is not None:
                nc_output = output_dir / f"precipitation_ssp85_ensemble_{period_name}_{season}_season_mmpermonth.nc"
                
                # Add metadata to the output
                data.attrs['scenario'] = 'SSP 8.5'
                data.attrs['source'] = '6-model ensemble average'
                data.attrs['period'] = f"{periods[period_name][0]}-{periods[period_name][1]}"
                data.attrs['season'] = season
                data.attrs['units'] = 'mm/month'
                data.attrs['description'] = f'Mean monthly precipitation for {season} season from SSP 8.5 ensemble'
                data.attrs['processing_date'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
                data.attrs['models_used'] = 'CMCC-CM2-SR5, CNRM-ESM2-1, EC-Earth3-Veg, IPSL-CM6A-LR, MPI-ESM1-2-LR, NorESM2-MM'
                data.attrs['ensemble_method'] = '6-model uniform ensemble'
                
                data.to_netcdf(nc_output)
                saved_files.append(nc_output)
                print(f"  Saved {nc_output}")
    
    # Close the combined dataset
    combined_ds.close()
    
    print(f"\nSSP 8.5 ensemble analysis complete!")
    print(f"Total files saved: {len(saved_files)}")
    print(f"Results saved in: {output_dir}")
    
    return saved_files

def create_analysis_summary(output_dir, saved_files):
    """Create a summary report of the seasonal analysis"""
    summary_content = f"""
# SSP 8.5 Ensemble Seasonal Precipitation Analysis Summary
Generated on: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}

## Analysis Details:
- Scenario: SSP 8.5
- Source: 6-model ensemble average
- Models: CMCC-CM2-SR5, CNRM-ESM2-1, EC-Earth3-Veg, IPSL-CM6A-LR, MPI-ESM1-2-LR, NorESM2-MM

## Time Periods Analyzed:
1. Reference: 1995-2014
2. Near Future: 2021-2040
3. Mid Future: 2041-2060
4. Far Future: 2061-2070

## Seasons:
- Wet Season: October-March (6 months)
- Dry Season: April-September (6 months)

## Output Files Generated ({len(saved_files)} files):
{chr(10).join(f"- {file.name}" for file in saved_files)}

## Processing Method:
1. Load and concatenate 6 ensemble NetCDF files
2. Extract seasonal data for each time period
3. Calculate monthly means normalized to 30-day months
4. Apply basin masks using regionmask
5. Generate seasonal precipitation grids

## Output Format:
- Variable: Precipitation (mm/month)
- Grid: Same as source ensemble files
- Coordinate System: EPSG:4326 (WGS84)
- Coverage: Jordan basins (Syria basins excluded)

## Usage:
These files can be used for:
- Climate change impact assessment
- Seasonal precipitation trend analysis
- Water resource planning
- Agricultural planning
- Comparative analysis with other scenarios
"""
    
    summary_path = Path(output_dir) / 'ssp85_seasonal_analysis_summary.txt'
    with open(summary_path, 'w', encoding='utf-8') as f:
        f.write(summary_content)
    print(f"Analysis summary saved to: {summary_path}")

if __name__ == "__main__":
    # Updated paths for SSP 8.5 ensemble
    input_dir = Path(r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files")
    output_dir = Path(r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files")
    basin_shapefile = Path(r"D:\RICAAR\surfacebasin\surface_basin.shp")
    gov_shapefile = Path(r"D:\RICAAR\Governorates\JordanwithGovernorates.shp")
    
    print("="*60)
    print("SSP 8.5 ENSEMBLE PRECIPITATION ANALYSIS")
    print("="*60)
    print(f"Input directory: {input_dir}")
    print(f"Output directory: {output_dir}")
    print(f"Basin shapefile: {basin_shapefile}")
    print(f"Jordan shapefile: {gov_shapefile}")
    
    try:
        saved_files = process_precipitation_data(
            input_dir,
            output_dir,
            basin_shapefile,
            gov_shapefile
        )
        
        if saved_files:
            create_analysis_summary(output_dir, saved_files)
        
        print("\n" + "="*60)
        print("PROCESSING SUMMARY")
        print("="*60)
        print(f"Successfully processed SSP 8.5 ensemble data")
        print(f"Files created: {len(saved_files) if saved_files else 0}")
        print(f"Output location: {output_dir}")
        
    except Exception as e:
        print(f"\nError during processing: {str(e)}")
        import traceback
        traceback.print_exc()
        raise
    
    print("\nSSP 8.5 ensemble seasonal analysis complete!")

SSP 8.5 ENSEMBLE PRECIPITATION ANALYSIS
Input directory: D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files
Output directory: D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files
Basin shapefile: D:\RICAAR\surfacebasin\surface_basin.shp
Jordan shapefile: D:\RICAAR\Governorates\JordanwithGovernorates.shp

Initializing SSP 8.5 ensemble precipitation analysis...
Reading shapefiles...
Processing 16 basins...
Loading and concatenating SSP 8.5 ensemble files...
Loading ensemble_precipitation_6models_ssp85_1961_1994.nc...
  Time range: 1961-01-01 to 1994-12-31
  Time steps: 12418
  Size: 0.6 GB
Loading ensemble_precipitation_6models_ssp85_1995_2014.nc...
  Time range: 1995-01-01 to 2014-12-31
  Time steps: 7305
  Size: 0.3 GB
Loading ensemble_precipitation_6models_ssp85_2015_2020.nc...
  Time range: 2015-01-01 to 2020-12-31
  Time steps: 2192
  Size: 0.1 GB
Loading ensemble_precipitation_6models_ssp85_2021_2040.nc...
  T

  basin_mask = regionmask.mask_geopandas(basin_gdf, period_ds.lon, period_ds.lat)


    Processing dry season...
  Processing basin: JORDAN VALLY (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: N.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: AZRAQ (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: AMMAN ZARQA (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: MUJIB
    Processing wet season...
    Processing dry season...
  Processing basin: D.S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: W. ARABA NORTH
    Processing wet season...
    Processing dry season...
  Processing basin: HASA
    Processing wet season...
    Processing dry season...
  Processing basin: JAFER
    Processing wet season...
    Processing dry season...
  Processing basin: WADI ARABA SOUTH
    Processing wet season...
    Proce

  basin_mask = regionmask.mask_geopandas(basin_gdf, period_ds.lon, period_ds.lat)


    Processing dry season...
  Processing basin: JORDAN VALLY (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: N.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: AZRAQ (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: AMMAN ZARQA (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: MUJIB
    Processing wet season...
    Processing dry season...
  Processing basin: D.S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: W. ARABA NORTH
    Processing wet season...
    Processing dry season...
  Processing basin: HASA
    Processing wet season...
    Processing dry season...
  Processing basin: JAFER
    Processing wet season...
    Processing dry season...
  Processing basin: WADI ARABA SOUTH
    Processing wet season...
    Proce

  basin_mask = regionmask.mask_geopandas(basin_gdf, period_ds.lon, period_ds.lat)


    Processing dry season...
  Processing basin: JORDAN VALLY (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: N.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: AZRAQ (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: AMMAN ZARQA (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: MUJIB
    Processing wet season...
    Processing dry season...
  Processing basin: D.S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: W. ARABA NORTH
    Processing wet season...
    Processing dry season...
  Processing basin: HASA
    Processing wet season...
    Processing dry season...
  Processing basin: JAFER
    Processing wet season...
    Processing dry season...
  Processing basin: WADI ARABA SOUTH
    Processing wet season...
    Proce

  basin_mask = regionmask.mask_geopandas(basin_gdf, period_ds.lon, period_ds.lat)


    Processing dry season...
  Processing basin: JORDAN VALLY (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: N.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: AZRAQ (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: AMMAN ZARQA (JORDAN)
    Processing wet season...
    Processing dry season...
  Processing basin: S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: MUJIB
    Processing wet season...
    Processing dry season...
  Processing basin: D.S.R.S.W
    Processing wet season...
    Processing dry season...
  Processing basin: W. ARABA NORTH
    Processing wet season...
    Processing dry season...
  Processing basin: HASA
    Processing wet season...
    Processing dry season...
  Processing basin: JAFER
    Processing wet season...
    Processing dry season...
  Processing basin: WADI ARABA SOUTH
    Processing wet season...
    Proce

## convert to excel 

In [2]:
import xarray as xr
import pandas as pd
import numpy as np
from pathlib import Path

def convert_nc_to_excel(nc_dir, excel_dir):
    """Convert NetCDF files to Excel format with data and summary sheets"""
    print("\nStarting SSP 8.5 NetCDF to Excel conversion...")
    
    # Create Excel output directory
    excel_dir = Path(excel_dir)
    excel_dir.mkdir(parents=True, exist_ok=True)
    
    # Process each NC file - updated pattern for SSP 8.5 ensemble files
    nc_files = list(Path(nc_dir).glob("precipitation_ssp85_ensemble_*_season_mmpermonth.nc"))
    print(f"Found {len(nc_files)} NetCDF files to process")
    
    if not nc_files:
        print("No SSP 8.5 ensemble files found. Checking for any precipitation files...")
        nc_files = list(Path(nc_dir).glob("precipitation_*.nc"))
        print(f"Found {len(nc_files)} total precipitation files")
    
    conversion_summary = []
    
    for nc_file in nc_files:
        print(f"\nProcessing {nc_file.name}")
        try:
            # Read NetCDF file
            with xr.open_dataset(nc_file) as ds:
                print(f"  Dataset shape: {ds.prAdjust.shape}")
                print(f"  Coordinate ranges - Lat: {ds.lat.min().values:.3f} to {ds.lat.max().values:.3f}")
                print(f"  Coordinate ranges - Lon: {ds.lon.min().values:.3f} to {ds.lon.max().values:.3f}")
                
                # Convert to DataFrame
                df_data = []
                total_points = len(ds.lat) * len(ds.lon)
                valid_points = 0
                
                for lat in ds.lat.values:
                    for lon in ds.lon.values:
                        value = float(ds.prAdjust.sel(lat=lat, lon=lon).values)
                        if not np.isnan(value) and value != 0:  # Include non-zero, non-NaN values
                            df_data.append({
                                'Latitude': lat,
                                'Longitude': lon,
                                'Precipitation (mm/month)': value
                            })
                            valid_points += 1
                
                print(f"  Valid data points: {valid_points}/{total_points} ({valid_points/total_points*100:.1f}%)")
                
                if df_data:
                    # Create main data DataFrame
                    df = pd.DataFrame(df_data)
                    df = df.sort_values(['Latitude', 'Longitude'])
                    
                    # Extract metadata from file attributes if available
                    metadata_info = {}
                    if hasattr(ds, 'attrs'):
                        for attr in ['scenario', 'period', 'season', 'source', 'description', 'models_used', 'ensemble_method']:
                            if attr in ds.attrs:
                                metadata_info[attr] = ds.attrs[attr]
                    
                    # Calculate summary statistics
                    summary_data = {
                        'Statistic': [
                            'File Name',
                            'Scenario',
                            'Period',
                            'Season',
                            'Source',
                            'Models Used',
                            'Number of Valid Points',
                            'Total Grid Points',
                            'Data Coverage (%)',
                            'Total Precipitation (mm/month)',
                            'Mean Precipitation (mm/month)',
                            'Median Precipitation (mm/month)',
                            'Maximum Precipitation (mm/month)',
                            'Minimum Precipitation (mm/month)',
                            'Standard Deviation (mm/month)',
                            'Latitude Range',
                            'Longitude Range',
                            'Processing Date'
                        ],
                        'Value': [
                            nc_file.name,
                            metadata_info.get('scenario', 'SSP 8.5'),
                            metadata_info.get('period', 'Unknown'),
                            metadata_info.get('season', 'Unknown'),
                            metadata_info.get('source', '6-model ensemble average'),
                            metadata_info.get('models_used', 'CMCC-CM2-SR5, CNRM-ESM2-1, EC-Earth3-Veg, IPSL-CM6A-LR, MPI-ESM1-2-LR, NorESM2-MM'),
                            len(df),
                            total_points,
                            f"{valid_points/total_points*100:.1f}%",
                            f"{df['Precipitation (mm/month)'].sum():.2f}",
                            f"{df['Precipitation (mm/month)'].mean():.2f}",
                            f"{df['Precipitation (mm/month)'].median():.2f}",
                            f"{df['Precipitation (mm/month)'].max():.2f}",
                            f"{df['Precipitation (mm/month)'].min():.2f}",
                            f"{df['Precipitation (mm/month)'].std():.2f}",
                            f"{df['Latitude'].min():.4f}°N to {df['Latitude'].max():.4f}°N",
                            f"{df['Longitude'].min():.4f}°E to {df['Longitude'].max():.4f}°E",
                            pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
                        ]
                    }
                    summary_df = pd.DataFrame(summary_data)
                    
                    # Create additional analysis sheet
                    # Precipitation distribution analysis
                    precip_values = df['Precipitation (mm/month)']
                    percentiles = [5, 10, 25, 50, 75, 90, 95]
                    
                    distribution_data = {
                        'Percentile': [f"{p}th" for p in percentiles],
                        'Precipitation (mm/month)': [f"{np.percentile(precip_values, p):.2f}" for p in percentiles]
                    }
                    distribution_df = pd.DataFrame(distribution_data)
                    
                    # Create precipitation categories analysis
                    categories = ['Very Low (0-10)', 'Low (10-50)', 'Moderate (50-100)', 'High (100-200)', 'Very High (200+)']
                    category_counts = [
                        sum((precip_values >= 0) & (precip_values < 10)),
                        sum((precip_values >= 10) & (precip_values < 50)),
                        sum((precip_values >= 50) & (precip_values < 100)),
                        sum((precip_values >= 100) & (precip_values < 200)),
                        sum(precip_values >= 200)
                    ]
                    
                    category_data = {
                        'Precipitation Category (mm/month)': categories,
                        'Number of Grid Points': category_counts,
                        'Percentage': [f"{count/len(precip_values)*100:.1f}%" for count in category_counts]
                    }
                    category_df = pd.DataFrame(category_data)
                    
                    # Save to Excel with multiple sheets
                    excel_file = excel_dir / f"{nc_file.stem}.xlsx"
                    with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
                        # Main data sheet
                        df.to_excel(writer, sheet_name='Precipitation_Data', index=False)
                        
                        # Summary statistics sheet
                        summary_df.to_excel(writer, sheet_name='Summary_Statistics', index=False)
                        
                        # Distribution analysis sheet
                        distribution_df.to_excel(writer, sheet_name='Distribution_Analysis', index=False)
                        
                        # Category analysis sheet
                        category_df.to_excel(writer, sheet_name='Category_Analysis', index=False)
                        
                        # Metadata sheet (if available)
                        if metadata_info:
                            metadata_df = pd.DataFrame(list(metadata_info.items()), 
                                                     columns=['Attribute', 'Value'])
                            metadata_df.to_excel(writer, sheet_name='Metadata', index=False)
                    
                    print(f"  Saved {excel_file}")
                    
                    # Add to conversion summary
                    conversion_summary.append({
                        'NetCDF_File': nc_file.name,
                        'Excel_File': excel_file.name,
                        'Scenario': metadata_info.get('scenario', 'SSP 8.5'),
                        'Period': metadata_info.get('period', 'Unknown'),
                        'Season': metadata_info.get('season', 'Unknown'),
                        'Valid_Points': valid_points,
                        'Total_Points': total_points,
                        'Coverage_Percent': f"{valid_points/total_points*100:.1f}%",
                        'Mean_Precipitation': f"{df['Precipitation (mm/month)'].mean():.2f}",
                        'Max_Precipitation': f"{df['Precipitation (mm/month)'].max():.2f}",
                        'Status': 'Success'
                    })
                    
                else:
                    print(f"  Warning: No valid data points found in {nc_file.name}")
                    conversion_summary.append({
                        'NetCDF_File': nc_file.name,
                        'Excel_File': 'Not created',
                        'Scenario': 'SSP 8.5',
                        'Period': 'Unknown',
                        'Season': 'Unknown',
                        'Valid_Points': 0,
                        'Total_Points': total_points,
                        'Coverage_Percent': '0.0%',
                        'Mean_Precipitation': 'N/A',
                        'Max_Precipitation': 'N/A',
                        'Status': 'No valid data'
                    })
                    
        except Exception as e:
            print(f"  Error processing {nc_file.name}: {str(e)}")
            conversion_summary.append({
                'NetCDF_File': nc_file.name,
                'Excel_File': 'Error',
                'Scenario': 'SSP 8.5',
                'Period': 'Error',
                'Season': 'Error',
                'Valid_Points': 'Error',
                'Total_Points': 'Error',
                'Coverage_Percent': 'Error',
                'Mean_Precipitation': 'Error',
                'Max_Precipitation': 'Error',
                'Status': f'Error: {str(e)}'
            })
    
    # Save conversion summary
    if conversion_summary:
        summary_df = pd.DataFrame(conversion_summary)
        summary_file = excel_dir / "SSP85_Conversion_Summary.xlsx"
        summary_df.to_excel(summary_file, index=False)
        print(f"\nConversion summary saved to: {summary_file}")
    
    return conversion_summary

def create_master_summary(excel_dir, conversion_summary):
    """Create a master summary comparing all periods and seasons"""
    if not conversion_summary or not any(item['Status'] == 'Success' for item in conversion_summary):
        return
    
    print("\nCreating master comparison summary...")
    
    # Filter successful conversions
    successful = [item for item in conversion_summary if item['Status'] == 'Success']
    
    if len(successful) > 0:
        comparison_data = []
        for item in successful:
            comparison_data.append({
                'Period': item['Period'],
                'Season': item['Season'],
                'File_Name': item['Excel_File'],
                'Valid_Points': item['Valid_Points'],
                'Coverage_%': item['Coverage_Percent'],
                'Mean_Precipitation_mm_month': float(item['Mean_Precipitation']),
                'Max_Precipitation_mm_month': float(item['Max_Precipitation'])
            })
        
        comparison_df = pd.DataFrame(comparison_data)
        
        # Create summary by period
        period_summary = comparison_df.groupby('Period').agg({
            'Mean_Precipitation_mm_month': 'mean',
            'Max_Precipitation_mm_month': 'max',
            'Valid_Points': 'sum'
        }).round(2)
        
        # Create summary by season
        season_summary = comparison_df.groupby('Season').agg({
            'Mean_Precipitation_mm_month': 'mean',
            'Max_Precipitation_mm_month': 'max',
            'Valid_Points': 'sum'
        }).round(2)
        
        # Save master summary
        master_file = excel_dir / "SSP85_Master_Summary.xlsx"
        with pd.ExcelWriter(master_file, engine='openpyxl') as writer:
            comparison_df.to_excel(writer, sheet_name='All_Files_Comparison', index=False)
            period_summary.to_excel(writer, sheet_name='Period_Summary')
            season_summary.to_excel(writer, sheet_name='Season_Summary')
        
        print(f"Master summary saved to: {master_file}")

def main():
    """Main function to convert SSP 8.5 NetCDF files to Excel"""
    # Define paths for SSP 8.5 ensemble
    nc_dir = Path(r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files")
    excel_dir = Path(r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files")
    
    print("="*70)
    print("SSP 8.5 ENSEMBLE NETCDF TO EXCEL CONVERTER")
    print("="*70)
    print(f"Input directory:  {nc_dir}")
    print(f"Output directory: {excel_dir}")
    
    # Check if input directory exists
    if not nc_dir.exists():
        print(f"\nError: Input directory does not exist: {nc_dir}")
        return
    
    # Convert files
    conversion_summary = convert_nc_to_excel(nc_dir, excel_dir)
    
    # Create master summary
    if conversion_summary:
        create_master_summary(excel_dir, conversion_summary)
    
    # Print final summary
    print("\n" + "="*70)
    print("CONVERSION SUMMARY")
    print("="*70)
    
    if conversion_summary:
        successful = sum(1 for item in conversion_summary if item['Status'] == 'Success')
        failed = len(conversion_summary) - successful
        
        print(f"Total files processed: {len(conversion_summary)}")
        print(f"Successfully converted: {successful}")
        print(f"Failed conversions: {failed}")
        
        if successful > 0:
            print(f"\nExcel files created in: {excel_dir}")
            print("\nFiles created:")
            for item in conversion_summary:
                if item['Status'] == 'Success':
                    print(f"  - {item['Excel_File']} ({item['Valid_Points']} data points)")
                    print(f"    Period: {item['Period']}, Season: {item['Season']}")
                    print(f"    Mean: {item['Mean_Precipitation']} mm/month, Max: {item['Max_Precipitation']} mm/month")
        
        if failed > 0:
            print(f"\nFailed files:")
            for item in conversion_summary:
                if item['Status'] != 'Success':
                    print(f"  - {item['NetCDF_File']}: {item['Status']}")
    else:
        print("No files were processed.")
    
    print("\nSSP 8.5 NetCDF to Excel conversion complete!")

if __name__ == "__main__":
    main()

SSP 8.5 ENSEMBLE NETCDF TO EXCEL CONVERTER
Input directory:  D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files
Output directory: D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files

Starting SSP 8.5 NetCDF to Excel conversion...
Found 8 NetCDF files to process

Processing precipitation_ssp85_ensemble_far_future_dry_season_mmpermonth.nc
  Dataset shape: (85, 75)
  Coordinate ranges - Lat: 27.050 to 35.450
  Coordinate ranges - Lon: 33.550 to 40.950
  Valid data points: 824/6375 (12.9%)
  Saved D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files\precipitation_ssp85_ensemble_far_future_dry_season_mmpermonth.xlsx

Processing precipitation_ssp85_ensemble_far_future_wet_season_mmpermonth.nc
  Dataset shape: (85, 75)
  Coordinate ranges - Lat: 27.050 to 35.450
  Coordinate ranges - Lon: 33.550 to 40.950
  Valid data points: 824/6375 (