In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

def calculate_differences(excel_dir, output_dir):
    """Calculate differences between periods and save to new Excel files"""
    print("\nCalculating precipitation differences between periods for SSP 8.5...")
    
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    for season in ['wet', 'dry']:
        print(f"\nProcessing {season} season differences...")
        
        # Updated file naming pattern for SSP 8.5
        ref_file = Path(excel_dir) / f"precipitation_ssp85_ensemble_reference_{season}_season_mmpermonth.xlsx"
        near_file = Path(excel_dir) / f"precipitation_ssp85_ensemble_near_future_{season}_season_mmpermonth.xlsx"
        mid_file = Path(excel_dir) / f"precipitation_ssp85_ensemble_mid_future_{season}_season_mmpermonth.xlsx"
        far_file = Path(excel_dir) / f"precipitation_ssp85_ensemble_far_future_{season}_season_mmpermonth.xlsx"
        
        # Check which files exist
        files_status = {
            'reference': ref_file.exists(),
            'near_future': near_file.exists(),
            'mid_future': mid_file.exists(),
            'far_future': far_file.exists()
        }
        
        print(f"  File availability:")
        for period, exists in files_status.items():
            status = "✓" if exists else "✗"
            print(f"    {status} {period}: {exists}")
        
        if not all([files_status['reference'], files_status['near_future'], files_status['mid_future']]):
            print(f"  Skipping {season} season - missing required files")
            continue
        
        try:
            # Read Excel files (using 'Precipitation_Data' sheet name from your converter)
            ref_data = pd.read_excel(ref_file, sheet_name='Precipitation_Data')
            near_data = pd.read_excel(near_file, sheet_name='Precipitation_Data')
            mid_data = pd.read_excel(mid_file, sheet_name='Precipitation_Data')
            
            # Read far future if available
            far_data = None
            if files_status['far_future']:
                far_data = pd.read_excel(far_file, sheet_name='Precipitation_Data')
            
            # Create unique point identifiers
            ref_data['Point'] = ref_data['Latitude'].astype(str) + '_' + ref_data['Longitude'].astype(str)
            near_data['Point'] = near_data['Latitude'].astype(str) + '_' + near_data['Longitude'].astype(str)
            mid_data['Point'] = mid_data['Latitude'].astype(str) + '_' + mid_data['Longitude'].astype(str)
            
            if far_data is not None:
                far_data['Point'] = far_data['Latitude'].astype(str) + '_' + far_data['Longitude'].astype(str)
            
            print(f"  Data points - Reference: {len(ref_data)}, Near: {len(near_data)}, Mid: {len(mid_data)}")
            if far_data is not None:
                print(f"  Far future: {len(far_data)}")
            
            differences = {}
            
            # Define all possible comparisons
            comparisons = [
                ('near_minus_ref', near_data, ref_data, 'Near Future - Reference'),
                ('mid_minus_ref', mid_data, ref_data, 'Mid Future - Reference'),
                ('mid_minus_near', mid_data, near_data, 'Mid Future - Near Future')
            ]
            
            # Add far future comparisons if data is available
            if far_data is not None:
                comparisons.extend([
                    ('far_minus_ref', far_data, ref_data, 'Far Future - Reference'),
                    ('far_minus_near', far_data, near_data, 'Far Future - Near Future'),
                    ('far_minus_mid', far_data, mid_data, 'Far Future - Mid Future')
                ])
            
            # Calculate differences
            for comp_name, future_data, base_data, description in comparisons:
                print(f"    Calculating {description}...")
                
                # Merge datasets on common points
                merged = pd.merge(future_data, base_data, on='Point', suffixes=('_future', '_base'))
                
                if len(merged) == 0:
                    print(f"      Warning: No common points found for {description}")
                    continue
                
                # Calculate absolute and percentage differences
                differences[comp_name] = {
                    'data': pd.DataFrame({
                        'Latitude': merged['Latitude_future'],
                        'Longitude': merged['Longitude_future'],
                        'Future_Precipitation': merged['Precipitation (mm/month)_future'],
                        'Base_Precipitation': merged['Precipitation (mm/month)_base'],
                        'Precipitation_Difference': (
                            merged['Precipitation (mm/month)_future'] - 
                            merged['Precipitation (mm/month)_base']
                        ),
                        'Percent_Change': (
                            (merged['Precipitation (mm/month)_future'] - 
                             merged['Precipitation (mm/month)_base']) / 
                            merged['Precipitation (mm/month)_base'] * 100
                        ).replace([np.inf, -np.inf], np.nan)  # Handle division by zero
                    }),
                    'description': description
                }
                
                print(f"      Common points: {len(merged)}")
            
            # Save difference files
            for diff_type, diff_info in differences.items():
                df = diff_info['data']
                description = diff_info['description']
                
                # Remove any NaN or infinite values
                df_clean = df.replace([np.inf, -np.inf], np.nan).dropna()
                
                if len(df_clean) == 0:
                    print(f"      Warning: No valid data points for {description}")
                    continue
                
                # Calculate summary statistics
                summary_data = {
                    'Statistic': [
                        'Comparison Type',
                        'Number of Valid Points',
                        'Number of Original Points',
                        'Mean Difference (mm/month)',
                        'Median Difference (mm/month)',
                        'Max Difference (mm/month)',
                        'Min Difference (mm/month)',
                        'Standard Deviation of Difference',
                        'Mean Percent Change (%)',
                        'Median Percent Change (%)',
                        'Max Percent Change (%)',
                        'Min Percent Change (%)',
                        'Standard Deviation of Percent Change (%)',
                        'Points with Increase',
                        'Points with Decrease',
                        'Points with No Change',
                        'Mean Future Precipitation (mm/month)',
                        'Mean Base Precipitation (mm/month)',
                        'Latitude Range',
                        'Longitude Range',
                        'Processing Date'
                    ],
                    'Value': [
                        description,
                        len(df_clean),
                        len(df),
                        f"{df_clean['Precipitation_Difference'].mean():.2f}",
                        f"{df_clean['Precipitation_Difference'].median():.2f}",
                        f"{df_clean['Precipitation_Difference'].max():.2f}",
                        f"{df_clean['Precipitation_Difference'].min():.2f}",
                        f"{df_clean['Precipitation_Difference'].std():.2f}",
                        f"{df_clean['Percent_Change'].mean():.2f}",
                        f"{df_clean['Percent_Change'].median():.2f}",
                        f"{df_clean['Percent_Change'].max():.2f}",
                        f"{df_clean['Percent_Change'].min():.2f}",
                        f"{df_clean['Percent_Change'].std():.2f}",
                        f"{(df_clean['Precipitation_Difference'] > 0).sum()} ({(df_clean['Precipitation_Difference'] > 0).sum() / len(df_clean) * 100:.1f}%)",
                        f"{(df_clean['Precipitation_Difference'] < 0).sum()} ({(df_clean['Precipitation_Difference'] < 0).sum() / len(df_clean) * 100:.1f}%)",
                        f"{(df_clean['Precipitation_Difference'] == 0).sum()} ({(df_clean['Precipitation_Difference'] == 0).sum() / len(df_clean) * 100:.1f}%)",
                        f"{df_clean['Future_Precipitation'].mean():.2f}",
                        f"{df_clean['Base_Precipitation'].mean():.2f}",
                        f"{df_clean['Latitude'].min():.4f}°N to {df_clean['Latitude'].max():.4f}°N",
                        f"{df_clean['Longitude'].min():.4f}°E to {df_clean['Longitude'].max():.4f}°E",
                        pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
                    ]
                }
                summary_df = pd.DataFrame(summary_data)
                
                # Create change categories analysis
                categories = [
                    'Large Decrease (< -50%)',
                    'Moderate Decrease (-50% to -20%)',
                    'Small Decrease (-20% to -5%)',
                    'Minimal Change (-5% to +5%)',
                    'Small Increase (+5% to +20%)',
                    'Moderate Increase (+20% to +50%)',
                    'Large Increase (> +50%)'
                ]
                
                category_counts = [
                    sum(df_clean['Percent_Change'] < -50),
                    sum((df_clean['Percent_Change'] >= -50) & (df_clean['Percent_Change'] < -20)),
                    sum((df_clean['Percent_Change'] >= -20) & (df_clean['Percent_Change'] < -5)),
                    sum((df_clean['Percent_Change'] >= -5) & (df_clean['Percent_Change'] <= 5)),
                    sum((df_clean['Percent_Change'] > 5) & (df_clean['Percent_Change'] <= 20)),
                    sum((df_clean['Percent_Change'] > 20) & (df_clean['Percent_Change'] <= 50)),
                    sum(df_clean['Percent_Change'] > 50)
                ]
                
                category_data = {
                    'Change Category': categories,
                    'Number of Points': category_counts,
                    'Percentage of Total': [f"{count/len(df_clean)*100:.1f}%" for count in category_counts]
                }
                category_df = pd.DataFrame(category_data)
                
                # Save to Excel with multiple sheets
                output_file = output_dir / f"precipitation_ssp85_difference_{season}_{diff_type}.xlsx"
                with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
                    df_clean.to_excel(writer, sheet_name='Difference_Data', index=False)
                    summary_df.to_excel(writer, sheet_name='Summary_Statistics', index=False)
                    category_df.to_excel(writer, sheet_name='Change_Categories', index=False)
                
                print(f"      Saved {output_file}")
                
        except Exception as e:
            print(f"  Error processing {season} season: {str(e)}")
            import traceback
            traceback.print_exc()
            continue

def create_difference_summary(output_dir):
    """Create a master summary of all difference calculations"""
    print("\nCreating difference calculation summary...")
    
    diff_files = list(Path(output_dir).glob("precipitation_ssp85_difference_*.xlsx"))
    
    if not diff_files:
        print("No difference files found to summarize.")
        return
    
    summary_data = []
    
    for file in diff_files:
        try:
            # Extract season and comparison type from filename
            filename = file.stem
            parts = filename.split('_')
            season = parts[4]  # wet or dry
            comparison = '_'.join(parts[5:])  # e.g., near_minus_ref
            
            # Read summary statistics
            summary_stats = pd.read_excel(file, sheet_name='Summary_Statistics')
            stats_dict = dict(zip(summary_stats['Statistic'], summary_stats['Value']))
            
            summary_data.append({
                'Season': season.title(),
                'Comparison': comparison.replace('_', ' ').title(),
                'Comparison_Type': stats_dict.get('Comparison Type', 'Unknown'),
                'Valid_Points': stats_dict.get('Number of Valid Points', 0),
                'Mean_Difference_mm': stats_dict.get('Mean Difference (mm/month)', '0.00').replace('mm/month', ''),
                'Mean_Percent_Change': stats_dict.get('Mean Percent Change (%)', '0.00%'),
                'Points_Increase': stats_dict.get('Points with Increase', '0 (0.0%)'),
                'Points_Decrease': stats_dict.get('Points with Decrease', '0 (0.0%)'),
                'Max_Change_Percent': stats_dict.get('Max Percent Change (%)', '0.00%'),
                'Min_Change_Percent': stats_dict.get('Min Percent Change (%)', '0.00%'),
                'Filename': file.name
            })
            
        except Exception as e:
            print(f"Error processing {file.name}: {e}")
            continue
    
    if summary_data:
        master_summary = pd.DataFrame(summary_data)
        summary_file = output_dir / "SSP85_Difference_Master_Summary.xlsx"
        
        with pd.ExcelWriter(summary_file, engine='openpyxl') as writer:
            master_summary.to_excel(writer, sheet_name='All_Comparisons', index=False)
            
            # Create season-wise summary
            if len(master_summary) > 0:
                season_summary = master_summary.groupby('Season').agg({
                    'Valid_Points': 'sum',
                    'Comparison': 'count'
                }).rename(columns={'Comparison': 'Number_of_Comparisons'})
                season_summary.to_excel(writer, sheet_name='Season_Summary')
        
        print(f"Master summary saved to: {summary_file}")

def main():
    """Main function to calculate precipitation differences for SSP 8.5"""
    # Define paths for SSP 8.5
    excel_dir = Path(r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files")
    output_dir = Path(r"D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files\Difference_files")
    
    print("="*70)
    print("SSP 8.5 PRECIPITATION DIFFERENCE CALCULATOR")
    print("="*70)
    print(f"Input directory:  {excel_dir}")
    print(f"Output directory: {output_dir}")
    
    # Check if input directory exists
    if not excel_dir.exists():
        print(f"\nError: Input directory does not exist: {excel_dir}")
        return
    
    # Calculate differences
    calculate_differences(excel_dir, output_dir)
    
    # Create master summary
    create_difference_summary(output_dir)
    
    print("\n" + "="*70)
    print("DIFFERENCE CALCULATION COMPLETE")
    print("="*70)
    print(f"Results saved in: {output_dir}")
    print("\nFiles created:")
    diff_files = list(output_dir.glob("precipitation_ssp85_difference_*.xlsx"))
    for file in sorted(diff_files):
        print(f"  - {file.name}")
    
    summary_file = output_dir / "SSP85_Difference_Master_Summary.xlsx"
    if summary_file.exists():
        print(f"  - {summary_file.name}")
    
    print("\nSSP 8.5 difference calculations complete!")

if __name__ == "__main__":
    main()

SSP 8.5 PRECIPITATION DIFFERENCE CALCULATOR
Input directory:  D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files
Output directory: D:\RICAAR\Pr.New.Stations.Selection\ensemble.model8.5\ensemble.6models\nc.files\difference.files\Excel_files\Difference_files

Calculating precipitation differences between periods for SSP 8.5...

Processing wet season differences...
  File availability:
    ✓ reference: True
    ✓ near_future: True
    ✓ mid_future: True
    ✓ far_future: True
  Data points - Reference: 824, Near: 824, Mid: 824
  Far future: 824
    Calculating Near Future - Reference...
      Common points: 824
    Calculating Mid Future - Reference...
      Common points: 824
    Calculating Mid Future - Near Future...
      Common points: 824
    Calculating Far Future - Reference...
      Common points: 824
    Calculating Far Future - Near Future...
      Common points: 824
    Calculating Far Future - Mid Future...
      Commo