In [4]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

class GenerationSimulation:
    """
    Create statistical generation profiles from combined resurety data
    at hourly, daily, and monthly levels
    """
    
    def __init__(self):
        # Define paths - reading from resurety_data folder
        self.data_path = Path('resurety_data')  # Changed from '../resurety_data'
        self.base_output_path = Path('Renewable Portfolio LLC')  # New base output path
        
        # Get available combined files
        self.available_files = list(self.data_path.glob('*_generation_price_combined.csv'))
        self.available_sites = [f.stem.replace('_generation_price_combined', '') for f in self.available_files]
        
        # Define percentiles to calculate
        self.percentiles = [1, 5, 10, 15, 25, 50, 75, 85, 90, 95, 99]
        
        # Month names for labeling
        self.month_names = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                           'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        self.month_names_full = ['', 'January', 'February', 'March', 'April', 'May', 'June',
                                'July', 'August', 'September', 'October', 'November', 'December']
    
    def get_site_selection(self):
        """
        Interactive site selection with option for all sites
        """
        print("\n" + "="*60)
        print("GENERATION SIMULATION - SITE SELECTION")
        print("="*60)
        
        if not self.available_sites:
            print("❌ No combined generation-price files found in resurety_data!")
            return None
        
        print("\nAvailable options:")
        print("0. ALL SITES (Process all sites at once)")
        for i, site in enumerate(self.available_sites):
            print(f"{i+1}. {site}")
        
        print("="*60)
        
        while True:
            try:
                selection = input("\n📊 Select option number (0 for all sites): ").strip()
                if selection == '0':
                    return 'ALL_SITES'
                else:
                    idx = int(selection) - 1
                    if 0 <= idx < len(self.available_sites):
                        return self.available_sites[idx]
                    else:
                        print("❌ Invalid selection!")
            except:
                print("❌ Please enter a valid number!")
    
    def get_automatic_month_range(self):
        """
        Automatically determine month range: current month to 11 months later
        """
        current_date = datetime.now()
        current_month = current_date.month
        
        # Use current month as start
        start_month = current_month
        
        # End month is one month before start month (12 month cycle)
        if start_month == 1:
            end_month = 12
        else:
            end_month = start_month - 1
        
        print(f"\n📅 Auto-detected period: {self.month_names[start_month]} to {self.month_names[end_month]} (12 months)")
        print(f"   Starting from current month: {self.month_names[current_month]}")
        
        return start_month, end_month
    
    def get_months_in_range(self, start_month, end_month):
        """
        Get list of months in range, handling year-wrapping
        """
        if start_month <= end_month:
            # Normal range within same year
            return list(range(start_month, end_month + 1))
        else:
            # Year-wrapping range
            return list(range(start_month, 13)) + list(range(1, end_month + 1))
    
    def filter_data_for_months(self, df, start_month, end_month):
        """
        Filter dataframe for month range, handling year-wrapping
        """
        months_in_range = self.get_months_in_range(start_month, end_month)
        return df[df['month'].isin(months_in_range)].copy()
    
    def create_month_order_map(self, start_month, end_month):
        """
        Create a mapping for sorting months in the specified order
        """
        months_in_range = self.get_months_in_range(start_month, end_month)
        # Create a dictionary mapping month number to its position in the sequence
        return {month: idx for idx, month in enumerate(months_in_range)}
    
    def get_forecast_year(self, month):
        """
        Determine the forecast year for a given month based on current date
        """
        current_date = datetime.now()
        current_year = current_date.year
        current_month = current_date.month
        
        # If the forecast month is >= current month, it's this year
        # Otherwise it's next year (handling year wrap)
        if month >= current_month:
            return current_year
        else:
            return current_year + 1
    
    def calculate_hourly_statistics(self, df_filtered, month_order_map):
        """
        Calculate hourly generation statistics with datetime labels
        """
        print("\n⚡ Calculating HOURLY generation statistics...")
        
        # Group by month-day-hour across all years
        grouped = df_filtered.groupby(['month', 'day', 'hour'])['generation_mw']
        
        results = []
        for (month, day, hour), group in grouped:
            if len(group) < 5:  # Skip if too few data points
                continue
            
            # Determine the forecast year
            forecast_year = self.get_forecast_year(month)
            
            # Create datetime label
            datetime_label = f"{self.month_names[month]}-{day:02d} {hour:02d}:00"
                
            stats = {
                'datetime_label': datetime_label,
                'year': forecast_year,  # ADD YEAR
                'month': month,
                'day': day,
                'hour': hour,
                'month_order': month_order_map[month],  # Add order for sorting
                'mean': group.mean(),
                'std_dev': group.std(),
                'count': len(group),
                'min': group.min(),
                'max': group.max()
            }
            
            # Calculate percentiles
            for p in self.percentiles:
                stats[f'p{p}'] = group.quantile(p/100)
            
            results.append(stats)
        
        results_df = pd.DataFrame(results)
        # Sort by the custom month order, then day and hour
        results_df = results_df.sort_values(['month_order', 'day', 'hour']).reset_index(drop=True)
        # Drop the temporary month_order column
        results_df = results_df.drop('month_order', axis=1)
        
        print(f"   ✓ Calculated statistics for {len(results_df)} hourly slots")
        
        return results_df
    
    def calculate_daily_statistics(self, df_filtered, month_order_map):
        """
        Calculate daily generation statistics with date labels
        """
        print("\n📅 Calculating DAILY generation statistics...")
        
        # First aggregate hourly to daily for each year
        df_daily = df_filtered.groupby(['year', 'month', 'day'])['generation_mw'].sum().reset_index()
        df_daily.rename(columns={'generation_mw': 'daily_generation_mwh'}, inplace=True)
        
        # Then calculate statistics across years for each day
        grouped = df_daily.groupby(['month', 'day'])['daily_generation_mwh']
        
        results = []
        for (month, day), group in grouped:
            if len(group) < 5:  # Skip if too few data points
                continue
            
            # Determine the forecast year
            forecast_year = self.get_forecast_year(month)
            
            # Create date label
            date_label = f"{self.month_names[month]}-{day:02d}"
                
            stats = {
                'date_label': date_label,
                'year': forecast_year,  # ADD YEAR
                'month': month,
                'day': day,
                'month_order': month_order_map[month],  # Add order for sorting
                'mean': group.mean(),
                'std_dev': group.std(),
                'count': len(group),
                'min': group.min(),
                'max': group.max()
            }
            
            # Calculate percentiles
            for p in self.percentiles:
                stats[f'p{p}'] = group.quantile(p/100)
            
            results.append(stats)
        
        results_df = pd.DataFrame(results)
        # Sort by the custom month order, then day
        results_df = results_df.sort_values(['month_order', 'day']).reset_index(drop=True)
        # Drop the temporary month_order column
        results_df = results_df.drop('month_order', axis=1)
        
        print(f"   ✓ Calculated statistics for {len(results_df)} daily slots")
        
        return results_df
    
    def calculate_monthly_statistics(self, df_filtered, month_order_map):
        """
        Calculate monthly generation statistics with month names
        """
        print("\n📊 Calculating MONTHLY generation statistics...")
        
        # First aggregate to monthly for each year
        df_monthly = df_filtered.groupby(['year', 'month'])['generation_mw'].sum().reset_index()
        df_monthly.rename(columns={'generation_mw': 'monthly_generation_mwh'}, inplace=True)
        
        # Then calculate statistics across years for each month
        grouped = df_monthly.groupby('month')['monthly_generation_mwh']
        
        results = []
        for month, group in grouped:
            if len(group) < 5:  # Skip if too few data points
                continue
            
            # Convert month to int if needed
            month_idx = int(month)
            
            # Determine the forecast year
            forecast_year = self.get_forecast_year(month_idx)
                
            stats = {
                'month_name': self.month_names_full[month_idx],
                'year': forecast_year,  # ADD YEAR
                'month': month_idx,
                'month_order': month_order_map[month_idx],  # Add order for sorting
                'mean': group.mean(),
                'std_dev': group.std(),
                'count': len(group),
                'min': group.min(),
                'max': group.max()
            }
            
            # Calculate percentiles
            for p in self.percentiles:
                stats[f'p{p}'] = group.quantile(p/100)
            
            results.append(stats)
        
        results_df = pd.DataFrame(results)
        # Sort by the custom month order
        results_df = results_df.sort_values(['month_order']).reset_index(drop=True)
        # Drop the temporary month_order column
        results_df = results_df.drop('month_order', axis=1)
        
        print(f"   ✓ Calculated statistics for {len(results_df)} months")
        
        return results_df
    
    def create_hourly_timeseries(self, df_filtered, month_order_map):
        """
        Create hourly generation timeseries with years as columns
        """
        print("\n⏰ Creating HOURLY generation timeseries...")
        
        # Use all available data - no year skipping
        df_work = df_filtered.copy()
        
        # Add month order for sorting
        df_work['month_order'] = df_work['month'].map(month_order_map)
        
        # Pivot data to have years as columns
        pivot_df = df_work.pivot_table(
            index=['month', 'day', 'hour', 'month_order'],
            columns='year',
            values='generation_mw',
            aggfunc='mean'
        ).reset_index()
        
        # Sort by custom month order
        pivot_df = pivot_df.sort_values(['month_order', 'day', 'hour']).reset_index(drop=True)
        
        # Add datetime label
        pivot_df['datetime_label'] = pivot_df.apply(
            lambda row: f"{self.month_names[int(row['month'])]}-{int(row['day']):02d} {int(row['hour']):02d}:00",
            axis=1
        )
        
        # Drop the temporary month_order column
        pivot_df = pivot_df.drop('month_order', axis=1)
        
        # Reorder columns
        year_cols = [col for col in pivot_df.columns if isinstance(col, int)]
        cols = ['datetime_label', 'month', 'day', 'hour'] + sorted(year_cols)
        pivot_df = pivot_df[cols]
        
        print(f"   ✓ Created timeseries for {len(pivot_df)} hourly slots")
        print(f"   ✓ Including data from all {len(year_cols)} years")
        
        return pivot_df
    
    def create_daily_timeseries(self, df_filtered, month_order_map):
        """
        Create daily generation timeseries with years as columns
        """
        print("\n📅 Creating DAILY generation timeseries...")
        
        # First aggregate to daily
        df_daily = df_filtered.groupby(['year', 'month', 'day'])['generation_mw'].sum().reset_index()
        df_daily.rename(columns={'generation_mw': 'daily_generation_mwh'}, inplace=True)
        
        # Use all available data - no year skipping
        
        # Add month order for sorting
        df_daily['month_order'] = df_daily['month'].map(month_order_map)
        
        # Pivot data to have years as columns
        pivot_df = df_daily.pivot_table(
            index=['month', 'day', 'month_order'],
            columns='year',
            values='daily_generation_mwh',
            aggfunc='sum'
        ).reset_index()
        
        # Sort by custom month order
        pivot_df = pivot_df.sort_values(['month_order', 'day']).reset_index(drop=True)
        
        # Add date label
        pivot_df['date_label'] = pivot_df.apply(
            lambda row: f"{self.month_names[int(row['month'])]}-{int(row['day']):02d}",
            axis=1
        )
        
        # Drop the temporary month_order column
        pivot_df = pivot_df.drop('month_order', axis=1)
        
        # Reorder columns
        year_cols = [col for col in pivot_df.columns if isinstance(col, int)]
        cols = ['date_label', 'month', 'day'] + sorted(year_cols)
        pivot_df = pivot_df[cols]
        
        print(f"   ✓ Created timeseries for {len(pivot_df)} daily slots")
        print(f"   ✓ Including data from all {len(year_cols)} years")
        
        return pivot_df
    
    def create_monthly_timeseries(self, df_filtered, month_order_map):
        """
        Create monthly generation timeseries with years as columns
        """
        print("\n📊 Creating MONTHLY generation timeseries...")
        
        # First aggregate to monthly
        df_monthly = df_filtered.groupby(['year', 'month'])['generation_mw'].sum().reset_index()
        df_monthly.rename(columns={'generation_mw': 'monthly_generation_mwh'}, inplace=True)
        
        # Use all available data - no year skipping
        
        # Add month order for sorting
        df_monthly['month_order'] = df_monthly['month'].map(month_order_map)
        
        # Pivot data to have years as columns
        pivot_df = df_monthly.pivot_table(
            index=['month', 'month_order'],
            columns='year',
            values='monthly_generation_mwh',
            aggfunc='sum'
        ).reset_index()
        
        # Sort by custom month order
        pivot_df = pivot_df.sort_values(['month_order']).reset_index(drop=True)
        
        # Add month name
        pivot_df['month_name'] = pivot_df['month'].apply(lambda x: self.month_names_full[int(x)])
        
        # Drop the temporary month_order column
        pivot_df = pivot_df.drop('month_order', axis=1)
        
        # Reorder columns
        year_cols = [col for col in pivot_df.columns if isinstance(col, int)]
        cols = ['month_name', 'month'] + sorted(year_cols)
        pivot_df = pivot_df[cols]
        
        print(f"   ✓ Created timeseries for {len(pivot_df)} months")
        print(f"   ✓ Including data from all {len(year_cols)} years")
        
        return pivot_df
    
    def save_all_results(self, hourly_stats, daily_stats, monthly_stats, 
                        hourly_ts, daily_ts, monthly_ts, site_name):
        """
        Save all files in the new structure: Renewable Portfolio LLC/{site_name}/Generation/
        """
        # Create the site-specific Generation folder
        generation_path = self.base_output_path / site_name / 'Generation'
        generation_path.mkdir(parents=True, exist_ok=True)
        
        # Create plots folder for the site
        plots_path = self.base_output_path / site_name / 'plots'
        plots_path.mkdir(exist_ok=True)
        
        # Save hourly statistics (CHANGED: stats -> forecast)
        hourly_stats_file = f"{site_name}_generation_hourly_forecast.csv"
        hourly_stats_path = generation_path / hourly_stats_file
        
        cols_hourly = ['datetime_label', 'year', 'month', 'day', 'hour', 'mean', 'std_dev', 
                      'p1', 'p5', 'p10', 'p15', 'p25', 'p50', 
                      'p75', 'p85', 'p90', 'p95', 'p99',
                      'min', 'max', 'count']
        
        hourly_stats[cols_hourly].to_csv(hourly_stats_path, index=False, float_format='%.3f')
        print(f"\n💾 Saved: Renewable Portfolio LLC/{site_name}/Generation/{hourly_stats_file}")
        
        # Save daily statistics (CHANGED: stats -> forecast)
        daily_stats_file = f"{site_name}_generation_daily_forecast.csv"
        daily_stats_path = generation_path / daily_stats_file
        
        cols_daily = ['date_label', 'year', 'month', 'day', 'mean', 'std_dev', 
                     'p1', 'p5', 'p10', 'p15', 'p25', 'p50', 
                     'p75', 'p85', 'p90', 'p95', 'p99',
                     'min', 'max', 'count']
        
        daily_stats[cols_daily].to_csv(daily_stats_path, index=False, float_format='%.3f')
        print(f"💾 Saved: Renewable Portfolio LLC/{site_name}/Generation/{daily_stats_file}")
        
        # Save monthly statistics (CHANGED: stats -> forecast)
        monthly_stats_file = f"{site_name}_generation_monthly_forecast.csv"
        monthly_stats_path = generation_path / monthly_stats_file
        
        cols_monthly = ['month_name', 'year', 'month', 'mean', 'std_dev', 
                       'p1', 'p5', 'p10', 'p15', 'p25', 'p50', 
                       'p75', 'p85', 'p90', 'p95', 'p99',
                       'min', 'max', 'count']
        
        monthly_stats[cols_monthly].to_csv(monthly_stats_path, index=False, float_format='%.3f')
        print(f"💾 Saved: Renewable Portfolio LLC/{site_name}/Generation/{monthly_stats_file}")
        
        # Save hourly timeseries
        hourly_ts_file = f"{site_name}_generation_hourly_timeseries.csv"
        hourly_ts_path = generation_path / hourly_ts_file
        hourly_ts_save = hourly_ts.copy()
        year_cols = [col for col in hourly_ts_save.columns if isinstance(col, int)]
        for col in year_cols:
            hourly_ts_save[col] = hourly_ts_save[col].apply(lambda x: '' if pd.isna(x) else f'{x:.3f}')
        hourly_ts_save.to_csv(hourly_ts_path, index=False)
        print(f"💾 Saved: Renewable Portfolio LLC/{site_name}/Generation/{hourly_ts_file}")
        
        # Save daily timeseries
        daily_ts_file = f"{site_name}_generation_daily_timeseries.csv"
        daily_ts_path = generation_path / daily_ts_file
        daily_ts_save = daily_ts.copy()
        year_cols = [col for col in daily_ts_save.columns if isinstance(col, int)]
        for col in year_cols:
            daily_ts_save[col] = daily_ts_save[col].apply(lambda x: '' if pd.isna(x) else f'{x:.3f}')
        daily_ts_save.to_csv(daily_ts_path, index=False)
        print(f"💾 Saved: Renewable Portfolio LLC/{site_name}/Generation/{daily_ts_file}")
        
        # Save monthly timeseries
        monthly_ts_file = f"{site_name}_generation_monthly_timeseries.csv"
        monthly_ts_path = generation_path / monthly_ts_file
        monthly_ts_save = monthly_ts.copy()
        year_cols = [col for col in monthly_ts_save.columns if isinstance(col, int)]
        for col in year_cols:
            monthly_ts_save[col] = monthly_ts_save[col].apply(lambda x: '' if pd.isna(x) else f'{x:.3f}')
        monthly_ts_save.to_csv(monthly_ts_path, index=False)
        print(f"💾 Saved: Renewable Portfolio LLC/{site_name}/Generation/{monthly_ts_file}")
    
    def print_sample_results(self, hourly_df, daily_df, monthly_df, hourly_ts, daily_ts):
        """
        Print samples from both statistics and timeseries
        """
        print("\n" + "="*60)
        print("SAMPLE RESULTS")
        print("="*60)
        
        # Sample hourly statistics
        print("\n⚡ HOURLY GENERATION STATISTICS SAMPLE (Mid-month, Noon):")
        if not hourly_df.empty:
            mid_month = hourly_df['month'].min()
            sample = hourly_df[(hourly_df['month'] == mid_month) & 
                              (hourly_df['day'] == 15) & 
                              (hourly_df['hour'] == 12)]
            
            if not sample.empty:
                row = sample.iloc[0]
                print(f"   {row['datetime_label']} (Year: {row['year']}):")
                print(f"   Mean: {row['mean']:.2f} MW, P10-P90: {row['p10']:.2f}-{row['p90']:.2f} MW")
        
        # Sample hourly timeseries
        print("\n⏰ HOURLY GENERATION TIMESERIES SAMPLE (Same time):")
        if not hourly_ts.empty:
            mid_month = hourly_ts['month'].min()
            sample = hourly_ts[(hourly_ts['month'] == mid_month) & 
                              (hourly_ts['day'] == 15) & 
                              (hourly_ts['hour'] == 12)]
            
            if not sample.empty:
                row = sample.iloc[0]
                year_cols = [col for col in hourly_ts.columns if isinstance(col, int)][:3]  # First 3 years
                print(f"   {row['datetime_label']}:")
                values = [f"{int(year)}: {row[year]:.2f} MW" for year in year_cols if pd.notna(row[year])]
                print(f"   {', '.join(values)}, ...")
        
        # Sample daily statistics
        print("\n📅 DAILY GENERATION STATISTICS SAMPLE (Mid-month):")
        if not daily_df.empty:
            mid_month = daily_df['month'].min()
            sample = daily_df[(daily_df['month'] == mid_month) & (daily_df['day'] == 15)]
            
            if not sample.empty:
                row = sample.iloc[0]
                print(f"   {row['date_label']} (Year: {row['year']}):")
                print(f"   Mean: {row['mean']:.2f} MWh, P10-P90: {row['p10']:.2f}-{row['p90']:.2f} MWh")
        
        # Sample daily timeseries
        print("\n📊 DAILY GENERATION TIMESERIES SAMPLE (Same date):")
        if not daily_ts.empty:
            mid_month = daily_ts['month'].min()
            sample = daily_ts[(daily_ts['month'] == mid_month) & (daily_ts['day'] == 15)]
            
            if not sample.empty:
                row = sample.iloc[0]
                year_cols = [col for col in daily_ts.columns if isinstance(col, int)][:3]  # First 3 years
                print(f"   {row['date_label']}:")
                values = [f"{int(year)}: {row[year]:.1f} MWh" for year in year_cols if pd.notna(row[year])]
                print(f"   {', '.join(values)}, ...")
    
    def process_single_site(self, site_name, start_month, end_month):
        """
        Process a single site with given month range
        """
        # Create month order mapping for this specific range
        month_order_map = self.create_month_order_map(start_month, end_month)
        
        # Load and prepare data
        print(f"\n{'='*60}")
        print(f"Processing: {site_name}")
        
        # Display month range properly
        months_in_range = self.get_months_in_range(start_month, end_month)
        num_months = len(months_in_range)
        
        if start_month <= end_month:
            print(f"Month range: {self.month_names[start_month]} to {self.month_names[end_month]} ({num_months} months)")
        else:
            print(f"Month range: {self.month_names[start_month]} to {self.month_names[end_month]} (year-wrapping, {num_months} months)")
        
        print(f"{'='*60}")
        
        # Load combined data
        file_path = self.data_path / f"{site_name}_generation_price_combined.csv"
        print(f"\n📁 Loading data from: {file_path}")
        
        try:
            df = pd.read_csv(file_path)
            df['datetime'] = pd.to_datetime(df['datetime'])
            
            # Extract day of month from datetime since it's not in the columns
            df['day'] = df['datetime'].dt.day
            
            # Data already has year, month, hour columns from our previous processing
            # Just need to ensure they're integers
            df['year'] = df['year'].astype(int)
            df['month'] = df['month'].astype(int)
            df['hour'] = df['hour'].astype(int)
            
            # Filter for selected months
            df_filtered = self.filter_data_for_months(df, start_month, end_month)
            
            # Get data summary
            years_available = sorted(df_filtered['year'].unique())
            print(f"\n📊 Data summary:")
            print(f"   Years available: {years_available[0]} to {years_available[-1]} ({len(years_available)} years)")
            print(f"   Total data points: {len(df_filtered):,}")
            print(f"   Months included: {', '.join([self.month_names[m] for m in months_in_range])}")
            
            # Updated note about year-wrapping
            if start_month > end_month:
                print(f"\n   ℹ️  Note: Year-wrapping range detected!")
                print(f"   First year ({years_available[0]}) and last year ({years_available[-1]}) may have partial data")
                print(f"   All available data will be included in the timeseries files")
            
            # Calculate statistics at all three levels
            print("\n" + "-"*40)
            print("GENERATION DISTRIBUTION STATISTICS")
            print("-"*40)
            hourly_stats = self.calculate_hourly_statistics(df_filtered, month_order_map)
            daily_stats = self.calculate_daily_statistics(df_filtered, month_order_map)
            monthly_stats = self.calculate_monthly_statistics(df_filtered, month_order_map)
            
            # Create timeseries at all three levels
            print("\n" + "-"*40)
            print("GENERATION TIMESERIES")
            print("-"*40)
            hourly_ts = self.create_hourly_timeseries(df_filtered, month_order_map)
            daily_ts = self.create_daily_timeseries(df_filtered, month_order_map)
            monthly_ts = self.create_monthly_timeseries(df_filtered, month_order_map)
            
            # Print samples
            self.print_sample_results(hourly_stats, daily_stats, monthly_stats, hourly_ts, daily_ts)
            
            # Save all results with new structure
            print("\n" + "-"*40)
            print("SAVING RESULTS")
            print("-"*40)
            self.save_all_results(hourly_stats, daily_stats, monthly_stats,
                                hourly_ts, daily_ts, monthly_ts, site_name)
            
            return True
            
        except Exception as e:
            print(f"\n❌ Error processing {site_name}: {str(e)}")
            return False
    
    def run_simulation(self):
        """
        Main function to run the generation simulation
        """
        print("\n🌟 Generation Statistical Simulation")
        print("   (From Resurety Combined Data)")
        print("="*60)
        
        # Get site selection
        site_selection = self.get_site_selection()
        if not site_selection:
            return
        
        # Get automatic month range (next month to 12 months later)
        start_month, end_month = self.get_automatic_month_range()
        
        # Process based on selection
        if site_selection == 'ALL_SITES':
            # Process all sites
            print("\n" + "="*60)
            print("🚀 PROCESSING ALL SITES")
            print("="*60)
            
            successful = 0
            failed = 0
            
            for i, site_name in enumerate(self.available_sites, 1):
                print(f"\n[{i}/{len(self.available_sites)}] Processing {site_name}...")
                
                if self.process_single_site(site_name, start_month, end_month):
                    successful += 1
                else:
                    failed += 1
            
            # Summary
            print("\n" + "="*60)
            print("✨ ALL SITES PROCESSING COMPLETE!")
            print("="*60)
            print(f"\n📊 Summary:")
            print(f"   ✅ Successfully processed: {successful} sites")
            if failed > 0:
                print(f"   ❌ Failed: {failed} sites")
            
            months_in_range = self.get_months_in_range(start_month, end_month)
            print(f"\n12-month period: {self.month_names[start_month]} to {self.month_names[end_month]}")
            
            print(f"\n📁 Files saved in: Renewable Portfolio LLC/[site_name]/Generation/")
            
        else:
            # Process single site
            if self.process_single_site(site_selection, start_month, end_month):
                print("\n" + "="*60)
                print("✨ SIMULATION COMPLETE!")
                print("="*60)
                print(f"\nAnalysis created for {site_selection}")
                
                months_in_range = self.get_months_in_range(start_month, end_month)
                print(f"12-month period: {self.month_names[start_month]} to {self.month_names[end_month]}")
                
                print(f"\n📁 Files saved in:")
                print(f"   Renewable Portfolio LLC/{site_selection}/Generation/")
                print("     • Hourly/Daily/Monthly generation statistics (with year column)")
                print("     • Hourly/Daily/Monthly generation timeseries")
                print(f"   Renewable Portfolio LLC/{site_selection}/plots/")
                print("     • (Ready for future visualizations)")
        
        # Ask if user wants to create another simulation
        another = input("\n🔄 Create another generation simulation? (y/n): ").strip().lower()
        if another == 'y':
            self.run_simulation()

# Run the simulation
if __name__ == "__main__":
    simulator = GenerationSimulation()
    simulator.run_simulation()


🌟 Generation Statistical Simulation
   (From Resurety Combined Data)

GENERATION SIMULATION - SITE SELECTION

Available options:
0. ALL SITES (Process all sites at once)
1. Blue_Wing_Solar_Energy_Generator
2. High_Lonesome_Wind_Power
3. Midway_Solar_Farm_III
4. Misae_Solar
5. Mount_Signal_Solar_Farm_II
6. RE_Mustang_LLC
7. Stanton_Wind_Energy_LLC

📅 Auto-detected period: Jul to Jun (12 months)
   Starting from current month: Jul

🚀 PROCESSING ALL SITES

[1/7] Processing Blue_Wing_Solar_Energy_Generator...

Processing: Blue_Wing_Solar_Energy_Generator
Month range: Jul to Jun (year-wrapping, 12 months)

📁 Loading data from: resurety_data\Blue_Wing_Solar_Energy_Generator_generation_price_combined.csv

📊 Data summary:
   Years available: 2012 to 2025 (14 years)
   Total data points: 115,115
   Months included: Jul, Aug, Sep, Oct, Nov, Dec, Jan, Feb, Mar, Apr, May, Jun

   ℹ️  Note: Year-wrapping range detected!
   First year (2012) and last year (2025) may have partial data
   All availab