In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

class ComprehensiveSolarAnalysis:
    def __init__(self, customer_columns_file, house_load_file, generation_file, ev_load_file):
        """Initialize the analysis with file paths."""
        self.customer_ids = pd.read_csv(customer_columns_file, header=None).iloc[0].values
        self.house_load = pd.read_csv(house_load_file, header=None)
        self.generation = pd.read_csv(generation_file, header=None)
        self.ev_load = pd.read_csv(ev_load_file, header=None)
        
        # Create timestamp index
        self.create_timestamp_index()
        
    def create_timestamp_index(self):
        """Create timestamp index for the data (3 years with 30-min intervals)."""
        start_date = datetime(2019, 8, 1) # Assuming start date based on pattern
        timestamps = [start_date + timedelta(minutes=30*i) for i in range(52608)]
        
        # Set index for all dataframes
        self.house_load.index = timestamps
        self.generation.index = timestamps
        self.ev_load.index = timestamps
        
    def select_sample_houses(self, n_houses=5):
        """Select a sample of houses for analysis."""
        np.random.seed(0)  # For reproducibility
        # self.selected_houses = np.random.choice(self.customer_ids, size=n_houses, replace=False)
        self.selected_houses = np.array([2, 73, 74, 109, 141])
        return self.selected_houses

    def calculate_metrics(self, house_id):
        """Calculate self-consumption and self-sufficiency for a specific house."""
        # Get data for specific house
        house_load = self.house_load[house_id] + self.ev_load[house_id]
        generation = self.generation[house_id]
        
        # Calculate self-consumption
        self_consumed = np.minimum(house_load, generation)
        self_consumption = np.sum(self_consumed) / np.sum(generation) * 100
        
        # Calculate self-sufficiency
        self_sufficiency = np.sum(self_consumed) / np.sum(house_load) * 100
        
        return {
            'house_id': house_id,
            'self_consumption [%]': self_consumption,
            'self_sufficiency [%]': self_sufficiency,
            'total_load [kWh]': np.sum(house_load),
            'total_generation [kWh]': np.sum(generation),
            'total_self_consumed [kWh]': np.sum(self_consumed)
        }

    def plot_daily_profile(self, house_id):
        """Create daily profile plot for a specific house."""
        # Get data for specific house
        house_load = self.house_load[house_id] + self.ev_load[house_id]
        generation = self.generation[house_id]
        
        # Calculate average daily profile
        daily_load = house_load.groupby(house_load.index.hour).mean()
        daily_generation = generation.groupby(generation.index.hour).mean()
        
        
        plt.figure(figsize=(11.7, 8.3)) 
        plt.plot(daily_load.index, daily_load.values, label='Consumption (Load + EV)', linewidth=2)
        plt.plot(daily_generation.index, daily_generation.values, label='Generation', linewidth=2)
        plt.title(f'Average Daily Profile - House {house_id}', fontsize=16)
        plt.xlabel('Hour of Day', fontsize=14)
        plt.ylabel('Energy (kWh)', fontsize=14)
        plt.legend(fontsize=12)
        plt.grid(True)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        return plt.gcf()

    def plot_daily_average_for_season(self, house_id, year):
            """Create average daily profile plots for summer (Dec-Feb) and winter (Jun-Aug) in subplots."""
            # Get data for specific house
            house_load = self.house_load[house_id] + self.ev_load[house_id]
            generation = self.generation[house_id]
            
            # Create figure with two subplots
            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(11.7, 16.5)) 
            
            # Common font sizes
            title_font = 16
            label_font = 14
            tick_font = 12
            legend_font = 12
            
            # Plot Summer data (Dec-Feb)
            summer_months = [12, 1, 2]  # December, January, February
            summer_data = pd.DataFrame()
            
            for month in summer_months:
                year_to_use = year if month != 12 else year - 1 
                mask = (house_load.index.year == year_to_use) & (house_load.index.month == month)
                
                # Group by hour and get mean for this month
                month_load = house_load[mask].groupby(house_load[mask].index.hour).mean()
                month_gen = generation[mask].groupby(generation[mask].index.hour).mean()
                
                if summer_data.empty:
                    summer_data['load'] = month_load
                    summer_data['generation'] = month_gen
                else:
                    summer_data['load'] += month_load
                    summer_data['generation'] += month_gen
            
            # Calculate averages across the three months
            summer_data = summer_data / len(summer_months)
            
            ax1.plot(summer_data.index, summer_data['load'], label='Consumption (Load + EV)', color='C0', linewidth=2)
            ax1.plot(summer_data.index, summer_data['generation'], label='Generation', color='C1', linewidth=2)
            ax1.set_title(f'Summer (Dec-Feb) Average Daily Profile {year} - House {house_id}', fontsize=title_font)
            ax1.set_xlabel('Hour of Day', fontsize=label_font)
            ax1.set_ylabel('Average Energy (kWh)', fontsize=label_font)
            ax1.legend(fontsize=legend_font)
            ax1.grid(True)
            ax1.set_xticks(range(0, 24))
            ax1.tick_params(axis='both', which='major', labelsize=tick_font)
            
            # Calculate and display summer self-consumption
            daily_self_consumed = np.minimum(summer_data['load'], summer_data['generation'])
            summer_self_consumption = (np.sum(daily_self_consumed) / np.sum(summer_data['generation'])) * 100
            ax1.text(0.02, 0.95, f'Average Daily Self-Consumption: {summer_self_consumption:.1f}%',
                    transform=ax1.transAxes, fontsize=tick_font, bbox=dict(facecolor='white', alpha=0.8))
            
            # Plot Winter data (Jun-Aug)
            winter_months = [6, 7, 8]  # June, July, August
            winter_data = pd.DataFrame()
            
            for month in winter_months:
                mask = (house_load.index.year == year) & (house_load.index.month == month)
                
                # Group by hour and get mean for this month
                month_load = house_load[mask].groupby(house_load[mask].index.hour).mean()
                month_gen = generation[mask].groupby(generation[mask].index.hour).mean()
                
                if winter_data.empty:
                    winter_data['load'] = month_load
                    winter_data['generation'] = month_gen
                else:
                    winter_data['load'] += month_load
                    winter_data['generation'] += month_gen
            
            # Calculate averages across the three months
            winter_data = winter_data / len(winter_months)
            
            ax2.plot(winter_data.index, winter_data['load'], label='Consumption (Load + EV)', color='C0', linewidth=2)
            ax2.plot(winter_data.index, winter_data['generation'], label='Generation', color='C1', linewidth=2)
            ax2.set_title(f'Winter (Jun-Aug) Average Daily Profile {year} - House {house_id}', fontsize=title_font)
            ax2.set_xlabel('Hour of Day', fontsize=label_font)
            ax2.set_ylabel('Average Energy (kWh)', fontsize=label_font)
            ax2.legend(fontsize=legend_font)
            ax2.grid(True)
            ax2.set_xticks(range(0, 24))
            ax2.tick_params(axis='both', which='major', labelsize=tick_font)
            
            # Calculate and display winter self-consumption
            daily_self_consumed = np.minimum(winter_data['load'], winter_data['generation'])
            winter_self_consumption = (np.sum(daily_self_consumed) / np.sum(winter_data['generation'])) * 100
            ax2.text(0.02, 0.95, f'Average Daily Self-Consumption: {winter_self_consumption:.1f}%',
                    transform=ax2.transAxes, fontsize=tick_font, bbox=dict(facecolor='white', alpha=0.8))
            
            plt.tight_layout()
            return fig

    def plot_seasonal_data(self, house_id, year=2021):
            """Plot seasonal averages (Summer: Dec-Feb, Winter: Jun-Aug) showing daily patterns."""
            # Get data for specific house
            house_load = self.house_load[house_id] + self.ev_load[house_id]
            generation = self.generation[house_id]
            
            # Create figure with two subplots
            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(11.7, 16.5))  # A4 size in inches (width, height)
            
            # Common font sizes
            title_font = 16
            label_font = 14
            tick_font = 12
            legend_font = 12
            
            # Process Summer data (Dec-Feb)
            summer_months = [12, 1, 2]  # December, January, February
            summer_data = pd.DataFrame()
            
            for month in summer_months:
                year_to_use = year if month != 12 else year - 1  # Use previous year for December
                mask = (house_load.index.year == year_to_use) & (house_load.index.month == month)
                
                # Resample to daily values for this month
                month_load = house_load[mask].resample('D').sum()
                month_gen = generation[mask].resample('D').sum()
                
                if summer_data.empty:
                    summer_data['load'] = month_load
                    summer_data['generation'] = month_gen
                else:
                    summer_data['load'] = summer_data['load'].add(month_load, fill_value=0)
                    summer_data['generation'] = summer_data['generation'].add(month_gen, fill_value=0)
            
            # Calculate averages across the three months
            summer_data = summer_data / len(summer_months)
            
            # Plot Summer data
            days = range(len(summer_data))
            ax1.plot(days, summer_data['load'], label='Daily consumption (load + EV)', color='C0', linewidth=2)
            ax1.plot(days, summer_data['generation'], label='Daily generation', color='C1', linewidth=2)
            ax1.set_title(f'Summer (Dec-Feb) - Average Daily Energy - House {house_id}', fontsize=title_font)
            ax1.set_xlabel('Days in Season', fontsize=label_font)
            ax1.set_ylabel('Average Energy (kWh)', fontsize=label_font)
            ax1.legend(fontsize=legend_font)
            ax1.grid(True)
            ax1.tick_params(axis='both', which='major', labelsize=tick_font)
            
            # Add self-consumption percentage for Summer
            daily_self_consumed = np.minimum(summer_data['load'], summer_data['generation'])
            summer_self_consumption = (daily_self_consumed.sum() / summer_data['generation'].sum()) * 100
            ax1.text(0.02, 0.95, f'Average Seasonal Self-Consumption: {summer_self_consumption:.1f}%',
                    transform=ax1.transAxes, fontsize=tick_font, bbox=dict(facecolor='white', alpha=0.8))
            
            # Process Winter data (Jun-Aug)
            winter_months = [6, 7, 8]  # June, July, August
            winter_data = pd.DataFrame()
            
            for month in winter_months:
                mask = (house_load.index.year == year) & (house_load.index.month == month)
                
                # Resample to daily values for this month
                month_load = house_load[mask].resample('D').sum()
                month_gen = generation[mask].resample('D').sum()
                
                if winter_data.empty:
                    winter_data['load'] = month_load
                    winter_data['generation'] = month_gen
                else:
                    winter_data['load'] = winter_data['load'].add(month_load, fill_value=0)
                    winter_data['generation'] = winter_data['generation'].add(month_gen, fill_value=0)
            
            # Calculate averages across the three months
            winter_data = winter_data / len(winter_months)
            
            # Plot Winter data
            days = range(len(winter_data))
            ax2.plot(days, winter_data['load'], label='Daily consumption (load + EV)', color='C0', linewidth=2)
            ax2.plot(days, winter_data['generation'], label='Daily generation', color='C1', linewidth=2)
            ax2.set_title(f'Winter - Average Daily Energy - House {house_id}', fontsize=title_font)
            ax2.set_xlabel('Days in Season', fontsize=label_font)
            ax2.set_ylabel('Average Energy (kWh)', fontsize=label_font)
            ax2.legend(fontsize=legend_font)
            ax2.grid(True)
            ax2.tick_params(axis='both', which='major', labelsize=tick_font)
            
            # Add self-consumption percentage for Winter
            daily_self_consumed = np.minimum(winter_data['load'], winter_data['generation'])
            winter_self_consumption = (daily_self_consumed.sum() / winter_data['generation'].sum()) * 100
            ax2.text(0.02, 0.95, f'Average Seasonal Self-Consumption: {winter_self_consumption:.1f}%',
                    transform=ax2.transAxes, fontsize=tick_font, bbox=dict(facecolor='white', alpha=0.8))
            
            plt.tight_layout()
            return fig
        
        
    def create_summary_report(self, selected_houses=None):
        """Create a summary report for selected houses showing 3 year metrics."""
        if selected_houses is None:
            selected_houses = self.selected_houses
        
        metrics_list = []
        for house_id in selected_houses:
            # Get data for specific house
            house_load = self.house_load[house_id] + self.ev_load[house_id]
            generation = self.generation[house_id]
            
            # Calculate self-consumption and self-sufficiency for entire year
            self_consumed = np.minimum(house_load, generation)
            self_consumption = np.sum(self_consumed) / np.sum(generation) * 100
            self_sufficiency = np.sum(self_consumed) / np.sum(house_load) * 100
            
            metrics = {
                'house_id': house_id,
                'self_consumption [%]': self_consumption,
                'self_sufficiency [%]': self_sufficiency,
                'total_load [kWh]': np.sum(house_load),
                'total_generation [kWh]': np.sum(generation),
                'total_self_consumed [kWh]': np.sum(self_consumed)
            }
            metrics_list.append(metrics)
        
        return pd.DataFrame(metrics_list)

    def print_seasonal_stats(self, house_id, year=2021):
        """Print seasonal statistics for summer (Dec-Feb) and winter (Jun-Aug)."""
        house_load = self.house_load[house_id] + self.ev_load[house_id]
        generation = self.generation[house_id]
        
        def calculate_seasonal_stats(season_months, season_year):
            """Calculate statistics for a given season."""
            seasonal_data = {'load': [], 'generation': []}
            
            for month in season_months:
                year_to_use = season_year if month != 12 else season_year - 1
                mask = (house_load.index.year == year_to_use) & (house_load.index.month == month)
                month_load = house_load[mask].resample('D').sum()
                month_gen = generation[mask].resample('D').sum()
                
                seasonal_data['load'].extend(month_load.values)
                seasonal_data['generation'].extend(month_gen.values)
            
            # Convert to numpy arrays for calculations
            seasonal_load = np.array(seasonal_data['load'])
            seasonal_gen = np.array(seasonal_data['generation'])
            daily_self_consumed = np.minimum(seasonal_load, seasonal_gen)
            
            return {
                'Total Load (kWh)': np.sum(seasonal_load),
                'Total Generation (kWh)': np.sum(seasonal_gen),
                'Average Daily Load (kWh)': np.mean(seasonal_load),
                'Average Daily Generation (kWh)': np.mean(seasonal_gen),
                'Max Daily Load (kWh)': np.max(seasonal_load),
                'Max Daily Generation (kWh)': np.max(seasonal_gen),
                'Min Daily Load (kWh)': np.min(seasonal_load),
                'Min Daily Generation (kWh)': np.min(seasonal_gen),
                'Self-Consumption (%)': (np.sum(daily_self_consumed) / np.sum(seasonal_gen)) * 100 if np.sum(seasonal_gen) > 0 else 0,
                'Days with Excess Generation': np.sum(seasonal_gen > seasonal_load)
            }
        
        # Calculate statistics for summer (Dec-Feb) and winter (Jun-Aug)
        stats = pd.DataFrame({
            'Summer (Dec-Feb)': calculate_seasonal_stats([12, 1, 2], year),
            'Winter (Jun-Aug)': calculate_seasonal_stats([6, 7, 8], year)
        })
        
        return stats

import os

def main():
    try:
        # Create necessary directories
        os.makedirs('reports', exist_ok=True)
        os.makedirs('plots', exist_ok=True)
        os.makedirs('stats', exist_ok=True)

        # Initialize analysis
        analysis = ComprehensiveSolarAnalysis(
            'CustomerColumns.csv',
            'HouseLoad.csv',
            'Generation.csv',
            'EVLoad.csv'
        )
        
        # Select sample houses
        selected_houses = analysis.select_sample_houses(5)
        print("\nSelected houses for analysis:", selected_houses)
        
        # Generate and display 3 year summary report
        print("\nGenerating 3 year Summary Report...")
        summary_df = analysis.create_summary_report()
        print("\n3 year Summary Report:")
        print(summary_df.round(2))
        
        # Save 3 year summary report
        summary_df.to_csv('reports/3_year_summary_report.csv', index=False)
        print("\n3 year summary report saved to 'reports/3_year_summary_report.csv'")
        
        # Process each house
        print("\nDetailed analysis for each house...")
        for house_id in selected_houses:
            try:
                # Create and save daily profile plots
                daily_fig = analysis.plot_daily_profile(house_id)
                plt.savefig(f'plots/house_{house_id}_daily_profile.png', dpi=300, bbox_inches='tight')
                plt.close()
                
                # Create and save seasonal average plots
                seasonal_avg_fig = analysis.plot_daily_average_for_season(house_id, 2021)
                plt.savefig(f'plots/house_{house_id}_seasonal_daily_averages.png', dpi=300, bbox_inches='tight')
                plt.close()
    
                # Create and save seasonal profile plots
                seasonal_fig = analysis.plot_seasonal_data(house_id)
                plt.savefig(f'plots/house_{house_id}_seasonal_profiles.png', dpi=300, bbox_inches='tight')
                plt.close()
                
                # Generate and save seasonal statistics
                seasonal_stats = analysis.print_seasonal_stats(house_id)
                
                print(f"\nSeasonal Statistics for House {house_id}:")
                print(seasonal_stats.round(2))
                
                # Save seasonal statistics to CSV
                stats_filename = f'stats/house_{house_id}_seasonal_stats.csv'
                seasonal_stats.to_csv(stats_filename)
                print(f"Seasonal statistics saved to '{stats_filename}'")
                
                print(f"Successfully completed analysis for House {house_id}")
                
            except Exception as e:
                print(f"Error processing house {house_id}: {str(e)}")
                print("Continuing with next house...")
                continue
        
        print("\nAnalysis complete! All files have been saved.")
        
    except Exception as e:
        print(f"\nError in main execution: {str(e)}")

if __name__ == "__main__":
        main()


Selected houses for analysis: [  2  73  74 109 141]

Generating 3 year Summary Report...

3 year Summary Report:
   house_id  self_consumption [%]  self_sufficiency [%]  total_load [kWh]  \
0         2                 40.11                 11.95          29663.13   
1        73                 29.23                 20.72          41218.95   
2        74                 23.16                 25.01          59696.72   
3       109                 38.59                 21.20          40642.15   
4       141                 39.30                 18.86          34793.64   

   total_generation [kWh]  total_self_consumed [kWh]  
0                 8839.49                    3545.83  
1                29216.50                    8541.10  
2                64457.67                   14930.98  
3                22323.31                    8615.19  
4                16698.43                    6562.78  

3 year summary report saved to 'reports/3_year_summary_report.csv'

Detailed analysis for ea