In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [None]:
# Define relative paths to the CSV files
generation_by_source_path = 'data/generation_by_source.csv'
historical_power_load_path = 'data/historical_power_load.csv'

In [None]:
# Read the CSV files using the relative paths
generation_data = pd.read_csv(generation_by_source_path)
load_data = pd.read_csv(historical_power_load_path)

In [None]:
# Convert datetime columns to datetime objects
generation_data['datetime_beginning_ept'] = pd.to_datetime(generation_data['datetime_beginning_ept'])
load_data['forecast_hour_beginning_ept'] = pd.to_datetime(load_data['forecast_hour_beginning_ept'])

In [None]:
# Filter the generation data to only include renewable sources (is_renewable == True)
renewable_generation = generation_data[generation_data['is_renewable'] == True]

In [None]:
# Aggregate renewable generation by hour
renewable_generation_agg = renewable_generation.groupby('datetime_beginning_ept')['mw'].sum().reset_index()

In [None]:
# Merge the renewable generation data with the load data
clean_data = pd.merge(load_data, renewable_generation_agg, 
                       left_on='forecast_hour_beginning_ept', 
                       right_on='datetime_beginning_ept', 
                       how='left')

In [None]:
# Fill missing values in renewable generation with 0
clean_data['mw'].fillna(0, inplace=True)

In [None]:
# Calculate net load as total load minus renewable generation
clean_data['net_load_mw'] = clean_data['forecast_load_mw'] - clean_data['mw']

In [None]:
# Display the first few rows of net load data to understand the output
print(clean_data[['forecast_hour_beginning_ept', 'forecast_load_mw', 'mw', 'net_load_mw']])

In [None]:
# Group by hour and calculate the mean net load
average_hourly_net_load = clean_data.groupby(clean_data['forecast_hour_beginning_ept'].dt.hour)['net_load_mw'].mean()

# Display the average hourly net load
print(average_hourly_net_load)

In [None]:
# Hour labels (0-23)
hours = [f"{hour}:00" for hour in average_hourly_net_load.index]

# Create a bar graph for average hourly net load
plt.figure(figsize=(12, 6))
bars = plt.bar(hours, average_hourly_net_load.values, color='skyblue')
plt.title('Average Hourly Net Load', fontsize=16)
plt.xlabel('Hour of the Day (EPT)', fontsize=14)
plt.ylabel('Net Load (MW)', fontsize=14)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add text above each bar to display its value
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, yval, str(round(yval, 1)), 
             ha='center', va='bottom', fontsize=7, color='black')

plt.tight_layout()
plt.show()