In [1]:
import pandas as pd


In [2]:
# Define relative paths to the CSV files
generation_by_source_path = 'generation_by_source.csv'
historical_power_load_path = 'historical_power_load.csv'

In [3]:
# Read the CSV files using the relative paths
generation_data = pd.read_csv(generation_by_source_path)
load_data = pd.read_csv(historical_power_load_path)

In [4]:
# Convert datetime columns to datetime objects
generation_data['datetime_beginning_utc'] = pd.to_datetime(generation_data['datetime_beginning_utc'])
load_data['forecast_hour_beginning_utc'] = pd.to_datetime(load_data['forecast_hour_beginning_utc'])

In [5]:
# Filter the generation data to only include renewable sources (is_renewable == True)
renewable_generation = generation_data[generation_data['is_renewable']]

In [6]:
# Aggregate renewable generation by hour
renewable_generation_agg = renewable_generation.groupby('datetime_beginning_utc')['mw'].sum().reset_index()

In [7]:
# Merge the renewable generation data with the load data
clean_data = pd.merge(load_data, renewable_generation_agg, 
                       left_on='forecast_hour_beginning_utc', 
                       right_on='datetime_beginning_utc', 
                       how='left')

In [8]:
# Fill missing values in renewable generation with 0
clean_data['mw'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  clean_data['mw'].fillna(0, inplace=True)


In [9]:
# Calculate net load as total load minus renewable generation
clean_data['net_load_mw'] = clean_data['forecast_load_mw'] - clean_data['mw']

In [10]:
# Display the first few rows of net load data to understand the output
print(clean_data[['forecast_hour_beginning_utc', 'forecast_load_mw', 'mw', 'net_load_mw']].head())

  forecast_hour_beginning_utc  forecast_load_mw    mw  net_load_mw
0         2023-10-06 04:00:00             12161  5138         7023
1         2023-10-06 04:00:00             12212  5138         7074
2         2023-10-06 04:00:00             12212  5138         7074
3         2023-10-06 04:00:00             12212  5138         7074
4         2023-10-06 04:00:00             12212  5138         7074


In [12]:
# Group by hour and calculate the mean net load
average_hourly_net_load = clean_data.groupby(clean_data['forecast_hour_beginning_utc'].dt.hour)['net_load_mw'].mean()

# Display the average hourly net load
print(average_hourly_net_load)

forecast_hour_beginning_utc
0     10385.064691
1     10694.202821
2     10937.991486
3     10462.908763
4      9652.550392
5      9270.534810
6      8970.669610
7      8881.651902
8      8988.189747
9      9326.807207
10     9740.306492
11     9627.220629
12     8918.925448
13     8309.671617
14     8052.284581
15     8059.721437
16     7912.765661
17     7681.121854
18     7512.127784
19     7511.228124
20     7640.520346
21     8339.347008
22     9163.866973
23     9960.673883
Name: net_load_mw, dtype: float64


In [13]:
# Resample to daily average net load
daily_net_load = clean_data.set_index('forecast_hour_beginning_utc')['net_load_mw'].resample('D').mean()

# Display the first few rows of daily net load
print(daily_net_load.head())

forecast_hour_beginning_utc
2023-10-06    8131.912932
2023-10-07    5617.632828
2023-10-08    7278.966667
2023-10-09    8874.614141
2023-10-10    8229.594444
Freq: D, Name: net_load_mw, dtype: float64


In [14]:
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate autocorrelation
def autocorrelation(series, lag):
    return np.corrcoef(series[:-lag], series[lag:])[0, 1]

# Calculate autocorrelation for lags from 1 to 30 days
lags = np.arange(1, 31)
autocorrelations = [autocorrelation(daily_net_load.values, lag) for lag in lags]

# Plot the autocorrelation values
plt.figure(figsize=(10, 6))
plt.stem(lags, autocorrelations, use_line_collection=True)
plt.title('Autocorrelation of Daily Net Load (Cyclicality Analysis)')
plt.xlabel('Lag (days)')
plt.ylabel('Autocorrelation')
plt.grid(True)
plt.show()


TypeError: stem() got an unexpected keyword argument 'use_line_collection'

<Figure size 1000x600 with 0 Axes>