In [1]:
from dotenv import load_dotenv
import os
from pymongo import MongoClient

load_dotenv()
cluster_uri = os.environ.get("MONGODB_URI")
client = MongoClient(cluster_uri)
db = client["MSCI446_DB"]
collection = db["Load_Forecast"]

In [9]:
import pandas as pd

documents = list(collection.find())

# Convert the list of documents into a DataFrame
df = pd.DataFrame(documents)

# Optionally, you can drop the '_id' column if you don't need it
df.drop('_id', axis=1, inplace=True)

print(df.head())

  forecast_hour_beginning_ept forecast_area  forecast_load_mw
0        1/1/2024 12:00:00 AM           AEP             14145
1         1/1/2024 1:00:00 AM           AEP             13908
2         1/1/2024 2:00:00 AM           AEP             13765
3         1/1/2024 3:00:00 AM           AEP             13788
4         1/1/2024 4:00:00 AM           AEP             13862


In [3]:
collection_solar = db["Solar_Forecast"]
collection_wind = db["Wind_Forecast"]
documents_solar = list(collection_solar.find())
documents_wind = list(collection_wind.find())
df_solar = pd.DataFrame(documents_solar)
df_wind = pd.DataFrame(documents_wind)
df_solar.drop('_id', axis=1, inplace=True)
df_wind.drop('_id', axis=1, inplace=True)

print(df_solar.head())
print(df_wind.head())

  datetime_beginning_ept    area  solar_generation_mw
0  2/28/2024 11:00:00 PM  MIDATL                  0.0
1  2/28/2024 11:00:00 PM   OTHER                  0.0
2  2/28/2024 11:00:00 PM     RFC                  0.0
3  2/28/2024 11:00:00 PM     RTO                  0.0
4  2/28/2024 11:00:00 PM   SOUTH                  0.0
   datetime_beginning_ept    area  wind_generation_mw
0  12/31/2020 11:00:00 PM  MIDATL             112.120
1  12/31/2020 11:00:00 PM   SOUTH             156.846
2  12/31/2020 11:00:00 PM    WEST            2130.528
3  12/31/2020 11:00:00 PM     RTO            2399.494
4  12/31/2020 11:00:00 PM     RFC            2242.648


In [11]:
# Assuming df_solar and df_wind are already created and the '_id' column is dropped

# Get unique area values from the Solar Forecast table
unique_areas_solar = df_solar['area'].unique()

# Get unique area values from the Wind Forecast table
unique_areas_wind = df_wind['area'].unique()


unique_areas_load_forecast = df['forecast_area'].unique()

# Print the unique area values
print("Unique areas in Solar Forecast table:", unique_areas_solar)
print("Unique areas in Wind Forecast table:", unique_areas_wind)
print("Unique areas in Load Forecast:", unique_areas_load_forecast)


Unique areas in Solar Forecast table: ['MIDATL' 'OTHER' 'RFC' 'RTO' 'SOUTH' 'WEST']
Unique areas in Wind Forecast table: ['MIDATL' 'SOUTH' 'WEST' 'RTO' 'RFC' 'OTHER']
Unique areas in Load Forecast: ['AEP' 'APS' 'ATSI' 'COMED' 'DAY' 'DEOK' 'DOM' 'DUQ' 'EKPC' 'MIDATL' 'RTO']


In [7]:
print(df_wind.head())

   datetime_beginning_ept    area  wind_generation_mw
0  12/31/2020 11:00:00 PM  MIDATL             112.120
1  12/31/2020 11:00:00 PM   SOUTH             156.846
2  12/31/2020 11:00:00 PM    WEST            2130.528
3  12/31/2020 11:00:00 PM     RTO            2399.494
4  12/31/2020 11:00:00 PM     RFC            2242.648


In [None]:

wind_pivot = df_wind.pivot(index='datetime_beginning_ept', columns='area', values='wind_generation_mw')
solar_pivot = df_solar.pivot(index='datetime_beginning_ept', columns='area', values='solar_generation_mw')

# Merge the pivoted DataFrames along the dates
merged_df = pd.merge(solar_pivot, wind_pivot, on='datetime_beginning_ept', suffixes=('_solar', '_wind'))
# merged_df = pd.merge(merged_df, solar_pivot, on='date', suffixes=('_wind', '_solar'))

# Reset index to make 'date' a column
merged_df.reset_index(inplace=True)

# Rename the columns
# merged_df.columns = ['Date', 'North-values_load_forecast', 'South-values_load_forecast', 'East-values_load_forecast',
                    #  'Upper-belt-values_wind', 'Lower-belt-values_wind', 'Mid-belt-values_wind', 'Side-belt-values_wind',
                    #  'Atlantic-values_solar', 'Pacific-values_solar']

print(merged_df.head())

In [None]:
wind_duplicates = df_wind[df_wind.duplicated(keep=False)]

print(wind_duplicates)


In [None]:
solar_duplicates = df_solar[df_solar.duplicated(keep=False)]

print(solar_duplicates)

In [25]:
# Renaming date-time column heading in load forecast df to common date-time column heading
df.rename(columns={'forecast_hour_beginning_ept': 'datetime_beginning_ept'}, inplace=True)

# Instead of dropping the duplicate rows, I just averaged them and since they're the same their value shouldn't change
wind_pivot = df_wind.pivot_table(index='datetime_beginning_ept', columns='area', values='wind_generation_mw', aggfunc='mean')
solar_pivot = df_solar.pivot_table(index='datetime_beginning_ept', columns='area', values='solar_generation_mw', aggfunc='mean')
load_forecast_pivot = df.pivot_table(index='datetime_beginning_ept', columns ='forecast_area', values = 'forecast_load_mw', aggfunc = 'mean')

# Renaming column headings to end in _forecast to differentiate from wind and solar columns
load_forecast_pivot = load_forecast_pivot.rename(columns={col: f"{col}_forecast" for col in load_forecast_pivot.columns if col != 'datetime_beginning_ept'})

# Merge the pivoted DataFrames along the dates
merged_wind_solar_df = pd.merge(solar_pivot, wind_pivot, on='datetime_beginning_ept', suffixes=('_solar', '_wind'))
# merged_df = pd.merge(merged_df, solar_pivot, on='date', suffixes=('_wind', '_solar'))

merged_wind_solar_forecast_df = pd.merge(merged_wind_solar_df, load_forecast_pivot, on = 'datetime_beginning_ept', suffixes=('', ''))

# Reset index to make 'date' a column
merged_wind_solar_forecast_df.reset_index(inplace=True)

# Rename the columns
# merged_df.columns = ['Date', 'North-values_load_forecast', 'South-values_load_forecast', 'East-values_load_forecast',
                    #  'Upper-belt-values_wind', 'Lower-belt-values_wind', 'Mid-belt-values_wind', 'Side-belt-values_wind',
                    #  'Atlantic-values_solar', 'Pacific-values_solar']

merged_wind_solar_forecast_df = df.clip(lower=0)

print(merged_wind_solar_forecast_df.head())

  datetime_beginning_ept  MIDATL_solar  OTHER_solar  RFC_solar  RTO_solar  \
0   1/1/2021 10:00:00 AM       158.206        5.057    178.491    356.255   
1   1/1/2021 10:00:00 PM        -1.324       -0.020     -1.612     -4.765   
2   1/1/2021 11:00:00 AM       141.946        4.618    158.202    379.477   
3   1/1/2021 11:00:00 PM        -1.315       -0.020     -1.605     -4.888   
4   1/1/2021 12:00:00 AM         2.147       -0.120      1.874     -1.019   

   SOUTH_solar  WEST_solar  MIDATL_wind  OTHER_wind  RFC_wind  ...  \
0      177.764      20.285      692.726         NaN  4221.014  ...   
1       -3.153      -0.288       69.161         NaN  2621.295  ...   
2      221.275      16.256      582.513         NaN  3750.589  ...   
3       -3.283      -0.290      105.647         NaN  2412.106  ...   
4       -2.893      -0.273       99.408         NaN  2530.709  ...   

   APS_forecast  ATSI_forecast  COMED_forecast  DAY_forecast  DEOK_forecast  \
0      6166.125       7122.000       