In [6]:
from dotenv import load_dotenv
import os
from pymongo import MongoClient

load_dotenv()
cluster_uri = os.environ.get("MONGODB_URI")
client = MongoClient(cluster_uri)
db = client["MSCI446_DB"]
collection = db["Load_Forecast"]

In [7]:
import pandas as pd

documents = list(collection.find())

# Convert the list of documents into a DataFrame
df = pd.DataFrame(documents)

# Optionally, you can drop the '_id' column if you don't need it
df.drop('_id', axis=1, inplace=True)

print(df.head())

  forecast_hour_beginning_ept forecast_area  forecast_load_mw
0        1/1/2024 12:00:00 AM           AEP             14145
1         1/1/2024 1:00:00 AM           AEP             13908
2         1/1/2024 2:00:00 AM           AEP             13765
3         1/1/2024 3:00:00 AM           AEP             13788
4         1/1/2024 4:00:00 AM           AEP             13862


In [27]:
collection_solar = db["Solar_Forecast"]
collection_wind = db["Wind_Forecast"]
documents_solar = list(collection_solar.find())
documents_wind = list(collection_wind.find())
df_solar = pd.DataFrame(documents_solar)
df_wind = pd.DataFrame(documents_wind)
df_solar.drop('_id', axis=1, inplace=True)
df_wind.drop('_id', axis=1, inplace=True)

print(df_solar.head())
print(df_wind.head())

  datetime_beginning_ept    area  solar_generation_mw
0  2/28/2024 11:00:00 PM  MIDATL                  0.0
1  2/28/2024 11:00:00 PM   OTHER                  0.0
2  2/28/2024 11:00:00 PM     RFC                  0.0
3  2/28/2024 11:00:00 PM     RTO                  0.0
4  2/28/2024 11:00:00 PM   SOUTH                  0.0
   datetime_beginning_ept    area  wind_generation_mw
0  12/31/2020 11:00:00 PM  MIDATL             112.120
1  12/31/2020 11:00:00 PM   SOUTH             156.846
2  12/31/2020 11:00:00 PM    WEST            2130.528
3  12/31/2020 11:00:00 PM     RTO            2399.494
4  12/31/2020 11:00:00 PM     RFC            2242.648


In [28]:
# Assuming df_solar and df_wind are already created and the '_id' column is dropped

# Get unique area values from the Solar Forecast table
unique_areas_solar = df_solar['area'].unique()

# Get unique area values from the Wind Forecast table
unique_areas_wind = df_wind['area'].unique()

# Print the unique area values
print("Unique areas in Solar Forecast table:", unique_areas_solar)
print("Unique areas in Wind Forecast table:", unique_areas_wind)


Unique areas in Solar Forecast table: ['MIDATL' 'OTHER' 'RFC' 'RTO' 'SOUTH' 'WEST']
Unique areas in Wind Forecast table: ['MIDATL' 'SOUTH' 'WEST' 'RTO' 'RFC' 'OTHER']


In [None]:
print(df_wind.head())

In [None]:

wind_pivot = df_wind.pivot(index='datetime_beginning_ept', columns='area', values='wind_generation_mw')
solar_pivot = df_solar.pivot(index='datetime_beginning_ept', columns='area', values='solar_generation_mw')

# Merge the pivoted DataFrames along the dates
merged_df = pd.merge(solar_pivot, wind_pivot, on='datetime_beginning_ept', suffixes=('_solar', '_wind'))
# merged_df = pd.merge(merged_df, solar_pivot, on='date', suffixes=('_wind', '_solar'))

# Reset index to make 'date' a column
merged_df.reset_index(inplace=True)

# Rename the columns
# merged_df.columns = ['Date', 'North-values_load_forecast', 'South-values_load_forecast', 'East-values_load_forecast',
                    #  'Upper-belt-values_wind', 'Lower-belt-values_wind', 'Mid-belt-values_wind', 'Side-belt-values_wind',
                    #  'Atlantic-values_solar', 'Pacific-values_solar']

print(merged_df.head())

In [47]:
wind_duplicates = df_wind[df_wind.duplicated(keep=False)]

print(wind_duplicates)


In [None]:
solar_duplicates = df_solar[df_solar.duplicated(keep=False)]

print(solar_duplicates)

In [48]:
solar_drop_duplicates = df_solar.drop_duplicates()

In [32]:
# rows_with_other = df_solar[df_solar['area'] == 'OTHER']
df_dropped_solar = df_solar.drop([16555, 69259])

In [None]:
solar_duplicates = df_dropped_solar[df_dropped_solar.duplicated(keep=False)]

print(solar_duplicates)

In [53]:
wind_pivot = df_wind.pivot_table(index='datetime_beginning_ept', columns='area', values='wind_generation_mw', aggfunc='mean')
solar_pivot = df_solar.pivot_table(index='datetime_beginning_ept', columns='area', values='solar_generation_mw', aggfunc='mean')

# Merge the pivoted DataFrames along the dates
merged_df = pd.merge(solar_pivot, wind_pivot, on='datetime_beginning_ept', suffixes=('_solar', '_wind'))
# merged_df = pd.merge(merged_df, solar_pivot, on='date', suffixes=('_wind', '_solar'))

# Reset index to make 'date' a column
merged_df.reset_index(inplace=True)

# Rename the columns
# merged_df.columns = ['Date', 'North-values_load_forecast', 'South-values_load_forecast', 'East-values_load_forecast',
                    #  'Upper-belt-values_wind', 'Lower-belt-values_wind', 'Mid-belt-values_wind', 'Side-belt-values_wind',
                    #  'Atlantic-values_solar', 'Pacific-values_solar']

print(merged_df.head())