In [13]:
import pandas as pd
import os

#Defining the base directory
notebook_dir = os.getcwd()

base_dir = os.path.join(notebook_dir, 'Hourly Energy Consumption')

#List of file names
file_names = [
    "AEP_hourly.csv", "COMED_hourly.csv", "DAYTON_hourly.csv", 
    "DEOK_hourly.csv", "DOM_hourly.csv", "DUQ_hourly.csv", 
    "EKPC_hourly.csv", "FE_hourly.csv", "NI_hourly.csv", 
    "PJM_Load_hourly.csv", "PJME_hourly.csv", "PJMW_hourly.csv"
]

#Loading the datasets into a dictionary
dataframes = {}
for file_name in file_names:
    file_path = os.path.join(base_dir, file_name)
    if os.path.exists(file_path):
        dataframes[file_name] = pd.read_csv(file_path)
        print(f"Loaded {file_name}")
    else:
        print(f"File not found: {file_name}")

#Combining all datasets into one
combined_df = pd.concat(dataframes.values(), ignore_index=True)

#Preprocessing
if 'Datetime' in combined_df.columns:
    combined_df['Datetime'] = pd.to_datetime(combined_df['Datetime'])
else:
    print("No 'Datetime' column found, please check the dataset structure.")

#Fill in any missing values
combined_df = combined_df.fillna(method='ffill')

#Sort by datetime
combined_df = combined_df.sort_values(by='Datetime')

#Display the first few rows of the preprocessed dataset
combined_df.head()



Loaded AEP_hourly.csv
Loaded COMED_hourly.csv
Loaded DAYTON_hourly.csv
Loaded DEOK_hourly.csv
Loaded DOM_hourly.csv
Loaded DUQ_hourly.csv
Loaded EKPC_hourly.csv
Loaded FE_hourly.csv
Loaded NI_hourly.csv
Loaded PJM_Load_hourly.csv
Loaded PJME_hourly.csv
Loaded PJMW_hourly.csv


Unnamed: 0,Datetime,AEP_MW,COMED_MW,DAYTON_MW,DEOK_MW,DOM_MW,DUQ_MW,EKPC_MW,FE_MW,NI_MW,PJM_Load_MW,PJME_MW,PJMW_MW
775273,1998-04-01 01:00:00,19993.0,12816.0,2552.0,4100.0,17428.0,1721.0,2846.0,8393.0,12223.0,22259.0,,
775274,1998-04-01 02:00:00,19993.0,12816.0,2552.0,4100.0,17428.0,1721.0,2846.0,8393.0,12223.0,21244.0,,
775275,1998-04-01 03:00:00,19993.0,12816.0,2552.0,4100.0,17428.0,1721.0,2846.0,8393.0,12223.0,20651.0,,
775276,1998-04-01 04:00:00,19993.0,12816.0,2552.0,4100.0,17428.0,1721.0,2846.0,8393.0,12223.0,20421.0,,
775277,1998-04-01 05:00:00,19993.0,12816.0,2552.0,4100.0,17428.0,1721.0,2846.0,8393.0,12223.0,20713.0,,
