In [24]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from statsmodels.tsa.arima.model import ARIMA


# ARIMA

In [25]:
def load_and_preprocess_dataset(file_path):
    data = pd.read_csv(file_path)
    if 'Datetime' in data.columns:
        data['Datetime'] = pd.to_datetime(data['Datetime'])
    if 'Datetime' in data.columns:
        data = data.sort_values('Datetime')
    print(data.head())
    return data


In [26]:

def fit_arima_model(data, p, d, q):
    model = ARIMA(data, order=(p, d, q))
    result = model.fit()

    return result

In [27]:
file_path = 'dataset/AEP_hourly.csv'
data = load_and_preprocess_dataset(file_path)

                Datetime   AEP_MW
2183 2004-10-01 01:00:00  12379.0
2184 2004-10-01 02:00:00  11935.0
2185 2004-10-01 03:00:00  11692.0
2186 2004-10-01 04:00:00  11597.0
2187 2004-10-01 05:00:00  11681.0


In [34]:
data.head()

Unnamed: 0,Datetime,AEP_MW
2183,2004-10-01 01:00:00,12379.0
2184,2004-10-01 02:00:00,11935.0
2185,2004-10-01 03:00:00,11692.0
2186,2004-10-01 04:00:00,11597.0
2187,2004-10-01 05:00:00,11681.0


In [35]:
data.columns

Index(['Datetime', 'AEP_MW'], dtype='object')

In [42]:
x = 'AEP_hourly_.csv'
x[0:x.find('_')]

'AEP'

In [37]:
p = 5  # Autoregressive (AR) order
d = 1  # Integrated (I) order (number of differences)
q = 0  # Moving Average (MA) order

result = fit_arima_model(data['AEP_MW'], p, d, q)
print(result.summary())


                               SARIMAX Results                                
Dep. Variable:                 AEP_MW   No. Observations:               121273
Model:                 ARIMA(5, 1, 0)   Log Likelihood             -869885.410
Date:                Sun, 12 May 2024   AIC                        1739782.820
Time:                        22:15:21   BIC                        1739841.055
Sample:                             0   HQIC                       1739800.341
                             - 121273                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          1.0349      0.001    967.532      0.000       1.033       1.037
ar.L2         -0.3522      0.001   -456.038      0.000      -0.354      -0.351
ar.L3          0.0684      0.002     34.737      0.0

In [38]:
forecast_steps = 24  #forecast next 24 time steps (hours)
forecast = result.forecast(steps=forecast_steps)
print("Forecasted Values:\n", forecast)


Forecasted Values:
 121273    14017.567599
121274    13583.980313
121275    13330.405083
121276    13287.917720
121277    13466.989019
121278    13768.075999
121279    14077.499059
121280    14347.269187
121281    14555.940118
121282    14682.876663
121283    14722.484156
121284    14692.560416
121285    14619.523138
121286    14526.754680
121287    14433.771876
121288    14356.368526
121289    14303.907769
121290    14278.415525
121291    14276.662572
121292    14292.611238
121293    14318.971911
121294    14348.531644
121295    14375.430611
121296    14395.885223
Name: predicted_mean, dtype: float64


  return get_prediction_index(


In [39]:
#pickeling the model
import pickle

with open('backend/models/AEP_ARIMA3.pkl', 'wb') as p:
    pickle.dump(result, p)

In [31]:
type(forecast)

pandas.core.series.Series

In [32]:
# forecast.to_csv('AEP_ARIMA.csv', index=False)
# 

In [58]:
import os
import sqlite3
import pandas as pd
# Specify the folder path
folder_path = '../project/backend/forecast_data.db'

conn = sqlite3.connect(folder_path)
q = "select * from NI_Forecast"

df = pd.read_sql(q,conn)

conn.close()

df.head()

Unnamed: 0,Datetime,NI_MW,ARIMA_prediction,SARIMAX_prediction,ETS_prediction,Prophet_prediction,SVR_prediction,LSTM_prediction
0,2004-05-01 01:00:00,9198.0,,,,,,
1,2004-05-01 02:00:00,8570.0,,,,,,
2,2004-05-01 03:00:00,8183.0,,,,,,
3,2004-05-01 04:00:00,7917.0,,,,,,
4,2004-05-01 05:00:00,7828.0,,,,,,


In [57]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect(f'{folder_path}')

# Create a cursor object to execute SQL queries
cursor = conn.cursor()

# Execute a query to retrieve table names from sqlite_master
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")

# Fetch all the table names
table_names = cursor.fetchall()

# Print the table names
for name in table_names:
    print(name[0])

# Close the cursor and connection
cursor.close()
conn.close()


DEOK_Forecast
FE_Forecast
NI_Forecast
AEP_Forecast
DAYTON_Forecast
COMED_Forecast
DOM_Forecast
EKPC_Forecast
PJM_Forecast


In [45]:
import pandas as pd
import numpy as np

def add_model_predictions(df, model_name, predictions):
    # Create a new column name for the model predictions
    prediction_col = f"{model_name}_prediction"
    
    # Add a new column to the DataFrame with NaN values initially
    df[prediction_col] = np.nan
    
    # Get the last datetime value from the original DataFrame
    last_date = df['Datetime'].iloc[-1]
    
    # Create a datetime range for the predictions starting from the last date + 1 hour
    prediction_dates = pd.date_range(start=last_date, periods=len(predictions) + 1, freq='H')[1:]
    
    # Create a new DataFrame for the predictions
    prediction_df = pd.DataFrame({'Datetime': prediction_dates, prediction_col: predictions})
    
    # Append the prediction DataFrame to the original DataFrame
    df = pd.concat([df, prediction_df], ignore_index=True)
    
    return df

# Load the dataset
df = pd.read_csv('data/AEP_ARIMA.csv')

# Convert Datetime column to datetime format
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)
df.sort_index(inplace=True)
df.reset_index(inplace=True)

# Example usage of the function
predictions_arima = [1450.0, 1460.0, 1470.0, 1480.0, 1490.0]
df_with_arima = add_model_predictions(df, "ARIMA", predictions_arima)

predictions_sarima = [1445.0, 1455.0, 1465.0, 1475.0, 1485.0]
df_with_sarima = add_model_predictions(df_with_arima, "SARIMA", predictions_sarima)

# Display the resulting DataFrame
print(df_with_sarima.tail(10))


FileNotFoundError: [Errno 2] No such file or directory: 'data/AEP_ARIMA.csv'