In [16]:
import pandas as pd,os
import pmdarima as pm
from pmdarima import auto_arima
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import warnings
warnings.filterwarnings("ignore")


In [17]:
parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
main_directory = os.path.dirname(parent_directory)
common_records =  pd.read_csv(main_directory + '/data/common_records.csv')

      ID                       From        Date  Hour Participation_Phase  \
0  Exp_1  2020-02-01 00:00:00+00:00  2020-02-01     1             Phase_1   
1  Exp_1  2020-02-01 01:00:00+00:00  2020-02-01     2             Phase_1   
2  Exp_1  2020-02-01 02:00:00+00:00  2020-02-01     3             Phase_1   
3  Exp_1  2020-02-01 03:00:00+00:00  2020-02-01     4             Phase_1   
4  Exp_1  2020-02-01 04:00:00+00:00  2020-02-01     5             Phase_1   

   Demand_kWh  Temperature  Temperature24  Temperature48  Temperature72  
0       2.393          7.6            6.1            5.0            4.4  
1       2.056          8.2            6.2            5.1            4.4  
2       2.258          8.4            6.3            5.3            4.5  
3       2.535          8.4            6.4            5.4            4.5  
4       2.042          8.2            6.5            5.5            4.6  


In [50]:
unique_ids =  common_records["ID"].unique()
print(f"Number of households : {len(unique_ids)}")

forecasted_df = pd.DataFrame()
error_metrics_df = pd.DataFrame(columns=["ID", "MSE", "MAE", "RMSE"])

for id in unique_ids :
    temp_records = common_records[common_records["ID"]==id].reset_index(drop=True)

    y=temp_records['Demand_kWh']
    X= temp_records[["From","Temperature"]]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
    model = auto_arima(y_train,
                   exogenous=X_train, 
                   suppress_warnings=True, 
                   error_action="ignore")

    print(f"Model Summary for {id} : {model.summary()}")

    n_periods = len(y_test)

    forecast = model.predict(n_periods=n_periods, exogenous=X_test)
    forecast.name = "forecast_data"
    forecast_temp_df = temp_records.join(forecast,how="left",rsuffix="_forecast")
    print(id)
    mse = mean_squared_error(y_test, forecast)
    print("Mean Squared Error:", mse)

    mae = mean_absolute_error(y_test, forecast)
    print("Mean Absolute Error:", mae)

    rmse = np.sqrt(mse)
    print("Root Mean Squared Error:", rmse)
    new_row = pd.DataFrame({
        "ID": [id],
        "MSE": [mse],
        "MAE": [mae],
        "RMSE": [rmse]
    })
    
    error_metrics_df = pd.concat([error_metrics_df, new_row], ignore_index=True)
    forecast_temp_df = forecast_temp_df[forecast_temp_df["forecast_data"].notna()]
    forecasted_df = pd.concat([forecasted_df, forecast_temp_df], ignore_index=True)


forecasted_df = forecasted_df.drop(columns=["Temperature","Temperature24","Temperature48","Temperature72","Hour","Participation_Phase"])

forecasted_df.to_csv("arima_common_records_method_1_80_20.csv",index=True)
error_metrics_df.to_csv("arima_coomon_records_method_1_80_20_error_report.csv",index=True)

Number of households : 688
Model Summary for Exp_1 :                                SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 3667
Model:               SARIMAX(1, 1, 2)   Log Likelihood               -3352.894
Date:                Wed, 19 Jun 2024   AIC                           6713.788
Time:                        00:14:19   BIC                           6738.615
Sample:                             0   HQIC                          6722.628
                               - 3667                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.6702      0.029     23.085      0.000       0.613       0.727
ma.L1         -1.2886      0.036    -35.943      0.000      -1.359      -1.218