In [None]:
import pmdarima as pm
import statsmodels.api as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

# This function will search for a good set of parameters.
auto_model = pm.auto_arima(df['Total (grid load) [MWh]'], seasonal=True, m=7,
                           trace=True, error_action='ignore', suppress_warnings=True)
print(auto_model.summary())


In [None]:
# 80% training, 20% testing split
split_index = int(len(df) * 0.8)
train = df.iloc[:split_index]
test = df.iloc[split_index:]


In [None]:
# Fit the model
# For example, using SARIMAX with order (p,d,q)=(1,1,1) and seasonal order (P,D,Q,s)=(1,1,1,7)
model = sm.tsa.statespace.SARIMAX(train['Total (grid load) [MWh]'],
                                  order=(1,1,1),
                                  seasonal_order=(1,1,1,7),
                                  enforce_stationarity=False,
                                  enforce_invertibility=False)
model_fit = model.fit(disp=False)
print(model_fit.summary())


In [None]:
model_fit.plot_diagnostics(figsize=(15, 8))
plt.show()


In [None]:
# Forecast steps equal to the length of the test set
forecast = model_fit.get_forecast(steps=len(test))
predictions = forecast.predicted_mean
conf_int = forecast.conf_int()

plt.figure(figsize=(12,6))
plt.plot(train.index, train['Total (grid load) [MWh]'], label='Training')
plt.plot(test.index, test['Total (grid load) [MWh]'], label='Test')
plt.plot(test.index, predictions, label='Forecast', color='red')
plt.fill_between(test.index, conf_int.iloc[:, 0], conf_int.iloc[:, 1],
                 color='pink', alpha=0.3)
plt.legend()
plt.show()


In [None]:
# Calculate the error metrics
mae = mean_absolute_error(test['Total (grid load) [MWh]'], predictions)
rmse = np.sqrt(mean_squared_error(test['Total (grid load) [MWh]'], predictions))
print(f"MAE: {mae}, RMSE: {rmse}")
