# Load the data

In [3]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from statsmodels.tsa.statespace.sarimax import SARIMAX
import plotly.graph_objects as go

import DataRetriever as dr

retriever = dr.DataRetriever()

CON_ATTRIBUTES = retriever.get_attributes(file_name='consuming_attributes.pkl')

con_df = retriever.get_data(file_name='All-Subsystems-hour-Year2.pkl')

all_consuming = retriever.get_attributes(file_name='consuming_attributes.pkl')
flex_consuming = ["Load_ClothesWasherPowerWithStandby", "Elec_PowerDishwasher", "Load_DryerPowerTotal"]
fixed_consuming = list(set(all_consuming) - set(flex_consuming))

data = pd.DataFrame(con_df[fixed_consuming].sum(axis=1).clip(lower=0) / 1e3)
data.columns = ['Consumed Energy']

AttributeError: Can't get attribute '_unpickle_block' on <module 'pandas._libs.internals' from 'C:\\Users\\madsc\\AppData\\Roaming\\Python\\Python39\\site-packages\\pandas\\_libs\\internals.cp39-win_amd64.pyd'>

# Sliding window function

In [28]:
def sliding_forecast(endog: pd.DataFrame, trend: str, order: tuple,
                     seasonal_order: tuple=None, train_length: int=0,
                     val_length: int=0, shift: int=0):

    train_start = 0
    train_end = train_start + (train_length * 24)
    val_start = train_end
    val_end = val_start + (val_length * 24)
    progress = 1

    rmse_list = list()

    while len(endog[: val_end]) < len(endog):
        model = SARIMAX(endog=endog[train_start : train_end],
                        trend=trend, order=order, seasonal_order=seasonal_order, initialization='approximate_diffuse')\
            .fit(low_memory=True, disp=False, full_output=False)

        predictions = pd.DataFrame(model.predict(start=endog.index[val_start],
                                                 end=endog.index[val_end - 1]))

        rmse_list.append(mean_squared_error(y_true=endog[val_start: val_end],
                                            y_pred=predictions,
                                            squared=False))

        train_start += shift * 24
        train_end = train_start + (train_length * 24)
        val_start = train_end
        val_end = val_start + (val_length * 24)

        print(f"Completed a run - {progress}")

        progress += 1

    return sum(rmse_list) / len(rmse_list)

# RMSE for SARIMA

In [None]:
sarima_params = [(4, 1, 2, 1, 1, 0, 168), (4, 1, 2, 0, 1, 1, 168),
                 (4, 1, 2, 1, 0, 0, 168), (4, 1, 2, 0, 0, 1, 168)]

sarima_rmses = dict()
key = 1

for params in sarima_params:
    result = sliding_forecast(endog=con_df["Consumed Energy"],
                              trend='n', order=params[0: 3], seasonal_order=params[3: ],
                              train_length=28, val_length=3, shift=5)
    sarima_rmses[key] = result
    print(f"RMSE for SARIMA{params} is: {round(result, 5)}")
    key += 1

best_rmse = min(sarima_rmses.values())
best_param_key = [key for key in sarima_rmses if sarima_rmses[key] == best_rmse]
best_params = sarima_params[best_param_key]

Completed a run - 1


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 2


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 3


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 4


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 5


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 6


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 7


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 8


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 9


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 10


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 11


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 12


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 13


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 14


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting seasonal autoregressive'


Completed a run - 15


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting seasonal autoregressive'


Completed a run - 16


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting seasonal autoregressive'


Completed a run - 17


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Completed a run - 18


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting seasonal autoregressive'


In [None]:
start_train = 24*40
end_train = start_train + 24*28
start_test = end_train
end_test = start_test + 3*24

model = SARIMAX(endog=data["Consumed Energy"][data.index[start_train: end_train]],
                trend='n',
                order=best_params[: 3],
                seasonal_order=best_params[3: ])\
    .fit(low_memory=True, disp=False, full_output=False)

predictions = pd.DataFrame(model.predict(start=data.index[start_test],
                                         end=data.index[end_test]))

fig = go.Figure(go.Scattergl(
    x=data[start_train: end_train].index,
    y=data['Consumed Energy'][start_train: end_train],
    name="Training Data",
    line=dict(color='rgb(84, 84, 84)')
))

fig.add_trace(go.Scattergl(
    x=data[start_test: end_test].index,
    y=data['Consumed Energy'][start_test: end_test],
    name="Observed",
    line=dict(color='rgb(234,143,129)')
))

fig.add_trace(go.Scattergl(
    x=predictions.index,
    y=predictions,
    name="Predicted",
    line=dict(color='rgb(32,115,171)')
))

fig.update_yaxes(title="Fixed Consumption [kWh]")
fig.update_layout(template="plotly",
                  legend=dict(orientation="h",
                              yanchor="bottom",
                              y=1,
                              xanchor="left",
                              x=0))

fig.write_html("ARIMA_figs/best_consumption_arima.html")
fig.show()