# Load the data

In [2]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sktime.forecasting.arima import ARIMA
import plotly.graph_objects as go

import DataRetriever as dr

retriever = dr.DataRetriever()

CON_ATTRIBUTES = retriever.get_attributes(file_name='consuming_attributes.pkl')

con_df = retriever.get_data(file_name='All-Subsystems-hour-Year2.pkl')

all_consuming = retriever.get_attributes(file_name='consuming_attributes.pkl')
flex_consuming = ["Load_ClothesWasherPowerWithStandby", "Elec_PowerDishwasher", "Load_DryerPowerTotal"]
fixed_consuming = list(set(all_consuming) - set(flex_consuming))

data = pd.DataFrame(con_df[fixed_consuming].sum(axis=1).clip(lower=0) / 1e3)
data.columns = ['Consumed Energy']

  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  RELATIVE_TYPES = (pd.Int64Index, pd.RangeIndex)
  ABSOLUTE_TYPES = (pd.Int64Index, pd.RangeIndex, pd.DatetimeIndex, pd.PeriodIndex)
  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  VALID_MULTIINDEX_TYPES = (pd.Int64Index, pd.RangeIndex)
  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  VALID_MULTIINDEX_TYPES = (pd.Int64Index, pd.RangeIndex)


In [3]:
train_val = data[: data.index[len(data) * 0.8]]
test = data[data.index[len(data) * 0.8]: ]

  train_val = data[: data.index[len(data) * 0.8]]
  test = data[data.index[len(data) * 0.8]: ]


# Sliding window function

In [11]:
def sliding_forecast(endog: pd.DataFrame, trend: str, order: tuple,
                     seasonal_order: tuple=None, train_length: int=0,
                     val_length: int=0, shift: int=0):

    train_start = 0
    train_end = train_start + (train_length * 24)
    val_start = train_end
    val_end = val_start + (val_length * 24)

    progress = 0

    rmse_list = list()

    while len(endog[: val_end]) < len(endog):
        model = SARIMAX(endog=endog[train_start : train_end], trend=trend, order=order, seasonal_order=seasonal_order).fit(low_memory=True)

        predictions = pd.DataFrame(model.predict(start=endog.index[val_start],
                                                 end=endog.index[val_end - 1]))

        rmse_list.append(mean_squared_error(y_true=endog[val_start: val_end],
                                            y_pred=predictions,
                                            squared=False))

        train_start += shift * 24
        train_end = train_start + (train_length * 24)
        val_start = train_end
        val_end = val_start + (val_length * 24)

        progress += 1

    print(progress)

    return sum(rmse_list) / len(rmse_list)

# RMSE for ARIMA

In [12]:
sarima_params = [(4, 1, 2, 0, 0, 0, 0)]

sarima_rmses = dict()
key = 0

for params in sarima_params:
    result = sliding_forecast(endog=test["Consumed Energy"],
                              trend='n', order=params[0: 3], seasonal_order=params[3: ],
                              train_length=28, val_length=3, shift=5)
    sarima_rmses[key] = result
    print(f"RMSE for ARIMA{params} is: {round(result, 5)}")
    key += 1


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals



9
RMSE for ARIMA(4, 1, 2, 0, 0, 0, 0) is: 0.71028



Maximum Likelihood optimization failed to converge. Check mle_retvals



In [13]:
best_rmse = min(sarima_rmses.values())
best_param_key = [key for key in sarima_rmses if sarima_rmses[key] == best_rmse]

In [14]:
best_params = sarima_params[best_param_key[0]]
best_params, best_rmse

((4, 1, 2, 0, 0, 0, 0), 0.7102779222762857)

In [15]:
start_train = 0
end_train = start_train + 24*28
start_test = end_train
end_test = start_test + 3*24

model = SARIMAX(endog=test["Consumed Energy"][test.index[start_train: end_train]],
                trend='n',
                order=best_params[: 3],
                seasonal_order=best_params[3: ])\
    .fit(low_memory=True, disp=False, full_output=False)

predictions = pd.DataFrame(model.predict(start=test.index[start_test],
                                         end=test.index[end_test]))


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.



In [16]:
predictions#.index

Unnamed: 0,predicted_mean
2015-12-18 00:00:00,1.037413
2015-12-18 01:00:00,0.894844
2015-12-18 02:00:00,0.944502
2015-12-18 03:00:00,0.858394
2015-12-18 04:00:00,0.925186
...,...
2015-12-20 20:00:00,0.913851
2015-12-20 21:00:00,0.843740
2015-12-20 22:00:00,0.913788
2015-12-20 23:00:00,0.843803


In [17]:

fig = go.Figure(go.Scattergl(
    x=test[start_train: end_train].index,
    y=test['Consumed Energy'][start_train: end_train],
    name="Training Data",
    line=dict(color='rgb(84, 84, 84)')
))

fig.add_trace(go.Scattergl(
    x=test[start_test: end_test].index,
    y=test['Consumed Energy'][start_test: end_test],
    name="Observed",
    line=dict(color='rgb(234,143,129)')
))

fig.add_trace(go.Scattergl(
    x=predictions.index,
    y=predictions["predicted_mean"],
    name="Predicted",
    line=dict(color='rgb(32,115,171)')
))

fig.update_yaxes(title="Fixed Consumption [kWh]")
fig.update_layout(template="plotly",
                  legend=dict(orientation="h",
                              yanchor="bottom",
                              y=1,
                              xanchor="left",
                              x=0))

#fig.write_html("ARIMA_figs/best_consumption_arima_NEW.html")
fig.show()