In [None]:
!pip install numpy pandas statsmodels matplotlib seaborn prophet sklearn 

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import Holt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ
from statsmodels.tsa.forecasting.stl import STLForecast
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.forecasting.theta import ThetaModel
from datetime import datetime, timedelta

from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error
from prophet import Prophet

try:
  from google.colab import files
  uploaded = files.upload()
  IN_COLAB = True
except:
  IN_COLAB = False

import warnings
warnings.filterwarnings('once')

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


Initializing time series

In [4]:
df = pd.read_csv('../Dataset/ConsumptionIndustry.csv' if not IN_COLAB else 'ConsumptionIndustry.csv', sep=';')
df['HourDK'] = pd.to_datetime(df['HourDK'])
df['ConsumptionkWh'] = df['ConsumptionkWh'].str.replace(",", ".").astype(float)
df.index = df['HourDK']

# format data here
df.drop(columns=['HourUTC', 'HourDK', 'MunicipalityNo', 'Branche'], inplace=True)

Functions

In [9]:
def sample_data(df, start_date, train_window_size):
  start_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(hours=train_window_size)
  end_date = df.index[-1]
  return df[(df.index >= start_date) & (df.index <= end_date)]

def get_next_window(data, train_window_size, forecast_horizon):
  return data[:train_window_size], data[train_window_size:train_window_size + forecast_horizon]

def forecast_whitebox_model(model, forecast_horizon, model_name):
  model_res = model.fit(disp=0)

  if "SARIMA" in model_name:
    return model_res.get_forecast(steps=forecast_horizon).predicted_mean
  else:
    return model_res.forecast(steps=forecast_horizon)


White-box run

In [10]:
model_name = 'Dynamic_model'
date_start = '2023-11-01'
window_train_size = 24*7*2 #hours
forecast_horizon = 24 #hours

data = sample_data(df, date_start, window_train_size) # start: date_start - window_train_size, end: last date in df
results = np.array([])
iterations = 0

warnings.filterwarnings("ignore")

while len(data) > window_train_size + forecast_horizon:
  data_train, data_test = get_next_window(data, window_train_size, forecast_horizon)
  model = DynamicFactorMQ(endog=data_train)
  predictions = forecast_whitebox_model(model, forecast_horizon, model_name)
  
  results = np.append(results, predictions.values)
  iterations += 1
  data = data.iloc[forecast_horizon:] # move window by forecast_horizon

warnings.filterwarnings("default")

result_table = pd.DataFrame(results)
result_table.index = pd.date_range(start=date_start, periods=forecast_horizon*iterations, freq='H')
result_table.to_csv(f'../Results/{window_train_size}_{forecast_horizon}_{model_name}.csv', header=False)

Models and predictions

In [5]:
# model = SARIMAX(endog = data_train, order = (1, 1, 4), seasonal_order = (1, 1, 1, 12))
# model_res = model.fit(disp=0)

# predictions_statsmodels = model_res.get_forecast(steps=len(data_test)).predicted_mean
# predictions_statsmodels.name = 'predictions_statsmodels(1,1,1)'
# # display(predictions_statsmodels.head(4))

In [6]:
# model = SARIMAX(endog = data_train, order = (1, 1, 4), seasonal_order = (1, 1, 1, 12))
# model_res = model.fit(disp=0)

# predictions_statsmodels2 = model_res.get_forecast(steps=len(data_test)).predicted_mean
# predictions_statsmodels2.name = 'predictions_statsmodels(1,1,4)'

In [7]:
# stlf = STLForecast(data_train, SARIMAX, model_kwargs=dict(order=(1, 1, 1), seasonal_order = (1, 1, 1, 12)), period=24)
# model_res = stlf.fit()
# predictions_stlf = model_res.forecast(len(data_test))

In [8]:
# model = ThetaModel(data_train, period=24, method="additive")
# model_res = model.fit()
# predictions_theta = model_res.forecast(len(data_test))
# predictions_theta.index = data_test.index

# model2 = ThetaModel(data_train, period=24, method="multiplicative")
# model2_res = model2.fit()
# predictions_theta2 = model2_res.forecast(len(data_test))
# predictions_theta2.index = data_test.index

In [9]:
# model = DynamicFactorMQ(endog=data_train)
# model_res = model.fit()
# predictions_dynamicFactorMQ = model_res.forecast(len(data_test))

Plot of forecasts

In [10]:
# predictions_statsmodels.index = data_test.index
# predictions_statsmodels2.index = data_test.index
# predictions_stlf.index = data_test.index
# predictions_theta.index = data_test.index
# predictions_theta2.index = data_test.index
# predictions_dynamicFactorMQ.index = data_test.index

In [11]:
# fig, ax = plt.subplots(figsize=(8, 3))
# data_train.plot(ax=ax, label='train')
# data_test.plot(ax=ax, label='test')
# predictions_statsmodels.plot(ax=ax, label='arima_statsmodels(1,1,6)x(1,1,1)_12')
# # predictions_statsmodels2.plot(ax=ax, label='arima_statsmodels(1,1,4)x(1,1,1)_12')
# # predictions_dynamicFactorMQ.plot(ax=ax, label='dynamicFactorMQ')
# # predictions_stlf.plot(ax=ax, label='stlf')
# # predictions_theta.plot(ax=ax, label='theta')
# # predictions_theta2.plot(ax=ax, label='theta2')
# ax.set_title('Predictions')
# ax.legend()

Evaluate algorithm performance

In [13]:
# def evaluate(prediction, name):
#     print(f"-- {name} --")
#     print(f"mse: {mean_squared_error(data_test, prediction)}") #penalizes larger errors more
#     print(f"rmse: {mean_squared_error(data_test, prediction, squared=False)}")

# evaluate(predictions_statsmodels, "statsmodels(1,1,6)x(1,1,1)_12")
# evaluate(predictions_statsmodels2, "statsmodels(1,1,4)x(1,1,1)_12")
# evaluate(predictions_stlf, "stlf")
# evaluate(predictions_theta, "theta")
# evaluate(predictions_theta2, "theta2")
# evaluate(predictions_dynamicFactorMQ, "dynamicFactorMQ")