In [None]:
!pip install numpy pandas statsmodels matplotlib seaborn prophet sklearn 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import Holt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ
from statsmodels.tsa.forecasting.stl import STLForecast
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.forecasting.theta import ThetaModel
from statsmodels.tools.eval_measures import rmse
from statsmodels.tools.eval_measures import mse
from datetime import datetime, timedelta

from sklearn.metrics import mean_squared_error
from prophet import Prophet

from google.colab import files # type: ignore

# Upload files to Google Colab
uploaded = files.upload()

import warnings
warnings.filterwarnings('once')

In [None]:
# Correct the file path to the actual location of the CSV file
# df = pd.read_csv('../Dataset/ConsumptionIndustry.csv', sep=';')
df = pd.read_csv('ConsumptionIndustry.csv', sep=';')

# Convert HourDK to datetime
df['HourDK'] = pd.to_datetime(df['HourDK'])

# Convert ConsumptionkWh to numeric
df['ConsumptionkWh'] = df['ConsumptionkWh'].str.replace(",", ".").astype(float)

train_start = "2021-01-01"
train_end = "2023-11-30"
test_start = "2023-12-01"
test_end = "2024-11-10"

df.index = df['HourDK']

df.drop(columns=['HourUTC', 'HourDK', 'MunicipalityNo', 'Branche'], inplace=True)
data_train = df[(df.index >= train_start) & (df.index <= train_end)]
data_test = df[(df.index >= test_start) & (df.index <= test_end)]

print(f"Training Set: {data_train.shape[0]} rows")
print(f"Test Set: {data_test.shape[0]} rows")

df = df['ConsumptionkWh']
print(df)
# print(data_train)
# print(data_test)
plt.figure(figsize=(7, 3))
plt.plot(data_train.index, data_train['ConsumptionkWh'], label=f'Train ({train_start} - {train_end})')
plt.plot(data_test.index, data_test['ConsumptionkWh'], label=f'Test ({test_start} - {test_end})')
plt.title('Consumption in dk private households')
plt.xlabel('Measurements')
plt.ylabel('Power (kW / charger)')
plt.legend()
plt.show()

In [None]:
def optimize_SARIMAX(endog):
  results = []
  p = d = q = range(0, 1)
  seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

  for param in itertools.product(p, d, q):
    for seasonal_param in seasonal_pdq:
      try:
          model = SARIMAX(endog, order=param, seasonal_order=seasonal_param, enforce_stationarity=False, enforce_invertibility=False).fit(maxiter=200, disp=0) #disp=0 to reduce output verbosity
      except:
          continue

      aic = model.aic
      rmse = mean_squared_error(endog, model.fittedvalues, squared=False)
      results.append([f"{param}x{seasonal_param}", aic, rmse])
      print(f"{param}x{seasonal_param} - AIC: {aic} - RMSE: {rmse}")

  result_table = pd.DataFrame(results)
  result_table.columns = ['parameters', 'aic', 'rmse']
  result_table = result_table.sort_values(by='rmse', ascending=True).reset_index(drop=True)

  return result_table

optimize_SARIMAX(data_train)