## Imports: libraries and dataset

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import itertools
from statsmodels.tsa.stattools import adfuller

import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR

from sklearn.preprocessing import StandardScaler

In [13]:
# Load the dataset
df = pd.read_csv('../../dataset/KAG_energydata_complete.csv')

# Convert the 'date' column to datetime format and set it as the index
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

Standardize the dataset

In [14]:
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)

## Arima model

In [19]:
# Focusing on the 'Appliances' energy consumption
series = df_scaled['Appliances']

In [15]:
# Perform ADF test to check for stationarity
result = adfuller(series)
print('ADF Statistic:', result[0])
print('p-value:', result[1])

ADF Statistic: -21.61637819803618
p-value: 0.0


In [16]:
# If the p-value is greater than 0.05, difference the series
if result[1] > 0.05:
    series = series.diff().dropna()

In [18]:
# Parameter tuning for ARIMA using a smaller range for efficiency
p = d = q = range(0, 3)
best_aic = float("inf")
best_params = None
num_combinations = len(list(itertools.product(p, d, q)))

print(f"Total combinations to evaluate: {num_combinations}")

Total combinations to evaluate: 27


In [17]:
count = 0

for param in itertools.product(p, d, q):
    count += 1
    print(f"Evaluating combination {count}/{num_combinations}: ARIMA{param}")
    try:
        arima_model = ARIMA(series, order=param)
        results = arima_model.fit()
        if results.aic < best_aic:
            best_aic = results.aic
            best_params = param
    except Exception as e:
        print(f"Combination {param} failed with error: {e}")
        continue

print('Best ARIMA parameters:', best_params)
print('Best AIC:', best_aic)


Evaluating combination 1/27: ARIMA(0, 0, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 2/27: ARIMA(0, 0, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 3/27: ARIMA(0, 0, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 4/27: ARIMA(0, 1, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 5/27: ARIMA(0, 1, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 6/27: ARIMA(0, 1, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 7/27: ARIMA(0, 2, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 8/27: ARIMA(0, 2, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 9/27: ARIMA(0, 2, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


Evaluating combination 10/27: ARIMA(1, 0, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 11/27: ARIMA(1, 0, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 12/27: ARIMA(1, 0, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 13/27: ARIMA(1, 1, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 14/27: ARIMA(1, 1, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 15/27: ARIMA(1, 1, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 16/27: ARIMA(1, 2, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 17/27: ARIMA(1, 2, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 18/27: ARIMA(1, 2, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


Evaluating combination 19/27: ARIMA(2, 0, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 20/27: ARIMA(2, 0, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Evaluating combination 21/27: ARIMA(2, 0, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 22/27: ARIMA(2, 1, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 23/27: ARIMA(2, 1, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 24/27: ARIMA(2, 1, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 25/27: ARIMA(2, 2, 0)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 26/27: ARIMA(2, 2, 1)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Evaluating combination 27/27: ARIMA(2, 2, 2)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Best ARIMA parameters: (2, 0, 2)
Best AIC: 38596.60781395479


In [20]:
# Fit the ARIMA model with the best parameters
arima_model = ARIMA(series, order=best_params)
arima_model_fit = arima_model.fit()

TypeError: 'NoneType' object is not subscriptable

In [None]:
# Forecast
forecast_steps = 30  # Forecast for 30 time steps ahead
forecast = arima_model_fit.forecast(steps=forecast_steps)

In [None]:
# Evaluate the model
y_train = series[:-forecast_steps]
y_test = series[-forecast_steps:]
arima_y_pred = arima_model_fit.predict(start=len(series) - forecast_steps, end=len(series) - 1)

arima_mae = mean_absolute_error(y_test, arima_y_pred)
arima_mse = mean_squared_error(y_test, arima_y_pred)
arima_rmse = np.sqrt(arima_mse)
arima_r2 = r2_score(y_test, arima_y_pred)

print('Mean Absolute Error (MAE):', arima_mae)
print('Mean Squared Error (MSE):', arima_mse)
print('Root Mean Squared Error (RMSE):', arima_rmse)
print('R-squared:', arima_r2)

Mean Absolute Error (MAE): 0.3876404087583555
Mean Squared Error (MSE): 0.35329591459236337
Root Mean Squared Error (RMSE): 0.5943870074222378
R-squared: 0.6314782426852672
