In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import Holt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ
from statsmodels.tsa.forecasting.stl import STLForecast
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.forecasting.theta import ThetaModel
from statsmodels.tools.eval_measures import rmse
from statsmodels.tools.eval_measures import mse

from sklearn.metrics import mean_squared_error
from prophet import Prophet

import warnings
warnings.filterwarnings('once')

In [4]:
# Correct the file path to the actual location of the CSV file
df = pd.read_csv('../Dataset/ConsumptionIndustry.csv', sep=';')

# Convert HourDK to datetime
df['HourDK'] = pd.to_datetime(df['HourDK'])

# Convert ConsumptionkWh to numeric
df['ConsumptionkWh'] = df['ConsumptionkWh'].str.replace(",", ".").astype(float)

train_start = "2021-01-01"
train_end = "2023-11-30"
test_start = "2023-12-01"
test_end = "2024-11-10"

df.index = df['HourDK']

df.drop(columns=['HourUTC', 'HourDK', 'MunicipalityNo', 'Branche'], inplace=True)
data_train = df[(df.index >= train_start) & (df.index <= train_end)]
data_test = df[(df.index >= test_start) & (df.index <= test_end)]

print(f"Training Set: {data_train.shape[0]} rows")
print(f"Test Set: {data_test.shape[0]} rows")

df = df['ConsumptionkWh']
print(df)
plt.figure(figsize=(7, 3))
plt.plot(data_train.index, data_train['ConsumptionkWh'], label=f'Train ({train_start} - {train_end})')
plt.plot(data_test.index, data_test['ConsumptionkWh'], label=f'Test ({test_start} - {test_end})')
plt.title('Consumption in dk private households')
plt.xlabel('Measurements')
plt.ylabel('Power (kW / charger)')
plt.legend()
plt.show()

Training Set: 25512 rows
Test Set: 8281 rows
HourDK
2021-01-01 01:00:00    35086.772
2021-01-01 02:00:00    31777.762
2021-01-01 03:00:00    28423.659
2021-01-01 04:00:00    25675.926
2021-01-01 05:00:00    24283.909
                         ...    
2024-11-10 19:00:00    48584.696
2024-11-10 20:00:00    44105.371
2024-11-10 21:00:00    41492.172
2024-11-10 22:00:00    37148.073
2024-11-10 23:00:00    33307.696
Name: ConsumptionkWh, Length: 33839, dtype: float64
