In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import sklearn
from math import sqrt

First, Import the data

In [4]:
from HelpFunctions import import_csv_data

energyconsumption = import_csv_data.get_data(
    'hourly_ec_20201107_20231107.csv')

In [10]:
energyconsumption['weekday'] = energyconsumption.index.weekday
energyconsumption['hour'] = energyconsumption.index.hour
energyconsumption['month'] = energyconsumption.index.month

# create winter/cold dummy variable
energyconsumption['winter'] = energyconsumption['month'].apply(
    lambda x: 1 if x in [10, 11, 12, 1, 2, 3] else 0)

# Define mapping of hours to timeframes (based on graph) and create dummy variable
time_mapping = {
    'low_consumption_time': list(range(7)),  # differs a lot weekend/weekday
    'high_consumption_time': list(range(7, 20)),
    'transition_time': [6, 20, 21, 22, 23]}

for timeframe, hours in time_mapping.items():
    energyconsumption[timeframe] = energyconsumption['hour'].apply(
        lambda x: 1 if x in hours else 0)

# create weekend day dummy variable
energyconsumption['weekend_day'] = energyconsumption['weekday'].apply(
    lambda x: 1 if x in [5,6] else 0)

# drop unneccesary columns
energyconsumption = energyconsumption.drop(columns=['date', 'beginning', 'weekday', 'hour', 'month'])

energyconsumption

Unnamed: 0_level_0,energy_consumption,winter,low_consumption_time,high_consumption_time,transition_time,weekend_day
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-11-07 00:00:00,46.97475,1,1,0,0,1
2020-11-07 01:00:00,45.35550,1,1,0,0,1
2020-11-07 02:00:00,44.66450,1,1,0,0,1
2020-11-07 03:00:00,44.76300,1,1,0,0,1
2020-11-07 04:00:00,44.81150,1,1,0,0,1
...,...,...,...,...,...,...
2023-11-07 06:00:00,59.43725,1,1,0,1,0
2023-11-07 07:00:00,63.89000,1,0,1,0,0
2023-11-07 08:00:00,66.64725,1,0,1,0,0
2023-11-07 09:00:00,68.20925,1,0,1,0,0


## Let's fit our model

In [13]:
y_ec = energyconsumption['energy_consumption']
X_ec = energyconsumption.drop(
    columns=['energy_consumption', 'low_consumption_time']) #low consumption time as reference time --> drop

# add constant for the intercept term
X_ec = sm.add_constant(X_ec)

X_ec
# fit seasonal linear regression model
model = sm.OLS(y_ec, X_ec).fit()
model.summary()

0,1,2,3
Dep. Variable:,energy_consumption,R-squared:,0.715
Model:,OLS,Adj. R-squared:,0.715
Method:,Least Squares,F-statistic:,16470.0
Date:,"Fri, 10 Nov 2023",Prob (F-statistic):,0.0
Time:,16:20:13,Log-Likelihood:,-80466.0
No. Observations:,26291,AIC:,160900.0
Df Residuals:,26286,BIC:,161000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,45.2099,0.074,611.208,0.000,45.065,45.355
winter,5.8969,0.064,92.576,0.000,5.772,6.022
high_consumption_time,14.9759,0.077,194.505,0.000,14.825,15.127
transition_time,7.9468,0.094,84.116,0.000,7.762,8.132
weekend_day,-9.6217,0.070,-136.610,0.000,-9.760,-9.484

0,1,2,3
Omnibus:,835.742,Durbin-Watson:,0.266
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1030.674
Skew:,-0.382,Prob(JB):,1.56e-224
Kurtosis:,3.597,Cond. No.,5.14


## Calculate Forecasts

In [14]:
# create new dataframe containing future date_times and indepentent variables

last_ts = energyconsumption.index[-1]
horizon = pd.date_range(start=last_ts + pd.DateOffset(
    hours=1), periods=200, freq='H')

energy_forecast = pd.DataFrame({'date_time': horizon})
energy_forecast.set_index('date_time', inplace=True)

energy_forecast['weekday'] = energy_forecast.index.weekday
energy_forecast['hour'] = energy_forecast.index.hour
energy_forecast['month'] = energy_forecast.index.month

# create winter/cold dummy variable
energy_forecast['winter'] = energy_forecast['month'].apply(
    lambda x: 1 if x in [10, 11, 12, 1, 2, 3] else 0)

# time mapping already initialized
for timeframe, hours in time_mapping.items():
    energy_forecast[timeframe] = energy_forecast['hour'].apply(
        lambda x: 1 if x in hours else 0)

# create weekend day dummy variable
energy_forecast['weekend_day'] = energy_forecast['weekday'].apply(
    lambda x: 1 if x in [5,6] else 0)

energy_forecast

# Point forecasts
X_fc = energy_forecast.drop(columns=['weekday', 'hour', 'month','low_consumption_time'])
X_fc = sm.add_constant(X_fc, has_constant='add')
X_fc

# Make predictions
predictions_ec = model.predict(X_fc)
predictions_ec.head(20)


date_time
2023-11-07 11:00:00    66.082657
2023-11-07 12:00:00    66.082657
2023-11-07 13:00:00    66.082657
2023-11-07 14:00:00    66.082657
2023-11-07 15:00:00    66.082657
2023-11-07 16:00:00    66.082657
2023-11-07 17:00:00    66.082657
2023-11-07 18:00:00    66.082657
2023-11-07 19:00:00    66.082657
2023-11-07 20:00:00    59.053548
2023-11-07 21:00:00    59.053548
2023-11-07 22:00:00    59.053548
2023-11-07 23:00:00    59.053548
2023-11-08 00:00:00    51.106751
2023-11-08 01:00:00    51.106751
2023-11-08 02:00:00    51.106751
2023-11-08 03:00:00    51.106751
2023-11-08 04:00:00    51.106751
2023-11-08 05:00:00    51.106751
2023-11-08 06:00:00    59.053548
dtype: float64

Jetzt Quantile Regression um Quantile zu berechnen:

In [15]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

model_qr = sm.QuantReg(y_ec, X_ec)

for q in quantiles:
    
    model_temp = model_qr.fit(q=q)

    # Calculate forecasts for X_fc using the fitted model for the current quantile
    forecast_temp = model_temp.predict(X_fc)

    # Add the forecasts to the energy_forecast DataFrame with a label like 'forecast025'
    energy_forecast[f'forecast{q}'] = forecast_temp

In [19]:
# Define the specific date and time combinations
selected_dates = ['2023-11-10 12:00:00', '2023-11-10 16:00:00', '2023-11-10 20:00:00',
                  '2023-11-11 12:00:00', '2023-11-11 16:00:00', '2023-11-11 20:00:00']

forecasting_results = energy_forecast.loc[selected_dates,
                                          'forecast0.025':'forecast0.975']

forecasting_results

Unnamed: 0_level_0,forecast0.025,forecast0.25,forecast0.5,forecast0.75,forecast0.975
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-11-10 12:00:00,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-10 16:00:00,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-10 20:00:00,47.158251,55.523751,59.529001,63.065389,68.723749
2023-11-11 12:00:00,43.996251,52.709002,56.941501,60.884165,65.971965
2023-11-11 16:00:00,43.996251,52.709002,56.941501,60.884165,65.971965
2023-11-11 20:00:00,39.188751,45.4065,49.913502,53.838557,59.188016


Unnamed: 0_level_0,weekday,hour,month,winter,low_consumption_time,high_consumption_time,transition_time,weekend_day,forecast0.025,forecast0.25,forecast0.5,forecast0.75,forecast0.975
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-11-07 11:00:00,1,11,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-07 12:00:00,1,12,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-07 13:00:00,1,13,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-07 14:00:00,1,14,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-07 15:00:00,1,15,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-15 14:00:00,2,14,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-15 15:00:00,2,15,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-15 16:00:00,2,16,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
2023-11-15 17:00:00,2,17,11,1,0,1,0,0,51.965751,62.826253,66.557,70.110997,75.507699
