In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#For random forest for time series:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster

In [2]:
def arr(file):
    df = pd.DataFrame()
    df['Time'] = pd.date_range('2020-12-21 23:00:00', '2023-01-10 20:00:00', freq='H')
    
    temp = pd.read_excel(file)
    temp = temp[['time', 'E_min_min']]
    temp['time'] = pd.to_datetime(temp['time'], format = '%d.%m.%Y %H:%M')
    temp.columns = ['Time', 'E_min_min']
    
    df = df.merge(temp, on='Time', how='left').fillna(np.nan)
    
    arr = [np.nan]
    i = 1
    while i <= len(df)-1:
        if df.iloc[i-1]['E_min_min']==np.nan:
            arr.append(np.nan)
        elif df.iloc[i]['E_min_min']==np.nan:
            arr.append(np.nan)
        else:
            arr.append((df.iloc[i]['E_min_min']-df.iloc[i-1]['E_min_min']))
        i+=1
    df[file] = arr
    return df[['Time',file]]

In [3]:
units_to_forecast = 24

#Idk if this works:
def moving_average(timeseries, window):
    return timeseries.rolling(window=window, center=True).mean()

#train = train_series[:24*365-units_to_forecast]
#test = train_series[24*365-units_to_forecast:24*365]

#fig, ax=plt.subplots(figsize=(11, 3))
#moving_average(train['num_kwh'], 3).sort_index().plot(ax=ax, label='train', color='blue')
#moving_average(test['num_kwh'], 1).sort_index().plot(ax=ax, label='test', color='orange')
#ax.legend();



## Short test forcast

In [None]:
forecaster = ForecasterAutoreg(
                regressor = RandomForestRegressor(random_state=123),
                lags = 24
                )

forecaster.fit(y=train['num_kwh'])
forecaster

predictions = forecaster.predict(steps=units_to_forecast)

predictions.index = test.index

actual = train[-7*24:]

fig, ax = plt.subplots(figsize=(9, 4))
actual['num_kwh'].plot(ax=ax, label='train')
test['num_kwh'].plot(ax=ax, label='test')
predictions.plot(ax=ax, label='predictions')
ax.legend();
error_mse = mean_squared_error(
                y_true = test['num_kwh'],
                y_pred = predictions
            )

print(f"Test error (mse): {error_mse}")

## Hyperparameter tuning

In [None]:
forecaster = ForecasterAutoreg(
                regressor = RandomForestRegressor(random_state=123),
                lags      = 1 # This value will be replaced in the grid search
             )

# Lags used as predictors
lags_grid = [1, 2, 3, 12, 24, 48]

# Regressor's hyperparameters
param_grid = {'n_estimators': [i for i in range(50,100,10)],
              'max_depth': [i for i in range(3,10,1)]}

results_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = train_series[:]['num_kwh_normalized'],
                        param_grid         = param_grid,
                        lags_grid          = lags_grid,
                        steps              = units_to_forecast,
                        refit              = False,
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(train)),
                        return_best        = True,
                        fixed_train_size   = True,
                        verbose            = False
               )
results_grid

In [None]:
metric, predictions = backtesting_forecaster(
                            forecaster = forecaster,
                            y          = train_series[:]['num_kwh_normalized'],
                            initial_train_size = 24*365,
                            steps      = units_to_forecast,
                            refit      = True,
                            interval   = [5/2, 100-5/2],
                            n_boot     = 500,
                            metric     = 'mean_squared_error',
                            fixed_train_size   = True,
                            verbose    = True
                        )
predictions.head(4)

In [None]:
predictions.index = train_series[24*365:]['num_kwh_normalized'].index

fig, ax=plt.subplots(figsize=(22, 6))
predictions['pred'].plot(ax=ax, label='predictions', color="green",alpha=0.5)
train_series[24*365:]['num_kwh_normalized'].plot(ax=ax, label='test', color="darkorange", alpha=0.5)
train_series[:24*365]['num_kwh_normalized'].plot(ax=ax, label='train', color="blue")
ax.fill_between(
    predictions.index,
    predictions['lower_bound'],
    predictions['upper_bound'],
    color = 'deepskyblue',
    alpha = 0.3,
    label = '95% interval'
)
#ax.yaxis.set_major_formatter(ticker.EngFormatter())
ax.set_ylabel('kwh')
ax.set_title('Energy demand forecast')
ax.legend();

In [None]:
error_mse = mean_squared_error(
                y_true = train_series[24*365:]['num_kwh_normalized'],
                y_pred = predictions['pred']
            )

print(f"Test error (mse): {error_mse}")

inside_interval = np.where(
                     (train_series.loc[predictions.index, 'num_kwh_normalized'] >= predictions['lower_bound']) & \
                     (train_series.loc[predictions.index, 'num_kwh_normalized'] <= predictions['upper_bound']),
                     True,
                     False
                  )

coverage = inside_interval.mean()
print(f"Coverage of the predicted interval on test data: {100 * coverage}%")

In [None]:
naive_model = np.zeros((len(predictions)))

naive_mse = mean_squared_error(
                y_true = train_series[24*365:]['num_kwh_normalized'],
                y_pred = naive_model
            )
print(f"Test error (mse): {naive_mse}")

better = error_mse/naive_mse

print(f"How much fractionally less error that a naive model: {better*100}%")

In [None]:
naive_model2 = train_series[24*365-24*7:-24*7]
naive_model2.index = train_series[24*365:].index

naive_mse2 = mean_squared_error(
                y_true = train_series[24*365:]['num_kwh_normalized'],
                y_pred = naive_model2['num_kwh_normalized']
            )
print(f"Test error (mse): {naive_mse2}")

better2 = error_mse/naive_mse2

print(f"How much fractionally less error that a naive model: {better2*100}%")