In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
# Data processing
# ==============================================================================
import numpy as np
import pandas as pd

# Plots
# ==============================================================================
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
plt.style.use('seaborn-v0_8-darkgrid')

# Modelling and Forecasting
# ==============================================================================
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.plot import plot_prediction_distribution
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection import grid_search_forecaster
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_pinball_loss
from skforecast.exceptions import LongTrainingWarning

# Configuration
# ==============================================================================
import warnings
warnings.filterwarnings('once')

In [3]:
# Data download
# ==============================================================================
url = (
       'https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/'
       'data/vic_elec.csv'
)
data = pd.read_csv(url, sep=',')

# Data preparation (aggregation at daily level)
# ==============================================================================
data['Time'] = pd.to_datetime(data['Time'], format='%Y-%m-%dT%H:%M:%SZ')
data = data.set_index('Time')
data = data.asfreq('30min')
data = data.sort_index()
data = data.drop(columns='Date')
data = data.resample(rule='D', closed='left', label ='right')\
       .agg({'Demand': 'sum', 'Temperature': 'mean', 'Holiday': 'max'})

data.head(3)

Unnamed: 0_level_0,Demand,Temperature,Holiday
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01,82531.745918,21.047727,True
2012-01-02,227778.257304,26.578125,True
2012-01-03,275490.988882,31.751042,True


In [4]:
# Split data into train-validation-test
# ==============================================================================
data = data.loc['2012-01-01 00:00:00': '2014-12-30 23:00:00']
end_train = '2013-12-31 23:59:00'
end_validation = '2014-9-30 23:59:00'
data_train = data.loc[: end_train, :].copy()
data_val   = data.loc[end_train:end_validation, :].copy()
data_test  = data.loc[end_validation:, :].copy()

print(
    f"Train dates      : {data_train.index.min()} --- {data_train.index.max()}"
    f"  (n={len(data_train)})"
)
print(
    f"Validation dates : {data_val.index.min()} --- {data_val.index.max()}"
    f"  (n={len(data_val)})"
)
print(
    f"Test dates       : {data_test.index.min()} --- {data_test.index.max()}"
    f"  (n={len(data_test)})"
)

Train dates      : 2012-01-01 00:00:00 --- 2013-12-31 00:00:00  (n=731)
Validation dates : 2014-01-01 00:00:00 --- 2014-09-30 00:00:00  (n=273)
Test dates       : 2014-10-01 00:00:00 --- 2014-12-30 00:00:00  (n=91)


In [5]:
# Create and train a ForecasterAutoreg
# ==============================================================================
forecaster = ForecasterAutoreg(
                 regressor = LGBMRegressor(
                                 learning_rate = 0.01,
                                 max_depth     = 10,
                                 n_estimators  = 500,
                                 random_state  = 123
                             ),
                 lags      = 7
             )

forecaster.fit(y=data.loc[end_train:end_validation, 'Demand'])
forecaster

ForecasterAutoreg 
Regressor: LGBMRegressor(learning_rate=0.01, max_depth=10, n_estimators=500,
              random_state=123) 
Lags: [1 2 3 4 5 6 7] 
Transformer for y: None 
Transformer for exog: None 
Window size: 7 
Weight function included: False 
Differentiation order: None 
Exogenous included: False 
Type of exogenous variable: None 
Exogenous variables names: None 
Training range: [Timestamp('2014-01-01 00:00:00'), Timestamp('2014-09-30 00:00:00')] 
Training index type: DatetimeIndex 
Training index frequency: D 
Regressor parameters: {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.01, 'max_depth': 10, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': 123, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0} 
fit_kwargs: {} 
C

In [6]:
# Predict 10 different forecasting sequences of 7 steps each using bootstrapping
# ==============================================================================
boot_predictions = forecaster.predict_bootstrapping(steps=7, n_boot=10)
boot_predictions

Unnamed: 0,pred_boot_0,pred_boot_1,pred_boot_2,pred_boot_3,pred_boot_4,pred_boot_5,pred_boot_6,pred_boot_7,pred_boot_8,pred_boot_9
2014-10-01,208783.097577,204764.999003,201367.20342,212280.661192,204622.100582,202889.246579,210078.609982,210151.742392,200990.348257,203137.232359
2014-10-02,212340.130913,208320.030667,217221.147681,209996.152477,218464.949614,212545.697772,212061.596039,216579.376162,226902.326444,212847.017644
2014-10-03,221380.131363,223890.630246,224625.20484,207260.01897,206099.826214,288939.369743,220085.831843,229514.853352,230476.360893,224059.729506
2014-10-04,209943.773973,212946.528553,200027.296792,203592.696069,194120.73068,262225.65328,212516.896678,222042.271003,215236.033624,222487.580478
2014-10-05,193372.607408,177195.227257,194351.934752,206606.909536,202654.435499,297740.735825,192454.57653,199372.141645,208527.690318,197830.62438
2014-10-06,199626.367544,203338.054671,207145.334747,208993.695577,204766.177714,258050.416951,184903.916795,201473.420885,204790.425542,183312.907014
2014-10-07,201546.003371,212477.910879,209639.468951,204102.852012,225036.945159,257847.86904,197235.769115,196990.134684,200007.936217,207116.299541


In [9]:
boot_predictions.quantile(q=[0.05, 0.5, 0.95], axis=1).transpose().columns

Index([0.05, 0.5, 0.95], dtype='float64')

In [14]:
# Predict intervals for next 7 steps, quantiles 10th and 90th
# ==============================================================================
predictions = forecaster.predict_quantiles(
                  steps=7, 
                  quantiles=[0, 0.10], 
                  n_boot=1000
              )
predictions

Unnamed: 0,0.0,0.1
2014-10-01,150754.36569,195483.862851
2014-10-02,158719.737178,204160.738452
2014-10-03,169432.23034,212599.136391
2014-10-04,148400.111461,196863.841123
2014-10-05,139708.490626,185544.868289
2014-10-06,156623.431501,188334.486578
2014-10-07,142412.94771,191797.716429


In [15]:
# Predict intervals for next 7 steps, quantiles 10th and 90th
# ==============================================================================
predictions = forecaster.predict_interval(steps=7, interval=[10, 90], n_boot=1000)
predictions

Unnamed: 0,pred,lower_bound,upper_bound
2014-10-01,205723.923923,195483.862851,214883.472075
2014-10-02,215167.163121,204160.738452,225623.750301
2014-10-03,225144.443075,212599.136391,237675.514362
2014-10-04,211406.440681,196863.841123,234950.293307
2014-10-05,194848.766987,185544.868289,225497.131653
2014-10-06,201901.819903,188334.486578,226720.998742
2014-10-07,208648.526025,191797.716429,231948.852094


In [17]:
import numpy as np
import pandas as pd
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

# Fixtures
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import series
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import exog
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import exog_predict

transformer_exog = ColumnTransformer(
                       [('scale', StandardScaler(), ['exog_1']),
                        ('onehot', OneHotEncoder(), ['exog_2'])],
                       remainder = 'passthrough',
                       verbose_feature_names_out = False
                   )

In [21]:

forecaster = ForecasterAutoregMultiSeries(
                    regressor          = LinearRegression(),
                    lags               = 3,
                    transformer_series = StandardScaler(),
                    transformer_exog   = transformer_exog,
                )

forecaster.fit(series=series, exog=exog)
results = forecaster.predict_quantiles(
                steps               = 2,
                quantiles           = [0.05, 0.55, 0.95],
                levels              = None,
                exog                = exog_predict,
                n_boot              = 4,
                in_sample_residuals = True
            )
results

Unnamed: 0,1_0.05,1_0.55,1_0.95,2_0.05,2_0.55,2_0.95
50,0.201336,0.404609,0.537619,0.2766,0.615933,0.76168
51,0.077398,0.194541,0.453744,0.186019,0.404039,0.704617


In [22]:
results.values

array([[0.20133572, 0.40460928, 0.53761911, 0.27660015, 0.61593304,
        0.7616801 ],
       [0.07739771, 0.19454095, 0.45374409, 0.18601931, 0.40403914,
        0.70461714]])

In [23]:
results.columns

Index(['1_0.05', '1_0.55', '1_0.95', '2_0.05', '2_0.55', '2_0.95'], dtype='object')