**Direct forecasting**

Forecast the horizon = N based on N different models.

In [96]:
import random
import lightgbm as lgb
import pandas as pd
from datasetsforecast.m4 import M4, M4Info
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import smape

from mlforecast import MLForecast
from mlforecast.lag_transforms import ExponentiallyWeightedMean, RollingMean
from mlforecast.target_transforms import Differences


In [97]:
await M4.async_download('data', group='Hourly')
df, *_ = M4.load('data', 'Hourly')
uids = df['unique_id'].unique()
random.seed(0)
sample_uids = random.choices(uids, k=1)
df = df[df['unique_id'].isin(sample_uids)].reset_index(drop=True)
df['ds'] = df['ds'].astype('int64')
df

Unnamed: 0,unique_id,ds,y
0,H413,1,21.0
1,H413,2,16.0
2,H413,3,17.0
3,H413,4,19.0
4,H413,5,16.0
...,...,...,...
1003,H413,1004,99.0
1004,H413,1005,88.0
1005,H413,1006,47.0
1006,H413,1007,41.0


In [98]:
df.shape

(1008, 3)

In [99]:
df.unique_id.unique()

array(['H413'], dtype=object)

In [100]:
# multivariate: più regressori
# panel data: multiple time series
# global model with dependencies between time series

In [101]:
# ids = df['unique_id'].unique()
# random.seed(0)
# sample_ids = random.choices(ids, k=4)
# sample_df = df[df['unique_id'].isin(sample_ids)]
info = M4Info[group]
horizon = info.horizon
print(horizon)
valid = df.groupby('unique_id').tail(horizon)
train = df.drop(valid.index)

48


In [102]:
train.shape

(960, 3)

In [103]:
def avg_smape(df):
    """Computes the SMAPE by serie and then averages it across all series."""
    full = df.merge(valid)
    return (
        evaluate(full, metrics=[smape])
        .drop(columns='metric')
        .set_index('unique_id')
        .squeeze()
    )


In [104]:
fcst = MLForecast(
    models=lgb.LGBMRegressor(random_state=0, verbosity=-1),
    freq=1,
    # lags=[24 * (i+1) for i in range(7)],
    lags=[1, 2, 3], 
    # lag_transforms={
    #     1: [RollingMean(window_size=24)],
    #     24: [RollingMean(window_size=24)],
    #     48: [ExponentiallyWeightedMean(alpha=0.3)],
    # },
    num_threads=1,
    # target_transforms=[Differences([24])],
)


In [105]:
horizon = 24
# the following will train 24 models, one for each horizon
individual_fcst = fcst.fit(train, max_horizon=horizon)
individual_preds = individual_fcst.predict(horizon, before_predict_callback=inspect_input)
# avg_smape_individual = avg_smape(individual_preds).rename('individual')

Unnamed: 0,lag1,lag2,lag3
0,46.0,38.0,53.0


In [106]:
X, y = individual_fcst.preprocess(train, return_X_y=True, as_numpy=True)

In [107]:
len(X)

957

In [108]:
X[len(X)-1]

array([38., 53., 58.])

In [109]:
y[len(X)-1]

np.float64(46.0)

In [110]:
train.tail(5)

Unnamed: 0,unique_id,ds,y
955,H413,956,59.0
956,H413,957,58.0
957,H413,958,53.0
958,H413,959,38.0
959,H413,960,46.0


In [111]:
# the following will train a single model and use the recursive strategy

def inspect_input(new_x):
    """Displays the model inputs to inspect them"""
    display(new_x)
    return new_x

recursive_fcst = fcst.fit(train)
recursive_preds = recursive_fcst.predict(horizon, before_predict_callback=inspect_input)
# avg_smape_recursive = avg_smape(recursive_preds).rename('recursive')


Unnamed: 0,lag1,lag2,lag3
0,46.0,38.0,53.0


Unnamed: 0,lag1,lag2,lag3
0,36.954462,46.0,38.0


Unnamed: 0,lag1,lag2,lag3
0,37.9471,36.954462,46.0


Unnamed: 0,lag1,lag2,lag3
0,23.675939,37.9471,36.954462


Unnamed: 0,lag1,lag2,lag3
0,21.478337,23.675939,37.9471


Unnamed: 0,lag1,lag2,lag3
0,20.41735,21.478337,23.675939


Unnamed: 0,lag1,lag2,lag3
0,18.226453,20.41735,21.478337


Unnamed: 0,lag1,lag2,lag3
0,14.760322,18.226453,20.41735


Unnamed: 0,lag1,lag2,lag3
0,18.402782,14.760322,18.226453


Unnamed: 0,lag1,lag2,lag3
0,16.527327,18.402782,14.760322


Unnamed: 0,lag1,lag2,lag3
0,20.250658,16.527327,18.402782


Unnamed: 0,lag1,lag2,lag3
0,21.452191,20.250658,16.527327


Unnamed: 0,lag1,lag2,lag3
0,24.421443,21.452191,20.250658


Unnamed: 0,lag1,lag2,lag3
0,30.67816,24.421443,21.452191


Unnamed: 0,lag1,lag2,lag3
0,33.381225,30.67816,24.421443


Unnamed: 0,lag1,lag2,lag3
0,43.881575,33.381225,30.67816


Unnamed: 0,lag1,lag2,lag3
0,47.477304,43.881575,33.381225


Unnamed: 0,lag1,lag2,lag3
0,52.509863,47.477304,43.881575


Unnamed: 0,lag1,lag2,lag3
0,57.755596,52.509863,47.477304


Unnamed: 0,lag1,lag2,lag3
0,51.587856,57.755596,52.509863


Unnamed: 0,lag1,lag2,lag3
0,44.613664,51.587856,57.755596


Unnamed: 0,lag1,lag2,lag3
0,49.341912,44.613664,51.587856


Unnamed: 0,lag1,lag2,lag3
0,47.160314,49.341912,44.613664


Unnamed: 0,lag1,lag2,lag3
0,39.895212,47.160314,49.341912


In [112]:
X, y = fcst.preprocess(train, return_X_y=True, as_numpy=True)
X

array([[17., 16., 21.],
       [19., 17., 16.],
       [16., 19., 17.],
       ...,
       [58., 59., 68.],
       [53., 58., 59.],
       [38., 53., 58.]])

In [113]:
len(X), len(y)

(957, 957)

In [114]:
X[0]

array([17., 16., 21.])

In [115]:
y[0]

np.float64(19.0)

In [116]:
# results
print('Average SMAPE per method and serie')
# avg_smape_individual.to_frame().join(avg_smape_recursive).applymap('{:.1%}'.format)

Average SMAPE per method and serie
