**Add exogenous variables**

Source: https://nixtlaverse.nixtla.io/mlforecast/docs/how-to-guides/exogenous_features.html

In [35]:
import lightgbm as lgb
import pandas as pd
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.utils import generate_daily_series, generate_prices_for_series

In [36]:
series = generate_daily_series(
    100, equal_ends=True, n_static_features=2
).rename(columns={'static_1': 'product_id'})
series.head()

Unnamed: 0,unique_id,ds,y,static_0,product_id
0,id_00,2000-10-05,39.811983,79,45
1,id_00,2000-10-06,103.274013,79,45
2,id_00,2000-10-07,176.574744,79,45
3,id_00,2000-10-08,258.9879,79,45
4,id_00,2000-10-09,344.940404,79,45


In [37]:
series = series[series.unique_id == 'id_00']

In [38]:
len(series['unique_id'].unique())

1

In [39]:
# tutte le serie possono partire e finire in punti diversi
_temp = series.groupby(['unique_id'])['ds'].agg(['min', 'max'])
_temp.max().unique()

  _temp = series.groupby(['unique_id'])['ds'].agg(['min', 'max'])


<DatetimeArray>
['2000-10-05 00:00:00', '2001-05-14 00:00:00']
Length: 2, dtype: datetime64[ns]

In [40]:
prices_catalog = generate_prices_for_series(series)
prices_catalog.head()

Unnamed: 0,ds,unique_id,price
0,2000-10-05,id_00,0.548814
1,2000-10-06,id_00,0.715189
2,2000-10-07,id_00,0.602763
3,2000-10-08,id_00,0.544883
4,2000-10-09,id_00,0.423655


In [41]:
prices_catalog = prices_catalog[prices_catalog.unique_id == 'id_00']

In [42]:
# posso includerlo nella predict dato che ho valori futuri
_temp = prices_catalog.groupby(['unique_id'])['ds'].agg(['min', 'max'])
_temp.max().unique()

<DatetimeArray>
['2000-10-05 00:00:00', '2001-05-21 00:00:00']
Length: 2, dtype: datetime64[ns]

In [43]:
series_with_prices = series.merge(prices_catalog, how='left')
series_with_prices.head()

Unnamed: 0,unique_id,ds,y,static_0,product_id,price
0,id_00,2000-10-05,39.811983,79,45,0.548814
1,id_00,2000-10-06,103.274013,79,45,0.715189
2,id_00,2000-10-07,176.574744,79,45,0.602763
3,id_00,2000-10-08,258.9879,79,45,0.544883
4,id_00,2000-10-09,344.940404,79,45,0.423655


In [44]:
series_with_prices.shape

(222, 6)

In [45]:
series.ds.max(), prices_catalog.ds.max(), series_with_prices.ds.max()

(Timestamp('2001-05-14 00:00:00'),
 Timestamp('2001-05-21 00:00:00'),
 Timestamp('2001-05-14 00:00:00'))

In [46]:
fcst = MLForecast(
    models=lgb.LGBMRegressor(n_jobs=1, random_state=0, verbosity=-1),
    freq='D',
    lags=[3],
    lag_transforms={
        # 6: [ExpandingMean()],
        3: [RollingMean(window_size=8)],
    },
    date_features=['dayofweek', 'month'],
    num_threads=2,
)
# Le trasformazioni riguardano solo la variabile target
fcst.preprocess(series_with_prices, static_features=['static_0', 'product_id'])

Unnamed: 0,unique_id,ds,y,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
10,id_00,2000-10-15,266.501075,79,45,0.791725,12.688070,232.098439,6,10
11,id_00,2000-10-16,320.542865,79,45,0.528895,111.133819,241.013668,0,10
12,id_00,2000-10-17,424.913844,79,45,0.568045,197.982842,252.852272,1,10
13,id_00,2000-10-18,506.946385,79,45,0.925597,266.501075,264.093063,2,10
14,id_00,2000-10-19,38.877800,79,45,0.071036,320.542865,271.787434,3,10
...,...,...,...,...,...,...,...,...,...,...
217,id_00,2001-05-10,8.736114,79,45,0.383464,343.217885,271.801117,3,5
218,id_00,2001-05-11,98.206753,79,45,0.588317,431.501686,282.960582,4,5
219,id_00,2001-05-12,195.154790,79,45,0.831048,504.259018,295.946759,5,5
220,id_00,2001-05-13,259.690730,79,45,0.628982,8.736114,232.873582,6,5


In [47]:
fcst.fit(series_with_prices, static_features=['static_0', 'product_id'])

MLForecast(models=[LGBMRegressor], freq=D, lag_features=['lag3', 'rolling_mean_lag3_window_size8'], date_features=['dayofweek', 'month'], num_threads=2)

In [48]:
# X, y = fcst.preprocess(series_with_prices, return_X_y=True, as_numpy=True, static_features=['static_0', 'product_id'])
series_with_prices.tail(20)

Unnamed: 0,unique_id,ds,y,static_0,product_id,price
202,id_00,2001-04-25,481.824585,79,45,0.377752
203,id_00,2001-04-26,8.366281,79,45,0.179604
204,id_00,2001-04-27,91.476602,79,45,0.024679
205,id_00,2001-04-28,187.090532,79,45,0.06725
206,id_00,2001-04-29,242.521532,79,45,0.679393
207,id_00,2001-04-30,342.22597,79,45,0.453697
208,id_00,2001-05-01,400.369603,79,45,0.536579
209,id_00,2001-05-02,513.321524,79,45,0.896671
210,id_00,2001-05-03,39.373177,79,45,0.990339
211,id_00,2001-05-04,108.139791,79,45,0.216897


In [49]:
# le features calcolate a partire dalla target se le calcola con i valori 
# predetti, price è una variabile aggiuntiva che va fornita. 
def inspect_input(new_x):
    """Displays the model inputs to inspect them"""
    display(new_x)
    return new_x

preds = fcst.predict(h=7, X_df=prices_catalog, before_predict_callback=inspect_input)
preds.head()

Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.273542,195.15479,251.104654,1,5


Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.798047,259.69073,262.657839,2,5


Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.185636,348.622446,273.673678,3,5


Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.952792,426.32611,284.062206,4,5


Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.687488,504.347051,293.167876,5,5


Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.215508,31.99584,234.134979,6,5


Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.947371,110.212413,246.819516,0,5


Unnamed: 0,unique_id,ds,LGBMRegressor
0,id_00,2001-05-15,426.32611
1,id_00,2001-05-16,504.347051
2,id_00,2001-05-17,31.99584
3,id_00,2001-05-18,110.212413
4,id_00,2001-05-19,179.993988


In [50]:
fcst.fit(series_with_prices, static_features=['static_0', 'product_id'], max_horizon=7)

MLForecast(models=[LGBMRegressor], freq=D, lag_features=['lag3', 'rolling_mean_lag3_window_size8'], date_features=['dayofweek', 'month'], num_threads=2)

In [51]:
preds = fcst.predict(h=7, X_df=prices_catalog, before_predict_callback=inspect_input)
preds.head()

Unnamed: 0,static_0,product_id,price,lag3,rolling_mean_lag3_window_size8,dayofweek,month
0,79,45,0.273542,195.15479,251.104654,1,5


Unnamed: 0,unique_id,ds,LGBMRegressor
0,id_00,2001-05-15,426.32611
1,id_00,2001-05-16,514.40943
2,id_00,2001-05-17,25.553469
3,id_00,2001-05-18,99.518742
4,id_00,2001-05-19,178.87322
