In [11]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor

from skforecast.datasets import fetch_dataset
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries

In [3]:
# Data download
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)


Unnamed: 0_level_0,item_1,item_2,item_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01,8.253175,21.047727,19.429739
2012-01-02,22.777826,26.578125,28.009863
2012-01-03,27.549099,31.751042,32.078922
2012-01-04,25.895533,24.567708,27.252276
2012-01-05,21.379238,18.191667,20.357737


In [4]:
# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)

Train dates : 2012-01-01 00:00:00 --- 2014-07-15 00:00:00   (n=927)
Test dates  : 2014-07-16 00:00:00 --- 2015-01-01 00:00:00   (n=170)


In [5]:
# Generate exogenous variable month
# ==============================================================================
data_exog = data.copy()
data_exog['month'] = data_exog.index.month

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_exog_train = data_exog.loc[:end_train, :].copy()
data_exog_test  = data_exog.loc[end_train:, :].copy()

data_exog_train.head(3)

Unnamed: 0_level_0,item_1,item_2,item_3,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-01,8.253175,21.047727,19.429739,1
2012-01-02,22.777826,26.578125,28.009863,1
2012-01-03,27.549099,31.751042,32.078922,1


In [6]:
# Create and fit a Forecaster Multi-Series
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, n_estimators=10, verbose=-1),
                 lags               = 3,
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(
    series = data_exog_train[['item_1', 'item_2', 'item_3']], 
    exog   = data_exog_train[['month']]
)

In [8]:
print("series_col_names:", forecaster.series_col_names)
print("exog_col_names:", forecaster.exog_col_names)
print("series_X_train:", forecaster.series_X_train)
print("X_train_col_names:", forecaster.X_train_col_names)


series_col_names: ['item_1', 'item_2', 'item_3']
exog_col_names: ['month']
series_X_train: ['item_1', 'item_2', 'item_3']
X_train_col_names: ['lag_1', 'lag_2', 'lag_3', '_level_skforecast', 'month']


In [7]:
X_train = forecaster.create_train_X_y(series=data_train)[0]
X_train

Unnamed: 0_level_0,lag_1,lag_2,lag_3,_level_skforecast
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-04,27.549099,22.777826,8.253175,0
2012-01-05,25.895533,27.549099,22.777826,0
2012-01-06,21.379238,25.895533,27.549099,0
2012-01-07,21.106643,21.379238,25.895533,0
2012-01-08,20.533871,21.106643,21.379238,0
...,...,...,...,...
2014-07-11,9.677730,12.199832,14.072343,2
2014-07-12,10.396751,9.677730,12.199832,2
2014-07-13,16.139173,10.396751,9.677730,2
2014-07-14,13.028927,16.139173,10.396751,2


In [7]:
lw = forecaster.last_window['item_1'] + 10
lw = lw.to_frame(name='item_4')
lw

Unnamed: 0_level_0,item_4
date,Unnamed: 1_level_1
2014-07-13,32.609388
2014-07-14,33.307307
2014-07-15,35.980745


In [10]:
forecaster.predict(steps=1, levels='item_4', last_window=lw)



Unnamed: 0,item_4
2014-07-16,23.893397


In [9]:
forecaster.predict(steps=1, levels='item_4', last_window=None)



ValueError: No series to predict. None of the series ['item_4'] are present in `last_window` attribute. Provide `last_window` as argument in predict method.

In [11]:
X_predict = forecaster.create_predict_X(steps=1, levels='item_4', last_window=lw)['item_4']
X_predict



Unnamed: 0,lag_1,lag_2,lag_3,_level_skforecast
2014-07-16,35.980745,33.307307,32.609388,


In [None]:
forecaster.regressor.predict(X_predict)

array([23.89339724])

In [None]:
X_predict['_level_skforecast'] = np.nan
X_predict

Unnamed: 0,lag_1,lag_2,lag_3,_level_skforecast
2014-07-16,35.980745,33.307307,32.609388,


In [None]:
forecaster.regressor.predict(X_predict)

array([23.89339724])

In [None]:
lw =  pd.DataFrame(forecaster.last_window)
lw['item_4'] = lw['item_1'] + 10
lw

Unnamed: 0_level_0,item_1,item_2,item_3,item_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-07-13,22.609388,8.1,13.028927,32.609388
2014-07-14,23.307307,10.895833,9.315334,33.307307
2014-07-15,25.980745,10.489583,9.908915,35.980745


In [None]:
forecaster.predict(steps=1, levels=['item_1', 'item_4'], last_window=lw)



Unnamed: 0,item_1,item_4
2014-07-16,22.778094,23.893397


In [None]:
forecaster.predict_interval(steps=1, levels='item_4', last_window=lw)



Unnamed: 0,item_4,item_4_lower_bound,item_4_upper_bound
2014-07-16,23.893397,18.890479,28.713671


In [None]:
forecaster.predict_interval(steps=1, levels=['item_1', 'item_4'], last_window=lw)



Unnamed: 0,item_1,item_1_lower_bound,item_1_upper_bound,item_4,item_4_lower_bound,item_4_upper_bound
2014-07-16,22.778094,21.672791,27.20174,23.893397,18.890479,28.713671


In [None]:
forecaster.predict_interval(steps=1, levels='item_4', last_window=None)



ValueError: No series to predict. None of the series ['item_4'] are present in `last_window` attribute. Provide `last_window` as argument in predict method.

In [None]:
forecaster.predict_interval(steps=1, levels=['item_1', 'item_4'], last_window=None)



Unnamed: 0,item_1,item_1_lower_bound,item_1_upper_bound
2014-07-16,22.778094,21.672791,27.20174


In [None]:
forecaster.set_out_sample_residuals(residuals=forecaster.in_sample_residuals)

In [None]:
forecaster.out_sample_residuals.keys()

dict_keys(['item_1', 'item_2', 'item_3', '_unknown_level'])

In [None]:
forecaster.predict_interval(steps=1, levels='item_4', last_window=lw, in_sample_residuals=False)



Unnamed: 0,item_4,item_4_lower_bound,item_4_upper_bound
2014-07-16,23.893397,19.701222,29.869204


In [11]:
# Create and fit a Forecaster Multi-Series
# ==============================================================================
# Generate exogenous variable month
# ==============================================================================
data_exog = data.copy()
data_exog['month'] = data_exog.index.month

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_exog_train = data_exog.loc[:end_train, :].copy()
data_exog_test  = data_exog.loc[end_train:, :].copy()

data_exog_train.head(3)

forecaster = ForecasterAutoregMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, n_estimators=10, verbose=-1),
                 lags               = 3,
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = None,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

forecaster.fit(
    series = data_exog_train[['item_1', 'item_2', 'item_3']], 
    exog   = data_exog_train[['month']]
)

In [12]:
steps  = 10
level = 'item_1'

(
    last_window_values_dict,
    exog_values_dict,
    levels,
    prediction_index,
    _
) = forecaster._create_predict_inputs(
    steps       = steps,
    levels      = level,
    exog        = data_exog_test[['month']]
)


In [13]:
forecaster._recursive_predict(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

array([22.80722376, 21.06453624, 20.16995613, 20.79247993, 21.18398596,
       21.18398596, 20.58098726, 20.58098726, 21.18398596, 21.18398596])

In [14]:
forecaster._recursive_predict_new(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

array([22.80722376, 21.06453624, 20.16995613, 20.79247993, 21.18398596,
       21.18398596, 20.58098726, 20.58098726, 21.18398596, 21.18398596])

In [15]:
%%timeit -r 10 -n 100

forecaster._recursive_predict(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

3.9 ms ± 152 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [16]:
%%timeit -r 10 -n 100

forecaster._recursive_predict_new(
    steps       = steps,
    level       = level,
    last_window = last_window_values_dict[level],
    exog        = exog_values_dict[level]
)

4.15 ms ± 273 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [25]:
# Create a dummy DataFrame
# ==============================================================================
print(pd.__version__)
n = 3

df = pd.DataFrame({
    'item_1': np.random.normal(loc=0, scale=1, size=n),
    'item_2': np.random.normal(loc=0, scale=1, size=n),
    'item_3': np.random.normal(loc=0, scale=1, size=n),
    'month' : np.random.randint(1, 12, n)
}).astype(float)
df = pd.DataFrame(np.random.rand(n, 3), columns=['item_1', 'item_2', 'item_3'])
df_view = df.iloc[:, :-1]
display(df)
display(df_view)
print(id(df), id(df_view), id(df) == id(df_view) )
print(id(df['item_1']), id(df_view['item_1']), id(df['item_1']) == id(df_view['item_1']))
df_view.at[0, 'item_1'] = np.nan
display(df)
display(df_view)
print(id(df), id(df_view), id(df) == id(df_view) )
print(id(df['item_1']), id(df_view['item_1']), id(df['item_1']) == id(df_view['item_1']))


2.2.2


Unnamed: 0,item_1,item_2,item_3
0,0.069732,0.640479,0.440521
1,0.618573,0.53281,0.159427
2,0.569972,0.481268,0.598937


Unnamed: 0,item_1,item_2
0,0.069732,0.640479
1,0.618573,0.53281
2,0.569972,0.481268


2139660214864 2139683424272 False
2139683418384 2139683406224 False


Unnamed: 0,item_1,item_2,item_3
0,,0.640479,0.440521
1,0.618573,0.53281,0.159427
2,0.569972,0.481268,0.598937


Unnamed: 0,item_1,item_2
0,,0.640479
1,0.618573,0.53281
2,0.569972,0.481268


2139660214864 2139683424272 False
2139683418384 2139683411920 False


In [30]:
import pandas as pd
import numpy as np

n = 3

# Primera opción: DataFrame desde un diccionario
df = pd.DataFrame({
    'item_1': np.random.normal(loc=0, scale=1, size=n),
    'item_2': np.random.normal(loc=0, scale=1, size=n),
    'item_3': np.random.normal(loc=0, scale=1, size=n),
    'month' : np.random.randint(1, 12, n)
}).astype(float)
df_view = df.iloc[:, :-1]
print("Desde diccionario:")
print("DF", id(df), id(df_view), id(df) == id(df_view) )
print("item_1", id(df['item_1']), id(df_view['item_1']), id(df['item_1']) == id(df_view['item_1']))
print("Values", id(df.values), id(df_view.values), id(df.values) == id(df_view.values) )
df_view.at[0, 'item_1'] = np.nan
print(df)
print(df_view)
print("DF", id(df), id(df_view), id(df) == id(df_view) )
print("item_1", id(df['item_1']), id(df_view['item_1']), id(df['item_1']) == id(df_view['item_1']))
print("Values", id(df.values), id(df_view.values), id(df.values) == id(df_view.values) )

# Segunda opción: DataFrame desde un array numpy
df = pd.DataFrame(np.random.rand(n, 3), columns=['item_1', 'item_2', 'item_3'])
df_view_2 = df.iloc[:, :-1]
print("\nDesde array numpy:")
print("DF", id(df), id(df_view_2), id(df) == id(df_view_2) )
print("item_1", id(df['item_1']), id(df_view_2['item_1']), id(df['item_1']) == id(df_view_2['item_1']))
print("Values", id(df.values), id(df_view_2.values), id(df.values) == id(df_view_2.values) )
df_view_2.at[0, 'item_1'] = np.nan
print(df)
print(df_view_2)
print("DF", id(df), id(df_view_2), id(df) == id(df_view_2) )
print("item_1", id(df['item_1']), id(df_view_2['item_1']), id(df['item_1']) == id(df_view_2['item_1']))
print("Values", id(df.values), id(df_view_2.values), id(df.values) == id(df_view_2.values) )


Desde diccionario:
DF 2139659994640 2139683512464 False
item_1 2139683397264 2139683452240 False
Values 2139676957392 2139676957392 True
     item_1    item_2    item_3  month
0 -0.085345  0.239886  1.945927    1.0
1  1.599344  0.554975 -1.009476   11.0
2 -0.711111 -0.995174 -0.490364    4.0
     item_1    item_2    item_3
0       NaN  0.239886  1.945927
1  1.599344  0.554975 -1.009476
2 -0.711111 -0.995174 -0.490364
DF 2139659994640 2139683512464 False
item_1 2139683397264 2139683449296 False
Values 2139676957680 2139676957680 True

Desde array numpy:
DF 2139659813968 2139660031824 False
item_1 2139677044688 2139677045200 False
Values 2139676957776 2139676957776 True
     item_1    item_2    item_3
0       NaN  0.929748  0.165783
1  0.167344  0.169843  0.840790
2  0.261547  0.721513  0.237605
     item_1    item_2
0       NaN  0.929748
1  0.167344  0.169843
2  0.261547  0.721513
DF 2139659813968 2139660031824 False
item_1 2139677044688 2139677045392 False
Values 2139676955184 21396769

In [31]:
import pandas as pd
import numpy as np

n = 3

# Primera opción: DataFrame desde un diccionario
df = pd.DataFrame({
    'item_1': np.random.normal(loc=0, scale=1, size=n),
    'item_2': np.random.normal(loc=0, scale=1, size=n),
    'item_3': np.random.normal(loc=0, scale=1, size=n),
    'month' : np.random.randint(1, 12, n)
}).astype(float)
df_view = df.drop(columns='month')
print("Desde diccionario:")
print("DF", id(df), id(df_view), id(df) == id(df_view) )
print("item_1", id(df['item_1']), id(df_view['item_1']), id(df['item_1']) == id(df_view['item_1']))
print("Values", id(df.values), id(df_view.values), id(df.values) == id(df_view.values) )
df_view.at[0, 'item_1'] = np.nan
print(df)
print(df_view)
print("DF", id(df), id(df_view), id(df) == id(df_view) )
print("item_1", id(df['item_1']), id(df_view['item_1']), id(df['item_1']) == id(df_view['item_1']))
print("Values", id(df.values), id(df_view.values), id(df.values) == id(df_view.values) )

# Segunda opción: DataFrame desde un array numpy
df = pd.DataFrame(np.random.rand(n, 3), columns=['item_1', 'item_2', 'item_3'])
df_view_2 = df.drop(columns='item_3')
print("\nDesde array numpy:")
print("DF", id(df), id(df_view_2), id(df) == id(df_view_2) )
print("item_1", id(df['item_1']), id(df_view_2['item_1']), id(df['item_1']) == id(df_view_2['item_1']))
print("Values", id(df.values), id(df_view_2.values), id(df.values) == id(df_view_2.values) )
df_view_2.at[0, 'item_1'] = np.nan
print(df)
print(df_view_2)
print("DF", id(df), id(df_view_2), id(df) == id(df_view_2) )
print("item_1", id(df['item_1']), id(df_view_2['item_1']), id(df['item_1']) == id(df_view_2['item_1']))
print("Values", id(df.values), id(df_view_2.values), id(df.values) == id(df_view_2.values) )


Desde diccionario:
DF 2139659726992 2139683355024 False
item_1 2139683449296 2139660195536 False
Values 2139676955376 2139676955376 True
     item_1    item_2    item_3  month
0 -1.412770 -1.907469 -0.542944    9.0
1  0.522530 -0.755245 -1.929362    3.0
2 -0.424111 -0.913669  0.023097    4.0
     item_1    item_2    item_3
0       NaN -1.907469 -0.542944
1  0.522530 -0.755245 -1.929362
2 -0.424111 -0.913669  0.023097
DF 2139659726992 2139683355024 False
item_1 2139683449296 2139683512464 False
Values 2139676958640 2139676958640 True

Desde array numpy:
DF 2139677048400 2139677043408 False
item_1 2139683449296 2139677045776 False
Values 2139676959120 2139676959120 True
     item_1    item_2    item_3
0  0.264461  0.298399  0.803422
1  0.737960  0.371038  0.342819
2  0.794910  0.326141  0.583364
    item_1    item_2
0      NaN  0.298399
1  0.73796  0.371038
2  0.79491  0.326141
DF 2139677048400 2139677043408 False
item_1 2139683449296 2139677045776 False
Values 2139676958832 213967695883

In [12]:
import re
import pytest
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.exceptions import NotFittedError
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import HistGradientBoostingRegressor
from lightgbm import LGBMRegressor

from skforecast.exceptions import IgnoredArgumentWarning
from skforecast.preprocessing import TimeSeriesDifferentiator
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries

# Fixtures
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import series
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import exog
from skforecast.ForecasterAutoregMultiSeries.tests.fixtures_ForecasterAutoregMultiSeries import exog_predict


series_dict = joblib.load(r"C:\Users\jaesc2\GitHub\skforecast\skforecast\ForecasterAutoregMultiSeries\tests\fixture_sample_multi_series.joblib")
exog_dict = joblib.load(r"C:\Users\jaesc2\GitHub\skforecast\skforecast\ForecasterAutoregMultiSeries\tests\fixture_sample_multi_series_exog.joblib")
end_train = "2016-07-31 23:59:00"
series_dict_train = {k: v.loc[:end_train,] for k, v in series_dict.items()}
exog_dict_train = {k: v.loc[:end_train,] for k, v in exog_dict.items()}
series_dict_test = {k: v.loc[end_train:,] for k, v in series_dict.items()}
exog_dict_test = {k: v.loc[end_train:,] for k, v in exog_dict.items()}

In [10]:
forecaster = ForecasterAutoregMultiSeries(
        regressor=LGBMRegressor(
            n_estimators=2, random_state=123, verbose=-1, max_depth=2
        ),
        lags=14,
        encoding='onehot',
        differentiation=1,
        dropna_from_series=False,
        transformer_series=StandardScaler(),
        transformer_exog=StandardScaler(),
)
forecaster.fit(
    series=series_dict_train, exog=exog_dict_train, suppress_warnings=False,
    store_last_window=['id_1000', 'id_1001', 'id_10101']
)



In [8]:
forecaster.last_window.keys()

dict_keys(['id_1000', 'id_1001'])

In [6]:
forecaster.transformer_series_

{'id_1000': StandardScaler(),
 'id_1001': StandardScaler(),
 'id_1002': StandardScaler(),
 'id_1003': StandardScaler(),
 'id_1004': StandardScaler(),
 '_unknown_level': StandardScaler()}

In [97]:

transformer_series = {
    'id_1000': StandardScaler(), 
    'id_1001': StandardScaler(),
    'id_1002': StandardScaler(),
    'id_1003': StandardScaler(),
    'id_1004': StandardScaler()
}

forecaster = ForecasterAutoregMultiSeries(
        regressor=LGBMRegressor(
            n_estimators=200, random_state=123, verbose=-1, max_depth=7
        ),
        lags=14,
        encoding='ordinal',
        differentiation=None,
        dropna_from_series=False,
        transformer_series=transformer_series,
        transformer_exog=StandardScaler(),
)
forecaster.fit(
    series=series_dict_train, 
    exog=exog_dict_train, 
    suppress_warnings=False
)
forecaster.transformer_series_



{'id_1000': StandardScaler(),
 'id_1001': StandardScaler(),
 'id_1002': StandardScaler(),
 'id_1003': StandardScaler(),
 'id_1004': StandardScaler(),
 '_unknown_level': None}

In [None]:
series_dict = {
    '1': pd.Series(np.arange(10)),
    '2': pd.Series()
}

In [94]:
forecaster.series_col_names

['id_1000', 'id_1001', 'id_1002', 'id_1003', 'id_1004']

In [84]:
forecaster.in_sample_residuals.keys()

dict_keys(['_unknown_level'])

In [85]:
levels = ['id_1000', 'id_1001', 'id_1003', 'id_1004', 'id_1005']
last_window = pd.DataFrame(
    {k: v for k, v in forecaster.last_window.items() if k in levels}
)
last_window['id_1005'] = last_window['id_1004']
predictions = forecaster.predict(
    steps=5, 
    levels=levels, 
    exog=exog_dict_test, 
    suppress_warnings=False, 
    last_window=last_window,
)
predictions



Unnamed: 0,id_1000,id_1001,id_1003,id_1004,id_1005
2016-08-01,1385.344465,3186.180521,2969.253545,7017.298943,6251.552958
2016-08-02,1393.067049,3124.722045,2416.241743,8362.996757,6295.479348
2016-08-03,1360.064954,3044.431654,1923.296823,8284.858475,5937.095385
2016-08-04,1317.255708,2991.307331,1766.935128,8873.051069,6054.341108
2016-08-05,1300.654884,2671.347704,1580.540078,8821.281555,5565.488071


In [86]:
forecaster.in_sample_residuals.keys()

dict_keys(['_unknown_level'])

In [92]:
forecaster.in_sample_residuals = None

In [93]:
predictions = forecaster.predict_bootstrapping(
    steps=5, 
    levels=['id_1000', 'id_1001', 'id_1003', 'id_1004', 'id_1005'], 
    exog=exog_dict_test, 
    suppress_warnings=False, 
    last_window=last_window,
    in_sample_residuals=True,
    n_boot=10
)
predictions

AttributeError: 'NoneType' object has no attribute 'keys'

In [89]:
new_residuals = {
    '1': np.array([1, 2, 3, 4, 5]), 
    '2': np.array([1, 2, 3, 4, 5])
}
new_residuals = {
    '_unknown_level': np.array([1, 2, 3, 4, 5])
}

print(forecaster.out_sample_residuals)
forecaster.set_out_sample_residuals(residuals=new_residuals)
forecaster.out_sample_residuals

None


  )


{'_unknown_level': array([-1.36143002, -1.36099561, -1.36056119, -1.36012677, -1.35969236])}

In [91]:
predictions = forecaster.predict_bootstrapping(
    steps=5, 
    levels=['id_1000', 'id_1001', 'id_1003', 'id_1004', 'id_1005'], 
    exog=exog_dict_test, 
    suppress_warnings=False, 
    last_window=last_window,
    in_sample_residuals=False,
    n_boot=10
)
predictions



{'id_1000':             pred_boot_0  pred_boot_1  pred_boot_2  pred_boot_3  pred_boot_4  \
 2016-08-01 -1747.586096 -1747.586096 -1746.586096 -1748.586096 -1746.586096   
 2016-08-02 -1741.996066 -1740.996066 -1742.996066 -1741.996066 -1741.996066   
 2016-08-03 -1769.410926 -1768.410926 -1767.410926 -1771.410926 -1771.410926   
 2016-08-04 -1804.425377 -1803.425377 -1805.425377 -1803.425377 -1805.425377   
 2016-08-05 -1837.231351 -1838.231351 -1839.231351 -1839.231351 -1839.231351   
 
             pred_boot_5  pred_boot_6  pred_boot_7  pred_boot_8  pred_boot_9  
 2016-08-01 -1748.586096 -1748.586096 -1745.586096 -1748.586096 -1746.586096  
 2016-08-02 -1742.996066 -1741.996066 -1741.996066 -1743.996066 -1741.996066  
 2016-08-03 -1771.410926 -1769.410926 -1768.410926 -1767.410926 -1770.410926  
 2016-08-04 -1805.425377 -1803.425377 -1805.425377 -1805.425377 -1805.425377  
 2016-08-05 -1841.231351 -1838.231351 -1838.231351 -1840.231351 -1837.231351  ,
 'id_1001':             pred_boo

In [172]:
forecaster.in_sample_residuals.keys()

dict_keys(['_unknown_level', 'id_1000', 'id_1001', 'id_1002', 'id_1003', 'id_1004'])

In [173]:
predictions = forecaster.predict_bootstrapping(
steps=5, levels=levels, exog=exog_dict_test, suppress_warnings=False, last_window=last_window,
n_boot=10)
predictions



{'id_1000':             pred_boot_0  pred_boot_1  pred_boot_2  pred_boot_3  pred_boot_4  \
 2016-08-01   920.585451   751.085451  1072.335451   912.835847  1011.748939   
 2016-08-02   973.671764   632.585451   956.921863   687.172062   579.749137   
 2016-08-03   666.084955   663.085451   954.921566   612.585054   563.499336   
 2016-08-04   574.171268  1343.587206  1652.672924   338.085153   506.085947   
 2016-08-05   554.171070  1239.000595  1613.172924    -6.251458   445.835748   
 
             pred_boot_5  pred_boot_6  pred_boot_7  pred_boot_8  pred_boot_9  
 2016-08-01  1800.250496  1819.500496  1054.998840  1008.835649   824.498245  
 2016-08-02  1681.087107  1852.087107   829.335054   961.172458   796.498344  
 2016-08-03  1745.586710  1865.750595  1179.381645   809.335352  1430.663092  
 2016-08-04  1495.923321  1822.250496  1117.968256   862.421665  1472.749702  
 2016-08-05  1246.836214  2274.296888  1172.217561   554.334459  1337.412893  ,
 'id_1001':             pred_boo

In [39]:
predictions.to_numpy()

array([[2796.7268212 , 3205.84100287, 3367.37706412, 3367.37706412,
        3367.37706412],
       [2796.7268212 , 3367.37706412, 3367.37706412, 3367.37706412,
        3367.37706412],
       [2796.7268212 , 3367.37706412, 3367.37706412, 3367.37706412,
        3367.37706412],
       [2796.7268212 , 3367.37706412, 3367.37706412, 3367.37706412,
        3367.37706412],
       [2796.7268212 , 3367.37706412, 3367.37706412, 3367.37706412,
        3367.37706412]])

In [14]:
series_not_in_transformer_series = ['1']

In [15]:
unknown_not_in_transformer_series = '_unknown_level' in series_not_in_transformer_series 
unknown_not_in_transformer_series

False

In [17]:
series_s = set(['1', '_unknown_level'])
series_s = series_s - {'_unknown_level'}
series_s

{'1'}

In [23]:
['_unknown_level']

['_unknown_level']

In [151]:
forecaster = ForecasterAutoregMultiSeries(
        regressor=LGBMRegressor(
            n_estimators=2, random_state=123, verbose=-1, max_depth=2
        ),
        lags=14,
        encoding='ordinal',
        dropna_from_series=False,
        transformer_series=StandardScaler(),
        transformer_exog=StandardScaler(),
)
forecaster.fit(
    series=series_dict_train, exog=exog_dict_train, suppress_warnings=False
)



In [152]:
forecaster.transformer_series_

{'id_1000': StandardScaler(),
 'id_1001': StandardScaler(),
 'id_1002': StandardScaler(),
 'id_1003': StandardScaler(),
 'id_1004': StandardScaler(),
 '_unknown_level': StandardScaler()}

In [131]:
levels = ['1', '2']
residuals = {
    '1': np.array([1, 2, 3, 4, 5]),
    '2': np.array([1, 2, 3, 4, 5]),
    '_unknown_level': np.array([1, 2, 3, 4, 5, 1, 2, 3, 4, 5])
}

forecaster = ForecasterAutoregMultiSeries(LinearRegression(), lags=3,
                                            transformer_series=None)
forecaster.fit(series=series)
forecaster.in_sample_residuals = residuals
forecaster.set_out_sample_residuals(residuals=residuals)



In [129]:
forecaster.in_sample_residuals

{'1': array([1, 2, 3, 4, 5]),
 '2': array([1, 2, 3, 4, 5]),
 '_unknown_level': array([1, 2, 3, 4, 5, 1, 2, 3, 4, 5])}

In [132]:
forecaster.out_sample_residuals

{'1': array([1, 2, 3, 4, 5]),
 '2': array([1, 2, 3, 4, 5]),
 '_unknown_level': array([1, 2, 3, 4, 5, 1, 2, 3, 4, 5])}

In [16]:
series = pd.DataFrame({'l1': pd.Series(np.arange(10)), 
                       'l2': pd.Series(np.arange(10))})

In [22]:
forecaster = ForecasterAutoregMultiSeries(LinearRegression(), lags=3,
                                          encoding=None,
                                              transformer_series=StandardScaler())
forecaster.fit(series=series)
residuals = {'l1': np.array([1, 2, 3]), 'l2': np.array([4, 5, 6])}
forecaster.set_out_sample_residuals(residuals=residuals, transform=False)
results = forecaster.out_sample_residuals
results

  ("As `encoding` is set to `None`, no distinction between levels "
  


{'_unknown_level': array([1, 2, 3, 4, 5, 6])}