# Understanding joblib multiprocess

## Parallel barcktesting

In [18]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import HistGradientBoostingRegressor
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection import grid_search_forecaster
from skforecast.ForecasterAutoreg import ForecasterAutoreg
import skforecast 
print(skforecast.__version__)

0.8.1


In [19]:
forecaster = ForecasterAutoreg(regressor=HistGradientBoostingRegressor(random_state=666), lags=50)
#forecaster = ForecasterAutoreg(regressor=LinearRegression(), lags=50)
n=10_000
rgn = np.random.default_rng(seed=123)
y = pd.Series(rgn.random(size=(n)), name="y")
exog = pd.DataFrame(rgn.random(size=(n, 10)))
exog.columns = [f"col_{i}" for i in range(exog.shape[1])]
y_train = y[:-int(n/2)]
y

0       0.682352
1       0.053821
2       0.220360
3       0.184372
4       0.175906
          ...   
9995    0.344087
9996    0.517366
9997    0.502259
9998    0.370318
9999    0.408432
Name: y, Length: 10000, dtype: float64

In [20]:
# Backtesting with refit
# ==============================================================================
metric, backtest_predictions = backtesting_forecaster(
                                    forecaster          = forecaster,
                                    y                   = y,
                                    exog                = exog,
                                    initial_train_size  = len(y_train),
                                    fixed_train_size    = False,
                                    steps               = 24,
                                    metric              = 'mean_squared_error',
                                    refit               = True,
                                    interval            = None,
                                    n_boot              = 500,
                                    random_state        = 123,
                                    in_sample_residuals = True,
                                    verbose             = False
                            )

print(metric)
backtest_predictions

 


  0%|          | 0/209 [00:00<?, ?it/s]

0.08474745575115451


Unnamed: 0,pred
5000,0.547143
5001,0.492890
5002,0.439208
5003,0.514884
5004,0.514389
...,...
9995,0.495218
9996,0.514729
9997,0.475121
9998,0.467349


In [21]:
# Backtesting without refit
# ==============================================================================
metric, backtest_predictions = backtesting_forecaster(
                                    forecaster          = forecaster,
                                    y                   = y,
                                    exog                = exog,
                                    initial_train_size  = len(y_train),
                                    fixed_train_size    = False,
                                    steps               = 24,
                                    metric              = 'mean_squared_error',
                                    refit               = False,
                                    interval            = None,
                                    n_boot              = 500,
                                    random_state        = 123,
                                    in_sample_residuals = True,
                                    verbose             = False
                               )
print(metric)
backtest_predictions

  0%|          | 0/209 [00:00<?, ?it/s]

0.08613453504170875


Unnamed: 0,pred
5000,0.547143
5001,0.492890
5002,0.439208
5003,0.514884
5004,0.514389
...,...
9995,0.467167
9996,0.546763
9997,0.467555
9998,0.489330


In [22]:
# Backtesting
# ==============================================================================

# Lags used as predictors
lags_grid = [3, 10, [1, 2, 3, 20]]

# Regressor hyperparameters
param_grid = {
    'max_iter': [50],
    'max_depth': [5, 10, 15],
#     'min_samples_leaf': [5, 10, 20],
#     'l2_regularization': [0.0, 0.1, 0.5]
}

results_grid = grid_search_forecaster(
                    forecaster          = forecaster,
                    y                   = y,
                    exog                = exog,
                    initial_train_size  = len(y_train),
                    steps               = 24,
                    param_grid         = param_grid,
                    lags_grid          = lags_grid,
                    refit              = False,
                    metric             = 'mean_squared_error',
                    fixed_train_size   = False,
                    return_best        = False,
                    verbose            = False
               )

Number of models compared: 9.


lags grid:   0%|          | 0/3 [00:00<?, ?it/s]

params grid:   0%|          | 0/3 [00:00<?, ?it/s]

params grid:   0%|          | 0/3 [00:00<?, ?it/s]

params grid:   0%|          | 0/3 [00:00<?, ?it/s]

In [26]:
# Backtesting with refit
# ==============================================================================

# Lags used as predictors
lags_grid = [3, 10, [1, 2, 3, 20]]

# Regressor hyperparameters
param_grid = {
    'max_iter': [100],
    'max_depth': [5, 10, 15],
    'min_samples_leaf': [5, 10, 20],
    'l2_regularization': [0.0, 0.1, 0.5]
}

results_grid = grid_search_forecaster(
                    forecaster          = forecaster,
                    y                   = y,
                    exog                = exog,
                    initial_train_size  = len(y_train),
                    steps               = 24,
                    param_grid         = param_grid,
                    lags_grid          = lags_grid,
                    refit              = True,
                    metric             = 'mean_squared_error',
                    fixed_train_size   = False,
                    return_best        = False,
                    verbose            = False
               )

Number of models compared: 9.


lags grid:   0%|          | 0/3 [00:00<?, ?it/s]

params grid:   0%|          | 0/3 [00:00<?, ?it/s]

 


KeyboardInterrupt: 

In [24]:
results_grid

Unnamed: 0,lags,params,mean_squared_error,max_depth,max_iter
6,"[1, 2, 3, 20]","{'max_depth': 5, 'max_iter': 50}",0.083262,5,50
4,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","{'max_depth': 10, 'max_iter': 50}",0.083494,10,50
3,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","{'max_depth': 5, 'max_iter': 50}",0.083498,5,50
5,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","{'max_depth': 15, 'max_iter': 50}",0.083523,15,50
0,"[1, 2, 3]","{'max_depth': 5, 'max_iter': 50}",0.083526,5,50
7,"[1, 2, 3, 20]","{'max_depth': 10, 'max_iter': 50}",0.083677,10,50
2,"[1, 2, 3]","{'max_depth': 15, 'max_iter': 50}",0.083917,15,50
8,"[1, 2, 3, 20]","{'max_depth': 15, 'max_iter': 50}",0.083987,15,50
1,"[1, 2, 3]","{'max_depth': 10, 'max_iter': 50}",0.084243,10,50
