In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [2]:
import time
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Ridge
from joblib import Parallel, delayed

In [3]:

# Generate synthetic data
# ==============================================================================
X_train, y_train = make_regression(n_samples=1000, n_features=20, random_state=42)
X_train = pd.DataFrame(X_train, columns=[f"var_{i}" for i in range(X_train.shape[1])])
y_train = pd.Series(y_train, name="target")
n_models = 20

# Random Forest
# ==============================================================================
def train_model():
    model = RandomForestRegressor(**{'n_estimators': 500, 'max_depth': 5, 'random_state': 42})
    model.fit(X_train, y_train)
    return model

# Sequential Training
start_time = time.time()
models_sequential = [train_model() for i in range(n_models)]
sequential_time = time.time() - start_time

# Parallel Training
start_time = time.time()
models_parallel = Parallel(n_jobs=-1)(delayed(train_model)() for i in range(n_models))
parallel_time = time.time() - start_time

# Results
print("Random Forest")
print(f"Sequential Training Time: {sequential_time:.2f} seconds")
print(f"Parallel Training Time: {parallel_time:.2f} seconds")
print("")

# LightGBM
# ==============================================================================
# Define model training function
def train_model():
    model = LGBMRegressor(**{'n_estimators': 500, 'max_depth': 5, 'random_state': 42, 'verbose': -1})
    model.fit(X_train, y_train)
    return model

# Sequential Training
start_time = time.time()
models_sequential = [train_model() for i in range(n_models)]
sequential_time = time.time() - start_time

# Parallel Training
start_time = time.time()
models_parallel = Parallel(n_jobs=-1)(delayed(train_model)() for i in range(n_models))
parallel_time = time.time() - start_time

# Results
print("LightGBM")
print(f"Sequential Training Time: {sequential_time:.2f} seconds")
print(f"Parallel Training Time: {parallel_time:.2f} seconds")
print("")

# HistGradientBoosting
# ==============================================================================
# Define model training function
def train_model():
    model = HistGradientBoostingRegressor(**{'max_iter': 500, 'max_depth': 5, 'random_state': 42})
    model.fit(X_train, y_train)
    return model

# Sequential Training
start_time = time.time()
models_sequential = [train_model() for i in range(n_models)]
sequential_time = time.time() - start_time

# Parallel Training
start_time = time.time()
models_parallel = Parallel(n_jobs=-1)(delayed(train_model)() for i in range(n_models))
parallel_time = time.time() - start_time

# Results
print("HistGradientBoosting")
print(f"Sequential Training Time: {sequential_time:.2f} seconds")
print(f"Parallel Training Time: {parallel_time:.2f} seconds")
print("")

# Ridge
# ==============================================================================
# Define model training function
def train_model():
    model = Ridge(**{'random_state': 42})
    model.fit(X_train, y_train)
    return model

# Sequential Training
start_time = time.time()
models_sequential = [train_model() for i in range(n_models)]
sequential_time = time.time() - start_time

# Parallel Training
start_time = time.time()
models_parallel = Parallel(n_jobs=-1)(delayed(train_model)() for i in range(n_models))
parallel_time = time.time() - start_time

# Results
print("Ridge")
print(f"Sequential Training Time: {sequential_time:.2f} seconds")
print(f"Parallel Training Time: {parallel_time:.2f} seconds")
print("")

# XGBoost
# ==============================================================================
# Define model training function
def train_model():
    model = XGBRegressor(**{'n_estimators': 500, 'max_depth': 5, 'random_state': 42})
    model.fit(X_train, y_train)
    return model

# Sequential Training
start_time = time.time()
models_sequential = [train_model() for i in range(n_models)]
sequential_time = time.time() - start_time

# Parallel Training
start_time = time.time()
models_parallel = Parallel(n_jobs=-1)(delayed(train_model)() for i in range(n_models))
parallel_time = time.time() - start_time

# Results
print("XGBoost")
print(f"Sequential Training Time: {sequential_time:.2f} seconds")
print(f"Parallel Training Time: {parallel_time:.2f} seconds")
print("")

Random Forest
Sequential Training Time: 103.33 seconds
Parallel Training Time: 13.46 seconds

LightGBM
Sequential Training Time: 4.84 seconds
Parallel Training Time: 220.28 seconds

HistGradientBoosting
Sequential Training Time: 17.24 seconds
Parallel Training Time: 2.55 seconds

Ridge
Sequential Training Time: 0.06 seconds
Parallel Training Time: 0.14 seconds

XGBoost
Sequential Training Time: 14.91 seconds
Parallel Training Time: 4.59 seconds



In [4]:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import backtesting_forecaster

# Generate data
n = 1000
y = pd.Series(
    np.random.normal(loc=0, scale=1, size=n),
    index=pd.date_range(start="2021-01-01", periods=n, freq="h"),
)
exogs = pd.DataFrame(
    np.random.normal(loc=0, scale=1, size=(n, 10)),
    index=y.index,
    columns=[f"exog_{i}" for i in range(10)],
)

forecaster = ForecasterAutoreg(
    regressor=LGBMRegressor(n_estimators=500, max_depth=5, random_state=42, verbose=-1),
    lags=50
)

start_time = time.time()
backtesting_forecaster(
    forecaster=forecaster,
    y=y,
    exog=exogs,
    initial_train_size=500,
    steps=10,
    metric="mean_squared_error",
    refit=True,
    n_jobs='auto',
    verbose=False
)
sequential_time = time.time() - start_time
print(f"n_jobs='auto': {sequential_time:.2f} seconds")

start_time = time.time()
backtesting_forecaster(
    forecaster=forecaster,
    y=y,
    exog=exogs,
    initial_train_size=500,
    steps=10,
    metric="mean_squared_error",
    refit=True,
    n_jobs=1,
    verbose=False
)
sequential_time = time.time() - start_time
print(f"n_jobs='1': {sequential_time:.2f} seconds")


start_time = time.time()
backtesting_forecaster(
    forecaster=forecaster,
    y=y,
    exog=exogs,
    initial_train_size=500,
    steps=10,
    metric="mean_squared_error",
    refit=True,
    n_jobs=8,
    verbose=False
)
sequential_time = time.time() - start_time
print(f"n_jobs='-1': {sequential_time:.2f} seconds")



  0%|          | 0/50 [00:00<?, ?it/s]

n_jobs='auto': 11.63 seconds


  0%|          | 0/50 [00:00<?, ?it/s]

n_jobs='1': 11.73 seconds


  0%|          | 0/50 [00:00<?, ?it/s]

KeyboardInterrupt: 