In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.svm import SVR

from sklearn.model_selection import TimeSeriesSplit, cross_validate
from sklearn.preprocessing import StandardScaler, SplineTransformer, PowerTransformer
from sklearn.metrics import mean_absolute_percentage_error, r2_score

from sklearn.pipeline import Pipeline

SEED = 2058
NCALLS = 1000

params = [
    {
        'ensemble': [
            VotingRegressor(
                [
                    ('svm', SVR()),
                    ('rf', RandomForestRegressor()),
                    ('ada', AdaBoostRegressor()),
                    ('xgb', XGBRegressor())
                ]
            )
        ],
        'spline__n_knots': Integer(5, 20),
        'spline__degree': Integer(2, 5),

        'ensemble__svm__kernel': Categorical(['rbf', 'sigmoid']),
        'ensemble__svm__gamma': Real(1e-5, 1),
        'ensemble__svm__C': Real(1e-5, 1e4),
        'ensemble__svm__epsilon': Real(1e-5, 1e4),
        'ensemble__svm__max_iter': Categorical([10000]),

        'ensemble__ada__n_estimators': Categorical([50 + i for i in range(0, 300, 50)]),
        'ensemble__ada__learning_rate': Real(1e-5, 1e3),
        'ensemble__ada__loss': Categorical(['linear', 'square', 'exponential']),

        'ensemble__rf__n_estimators': Categorical([50 + i for i in range(0, 400, 50)]),
        'ensemble__rf__max_depth': Integer(4, 16),
        'ensemble__rf__min_samples_split': Integer(2, 20),
        'ensemble__rf__min_samples_leaf': Integer(2, 20),
        'ensemble__rf__max_features': Categorical(['sqrt', 'log2', 1.0]),
        'ensemble__rf__min_impurity_decrease': Real(1e-3, 1),
        'ensemble__rf__ccp_alpha': Real(1e-3, 1e2),

        'ensemble__xgb__n_estimators': Categorical([i for i in range(500, 3000, 1000)]),
        'ensemble__xgb__max_depth': Integer(4, 16),
        'ensemble__xgb__min_child_weight': Integer(1, 6),
        'ensemble__xgb__learning_rate': Real(1e-5, 1),
        'ensemble__xgb__alpha': Real(1e-5, 1e2),
        'ensemble__xgb__lambda': Real(1e-5, 1e2),
        'ensemble__xgb__subsample': Real(0.2, 1),
        'ensemble__xgb__colsample_bytree': Real(0.2, 1),
    }
]

ensemble = VotingRegressor(
    estimators=[
        ('svm', SVR()),
        ('rf', RandomForestRegressor()),
        ('ada', AdaBoostRegressor()),
        ('xgb', XGBRegressor())
    ]
)

pipeline = Pipeline(
    [
        ('power-transformer', PowerTransformer()),
        ('spline', SplineTransformer(n_knots=10)),
        ('ensemble', 'passthrough')
    ]
)

ts_cv = TimeSeriesSplit(
    n_splits=10,
    test_size=1
)

cv = BayesSearchCV(
    pipeline,
    search_spaces=params,
    n_iter=10,
    scoring='neg_mean_absolute_percentage_error',
    n_jobs=-1,
    random_state=SEED,
    return_train_score=True
)

In [3]:
import sys, os
import matplotlib.pyplot as plt
import warnings
import numpy as np

sys.path.append(os.getcwd() + "/../src/")

from utils import create_dataset

btc = create_dataset()
btc = btc.reset_index(drop=True)

X, y = btc.drop(columns=['target', 'Date']).astype(np.float64), btc['target'].astype(np.float64)

res = cv.fit(X=X, y=y)