In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.svm import SVR

from sklearn.model_selection import TimeSeriesSplit, cross_validate
from sklearn.preprocessing import StandardScaler, SplineTransformer, PowerTransformer
from sklearn.metrics import mean_absolute_percentage_error, r2_score

from sklearn.pipeline import Pipeline

from typing import Any

SEED = 2058
NCALLS = 200

params = [
    {
        'ensemble': [
            VotingRegressor(
                [
                    ('svm', SVR()),
                    ('rf', RandomForestRegressor()),
                    ('ada', AdaBoostRegressor()),
                    ('xgb', XGBRegressor())
                ]
            )
        ],
        'spline__n_knots': Integer(5, 20),
        'spline__degree': Integer(2, 5),

        'ensemble__svm__kernel': Categorical(['rbf', 'sigmoid']),
        'ensemble__svm__gamma': Real(1e-5, 1),
        'ensemble__svm__C': Real(1e-5, 1e4),
        'ensemble__svm__epsilon': Real(1e-5, 1e4),
        'ensemble__svm__max_iter': Categorical([10000]),

        'ensemble__ada__n_estimators': Categorical([50 + i for i in range(0, 300, 50)]),
        'ensemble__ada__learning_rate': Real(1e-5, 1e3),
        'ensemble__ada__loss': Categorical(['linear', 'square', 'exponential']),

        'ensemble__rf__n_estimators': Categorical([50 + i for i in range(0, 400, 50)]),
        'ensemble__rf__max_depth': Integer(4, 16),
        'ensemble__rf__min_samples_split': Integer(2, 20),
        'ensemble__rf__min_samples_leaf': Integer(2, 20),
        'ensemble__rf__max_features': Categorical(['sqrt', 'log2', 1.0]),
        'ensemble__rf__min_impurity_decrease': Real(1e-3, 1),
        'ensemble__rf__ccp_alpha': Real(1e-3, 1e2),

        'ensemble__xgb__n_estimators': Categorical([i for i in range(500, 3000, 1000)]),
        'ensemble__xgb__max_depth': Integer(4, 16),
        'ensemble__xgb__min_child_weight': Integer(1, 6),
        'ensemble__xgb__learning_rate': Real(1e-5, 1),
        'ensemble__xgb__alpha': Real(1e-5, 1e2),
        'ensemble__xgb__lambda': Real(1e-5, 1e2),
        'ensemble__xgb__subsample': Real(0.2, 1),
        'ensemble__xgb__colsample_bytree': Real(0.2, 1),
    }
]

ensemble = VotingRegressor(
    estimators=[
        ('svm', SVR()),
        ('rf', RandomForestRegressor()),
        ('ada', AdaBoostRegressor()),
        ('xgb', XGBRegressor())
    ]
)

pipeline = Pipeline(
    [
        ('power-transformer', PowerTransformer()),
        ('spline', SplineTransformer(n_knots=10)),
        ('ensemble', 'passthrough')
    ]
)

def scorer(estimator, X, y):
    return -mean_absolute_percentage_error(y_true=y, y_pred=estimator.predict(X))

ts_cv = TimeSeriesSplit(
    n_splits=10,
    test_size=1
)

cv = BayesSearchCV(
    pipeline,
    search_spaces=params,
    n_iter=NCALLS,
    scoring=scorer,
    n_jobs=-1,
    random_state=SEED,
    return_train_score=True,
    cv = ts_cv,
    iid=False
)

AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [None]:
import sys, os
import matplotlib.pyplot as plt
import warnings
import numpy as np

sys.path.append(os.getcwd() + "/../src/")

from utils import create_dataset

btc = create_dataset()
btc = btc.reset_index(drop=True)

X, y = btc.drop(columns=['target', 'Date']).astype(np.float64), btc['target'].astype(np.float64)

def on_step(optim_result):
    score = cv.best_score_
    print("best score: %s" % score)
    if score < 2:
        print('Interrupting!')
        return True

res = cv.fit(X=X, y=y, callback=on_step)

AttributeError: 'BayesSearchCV' object has no attribute 'best_score_'

In [None]:
with warnings.catch_warnings():
    warnings.filterwarnings(action='ignore')
    res.best_estimator_.fit(X, y)
    y_pred = res.predict(X)

fig, ax = plt.subplots(1, 1, figsize=(20, 10))
ax.scatter(btc['Date'], btc['target'], c='b')
ax.plot(btc['Date'], y_pred, c='r', linestyle='--')
fig.show()

In [None]:
print("----- MAPE -----")
print(f"{mean_absolute_percentage_error(btc['target'], y_pred)}")
print("----- R2 ------")
print(f"{r2_score(btc['target'], y_pred)}")

----- MAPE -----
7.692014985810995
----- R2 ------
-0.45630566254126426


In [None]:
import pickle

with open("./best_estimator.pkl", "rb") as fp:
    pickle.dump(res.best_estimator_, fp)