# 6. Fine tuning

Following the pipeline search using TPOT, the hyperparameters for each pipeline will be optimized using hyperopt. For this stage, the cost function will represent the capital return of the trading strategy.

Let us recall that the desired performance of the models will not be evaluated using a common regression metric but rather by comparing the results of a trading strategy that makes use of their predictions against a buy-and-hold strategy. Therefore, for this stage of the process, the hyperparameter tuning phase will use a cost function that evaluates the performance of the trading strategy because what we actually want is for the predictions of the models to be useful in this regard.

The predictions of the model will be averaged and a long (buy) position will be taken if this value is positive and a short (sell) position if it is negative. 

In [1]:
import pandas as pd
import joblib
from utils import *
from hyperopt import hp
from hyperopt.pyll import scope
from hyperopt import fmin, tpe, space_eval
from functools import partial
import threading

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
filtered_vars = joblib.load('models/filtered_vars.joblib')
cutoff_date = joblib.load('models/cutoff_date.joblib')
df = pd.read_csv('data/req_data.csv', index_col=0, parse_dates=True).dropna()
feats = df.drop(labels=['target'], axis=1)
to_predict = df.loc[:, 'target']
complete_data = pd.read_csv('data/ohlcv.csv', index_col=0, parse_dates=True)
del df

In [4]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LassoLarsCV, ElasticNetCV, SGDRegressor, RidgeCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.preprocessing import RobustScaler, PowerTransformer, KBinsDiscretizer, MaxAbsScaler, StandardScaler, Normalizer, MinMaxScaler, Binarizer, QuantileTransformer
from sklearn.feature_selection import SelectPercentile, f_regression, SelectFwe
from xgboost import XGBRegressor
from tpot.export_utils import set_param_recursive
from tpot.builtins import StackingEstimator, ZeroCount

The following are the pipelines found in the previous step ((Part II) pipeline_search notebook). A pipeline will be optimized for each cluster phase/all data and for each data transformation (cuberoot, arcsin and none).

In [5]:
best_pipelines_all = {
    'cuberoot': make_pipeline(
        KBinsDiscretizer(encode="ordinal", n_bins=500, strategy="quantile"),
        ExtraTreesRegressor(bootstrap=True, max_features=0.5, min_samples_leaf=18, min_samples_split=8, n_estimators=100)
    ),
    'arsinh': make_pipeline(
        SelectFwe(score_func=f_regression, alpha=0.048),
        StandardScaler(),
        GradientBoostingRegressor(alpha=0.75, learning_rate=0.001, loss="lad", max_depth=9, max_features=0.2, min_samples_leaf=16, min_samples_split=18, n_estimators=100, subsample=0.4)
    ),
    'none': make_pipeline(
        SelectFwe(score_func=f_regression, alpha=0.029),
        GradientBoostingRegressor(alpha=0.99, learning_rate=0.001, loss="lad", max_depth=9, max_features=0.2, min_samples_leaf=13, min_samples_split=8, n_estimators=100, subsample=0.7500000000000001)
    )}

best_pipelines_clusters = {'arsinh':{
    '3': make_pipeline(
        StackingEstimator(estimator=LassoLarsCV(normalize=True)),
        StackingEstimator(estimator=SGDRegressor(alpha=0.01, eta0=0.1, fit_intercept=True, l1_ratio=0.5, learning_rate="constant", loss="epsilon_insensitive", penalty="elasticnet", power_t=50.0)),
        XGBRegressor(learning_rate=0.5, max_depth=3, min_child_weight=13, n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.9000000000000001, verbosity=0)
    ),
    '2': make_pipeline(
        KBinsDiscretizer(encode="ordinal", n_bins=50, strategy="quantile"),
        ExtraTreesRegressor(bootstrap=True, max_features=0.5, min_samples_leaf=4, min_samples_split=8, n_estimators=100)
    ),
    '1': make_pipeline(
        QuantileTransformer(),
        Normalizer(norm="l1"),
        StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.85, learning_rate=0.5, loss="quantile", max_depth=10, max_features=0.9000000000000001, min_samples_leaf=2, min_samples_split=18, n_estimators=100, subsample=0.05)),
        GradientBoostingRegressor(alpha=0.99, learning_rate=0.001, loss="lad", max_depth=9, max_features=0.05, min_samples_leaf=15, min_samples_split=18, n_estimators=100, subsample=0.4)
    ),
    '0': make_pipeline(XGBRegressor(learning_rate=0.1, max_depth=1, min_child_weight=13, n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.05, verbosity=0))    
}, 'cuberoot': {
    '3': make_pipeline(
        Normalizer(norm="l1"),
        RandomForestRegressor(bootstrap=True, max_features=0.45, min_samples_leaf=8, min_samples_split=7, n_estimators=100)
    ),
    '2': make_pipeline(
        Normalizer(norm="l1"),
        ExtraTreesRegressor(bootstrap=True, max_features=0.5, min_samples_leaf=4, min_samples_split=8, n_estimators=100)
    ),
    '1': make_pipeline(
        StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.85, learning_rate=0.001, loss="quantile", max_depth=8, max_features=0.9500000000000001, min_samples_leaf=19, min_samples_split=6, n_estimators=100, subsample=0.7000000000000001)),
        StackingEstimator(estimator=SGDRegressor(alpha=0.01, eta0=0.01, fit_intercept=True, l1_ratio=0.0, learning_rate="constant", loss="squared_loss", penalty="elasticnet", power_t=100.0)),
        ExtraTreesRegressor(bootstrap=False, max_features=0.1, min_samples_leaf=9, min_samples_split=20, n_estimators=100)
    ),
    '0': make_pipeline(XGBRegressor(learning_rate=0.1, max_depth=5, min_child_weight=12, n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.8500000000000001, verbosity=0)),    
}, 'none': {
    '3': make_pipeline(
        StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.95, learning_rate=0.5, loss="ls", max_depth=2, max_features=0.7000000000000001, min_samples_leaf=8, min_samples_split=14, n_estimators=100, subsample=0.55)),
        Normalizer(norm="l1"),
        RandomForestRegressor(bootstrap=True, max_features=1.0, min_samples_leaf=5, min_samples_split=7, n_estimators=100)
    ),
    '2': make_pipeline(
        KBinsDiscretizer(encode="ordinal", n_bins=50, strategy="quantile"),
        ExtraTreesRegressor(bootstrap=True, max_features=0.5, min_samples_leaf=12, min_samples_split=8, n_estimators=100)
    ),
    '1': make_pipeline(ExtraTreesRegressor(bootstrap=True, max_features=0.05, min_samples_leaf=15, min_samples_split=15, n_estimators=100)),
    '0': make_pipeline(
        KBinsDiscretizer(encode="ordinal", n_bins=500, strategy="uniform"),
        QuantileTransformer(),
        GradientBoostingRegressor(alpha=0.9, learning_rate=0.01, loss="huber", max_depth=9, max_features=0.05, min_samples_leaf=9, min_samples_split=8, n_estimators=100, subsample=0.3)
    )    
    }
}

Now lets define broader hyperparameter search spaces for each pipeline. We will also add a roll_mean parameter which will be used to average the last n predictions of the model

In [6]:
all_searchspaces = {'cuberoot': {
    'kbinsdiscretizer__n_bins': hp.choice('kbinsdiscretizer__n_bins', list(range(10, 1000, 10))),
    'kbinsdiscretizer__strategy': hp.choice('kbinsdiscretizer__strategy', ['quantile', 'uniform']),
    'extratreesregressor__bootstrap': hp.choice('extratreesregressor__bootstrap', [True, False]),
    'extratreesregressor__max_features': hp.uniform('extratreesregressor__max_features', 0.05, 1),
    'extratreesregressor__min_samples_leaf': scope.int(hp.quniform('extratreesregressor__min_samples_leaf', 2, 80, 1)),
    'extratreesregressor__min_samples_split': scope.int(hp.quniform('extratreesregressor__min_samples_split', 2, 80, 1)),
    'extratreesregressor__n_estimators': scope.int(hp.quniform('extratreesregressor__n_estimators', 25, 400, 1)),
    'extratreesregressor__max_depth': scope.int(hp.quniform('extratreesregressor__max_depth', 1, 30, 1)),
    'roll_mean': scope.int(hp.quniform('roll_mean', 1, 8, 1))
}, 'arsinh': {
    'gradientboostingregressor__alpha': hp.uniform('gradientboostingregressor__alpha', 0.5, 1),
    'gradientboostingregressor__learning_rate': hp.uniform('gradientboostingregressor__learning_rate', 1e-3, 1),
    'gradientboostingregressor__loss': hp.choice('gradientboostingregressor__loss', ["ls", "lad", "huber", "quantile"]),
    'gradientboostingregressor__max_depth': scope.int(hp.quniform('gradientboostingregressor__max_depth', 1, 12, 1)),
    'gradientboostingregressor__max_features': hp.uniform('gradientboostingregressor__max_features', 0.05, 1),
    'gradientboostingregressor__min_samples_leaf': scope.int(hp.quniform('gradientboostingregressor__min_samples_leaf', 2, 80, 1)),
    'gradientboostingregressor__min_samples_split': scope.int(hp.quniform('gradientboostingregressor__min_samples_split', 2, 80,1)),
    'gradientboostingregressor__n_estimators': scope.int(hp.quniform('gradientboostingregressor__n_estimators', 25, 400, 1)),
    'gradientboostingregressor__subsample': hp.uniform('gradientboostingregressor__subsample', 0.1, 1),
    'selectfwe__alpha': hp.uniform('selectfwe__alpha', 0, 0.1),
    'roll_mean': scope.int(hp.quniform('roll_mean', 1, 8, 1))
}, 'none': {
    'gradientboostingregressor__alpha': hp.uniform('gradientboostingregressor__alpha', 0.5, 1),
    'gradientboostingregressor__learning_rate': hp.uniform('gradientboostingregressor__learning_rate', 1e-3, 1),
    'gradientboostingregressor__loss': hp.choice('gradientboostingregressor__loss', ["ls", "lad", "huber", "quantile"]),
    'gradientboostingregressor__max_depth': scope.int(hp.quniform('gradientboostingregressor__max_depth', 1, 12, 1)),
    'gradientboostingregressor__max_features': hp.uniform('gradientboostingregressor__max_features', 0.05, 1),
    'gradientboostingregressor__min_samples_leaf': scope.int(hp.quniform('gradientboostingregressor__min_samples_leaf', 2, 80, 1)),
    'gradientboostingregressor__min_samples_split': scope.int(hp.quniform('gradientboostingregressor__min_samples_split', 2, 80, 1)),
    'gradientboostingregressor__n_estimators': scope.int(hp.quniform('gradientboostingregressor__n_estimators', 25, 400, 1)),
    'gradientboostingregressor__subsample': hp.uniform('gradientboostingregressor__subsample', 0.1, 1),
    'selectfwe__alpha': hp.uniform('selectfwe__alpha', 0, 0.1),
    'roll_mean': scope.int(hp.quniform('roll_mean', 1, 8, 1))
}}

In [7]:
cluster_searchspaces = {
    'cuberoot': {
        'roll_mean': scope.int(hp.quniform('roll_mean', 1, 8, 1)),
        '3': {
            'randomforestregressor__bootstrap': hp.choice('randomforestregressor__bootstrap', [True, False]),
            'randomforestregressor__max_features': hp.uniform('randomforestregressor__max_features', 0.05, 1),
            'randomforestregressor__min_samples_leaf': scope.int(hp.quniform('randomforestregressor__min_samples_leaf', 2, 80, 1)),
            'randomforestregressor__min_samples_split': scope.int(hp.quniform('randomforestregressor__min_samples_split', 2, 80, 1)),
            'randomforestregressor__n_estimators': scope.int(hp.quniform('randomforestregressor__n_estimators', 25, 400, 1)),
            'randomforestregressor__max_depth': scope.int(hp.quniform('randomforestregressor__max_depth', 1, 30, 1))   
        },
        '2': {
            'extratreesregressor__bootstrap': hp.choice('2_extratreesregressor__bootstrap', [True, False]),
            'extratreesregressor__max_features': hp.uniform('2_extratreesregressor__max_features', 0.05, 1),
            'extratreesregressor__min_samples_leaf': scope.int(hp.quniform('2_extratreesregressor__min_samples_leaf', 2, 80, 1)),
            'extratreesregressor__min_samples_split': scope.int(hp.quniform('2_extratreesregressor__min_samples_split', 2, 80, 1)),
            'extratreesregressor__n_estimators': scope.int(hp.quniform('2_extratreesregressor__n_estimators', 25, 400, 1)),
            'extratreesregressor__max_depth': scope.int(hp.quniform('2_extratreesregressor__max_depth', 1, 30, 1))
        }, 
        '1': {
            'stackingestimator-1__estimator__alpha': hp.uniform('stackingestimator-1__estimator__alpha', 0.5, 1),
            'stackingestimator-1__estimator__learning_rate': hp.uniform('stackingestimator-1__estimator__learning_rate', 1e-3, 1),
            'stackingestimator-1__estimator__loss': hp.choice('stackingestimator-1__estimator__loss', ["ls", "lad", "huber", "quantile"]),
            'stackingestimator-1__estimator__max_depth': scope.int(hp.quniform('stackingestimator-1__estimator__max_depth', 1, 12, 1)),
            'stackingestimator-1__estimator__max_features': hp.uniform('stackingestimator-1__estimator__max_features', 0.05, 1),
            'stackingestimator-1__estimator__min_samples_leaf': scope.int(hp.quniform('stackingestimator-1__estimator__min_samples_leaf', 2, 80, 1)),
            'stackingestimator-1__estimator__min_samples_split': scope.int(hp.quniform('stackingestimator-1__estimator__min_samples_split', 2, 80, 1)),
            'stackingestimator-1__estimator__n_estimators': scope.int(hp.quniform('stackingestimator-1__estimator__n_estimators', 25, 500, 1)),
            'stackingestimator-1__estimator__subsample': hp.uniform('stackingestimator-1__estimator__subsample', 0.1, 1),
            'stackingestimator-2__estimator__loss': hp.choice('stackingestimator-2__estimator__loss', ['squared_loss', 'huber', 'epsilon_insensitive']),
            'stackingestimator-2__estimator__alpha': hp.uniform('stackingestimator-2__estimator__alpha', 0, 0.01),
            'stackingestimator-2__estimator__l1_ratio': hp.uniform('stackingestimator-2__estimator__l1_ratio', 0, 1),
            'stackingestimator-2__estimator__eta0': hp.uniform('stackingestimator-2__estimator__eta0', 0.1, 1), 
            'stackingestimator-2__estimator__power_t': scope.int(hp.quniform('stackingestimator-2__estimator__power_t', 0, 100, 1)),
            'extratreesregressor__bootstrap': hp.choice('extratreesregressor__bootstrap', [True, False]),
            'extratreesregressor__max_features': hp.uniform('extratreesregressor__max_features', 0.05, 1),
            'extratreesregressor__min_samples_leaf': scope.int(hp.quniform('extratreesregressor__min_samples_leaf', 2, 80, 1)),
            'extratreesregressor__min_samples_split': scope.int(hp.quniform('extratreesregressor__min_samples_split', 2, 80, 1)),
            'extratreesregressor__n_estimators': scope.int(hp.quniform('extratreesregressor__n_estimators', 25, 400, 1)),
            'extratreesregressor__max_depth': scope.int(hp.quniform('extratreesregressor__max_depth', 1, 30, 1))
        },
        '0': {
            'xgbregressor__min_child_weight': scope.int(hp.quniform('xgbregressor__min_child_weight', 2, 35, 1)),
            'xgbregressor__reg_lambda': hp.lognormal('xgbregressor__reg_lambda', 0, 10),
            'xgbregressor__learning_rate': hp.uniform('xgbregressor__learning_rate', 1e-3, 1),
            'xgbregressor__subsample': hp.uniform('xgbregressor__subsample', 0.1, 1),
            'xgbregressor__max_depth': scope.int(hp.quniform('xgbregressor__max_depth', 1, 12, 1)),
            'xgbregressor__n_estimators': scope.int(hp.quniform('xgbregressor__n_estimators', 25, 400, 1)),
            'xgbregressor__reg_alpha': hp.lognormal('xgbregressor__reg_alpha', 0, 10)
        },
    }, 'arsinh': {
        'roll_mean': scope.int(hp.quniform('roll_mean', 1, 8, 1)),
        '0': {
            'xgbregressor__min_child_weight': scope.int(hp.quniform('xgbregressor__min_child_weight', 2, 35, 1)),
            'xgbregressor__reg_lambda': hp.lognormal('xgbregressor__reg_lambda', 0, 10),
            'xgbregressor__learning_rate': hp.uniform('xgbregressor__learning_rate', 1e-3, 1),
            'xgbregressor__subsample': scope.int(hp.uniform('xgbregressor__subsample', 0.1, 1)),
            'xgbregressor__max_depth': scope.int(hp.quniform('xgbregressor__max_depth', 1, 12, 1)),
            'xgbregressor__n_estimators': scope.int(hp.quniform('xgbregressor__n_estimators', 25, 400, 1)),
            'xgbregressor__reg_alpha': hp.lognormal('xgbregressor__reg_alpha', 0, 10)
        },
        '1': {
            'gradientboostingregressor__alpha': hp.uniform('gradientboostingregressor__alpha', 0.5, 1),
            'gradientboostingregressor__learning_rate': hp.uniform('gradientboostingregressor__learning_rate', 1e-3, 1),
            'gradientboostingregressor__loss': hp.choice('gradientboostingregressor__loss', ["ls", "lad", "huber", "quantile"]),
            'gradientboostingregressor__max_depth': scope.int(hp.quniform('gradientboostingregressor__max_depth', 1, 12, 1)),
            'gradientboostingregressor__max_features': hp.uniform('gradientboostingregressor__max_features', 0.05, 1),
            'gradientboostingregressor__min_samples_leaf': scope.int(hp.quniform('gradientboostingregressor__min_samples_leaf', 2, 80, 1)),
            'gradientboostingregressor__min_samples_split': scope.int(hp.quniform('gradientboostingregressor__min_samples_split', 2, 80, 1)),
            'gradientboostingregressor__n_estimators': scope.int(hp.quniform('gradientboostingregressor__n_estimators', 25, 400, 1)),
            'gradientboostingregressor__subsample': hp.uniform('gradientboostingregressor__subsample', 0.1, 1),
            'stackingestimator__estimator__alpha': hp.uniform('stackingestimator__estimator__alpha', 0.5, 1),
            'stackingestimator__estimator__learning_rate': hp.uniform('stackingestimator__estimator__learning_rate', 1e-3, 1),
            'stackingestimator__estimator__loss': hp.choice('stackingestimator__estimator__loss', ["ls", "lad", "huber", "quantile"]),
            'stackingestimator__estimator__max_depth': scope.int(hp.quniform('stackingestimator__estimator__max_depth', 1, 12, 1)),
            'stackingestimator__estimator__max_features': hp.uniform('stackingestimator__estimator__max_features', 0.05, 1),
            'stackingestimator__estimator__min_samples_leaf': scope.int(hp.quniform('stackingestimator__estimator__min_samples_leaf', 2, 80, 1)),
            'stackingestimator__estimator__min_samples_split': scope.int(hp.quniform('stackingestimator__estimator__min_samples_split', 2, 80, 1)),
            'stackingestimator__estimator__n_estimators': scope.int(hp.quniform('stackingestimator__estimator__n_estimators', 25, 500, 1)),
            'stackingestimator__estimator__subsample': hp.uniform('stackingestimator__estimator__subsample', 0.1, 1),
        },
        '2': {
            'extratreesregressor__bootstrap': hp.choice('extratreesregressor__bootstrap', [True, False]),
            'extratreesregressor__max_features': hp.uniform('extratreesregressor__max_features', 0.05, 1),
            'extratreesregressor__min_samples_leaf': scope.int(hp.quniform('extratreesregressor__min_samples_leaf', 2, 80, 1)),
            'extratreesregressor__min_samples_split': scope.int(hp.quniform('extratreesregressor__min_samples_split', 2, 80, 1)),
            'extratreesregressor__n_estimators': scope.int(hp.quniform('extratreesregressor__n_estimators', 25, 400, 1)),
            'extratreesregressor__max_depth': scope.int(hp.quniform('extratreesregressor__max_depth', 1, 30, 1)),
            'kbinsdiscretizer__n_bins': hp.choice('kbinsdiscretizer__n_bins', list(range(10, 1000, 10))),
            'kbinsdiscretizer__strategy': hp.choice('kbinsdiscretizer__strategy', ['quantile', 'uniform'])
        },
        '3': {
            'xgbregressor__min_child_weight': scope.int(hp.quniform('3_xgbregressor__min_child_weight', 2, 35, 1)),
            'xgbregressor__reg_lambda': hp.lognormal('3_xgbregressor__reg_lambda', 0, 10),
            'xgbregressor__learning_rate': hp.uniform('3_xgbregressor__learning_rate', 1e-3, 1),
            'xgbregressor__subsample': hp.uniform('3_xgbregressor__subsample', 0.1, 1),
            'xgbregressor__max_depth': scope.int(hp.quniform('3_xgbregressor__max_depth', 1, 12, 1)),
            'xgbregressor__n_estimators': scope.int(hp.quniform('3_xgbregressor__n_estimators', 25, 400, 1)),
            'xgbregressor__reg_alpha': hp.lognormal('3_xgbregressor__reg_alpha', 0, 10),
            'stackingestimator-2__estimator__loss': hp.choice('stackingestimator-2__estimator__loss', ['squared_loss', 'huber', 'epsilon_insensitive']),
            'stackingestimator-2__estimator__alpha': hp.uniform('stackingestimator-2__estimator__alpha', 0, 0.01),
            'stackingestimator-2__estimator__l1_ratio': hp.uniform('stackingestimator-2__estimator__l1_ratio', 0, 1),
            'stackingestimator-2__estimator__eta0': hp.uniform('stackingestimator-2__estimator__eta0', 0.1, 1), 
            'stackingestimator-2__estimator__power_t': scope.int(hp.quniform('stackingestimator-2__estimator__power_t', 0, 100, 1))
        }
    }, 'none': {
        'roll_mean': scope.int(hp.quniform('roll_mean', 1, 8, 1)),
        '0': {
            'kbinsdiscretizer__n_bins': hp.choice('kbinsdiscretizer__n_bins', list(range(10, 1000, 10))),
            'kbinsdiscretizer__strategy': hp.choice('kbinsdiscretizer__strategy', ['quantile', 'uniform']),
            'gradientboostingregressor__alpha': hp.uniform('gradientboostingregressor__alpha', 0.5, 1),
            'gradientboostingregressor__learning_rate': hp.uniform('gradientboostingregressor__learning_rate', 1e-3, 1),
            'gradientboostingregressor__loss': hp.choice('gradientboostingregressor__loss', ["ls", "lad", "huber", "quantile"]),
            'gradientboostingregressor__max_depth': scope.int(hp.quniform('gradientboostingregressor__max_depth', 1, 12, 1)),
            'gradientboostingregressor__max_features': hp.uniform('gradientboostingregressor__max_features', 0.05, 1),
            'gradientboostingregressor__min_samples_leaf': scope.int(hp.quniform('gradientboostingregressor__min_samples_leaf', 2, 80, 1)),
            'gradientboostingregressor__min_samples_split': scope.int(hp.quniform('gradientboostingregressor__min_samples_split', 2, 80, 1)),
            'gradientboostingregressor__n_estimators': scope.int(hp.quniform('gradientboostingregressor__n_estimators', 25, 400, 1)),
            'gradientboostingregressor__subsample': hp.uniform('gradientboostingregressor__subsample', 0.1, 1)
        },
        '1': {
            'extratreesregressor__bootstrap': hp.choice('extratreesregressor__bootstrap', [True, False]),
            'extratreesregressor__max_features': hp.uniform('extratreesregressor__max_features', 0.05, 1),
            'extratreesregressor__min_samples_leaf': scope.int(hp.quniform('extratreesregressor__min_samples_leaf', 2, 80, 1)),
            'extratreesregressor__min_samples_split': scope.int(hp.quniform('extratreesregressor__min_samples_split', 2, 80, 1)),
            'extratreesregressor__n_estimators': scope.int(hp.quniform('extratreesregressor__n_estimators', 25, 400, 1)),
            'extratreesregressor__max_depth': scope.int(hp.quniform('extratreesregressor__max_depth', 1, 30, 1))
        },
        '2': {
            'kbinsdiscretizer__n_bins': hp.choice('2_kbinsdiscretizer__n_bins', list(range(10, 1000, 10))),
            'kbinsdiscretizer__strategy': hp.choice('2_kbinsdiscretizer__strategy', ['quantile', 'uniform']),
            'extratreesregressor__bootstrap': hp.choice('2_extratreesregressor__bootstrap', [True, False]),
            'extratreesregressor__max_features': hp.uniform('2_extratreesregressor__max_features', 0.05, 1),
            'extratreesregressor__min_samples_leaf': scope.int(hp.quniform('2_extratreesregressor__min_samples_leaf', 2, 80, 1)),
            'extratreesregressor__min_samples_split': scope.int(hp.quniform('2_extratreesregressor__min_samples_split', 2, 80, 1)),
            'extratreesregressor__n_estimators': scope.int(hp.quniform('2_extratreesregressor__n_estimators', 25, 400, 1)),
            'extratreesregressor__max_depth': scope.int(hp.quniform('2_extratreesregressor__max_depth', 1, 30, 1)),
        },
        '3': {
            'stackingestimator__estimator__alpha': hp.uniform('stackingestimator__estimator__alpha', 0.5, 1),
            'stackingestimator__estimator__learning_rate': hp.uniform('stackingestimator__estimator__learning_rate', 1e-3, 1),
            'stackingestimator__estimator__loss': hp.choice('stackingestimator__estimator__loss', ["ls", "lad", "huber", "quantile"]),
            'stackingestimator__estimator__max_depth': scope.int(hp.quniform('stackingestimator__estimator__max_depth', 1, 12, 1)),
            'stackingestimator__estimator__max_features': hp.uniform('stackingestimator__estimator__max_features', 0.05, 1),
            'stackingestimator__estimator__min_samples_leaf': scope.int(hp.quniform('stackingestimator__estimator__min_samples_leaf', 2, 80, 1)),
            'stackingestimator__estimator__min_samples_split': scope.int(hp.quniform('stackingestimator__estimator__min_samples_split', 2, 80, 1)),
            'stackingestimator__estimator__n_estimators': scope.int(hp.quniform('stackingestimator__estimator__n_estimators', 25, 500, 1)),
            'stackingestimator__estimator__subsample': hp.uniform('stackingestimator__estimator__subsample', 0.1, 1),
            'randomforestregressor__bootstrap': hp.choice('randomforestregressor__bootstrap', [True, False]),
            'randomforestregressor__max_features': hp.uniform('randomforestregressor__max_features', 0.05, 1),
            'randomforestregressor__min_samples_leaf': scope.int(hp.quniform('randomforestregressor__min_samples_leaf', 2, 80, 1)),
            'randomforestregressor__min_samples_split': scope.int(hp.quniform('randomforestregressor__min_samples_split', 2, 80, 1)),
            'randomforestregressor__n_estimators': scope.int(hp.quniform('randomforestregressor__n_estimators', 25, 400, 1)),
            'randomforestregressor__max_depth': scope.int(hp.quniform('randomforestregressor__max_depth', 1, 30, 1))
        }
    }
}

In [8]:
vars_to_lag = ['h_high_close', 'h_low_close', 'h_candle_body', 'h_rsi_13h', 'h_ema_50', 'h_ema_200', 'h_obv10_obv50',
              'h_obv50_obv200', 'h_close_ma']

In [9]:
data_transformations = {'none': [lambda x: x, lambda x: x], 'arsinh': [lambda x: np.arcsinh(x), lambda x: np.sinh(x)],
                       'cuberoot': [lambda x: np.cbrt(x), lambda x: x**(3)]}

In [10]:
oos_predictions = {}
do_not_transform = ['h_weekday', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'cluster_mode', 'd_obv10_obv50',
                   'd_obv50_obv200', 'd_hc_15davg', 'd_lc_15davg', 'd_cb_15davg', 'd_rsi_13', 'd_ret60d']

In [11]:
def slicedict(d, s):
    return {k:v for k,v in d.items() if not k.startswith(s)}

In [12]:
def optimize_pipeline_clusters(space, pipelines, x, y, cv, close):
    
    
    """
    Receives a pipeline and a hyperparameter space, makes predictions for each cluster and uses the predictions to predict if the next returns 
    will be positive or negative. It averages the returns and takes a long (buy) position if the average is positive and short
    (sell) position if it is negative.
    
    Outputs the weekly mean return of the trading strategy.
    """
    
    for key, value in pipelines.items():
        value.set_params(**space[key])
        
    preds = {key: [] for key in pipelines.keys()}
    for train, test in cv.split(x):
        
        temp_x = x.iloc[train, :]
        cluster_indices = {'0': temp_x[temp_x['cluster_mode']==0].index, '1': temp_x[temp_x['cluster_mode']==1].index,
                           '2': temp_x[temp_x['cluster_mode']==2].index, '3': temp_x[temp_x['cluster_mode']==3].index}
        
        for key, value in pipelines.items():
            value.fit(temp_x.loc[cluster_indices[key], filtered_vars[int(key)]], y.loc[cluster_indices[key]])
            
            to_predict = x.iloc[test].loc[:, filtered_vars[int(key)]]
            preds[key].append(pd.Series(data=value.predict(to_predict), index=to_predict.index))
    
    preds = pd.concat({k: pd.concat(v, axis=0) for k, v in preds.items()}, axis=1)
    preds = pd.concat([preds, x.loc[:, 'cluster_mode']], axis=1).dropna()
    
    melted = preds.melt(ignore_index=False, id_vars='cluster_mode')
    melted = melted[melted['cluster_mode'].astype(int) == melted['variable'].astype(int)]
    
    preds_rets = pd.concat([melted.iloc[:, -1].rolling(space['roll_mean']).mean(), close.pct_change().shift(-1)], axis=1).dropna()
    
    invert_rets = preds_rets.iloc[:, 0] < 0
    rets = preds_rets.iloc[:, 1].values
    rets[invert_rets] = rets[invert_rets] * -1
    
    capital = pd.Series(data=np.cumprod(1 + rets), index=preds_rets.index)
    weekly_mean_return = capital.resample('1W').last().pct_change().dropna().mean()
    
    return weekly_mean_return * -1

In [13]:
def optimize_pipeline_all(space, pipeline, x, y, cv, close):
    
    """
    Receives a pipeline and a hyperparameter space, makes predictions and uses the predictions to predict if the next returns 
    will be positive or negative. It averages the returns and takes a long (buy) position if the average is positive and short
    (sell) position if it is negative.
    
    Outputs the weekly mean return of the trading strategy.
    """
    
    pipeline.set_params(**slicedict(space, 'roll'))
    
    preds, scores = ts_cross_val(pipeline, x, y, cv=cv)
    
    preds = preds.rolling(space['roll_mean']).mean().dropna()
    preds_rets = pd.concat([preds, close.pct_change().shift(-1)], axis=1).dropna()
    
    invert_rets = preds_rets.iloc[:, 0] < 0
    rets = preds_rets.iloc[:, 1].values
    rets[invert_rets] = rets[invert_rets] * -1
    
    capital = pd.Series(data=np.cumprod(1 + rets), index=preds_rets.index)
    weekly_mean_return = capital.resample('1W').last().pct_change().dropna().mean()
    
    return weekly_mean_return * -1

In [14]:
lock = threading.Lock()

In [15]:
def train_byclusters():
    global lock
    cv = TimeSeriesSplit(n_splits=10)
    best_spaces_clusters = {}
    for transformation, space in cluster_searchspaces.items():
        
        lock.acquire()
        x = feats.copy()
        x.loc[:, ~x.columns.isin(do_not_transform)] = x.loc[:, ~x.columns.isin(do_not_transform)].apply(data_transformations[transformation][0], axis=1) 
        x = shift_dataset(x.copy(), lag=True, forecast=False, nlag=50, var_lags=vars_to_lag, dropna=True)
        x = x.loc[:cutoff_date, :]    
        y = to_predict.loc[x.index].apply(data_transformations[transformation][0])
        lock.release()
        
        fmin_objective = partial(optimize_pipeline_clusters, pipelines=best_pipelines_clusters[transformation], x=x, y=y, cv=cv, close=complete_data.loc[:, 'Close'])
        best_model = fmin(fmin_objective, space=space, algo=tpe.suggest, max_evals=150)
        print('cluster_' + transformation)
        print(space_eval(space, best_model))
        best_spaces_clusters[transformation] = space_eval(space, best_model)

In [16]:
def train_all():
    global lock
    best_spaces_all = {}
    for transformation, space in all_searchspaces.items():
        
        lock.acquire()
        x = feats.copy()
        x.loc[:, ~x.columns.isin(do_not_transform)] = x.loc[:, ~x.columns.isin(do_not_transform)].apply(data_transformations[transformation][0]) 
        x = shift_dataset(x.copy(), lag=True, forecast=False, nlag=50, var_lags=vars_to_lag, dropna=True)
        x = x.loc[:cutoff_date, filtered_vars['all']]
        y = to_predict.loc[x.index].apply(data_transformations[transformation][0])
        lock.release()
        
        fmin_objective = partial(optimize_pipeline_all, pipeline=best_pipelines_all[transformation], x=x, y=y, cv=10, close=complete_data.loc[:, 'Close'])
        best_model = fmin(fmin_objective, space=space, algo=tpe.suggest, max_evals=150)
        print('all_' + transformation)
        print(space_eval(space, best_model))
        best_spaces_all[transformation] = space_eval(space, best_model)

In [17]:
f1 = threading.Thread(target=train_byclusters)
f2 = threading.Thread(target=train_all)

In [18]:
f1.start()
f2.start()

  0%|          | 0/150 [00:00<?, ?trial/s, best loss=?]
[A                                                    
  1%|          | 1/150 [00:32<1:20:16, 32.32s/trial, best loss: -0.020611059619549923]
[A                                                    
  1%|1         | 2/150 [13:28<10:30:23, 255.56s/trial, best loss: -0.04085284873270213]
[A                                                    
  2%|2         | 3/150 [17:33<10:18:29, 252.44s/trial, best loss: -0.04085284873270213]
[A                                                    
  3%|2         | 4/150 [23:06<11:13:07, 276.63s/trial, best loss: -0.04256970095572999]
[A                                                    
  3%|3         | 5/150 [24:46<8:59:49, 223.37s/trial, best loss: -0.04256970095572999] 
[A                                                    
  4%|4         | 6/150 [35:44<14:09:20, 353.89s/trial, best loss: -0.04256970095572999]
[A                                                    
  5%|4         | 7/150 [4

 31%|###       | 46/150 [5:05:50<11:40:35, 404.19s/trial, best loss: -0.05871254921351102]  
[A                                                                                         
 31%|###1      | 47/150 [5:07:16<8:50:10, 308.84s/trial, best loss: -0.05871254921351102]   
[A                                                                                         
 32%|###2      | 48/150 [5:19:46<12:30:07, 441.25s/trial, best loss: -0.05871254921351102]  
[A                                                                                        
 33%|###2      | 49/150 [5:21:02<9:18:06, 331.55s/trial, best loss: -0.05871254921351102]  
[A                                                                                        
 33%|###3      | 50/150 [5:23:50<7:50:40, 282.40s/trial, best loss: -0.05871254921351102]  
[A                                                                                        
 34%|###4      | 51/150 [5:26:54<6:57:27, 253.00s/trial, best loss: -0.0587

[A                                                                                       
 61%|######    | 91/150 [9:22:21<8:02:05, 490.26s/trial, best loss: -0.059579184504440705]
[A                                                                                       
 61%|######1   | 92/150 [9:39:20<10:27:21, 648.98s/trial, best loss: -0.059579184504440705]
[A                                                                                       
 62%|######2   | 93/150 [9:43:54<8:29:35, 536.42s/trial, best loss: -0.059579184504440705] 
[A                                                                                       
 63%|######2   | 94/150 [9:45:37<6:19:27, 406.56s/trial, best loss: -0.059579184504440705]
[A                                                                                       
 63%|######3   | 95/150 [9:50:22<5:39:05, 369.92s/trial, best loss: -0.059579184504440705]
[A                                                                                     

[A                                                                                        
 90%|######### | 135/150 [13:11:09<1:33:09, 372.66s/trial, best loss: -0.059579184504440705]
[A                                                                                        
 91%|######### | 136/150 [13:14:10<1:13:33, 315.22s/trial, best loss: -0.059579184504440705]
[A                                                                                        
 91%|#########1| 137/150 [13:19:27<1:08:22, 315.59s/trial, best loss: -0.059579184504440705]
[A                                                                                         
 92%|#########2| 138/150 [13:23:01<57:02, 285.23s/trial, best loss: -0.059579184504440705]  
[A                                                                                         
 93%|#########2| 139/150 [13:29:50<59:04, 322.26s/trial, best loss: -0.059579184504440705]  
[A                                                                      

[A                                                                                         
 17%|#6        | 25/150 [2:17:23<8:53:44, 256.19s/trial, best loss: -0.05192648632053922]   
[A                                                                                         
 17%|#7        | 26/150 [2:20:18<7:59:04, 231.81s/trial, best loss: -0.05192648632053922]   
[A                                                                                         
 18%|#8        | 27/150 [2:26:04<9:05:27, 266.08s/trial, best loss: -0.05192648632053922]   
[A                                                                                        
 19%|#8        | 28/150 [2:32:40<10:20:10, 305.01s/trial, best loss: -0.05192648632053922] 
[A                                                                                        
 19%|#9        | 29/150 [2:49:40<17:27:35, 519.47s/trial, best loss: -0.05192648632053922] 
[A                                                                       

[A                                                                                        
 46%|####6     | 69/150 [6:58:50<13:10:55, 585.88s/trial, best loss: -0.05192648632053922] 
[A                                                                                        
 47%|####6     | 70/150 [7:01:21<10:07:07, 455.34s/trial, best loss: -0.05192648632053922] 
[A                                                                                        
 47%|####7     | 71/150 [7:04:16<8:08:42, 371.17s/trial, best loss: -0.05192648632053922]  
[A                                                                                        
 48%|####8     | 72/150 [7:22:36<12:46:44, 589.80s/trial, best loss: -0.05192648632053922] 
[A                                                                                        
 49%|####8     | 73/150 [7:43:22<16:49:27, 786.59s/trial, best loss: -0.05192648632053922] 
[A                                                                             

[A                                                                                         
 75%|#######5  | 113/150 [16:22:56<4:00:38, 390.22s/trial, best loss: -0.0551292919218872]  
[A                                                                                         
 76%|#######6  | 114/150 [16:27:14<3:30:15, 350.44s/trial, best loss: -0.0551292919218872]  
[A                                                                                         
 77%|#######6  | 115/150 [16:30:24<2:56:25, 302.45s/trial, best loss: -0.0551292919218872]  
[A                                                                                         
 77%|#######7  | 116/150 [16:31:02<2:06:21, 222.99s/trial, best loss: -0.0551292919218872]  
[A                                                                                         
 78%|#######8  | 117/150 [16:49:56<4:32:52, 496.14s/trial, best loss: -0.0551292919218872]  
[A                                                                   

[A                                                                                        
  1%|1         | 2/150 [12:43<19:22:56, 471.46s/trial, best loss: -0.020473750419154765]   
[A                                                                                        
  2%|2         | 3/150 [15:14<15:19:19, 375.23s/trial, best loss: -0.029827869836064815]   
[A                                                                                        
  3%|2         | 4/150 [20:12<14:16:59, 352.19s/trial, best loss: -0.03544205020040004]    
[A                                                                                        
  3%|3         | 5/150 [29:17<16:30:39, 409.93s/trial, best loss: -0.03554827968060809]     
[A                                                                                         
  4%|4         | 6/150 [34:22<15:08:27, 378.52s/trial, best loss: -0.03554827968060809]     
[A                                                                          

 17%|#6        | 25/150 [3:14:45<16:55:21, 487.37s/trial, best loss: -0.041894949194424144]

 11%|█▏        | 17/150 [1:51:11<11:03:59, 299.55s/trial, best loss: -0.03531464304710195][A
 12%|█▏        | 18/150 [1:55:54<10:54:48, 297.64s/trial, best loss: -0.03531464304710195][A
 13%|█▎        | 19/150 [1:58:51<9:30:53, 261.48s/trial, best loss: -0.03531464304710195] [A
 17%|#7        | 26/150 [3:29:01<20:35:31, 597.84s/trial, best loss: -0.041894949194424144]

 13%|█▎        | 19/150 [2:05:27<9:30:53, 261.48s/trial, best loss: -0.03531464304710195][A
 13%|█▎        | 20/150 [2:09:19<13:24:46, 371.43s/trial, best loss: -0.03531464304710195][A
 18%|#8        | 27/150 [3:39:09<20:31:59, 600.97s/trial, best loss: -0.041894949194424144]

 13%|█▎        | 20/150 [2:15:35<13:24:46, 371.43s/trial, best loss: -0.03531464304710195][A
 14%|█▍        | 21/150 [2:23:30<18:27:45, 515.24s/trial, best loss: -0.03531464304710195][A
 15%|█▍        | 22/150 [2:34:59<20:10:34, 567.45s/trial, best l

 26%|██▌       | 39/150 [5:37:06<15:27:40, 501.45s/trial, best loss: -0.03666976581799213][A
 27%|██▋       | 40/150 [5:38:52<14:48:03, 484.39s/trial, best loss: -0.03666976581799213][A
 27%|██▋       | 41/150 [5:41:57<11:56:55, 394.63s/trial, best loss: -0.03666976581799213][A
 39%|###8      | 58/150 [7:05:49<8:28:57, 331.93s/trial, best loss: -0.046595754625560794]

 27%|██▋       | 41/150 [5:42:16<11:56:55, 394.63s/trial, best loss: -0.03666976581799213][A
 39%|###9      | 59/150 [7:06:11<6:02:21, 238.92s/trial, best loss: -0.046595754625560794]

 27%|██▋       | 41/150 [5:42:37<11:56:55, 394.63s/trial, best loss: -0.03666976581799213][A
 40%|####      | 60/150 [7:14:18<7:49:48, 313.21s/trial, best loss: -0.046595754625560794]

 27%|██▋       | 41/150 [5:50:44<11:56:55, 394.63s/trial, best loss: -0.03666976581799213][A
 28%|██▊       | 42/150 [5:51:57<13:40:57, 456.09s/trial, best loss: -0.03666976581799213][A
 41%|####      | 61/150 [7:17:54<7:01:14, 283.98s/trial, best loss

 55%|█████▌    | 83/150 [8:56:17<5:42:57, 307.13s/trial, best loss: -0.04642554198337497][A
 56%|█████▌    | 84/150 [9:00:58<5:28:55, 299.03s/trial, best loss: -0.04642554198337497][A
 57%|█████▋    | 85/150 [9:05:58<5:24:16, 299.33s/trial, best loss: -0.04642554198337497][A
 53%|#####3    | 80/150 [10:30:06<18:22:52, 945.32s/trial, best loss: -0.04973826813682567] 

 57%|█████▋    | 85/150 [9:06:32<5:24:16, 299.33s/trial, best loss: -0.04642554198337497][A
 57%|█████▋    | 86/150 [9:13:15<6:03:28, 340.75s/trial, best loss: -0.04642554198337497][A
 54%|#####4    | 81/150 [10:36:59<15:03:34, 785.72s/trial, best loss: -0.04973826813682567]

 57%|█████▋    | 86/150 [9:13:25<6:03:28, 340.75s/trial, best loss: -0.04642554198337497][A
 58%|█████▊    | 87/150 [9:15:22<4:50:16, 276.46s/trial, best loss: -0.04642554198337497][A
 59%|█████▊    | 88/150 [9:20:12<4:49:59, 280.64s/trial, best loss: -0.04642554198337497][A
 55%|#####4    | 82/150 [10:52:20<15:36:33, 826.37s/trial, best loss:

 87%|████████▋ | 130/150 [13:14:42<1:45:04, 315.22s/trial, best loss: -0.04642554198337497][A
 87%|████████▋ | 131/150 [13:17:30<1:25:51, 271.15s/trial, best loss: -0.04642554198337497][A
 67%|######6   | 100/150 [14:43:30<7:15:37, 522.74s/trial, best loss: -0.04973826813682567]

 87%|████████▋ | 131/150 [13:19:56<1:25:51, 271.15s/trial, best loss: -0.04642554198337497][A
 67%|######7   | 101/150 [14:45:41<5:31:06, 405.43s/trial, best loss: -0.04973826813682567]

 87%|████████▋ | 131/150 [13:22:08<1:25:51, 271.15s/trial, best loss: -0.04642554198337497][A
 68%|######8   | 102/150 [14:48:45<4:31:10, 338.96s/trial, best loss: -0.04973826813682567]

 87%|████████▋ | 131/150 [13:25:11<1:25:51, 271.15s/trial, best loss: -0.04642554198337497][A
 88%|████████▊ | 132/150 [13:27:48<1:52:34, 375.27s/trial, best loss: -0.04642554198337497][A
 89%|████████▊ | 133/150 [13:32:41<1:39:18, 350.50s/trial, best loss: -0.04642554198337497][A
 89%|████████▉ | 134/150 [13:35:41<1:19:47, 299.25s/tria

 78%|#######8  | 117/150 [17:46:50<4:18:49, 470.60s/trial, best loss: -0.04973826813682567]

  5%|▍         | 7/150 [1:38:43<19:19:56, 486.69s/trial, best loss: -0.023225739365011527][A
 79%|#######8  | 118/150 [17:47:09<2:58:39, 334.99s/trial, best loss: -0.04973826813682567]

  5%|▍         | 7/150 [1:39:01<19:19:56, 486.69s/trial, best loss: -0.023225739365011527][A
  5%|▌         | 8/150 [1:41:28<38:29:48, 975.97s/trial, best loss: -0.023225739365011527][A
  6%|▌         | 9/150 [1:48:32<31:44:26, 810.40s/trial, best loss: -0.023225739365011527][A
  7%|▋         | 10/150 [1:55:01<26:35:49, 683.92s/trial, best loss: -0.023225739365011527][A
  7%|▋         | 11/150 [2:00:33<22:20:02, 578.43s/trial, best loss: -0.023225739365011527][A
 79%|#######9  | 119/150 [18:10:21<5:36:55, 652.12s/trial, best loss: -0.04973826813682567]

  7%|▋         | 11/150 [2:02:14<22:20:02, 578.43s/trial, best loss: -0.023225739365011527][A
 80%|########  | 120/150 [18:28:30<6:31:33, 783.11s/trial, b

 22%|██▏       | 33/150 [6:10:39<15:42:21, 483.26s/trial, best loss: -0.03473485661067434][A
 99%|#########8| 148/150 [22:18:58<08:03, 241.79s/trial, best loss: -0.04973826813682567] 

 22%|██▏       | 33/150 [6:10:51<15:42:21, 483.26s/trial, best loss: -0.03473485661067434][A
 99%|#########9| 149/150 [22:20:15<03:12, 192.45s/trial, best loss: -0.04973826813682567] 

 22%|██▏       | 33/150 [6:12:08<15:42:21, 483.26s/trial, best loss: -0.03473485661067434][A
 23%|██▎       | 34/150 [6:18:01<15:10:27, 470.92s/trial, best loss: -0.03473485661067434][A
100%|##########| 150/150 [22:27:18<00:00, 261.57s/trial, best loss: -0.04973826813682567] 

 23%|██▎       | 34/150 [6:19:11<15:10:27, 470.92s/trial, best loss: -0.03473485661067434][A
100%|##########| 150/150 [22:27:18<00:00, 538.93s/trial, best loss: -0.04973826813682567] 
all_none                                                                                  
{'gradientboostingregressor__alpha': 0.7553060976421809, 'gradientboosti