In [39]:
import sys
import pandas as pd
import numpy as np
from datetime import date
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from sklearn import datasets, metrics
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from backtesting import Backtest
from nStepLabeling import NStep
from Trading import Metrics, MyStrategy
from DePradoLabeling import triple_barrier_labeling
from DataFactory import *
# from LSTMClassifier import LSTMClassifier
import warnings
warnings.filterwarnings("ignore")

In [40]:
aapl = get_data('AAPL', '2000-01-01', '2020-12-31')

return_col = 'Open' 
aapl['Out'] = triple_barrier_labeling(aapl[return_col]).dropna()
#nstep = NStep(aapl, 1)
# aapl = nstep.get_labels(return_col)

aapl = generate_returns(aapl, return_col, [1, 2, 3, 5, 10, 15])
price_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
return_cols = ['return1', 'return2', 'return3', 'return5', 'return10', 'return15']
prices, y = price_expansion(aapl, n_past=5, feature_cols=price_cols)
returns = returns_expansion(aapl, n_past=5, return_cols=return_cols)
prices = scale_prices(prices)
x = np.concatenate((prices, returns), axis=2)
trainX, testX, trainY, testY =  train_test_split2(x, y, 0.9)

[*********************100%***********************]  1 of 1 completed


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5277/5277 [00:01<00:00, 3538.80it/s]


In [41]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import metrics
from DataFactory import get_tensors

class LSTMClassifier(nn.Module):
    
    def __init__(self, n_features=11, n_hidden=16, n_layers=1, n_classes=1,
                 num_epochs=100, lr=1e-3, l2=1e-3, threshold=0.5):
        super(LSTMClassifier, self).__init__()
        
        self.n_features = n_features
        self.n_hidden = n_hidden
        self.n_layers = n_layers 
        
        torch.manual_seed(0)
        self.device = device = torch.device("cuda:0" if torch.cuda.is_available()else "cpu")

        self.lstm = nn.LSTM(
            input_size=self.n_features,
            hidden_size=self.n_hidden,
            num_layers=self.n_layers,
            batch_first=True
        ).to(self.device)
        self.linear = nn.Linear(in_features=n_hidden, out_features=n_classes).to(self.device)
        self.num_epochs = num_epochs
        self.lr = lr
        self.l2 = l2
        self.optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay=l2)
        self.criterion = nn.BCELoss()
        self.threshold = threshold

    def forward(self, x):
        self.lstm.flatten_parameters()
        x = x.to(self.device)
        if x.dim() == 2:
           x = x.unsqueeze(1).to(self.device)
        _, (hidden, _) = self.lstm(x)
        out = hidden[-1]
        prediction = torch.sigmoid(self.linear(out)).to(self.device)
        return prediction

    def get_params(self, deep=True):
        return {"lr": self.lr, "l2": self.l2, "n_hidden": self.n_hidden,
                "n_layers": self.n_layers, "num_epochs": self.num_epochs}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    def fit(self, x, y):
        x, y = get_tensors(x, y)
        x = x.to(self.device)
        y = y.to(self.device)
        for epoch in range(self.num_epochs):
            prediction = self(x)
            loss = self.criterion(prediction, y)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            #score = metrics.roc_auc_score(y, prediction.detach())
            #if (epoch + 1) % 10 == 0:
            #   print(f'epoch: {epoch + 1}, loss = {loss.item():.4f}, roc_auc = {score:.4f}')
        prediction = np.round(prediction.detach().cpu())
        score = metrics.f1_score(y.cpu(), prediction)
        # score = metrics.roc_auc_score(y.cpu(), prediction.detach().cpu())
        print(self.get_params(), score) 
        return self
    
    def predict(self, x):
        x = torch.from_numpy(x.astype(np.float32))
        prediction = self(x)
        prediction[prediction < self.threshold] = 0
        prediction[prediction >= self.threshold] = 1
        return np.array(prediction.detach().squeeze().cpu()).astype(int)
    
    def score(self, x, y):
        x, y = get_tensors(x, y)
        x = x.to(self.device)
        with torch.no_grad():
            prediction = self(x)
            #score = metrics.roc_auc_score(y, prediction.cpu())
            prediction = np.round(prediction.cpu())
            #prediction[prediction < self.threshold] = 0
            #prediction[prediction >= self.threshold] = 1
            score = metrics.f1_score(y, prediction)
            #score = metrics.accuracy_score(y, prediction)
        return score


In [32]:
#model = LSTMClassifier()
#model.fit(trainX, trainY)
#model.predict(testX)

In [42]:
m1 = BayesSearchCV(
     LSTMClassifier(),
     {
         'lr': Real(1e-6, 1e-0, prior='log-uniform'),
         'l2': Real(1e-6, 1e-0, prior='log-uniform'),
         'n_layers': Integer(1, 2),
         'n_hidden': Integer(4, 16),
         'num_epochs': Integer(50, 500)
     },
     cv=TimeSeriesSplit(n_splits=10, max_train_size=5000),
     n_iter=50,
     random_state=0,
     refit=True
)
_ = m1.fit(trainX, trainY)

{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.6504065040650406
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.5212765957446809
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.6779661016949152
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.6998841251448437
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.7062404870624048
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.7066359210863438
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.7092748009468474
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.7

{'lr': 0.03461923256454243, 'l2': 0.6350199205489281, 'n_hidden': 9, 'n_layers': 1, 'num_epochs': 157} 0.7095115681233933
{'lr': 0.03461923256454243, 'l2': 0.6350199205489281, 'n_hidden': 9, 'n_layers': 1, 'num_epochs': 157} 0.7080908578937488
{'lr': 0.03461923256454243, 'l2': 0.6350199205489281, 'n_hidden': 9, 'n_layers': 1, 'num_epochs': 157} 0.7138114209827358
{'lr': 0.03461923256454243, 'l2': 0.6350199205489281, 'n_hidden': 9, 'n_layers': 1, 'num_epochs': 157} 0.7130252729176014
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.06666666666666667
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.6935890964159516
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.6993587325537532
{'lr': 0.001451691

{'lr': 0.22733418575264558, 'l2': 0.2207301083656404, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 341} 0.7143094565398039
{'lr': 0.22733418575264558, 'l2': 0.2207301083656404, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 341} 0.7133662625355487
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.6855670103092784
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.5592783505154639
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.679622431982232
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.6980544747081713
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.7061336588342998
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.7050025786487879
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.7091340963074929
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 10, 'n_layers': 1, 'num_epochs': 489} 0.7083

{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.6803779877709839
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.6991806476785016
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.7053435114503817
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.704
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.7092872570194384
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.7084675127865127
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.7139055222887558
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.713064346782661
{'lr': 1.1837557392219303e-06, 'l2': 4.163808567419651e-06, 'n_hidden': 6, 'n_layers': 2, 'num_epochs': 370} 0.6270270270270271
{'lr': 1.1837557392219303e-06, 'l2': 4.163808567419651e-06, 'n_hidden': 6, 'n_layers': 2, 'num_epochs': 370} 0.4845222072678332
{'lr': 1.18375

{'lr': 0.00010278098501358235, 'l2': 0.8550586324736423, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 256} 0.7060895822848515
{'lr': 0.00010278098501358235, 'l2': 0.8550586324736423, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 256} 0.7096912521440822
{'lr': 0.00010278098501358235, 'l2': 0.8550586324736423, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 256} 0.7082472290061995
{'lr': 0.00010278098501358235, 'l2': 0.8550586324736423, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 256} 0.7142857142857143
{'lr': 0.00010278098501358235, 'l2': 0.8550586324736423, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 256} 0.713452042495885
{'lr': 0.0004917898633196951, 'l2': 0.93713238842475, 'n_hidden': 9, 'n_layers': 2, 'num_epochs': 321} 0.631578947368421
{'lr': 0.0004917898633196951, 'l2': 0.93713238842475, 'n_hidden': 9, 'n_layers': 2, 'num_epochs': 321} 0.4656796769851952
{'lr': 0.0004917898633196951, 'l2': 0.93713238842475, 'n_hidden': 9, 'n_layers': 2, 'num_epochs': 321} 0.6875
{'lr': 0.000491789

{'lr': 1.3760624466236608e-06, 'l2': 0.0029327566956032667, 'n_hidden': 15, 'n_layers': 1, 'num_epochs': 500} 0.7092872570194384
{'lr': 1.3760624466236608e-06, 'l2': 0.0029327566956032667, 'n_hidden': 15, 'n_layers': 1, 'num_epochs': 500} 0.7084675127865127
{'lr': 1.3760624466236608e-06, 'l2': 0.0029327566956032667, 'n_hidden': 15, 'n_layers': 1, 'num_epochs': 500} 0.7139055222887558
{'lr': 1.3760624466236608e-06, 'l2': 0.0029327566956032667, 'n_hidden': 15, 'n_layers': 1, 'num_epochs': 500} 0.713064346782661
{'lr': 0.15921889062026928, 'l2': 2.0969643109735156e-06, 'n_hidden': 14, 'n_layers': 1, 'num_epochs': 474} 0.6811989100817438
{'lr': 0.15921889062026928, 'l2': 2.0969643109735156e-06, 'n_hidden': 14, 'n_layers': 1, 'num_epochs': 474} 0.5329815303430079
{'lr': 0.15921889062026928, 'l2': 2.0969643109735156e-06, 'n_hidden': 14, 'n_layers': 1, 'num_epochs': 474} 0.6788482834994463
{'lr': 0.15921889062026928, 'l2': 2.0969643109735156e-06, 'n_hidden': 14, 'n_layers': 1, 'num_epochs': 4

{'lr': 0.15403335548471245, 'l2': 0.0008483225267329389, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 500} 0.7053435114503817
{'lr': 0.15403335548471245, 'l2': 0.0008483225267329389, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 500} 0.704
{'lr': 0.15403335548471245, 'l2': 0.0008483225267329389, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 500} 0.7092872570194384
{'lr': 0.15403335548471245, 'l2': 0.0008483225267329389, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 500} 0.7084675127865127
{'lr': 0.15403335548471245, 'l2': 0.0008483225267329389, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 500} 0.7139055222887558
{'lr': 0.15403335548471245, 'l2': 0.0008483225267329389, 'n_hidden': 15, 'n_layers': 2, 'num_epochs': 500} 0.713064346782661
{'lr': 3.281793816487892e-06, 'l2': 1e-06, 'n_hidden': 5, 'n_layers': 1, 'num_epochs': 224} 0.4662576687116564
{'lr': 3.281793816487892e-06, 'l2': 1e-06, 'n_hidden': 5, 'n_layers': 1, 'num_epochs': 224} 0.42269503546099296
{'lr': 3.281793816487892e-06, 'l2':

{'lr': 0.00015700581367507574, 'l2': 1.136830743763762e-06, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 403} 0.7143094565398039
{'lr': 0.00015700581367507574, 'l2': 1.136830743763762e-06, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 403} 0.7127882599580713
{'lr': 1.0, 'l2': 0.1432361422724717, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 500} 0.715158094721129


In [43]:
model = m1
print('test score:', model.score(testX, testY))
print('train score:', model.score(trainX, trainY))
print('best score:', model.best_score_)
print('best params:', model.best_params_)
print('test prediction:', model.predict(testX))
#df = pd.DataFrame.from_dict(model.cv_results_)

#np.save('./results/train_prediction_m1_f1', model.predict(trainX))
#np.save('./results/test_prediction_m1_f1', model.predict(testX))

test score: 0.7795823665893271
train score: 0.715158094721129
best score: 0.6860317728053371
best params: OrderedDict([('l2', 0.1432361422724717), ('lr', 1.0), ('n_hidden', 16), ('n_layers', 1), ('num_epochs', 500)])
test prediction: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1

In [26]:
train_prediction_m1 = np.load('results/train_prediction_m1_f1.npy')
test_prediction_m1 = np.load('results/test_prediction_m1_f1.npy')
print('train accuracy:', accuracy_score(trainY, train_prediction_m1))
print('test accuracy:', accuracy_score(testY, test_prediction_m1))
print('train roc auc score:', roc_auc_score(trainY, train_prediction_m1))
print('test roc auc score:', roc_auc_score(testY, test_prediction_m1))

train_data = aapl.head(len(trainY))
test_data = aapl.tail(len(testY))
train_data = train_data.loc[:, train_data.columns != 'Out']
test_data = test_data.loc[:, test_data.columns != 'Out']

measures = ['Start', 'End', 'Sharpe Ratio', 'Equity Final [$]', 'Equity Peak [$]', 
            'Return (Ann.) [%]', 'Volatility (Ann.) [%]', '# Trades']
comm = 0.00 # 0.01

performance_metrics = Metrics(test_data)
print('Strategy performance on test data')
print('Original labels, commision=0.00, trade_on_close=False -> Open')
print(performance_metrics.get_metrics(testY, measures, commission=comm, trade_on_close=False))
print('Predicted labels, commision=0.00, trade_on_close=False -> Open')
print(performance_metrics.get_metrics(test_prediction_m1, measures, commission=comm, trade_on_close=False))
print('Original labels, commision=0.00, trade_on_close=True')
print(performance_metrics.get_metrics(testY, measures, commission=comm, trade_on_close=True))
print('Predicted labels, commision=0.00, trade_on_close=True')
print(performance_metrics.get_metrics(test_prediction_m1, measures, commission=comm, trade_on_close=True))

#print('Test set, original labels')
#print(testY.astype(int))
#print('Test set, predicted labels')
#print(test_prediction_m1)

train accuracy: 0.8041825095057035
test accuracy: 0.6083650190114068
train roc auc score: 0.8004761904761906
test roc auc score: 0.5758560595355848
Strategy performance on test data
Original labels, commision=0.00, trade_on_close=False -> Open
Start                    2018-11-20 00:00:00
End                      2020-12-22 00:00:00
Sharpe Ratio                        2.277057
Equity Final [$]               371655.799986
Equity Peak [$]                371655.799986
Return (Ann.) [%]                 465.242503
Volatility (Ann.) [%]             204.317416
# Trades                                 119
dtype: object
Predicted labels, commision=0.00, trade_on_close=False -> Open
Start                    2018-11-20 00:00:00
End                      2020-12-22 00:00:00
Sharpe Ratio                             0.0
Equity Final [$]                 7492.606322
Equity Peak [$]                 13232.721084
Return (Ann.) [%]                 -12.916033
Volatility (Ann.) [%]               34.78055
# Tr

In [24]:
train_data['Out'] = train_prediction_m1
test_data['Out'] = test_prediction_m1

print('Train data, % pozitivnih signala prije obrade:', np.sum(train_data['Out'] == 1) / len(train_data) * 100)
print('Test data, % pozitivnih signala prije obrade:', np.sum(test_data['Out'] == 1) / len(test_data) * 100)

train_data = filter_false_positives(train_data, comm)
test_data = filter_false_positives(test_data, comm)

print('Train data, % pozitivnih signala nakon obrade:', np.sum(train_data['Out'] == 1) / len(train_data) * 100)
print('Test data, % pozitivnih signala nakon obrade:', np.sum(test_data['Out'] == 1) / len(test_data) * 100)

trainY_new = np.array(train_data['Out'])
testY_new = np.array(test_data['Out'])

print('Strategy performance on test data')
print('Predicted labels - M1, commision=0.00, trade_on_close=False -> Open')
print(performance_metrics.get_metrics(test_prediction_m1, measures, commission=comm, trade_on_close=False))
print('Predicted labels - M1 without FP, commision=0.00, trade_on_close=False -> Open')
print(performance_metrics.get_metrics(testY_new, measures, commission=comm, trade_on_close=False))
print('Predicted labels - M1, commision=0.00, trade_on_close=True')
print(performance_metrics.get_metrics(test_prediction_m1, measures, commission=comm, trade_on_close=True))
print('Predicted labels - M1 without FP, commision=0.00, trade_on_close=True')
print(performance_metrics.get_metrics(testY_new, measures, commission=comm, trade_on_close=True))

#print('Test set, predicted labels')
#print(test_prediction_m1)
#print('Test set, predicted labels without false positive signals')
#print(testY_new)

Train data, % pozitivnih signala prije obrade: 56.67511618081961
Test data, % pozitivnih signala prije obrade: 63.68821292775665
Train data, % pozitivnih signala nakon obrade: 41.42374313476975
Test data, % pozitivnih signala nakon obrade: 45.627376425855516
Strategy performance on test data
Predicted labels - M1, commision=0.00, trade_on_close=False -> Open
Start                    2018-11-20 00:00:00
End                      2020-12-22 00:00:00
Sharpe Ratio                             0.0
Equity Final [$]                 7492.606322
Equity Peak [$]                 13232.721084
Return (Ann.) [%]                 -12.916033
Volatility (Ann.) [%]               34.78055
# Trades                                 237
dtype: object
Predicted labels - M1 without FP, commision=0.00, trade_on_close=False -> Open
Start                    2018-11-20 00:00:00
End                      2020-12-22 00:00:00
Sharpe Ratio                        1.902407
Equity Final [$]                95525.377042
Equity

In [30]:
# uzmi trainX -> originalne značajke train dataseta m1 modela
# zamijeni trainY s model.predict(trainX) uz filtriranje FP signala -> trainY_new
# treniraj model
# uzmi testX -> originalne značajke test dataseta m1 modela
# zamijeni testY s model.predict(testX) uz filtriranje FP signala -> testY_new
# provjeri performanse od m2.predict(testX)

m2 = BayesSearchCV(
     LSTMClassifier(),
     {
         'lr': Real(1e-6, 1e-0, prior='log-uniform'),
         'l2': Real(1e-6, 1e-0, prior='log-uniform'),
         'n_layers': Integer(1, 2),
         'n_hidden': Integer(4, 16),
         'num_epochs': Integer(50, 500)
     },
     cv=TimeSeriesSplit(n_splits=10, max_train_size=5000),
     n_iter=50,
     random_state=0,
     refit=True
)
_ = m2.fit(trainX, trainY_new)

{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.39080459770114945
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.4191343963553531
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.5
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.5410544511668107
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.5806045340050378
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.5805774278215223
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.577659101023587
{'lr': 0.011388776056171362, 'l2': 0.0015338574134575655, 'n_hidden': 11, 'n_layers': 2, 'num_epochs': 436} 0.5844607653652879

{'lr': 0.03461923256454243, 'l2': 0.6350199205489281, 'n_hidden': 9, 'n_layers': 1, 'num_epochs': 157} 0.005089058524173028
{'lr': 0.03461923256454243, 'l2': 0.6350199205489281, 'n_hidden': 9, 'n_layers': 1, 'num_epochs': 157} 0.0067873303167420825
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_layers': 1, 'num_epochs': 74} 0.0
{'lr': 0.0014516910118142717, 'l2': 0.0011559373017485425, 'n_hidden': 13, 'n_l

{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.6030769230769231
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.5961835997937082
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.5913650239860445
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.6027913994718974
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.6073400454693082
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 500} 0.5997054491899853
{'lr': 0.2547063951999453, 'l2': 0.23568407682154266, 'n_hidden': 5, 'n_layers': 2, 'num_epochs': 493} 0.41807909604519766
{'lr': 0.2547063951999453, 'l2': 0.23568407682154266, 'n_hidden': 5, 'n_layers': 2, 'num_epochs': 493} 0.4553191489361702
{'lr': 0.2547063951999453, 'l2': 0.23568407682154266, 'n_hidden': 5, 'n_layers': 2, 'num_epochs': 493} 0.5213154689403167
{'lr': 0.2547063951999453, 'l2': 0.23568407682154266, 'n_hidden

{'lr': 1e-06, 'l2': 1.0, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 500} 0.6073400454693082
{'lr': 1e-06, 'l2': 1.0, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 500} 0.5997054491899853
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 393} 0.3905325443786983
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 393} 0.40191387559808617
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 393} 0.46622516556291393
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 393} 0.5262222222222221
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 393} 0.5672025723472669
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers': 2, 'num_epochs': 393} 0.5711252653927812
{'lr': 0.5310858058694552, 'l2': 0.19068381558315572, 'n_hidden': 4, 'n_layers

{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.41807909604519766
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.4520255863539446
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.5159705159705159
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.5574324324324325
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.6039360393603935
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.594622543950362
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.5895196506550219
{'lr': 7.537826008733369e-06, 'l2': 0.8701210377780059, 'n_hidden': 16, 'n_layers': 1, 'num_epochs': 490} 0.6017365043412608


{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.4269662921348315
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.4683544303797468
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.5206812652068127
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.5627632687447346
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.6030769230769231
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.5961835997937082
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.5913650239860445
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.6027913994718974
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.6073400454693082
{'lr': 1.0, 'l2': 1e-06, 'n_hidden': 4, 'n_layers': 1, 'num_epochs': 500} 0.5997054491899853
{'lr': 1e-06, 'l2': 1e-06, 'n_hidden': 16, 'n_layers': 2, 'num_epochs'

{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.5627632687447346
{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.6030769230769231
{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.5961835997937082
{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.5913650239860445
{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.6027913994718974
{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.6073400454693082
{'lr': 0.008584321368465707, 'l2': 0.9942213964730562, 'n_hidden': 16, 'n_layers': 2, 'num_epochs': 500} 0.5997054491899853
{'lr': 0.10042586444105472, 'l2': 1.059288310126945e-06, 'n_hidden': 6, 'n_layers': 2, 'num_epochs': 492} 0.41807909604519766
{'lr':

In [33]:
model = m2
print('test score:', model.score(testX, testY_new))
print('train score:', model.score(trainX, trainY_new))
print('best score:', model.best_score_)
print('best params:', model.best_params_)
print('test prediction:', model.predict(testX))

np.save('./results/train_prediction_m2_f1', model.predict(trainX))
np.save('./results/test_prediction_m2_f1', model.predict(testX))

test score: 0.5991902834008098
train score: 0.7257231404958676
best score: 0.514679700468593
best params: OrderedDict([('l2', 1.2448416980368034e-06), ('lr', 0.21246940656079488), ('n_hidden', 5), ('n_layers', 1), ('num_epochs', 499)])
test prediction: [0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0
 0 0 1 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 0 1 0 1 1 0 0 0
 0 1 1 1 1 0 1 1 0 0 0 0 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 1 0
 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 1 0 1 1 0 0
 1 0 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1
 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1
 1 1 0 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1
 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0
 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 1 0

In [38]:
train_prediction_m2 = np.load('results/train_prediction_m2_f1.npy')
test_prediction_m2 = np.load('results/test_prediction_m2_f1.npy')

print('train accuracy', accuracy_score(trainY_new, train_prediction_m2))
print('train accuracy', accuracy_score(testY_new, test_prediction_m2))
print('train roc auc score:', roc_auc_score(trainY_new, train_prediction_m2))
print('test roc auc score:', roc_auc_score(testY_new, test_prediction_m2))

print('Strategy performance on test data')
print('Predicted labels - M1 without FP, commision=0.00, trade_on_close=False -> Open')
print(performance_metrics.get_metrics(testY_new, measures, commission=comm, trade_on_close=False))
print('Predicted labels - M2, commision=0.00, trade_on_close=False -> Open')
print(performance_metrics.get_metrics(test_prediction_m2, measures, commission=comm, trade_on_close=False))
print('Predicted labels - M1 without FP, commision=0.00, trade_on_close=True')
print(performance_metrics.get_metrics(testY_new, measures, commission=comm, trade_on_close=True))
print('Predicted labels - M2, commision=0.00, trade_on_close=True')
print(performance_metrics.get_metrics(test_prediction_m2, measures, commission=comm, trade_on_close=True))

#print('Test set, predicted labels with model M1 without FP')
#print(testY_new)
#print('Test set, predicted labels with model M2')
#print(test_prediction_m2)

train accuracy 0.7756653992395437
train accuracy 0.623574144486692
train roc auc score: 0.7669986665693244
test roc auc score: 0.623018648018648
Strategy performance on test data
Predicted labels - M1 without FP, commision=0.00, trade_on_close=False -> Open
Start                    2018-11-20 00:00:00
End                      2020-12-22 00:00:00
Sharpe Ratio                        1.902407
Equity Final [$]                95525.377042
Equity Peak [$]                 99268.799977
Return (Ann.) [%]                 194.823841
Volatility (Ann.) [%]             102.409127
# Trades                                 141
dtype: object
Predicted labels - M2, commision=0.00, trade_on_close=False -> Open
Start                    2018-11-20 00:00:00
End                      2020-12-22 00:00:00
Sharpe Ratio                             0.0
Equity Final [$]                 4731.905667
Equity Peak [$]                 10404.591389
Return (Ann.) [%]                  -30.12628
Volatility (Ann.) [%]         