In [1]:
import sys
import pandas as pd
import numpy as np
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from sklearn import datasets, metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
from backtesting import Backtest
import nStepLabeling, Trading
from DataFactory import get_data, train_test_split, scale_data, get_tensors
import warnings
warnings.filterwarnings("ignore")



In [4]:
aapl = get_data('AAPL', '2000-01-01', '2020-12-31')
nstep = nStepLabeling.NStep(aapl, 1)
aapl = nstep.get_labels(label_col='Close')

return_col = 'Close' 

aapl['return1'] = aapl[return_col].pct_change(1)
aapl['return2'] = aapl[return_col].pct_change(2)
aapl['return3'] = aapl[return_col].pct_change(3)
aapl['return4'] = aapl[return_col].pct_change(5)
aapl['return5'] = aapl[return_col].pct_change(10)
aapl['return6'] = aapl[return_col].pct_change(15)
aapl = aapl.dropna()
aapl = aapl.rename(columns={"open": "Open", "high": "High", "low": "Low",
                            "close": "Close", "volume": "Volume", "out": "Out"})
cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'return1',
        'return2', 'return3', 'return4', 'return5', 'return6']

X_train, X_test, y_train, y_test = train_test_split(aapl, 0.9, cols, 'Out')
X_train, X_test = scale_data(StandardScaler(), X_train, X_test)

[*********************100%***********************]  1 of 1 completed


In [5]:
measures = ['Start', 'End', 'Sharpe Ratio', 'Equity Final [$]',
'Equity Peak [$]', 'Return (Ann.) [%]', 'Volatility (Ann.) [%]', '# Trades']
split_size = int(len(aapl)*0.9)
backtest_data = aapl[split_size:][cols]
backtest_metrics = Trading.Metrics(backtest_data)
labels = np.array(aapl[split_size:]['Out'])
print(backtest_metrics.get_metrics(labels, measures, commission=0.01, trade_on_close=False))

Start                    2018-11-27 00:00:00
End                      2020-12-30 00:00:00
Sharpe Ratio                        1.709097
Equity Final [$]                74356.562132
Equity Peak [$]                  74739.25374
Return (Ann.) [%]                 161.003326
Volatility (Ann.) [%]              94.203716
# Trades                                 284
dtype: object


In [22]:
class LSTMClassifier(nn.Module):

    def __init__(self, n_features=11, n_hidden=16, n_layers=1, n_classes=1,
                 num_epochs=100, lr=1e-3, l2=1e-3, threshold=0.5):
        super(LSTMClassifier, self).__init__()

        self.n_features = n_features
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        
        torch.manual_seed(0)
        self.device = device = torch.device("cuda:0" if torch.cuda.is_available()else "cpu")

        self.lstm = nn.LSTM(
            input_size=self.n_features,
            hidden_size=self.n_hidden,
            num_layers=self.n_layers,
            batch_first=True
        ).to(self.device)
        self.linear = nn.Linear(in_features=n_hidden, out_features=n_classes).to(self.device)

        self.num_epochs = num_epochs
        self.lr = lr
        self.l2 = l2
        self.optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay=l2)
        self.criterion = nn.BCELoss()
        self.threshold = threshold

    def forward(self, x):
        self.lstm.flatten_parameters()
        x = x.unsqueeze(1).to(self.device)
        _, (hidden, _) = self.lstm(x)
        out = hidden[-1]
        prediction = torch.sigmoid(self.linear(out)).to(self.device)
        return prediction

    def get_params(self, deep=True):
        return {"lr": self.lr, "l2": self.l2, "n_hidden": self.n_hidden,
                "n_layers": self.n_layers, "num_epochs": self.num_epochs}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    def fit(self, x, y):
        x, y = get_tensors(x, y)
        x = x.to(self.device)
        y = y.to(self.device)
        for epoch in range(self.num_epochs):
            prediction = self(x)
            loss = self.criterion(prediction, y)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        score = metrics.roc_auc_score(y.cpu(), prediction.detach().cpu())
        print(self.get_params(), score) 
        return self

    def predict(self, x):
        x = torch.from_numpy(x.astype(np.float32))
        prediction = self(x)
        prediction[prediction < self.threshold] = 0
        prediction[prediction >= self.threshold] = 1
        return np.array(prediction.detach().squeeze().cpu()).astype(int)

    def predict_proba(self, x):
        x = torch.from_numpy(x.astype(np.float32))
        return np.array(self(x).detach().squeeze().cpu())

    def score(self, x, y):
        x, y = get_tensors(x, y)
        with torch.no_grad():
            prediction = self(x)
            score = metrics.roc_auc_score(y, prediction.cpu())
            #prediction[prediction < self.threshold] = 0
            #prediction[prediction >= self.threshold] = 1
            #score = metrics.f1_score(y, prediction)
            #score = metrics.accuracy_score(y, prediction)
        return score

In [21]:
opt = BayesSearchCV(
     LSTMClassifier(),
     {
         'lr': Real(1e-4, 1e-1, prior='log-uniform'),
         'l2': Real(1e-4, 1e-1, prior='log-uniform'),
         'n_layers': Integer(1, 2),
         'n_hidden': Integer(4, 16),
         'num_epochs': Integer(100, 500)
     },
     cv=TimeSeriesSplit(n_splits=10, max_train_size=5000),
     n_iter=1,
     random_state=0,
     refit=True
)
_ = opt.fit(X_train, y_train)

epoch: 10, loss = 0.6950, roc_auc = 0.5324
epoch: 20, loss = 0.6902, roc_auc = 0.5945
epoch: 30, loss = 0.6860, roc_auc = 0.6072
epoch: 40, loss = 0.6822, roc_auc = 0.6121
epoch: 50, loss = 0.6789, roc_auc = 0.6143
epoch: 60, loss = 0.6761, roc_auc = 0.6165
epoch: 70, loss = 0.6737, roc_auc = 0.6189
epoch: 80, loss = 0.6716, roc_auc = 0.6214
epoch: 90, loss = 0.6696, roc_auc = 0.6237
epoch: 100, loss = 0.6678, roc_auc = 0.6271
epoch: 110, loss = 0.6661, roc_auc = 0.6301
epoch: 120, loss = 0.6644, roc_auc = 0.6341
epoch: 130, loss = 0.6628, roc_auc = 0.6384
epoch: 140, loss = 0.6613, roc_auc = 0.6414
epoch: 150, loss = 0.6597, roc_auc = 0.6444
epoch: 160, loss = 0.6583, roc_auc = 0.6481
epoch: 170, loss = 0.6568, roc_auc = 0.6514
epoch: 180, loss = 0.6554, roc_auc = 0.6539
epoch: 190, loss = 0.6540, roc_auc = 0.6566
epoch: 200, loss = 0.6526, roc_auc = 0.6591
epoch: 210, loss = 0.6512, roc_auc = 0.6620
epoch: 220, loss = 0.6498, roc_auc = 0.6653
epoch: 230, loss = 0.6484, roc_auc = 0.66

epoch: 40, loss = 0.6907, roc_auc = 0.5376
epoch: 50, loss = 0.6900, roc_auc = 0.5456
epoch: 60, loss = 0.6893, roc_auc = 0.5506
epoch: 70, loss = 0.6887, roc_auc = 0.5530
epoch: 80, loss = 0.6882, roc_auc = 0.5546
epoch: 90, loss = 0.6877, roc_auc = 0.5557
epoch: 100, loss = 0.6873, roc_auc = 0.5567
epoch: 110, loss = 0.6869, roc_auc = 0.5578
epoch: 120, loss = 0.6865, roc_auc = 0.5591
epoch: 130, loss = 0.6861, roc_auc = 0.5602
epoch: 140, loss = 0.6858, roc_auc = 0.5611
epoch: 150, loss = 0.6855, roc_auc = 0.5620
epoch: 160, loss = 0.6852, roc_auc = 0.5628
epoch: 170, loss = 0.6850, roc_auc = 0.5638
epoch: 180, loss = 0.6847, roc_auc = 0.5649
epoch: 190, loss = 0.6844, roc_auc = 0.5660
epoch: 200, loss = 0.6842, roc_auc = 0.5671
epoch: 210, loss = 0.6839, roc_auc = 0.5683
epoch: 220, loss = 0.6837, roc_auc = 0.5692
epoch: 230, loss = 0.6834, roc_auc = 0.5702
epoch: 240, loss = 0.6832, roc_auc = 0.5713
epoch: 250, loss = 0.6829, roc_auc = 0.5724
epoch: 260, loss = 0.6827, roc_auc = 0

epoch: 80, loss = 0.6904, roc_auc = 0.5297
epoch: 90, loss = 0.6902, roc_auc = 0.5311
epoch: 100, loss = 0.6900, roc_auc = 0.5323
epoch: 110, loss = 0.6898, roc_auc = 0.5332
epoch: 120, loss = 0.6896, roc_auc = 0.5340
epoch: 130, loss = 0.6895, roc_auc = 0.5349
epoch: 140, loss = 0.6893, roc_auc = 0.5359
epoch: 150, loss = 0.6891, roc_auc = 0.5369
epoch: 160, loss = 0.6889, roc_auc = 0.5380
epoch: 170, loss = 0.6887, roc_auc = 0.5393
epoch: 180, loss = 0.6885, roc_auc = 0.5406
epoch: 190, loss = 0.6883, roc_auc = 0.5421
epoch: 200, loss = 0.6881, roc_auc = 0.5435
epoch: 210, loss = 0.6878, roc_auc = 0.5452
epoch: 220, loss = 0.6876, roc_auc = 0.5468
epoch: 230, loss = 0.6873, roc_auc = 0.5485
epoch: 240, loss = 0.6870, roc_auc = 0.5501
epoch: 250, loss = 0.6867, roc_auc = 0.5517
epoch: 260, loss = 0.6864, roc_auc = 0.5533
epoch: 270, loss = 0.6861, roc_auc = 0.5550
epoch: 280, loss = 0.6858, roc_auc = 0.5566
epoch: 290, loss = 0.6855, roc_auc = 0.5581
epoch: 300, loss = 0.6852, roc_auc

In [19]:
model = opt
print('test score:', model.score(X_test, y_test))
print('train score:', model.score(X_train, y_train))
print('best score:', model.best_score_)
print('best params:', model.best_params_)
print('test prediction:', model.predict(X_test))
print(backtest_metrics.get_metrics(model.predict(X_test), measures, commission=0.00, trade_on_close=False))
df = pd.DataFrame.from_dict(model.cv_results_)
print(df)
#df.to_csv('C:/Filip/FER/5.GODINA/DIPLOMSKI_RAD/međurezultati/labels_on_open_prices/hiperparametri.csv')
#np.save('C:/Filip/FER/5.GODINA/DIPLOMSKI_RAD/međurezultati/labels_on_open_prices/prediction', np.array(model.predict(X_test)))

test score: 0.5283495372397886
train score: 0.6872397642615202
best score: 0.5137058203141467
best params: OrderedDict([('l2', 0.00018887022196608237), ('lr', 0.04881101667405022), ('n_hidden', 14), ('n_layers', 2), ('num_epochs', 279)])
test prediction: [1 1 1 1 1 0 1 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 1
 0 1 0 0 0 1 0 1 1 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 1
 1 0 1 0 0 0 1 1 0 0 0 1 1 0 1 1 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 0 1 0
 1 0 0 0 0 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 1 0 0 1
 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 1 0 1 0
 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 1 1 1 0 1
 1 1 0 0 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 1
 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 1