In [1]:
import os
import re
import glob
from datetime import datetime
import sys
from collections import defaultdict

import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
sys.path.append("..") # Adds higher directory to python modules path for importing from src dir

from sklearn.svm import SVC

from src.algorithms.simple.lstm import SimpleLSTM
from src.algorithms.simple.mlp import MLPClassifier
from src.datasets import NyseStocksDataset
from src.evaluation import Plotter
from src.pipeline import run_pipeline, prepare_data, evaluate_results

%matplotlib inline
%load_ext autotime
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [2]:
plotter = Plotter('../reports')

time: 205 ms


In [3]:
datasets = [
    NyseStocksDataset('All-3C', file_path='../data/nyse/prices.csv'),
    NyseStocksDataset('Open-3C', file_path='../data/nyse/prices.csv', features=['open']),
    NyseStocksDataset('OC-3C', file_path='../data/nyse/prices.csv', features=['open', 'close']),
    NyseStocksDataset('All-2C', file_path='../data/nyse/prices.csv', epsilon=None),
]
[ds.load() for ds in datasets];

HBox(children=(IntProgress(value=0, max=501), HTML(value='')))




HBox(children=(IntProgress(value=0, max=501), HTML(value='')))




HBox(children=(IntProgress(value=0, max=501), HTML(value='')))




HBox(children=(IntProgress(value=0, max=501), HTML(value='')))


time: 43 s


In [4]:
4

4

time: 218 ms


In [5]:
def gen_predictors(n_features, n_timestamps=7, n_classes=3):
    return [
        SimpleLSTM(n_timestamps=n_timestamps, n_features=n_features, n_classes=n_classes,
                   epochs=10, batch_size=32, n_units=[64, 64]),
        MLPClassifier(activation='tanh', hidden_layer_sizes=(32, 32, 32, 3), max_iter=150,
                      solver='adam', verbose=True, random_state=42),  # tol=1e-4, learning_rate_init=1e-4, alpha=0.0001
        SVC(verbose=True, gamma='scale'),
    ]

results = defaultdict(lambda: defaultdict(list))

for ds in datasets:
    print('-'*10, 'Prepare dataset', '-'*10)
    data = prepare_data(ds, 20000, 2000)
    n_features = len(data[0].columns.levels[1])
    predictors = gen_predictors(n_features)
    for predictor in predictors:
        predictor_name = predictor.__class__.__name__
        can_handle_time_dim = isinstance(predictor, SimpleLSTM)
        print('-'*10, predictor_name, '|', ds, '-'*10)
        pipeline, y_pred = run_pipeline(predictor, data, time_dim=can_handle_time_dim)
        ev = evaluate_results(data[3], y_pred)
        results[str(ds)][predictor_name] = (pipeline, ev)

---------- Prepare dataset ----------
Train Labels --> -1.0: 20000; 0.0: 20000; 1.0: 20000
Training range: 2010-01-13 00:00:00 to 2010-10-01 00:00:00
Test Labels --> -1.0: 2000; 0.0: 2000; 1.0: 2000
Testing range: 2016-01-04 00:00:00 to 2016-12-30 00:00:00
Done preparing data
---------- SimpleLSTM | All-3C ----------


HBox(children=(IntProgress(value=0, description='Training', max=10, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='Epoch 0', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 1', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 2', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 3', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 4', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 5', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 6', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 7', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 8', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 9', max=60000, style=ProgressStyle(description_width='i…


              precision    recall  f1-score   support

        Down       0.26      0.18      0.21      2000
       Still       0.35      0.50      0.41      2000
          Up       0.29      0.26      0.27      2000

   micro avg       0.31      0.31      0.31      6000
   macro avg       0.30      0.31      0.30      6000
weighted avg       0.30      0.31      0.30      6000

Precision=0.30161466324014463
Recall=0.313
F1-Score=0.29984536394380285
Accuracy=0.313
MCC=-0.03129704425756518
---------- MLPClassifier | All-3C ----------


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))




              precision    recall  f1-score   support

        Down       0.25      0.28      0.26      2000
       Still       0.34      0.42      0.38      2000
          Up       0.29      0.18      0.22      2000

   micro avg       0.30      0.30      0.30      6000
   macro avg       0.29      0.30      0.29      6000
weighted avg       0.29      0.30      0.29      6000

Precision=0.2926426274579076
Recall=0.29533333333333334
F1-Score=0.28877798232547663
Accuracy=0.29533333333333334
MCC=-0.05793085547550047
---------- SVC | All-3C ----------
[LibSVM]              precision    recall  f1-score   support

        Down       0.28      0.14      0.18      2000
       Still       0.37      0.53      0.44      2000
          Up       0.31      0.34      0.32      2000

   micro avg       0.33      0.33      0.33      6000
   macro avg       0.32      0.33      0.31      6000
weighted avg       0.32      0.33      0.31      6000

Precision=0.31930461132868
Recall=0.33266666666666667
F

HBox(children=(IntProgress(value=0, description='Training', max=10, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='Epoch 0', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 1', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 2', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 3', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 4', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 5', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 6', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 7', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 8', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 9', max=60000, style=ProgressStyle(description_width='i…


              precision    recall  f1-score   support

        Down       0.34      0.33      0.34      2000
       Still       0.34      0.58      0.43      2000
          Up       0.34      0.10      0.16      2000

   micro avg       0.34      0.34      0.34      6000
   macro avg       0.34      0.34      0.31      6000
weighted avg       0.34      0.34      0.31      6000

Precision=0.3411137676758818
Recall=0.33916666666666667
F1-Score=0.3078677688489503
Accuracy=0.33916666666666667
MCC=0.009600603131811946
---------- MLPClassifier | Open-3C ----------


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))




              precision    recall  f1-score   support

        Down       0.36      0.37      0.37      2000
       Still       0.36      0.35      0.36      2000
          Up       0.31      0.32      0.31      2000

   micro avg       0.35      0.35      0.35      6000
   macro avg       0.35      0.35      0.35      6000
weighted avg       0.35      0.35      0.35      6000

Precision=0.3454656130185299
Recall=0.3451666666666667
F1-Score=0.3452719502894981
Accuracy=0.3451666666666667
MCC=0.01775221102763951
---------- SVC | Open-3C ----------
[LibSVM]              precision    recall  f1-score   support

        Down       0.33      0.22      0.26      2000
       Still       0.36      0.40      0.38      2000
          Up       0.32      0.39      0.35      2000

   micro avg       0.33      0.33      0.33      6000
   macro avg       0.33      0.33      0.33      6000
weighted avg       0.33      0.33      0.33      6000

Precision=0.33465463005344703
Recall=0.33466666666666667
F

HBox(children=(IntProgress(value=0, description='Training', max=10, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='Epoch 0', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 1', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 2', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 3', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 4', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 5', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 6', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 7', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 8', max=60000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 9', max=60000, style=ProgressStyle(description_width='i…


              precision    recall  f1-score   support

        Down       0.35      0.36      0.35      2000
       Still       0.34      0.55      0.42      2000
          Up       0.34      0.12      0.18      2000

   micro avg       0.34      0.34      0.34      6000
   macro avg       0.34      0.34      0.32      6000
weighted avg       0.34      0.34      0.32      6000

Precision=0.3405807425024758
Recall=0.3408333333333333
F1-Score=0.3153875297303072
Accuracy=0.3408333333333333
MCC=0.012075684478945223
---------- MLPClassifier | OC-3C ----------


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))




              precision    recall  f1-score   support

        Down       0.34      0.28      0.31      2000
       Still       0.38      0.41      0.39      2000
          Up       0.32      0.34      0.33      2000

   micro avg       0.35      0.35      0.35      6000
   macro avg       0.34      0.35      0.34      6000
weighted avg       0.34      0.35      0.34      6000

Precision=0.3448887433318737
Recall=0.3455
F1-Score=0.34404510754103185
Accuracy=0.3455
MCC=0.0183090603485236
---------- SVC | OC-3C ----------
[LibSVM]              precision    recall  f1-score   support

        Down       0.33      0.21      0.26      2000
       Still       0.38      0.39      0.39      2000
          Up       0.33      0.44      0.38      2000

   micro avg       0.35      0.35      0.35      6000
   macro avg       0.35      0.35      0.34      6000
weighted avg       0.35      0.35      0.34      6000

Precision=0.3471263734979933
Recall=0.3473333333333333
F1-Score=0.3398190297100664
A

HBox(children=(IntProgress(value=0, description='Training', max=10, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='Epoch 0', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 1', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 2', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 3', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 4', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 5', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 6', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 7', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 8', max=40000, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Epoch 9', max=40000, style=ProgressStyle(description_width='i…




ValueError: Number of classes, 2, does not match size of target_names, 3. Try specifying the labels parameter

time: 32min 51s


In [6]:
results

defaultdict(<function __main__.<lambda>()>,
            {'All-3C': defaultdict(list, {'SimpleLSTM': (Pipeline(memory=None,
                                steps=[('pre-scaling', FunctionTransformer(accept_sparse=False, check_inverse=True,
                                     func=<function from_3d_to_2d at 0x000001B56F9B3510>,
                                     inv_kw_args=None, inverse_func=None, kw_args=None,
                                     pass_y='deprecated', validate=False)), ('scaler', RobustScaler(copy=True, quantile...one, kw_args=None,
                                     pass_y='deprecated', validate=False)), ('predictor', SimpleLSTM(s=42))]),
                           {'prec': 0.30161466324014463,
                            'rec': 0.313,
                            'f1': 0.29984536394380285,
                            'acc': 0.313,
                            'mcc': -0.03129704425756518}),
                          'MLPClassifier': (Pipeline(memory=None,
          

time: 332 ms


In [None]:
evaluations = pd.DataFrame(dict([('SVC', ev_svc), ('MLP', ev_mlp), ('LSTM', ev_lstm)]),
                           index=['mcc', 'acc', 'f1', 'prec', 'rec'])
evaluations

In [None]:
# evaluations.to_csv('../reports/Ev_svc_mlp_lstm_60k_6k.csv')
# evaluations.to_csv('../reports/Ev_svc_mlp_lstm_450k_45k.csv')