In [None]:
import os
import re
import glob
from datetime import datetime
import sys
from collections import defaultdict

import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
sys.path.append("..") # Adds higher directory to python modules path for importing from src dir

from sklearn.svm import SVC

from src.algorithms.simple.lstm import SimpleLSTM
from src.algorithms.simple.mlp import MLPClassifier
from src.datasets import NyseStocksDataset
from src.evaluation import Evaluator, Plotter
# from src.pipeline import run_pipeline, prepare_data, evaluate_results
# from src.experiments import get_metrics

%matplotlib inline
%load_ext autotime
%load_ext autoreload
%autoreload 2

In [None]:
plotter = Plotter('../reports')

In [None]:
datasets = [
    NyseStocksDataset('All-3C', file_path='../data/nyse/prices.csv'),
    NyseStocksDataset('Open-3C', file_path='../data/nyse/prices.csv', features=['open']),
    NyseStocksDataset('OC-3C', file_path='../data/nyse/prices.csv', features=['open', 'close']),
    NyseStocksDataset('All-2C', file_path='../data/nyse/prices.csv', epsilon=None),
]
[ds.load() for ds in datasets];

### First Approach

In [None]:
def gen_predictors(n_features, n_timestamps=7, n_classes=3):
    return [
        SimpleLSTM(n_timestamps=n_timestamps, n_features=n_features, n_classes=n_classes,
                   epochs=10, batch_size=32, n_units=[64, 64]),
        MLPClassifier(activation='tanh', hidden_layer_sizes=(32, 32, 32, 3), max_iter=150,
                      solver='adam', verbose=True, random_state=42),  # tol=1e-4, learning_rate_init=1e-4, alpha=0.0001
        SVC(verbose=True, gamma='scale'),
    ]

results = defaultdict(lambda: defaultdict(list))

for ds in datasets:
    print('-'*10, 'Prepare dataset', '-'*10)
    data = prepare_data(ds, 20000, 2000)
    n_features = len(data[0].columns.levels[1])
    predictors = gen_predictors(n_features)
    for predictor in predictors:
        predictor_name = predictor.__class__.__name__
        can_handle_time_dim = isinstance(predictor, SimpleLSTM)
        print('-'*10, predictor_name, '|', ds, '-'*10)
        pipeline, y_pred = run_pipeline(predictor, data, time_dim=can_handle_time_dim)
        ev = evaluate_results(data[3], y_pred)
        results[str(ds)][predictor_name] = (pipeline, ev)

In [None]:
all_metrics, mcc_metric = get_metrics(results)
mcc_metric

In [None]:
all_metrics

In [None]:
# evaluations.to_csv('../reports/Ev_svc_mlp_lstm_60k_6k.csv')
# evaluations.to_csv('../reports/Ev_svc_mlp_lstm_450k_45k.csv')

### Second approach after code implementation

In [None]:
from sklearn.svm import SVC

from src.algorithms import SimpleLSTM, MLPClassifier
from src.evaluation import Evaluator


def get_predictors(n_features, n_timestamps=7, n_classes=3):
    return [
        MLPClassifier(activation='tanh', hidden_layer_sizes=(32, 32, 32, 3), epochs=150,
                      solver='adam', verbose=True, seed=42),
        SimpleLSTM(n_timestamps=n_timestamps, n_features=n_features, n_classes=n_classes,
                   epochs=10, batch_size=32, n_units=[64, 64]),
        # tol=1e-4, learning_rate_init=1e-4, alpha=0.0001
        SVC(verbose=True, gamma='scale'),
    ]


evaluator = Evaluator('feature_selection', datasets, get_predictors, '../reports', seed=42, store=True)
metrics = evaluator()

In [None]:
metrics