In [26]:
from data_retriever import DataRetriever
from pre_processing import PreProcessing
from scalers.min_max import MinMax
from regressors.lstm_regressor import LSTMRegressor
from regressors.esn_regressor import ESNRegressor
import pandas as pd
from model_evaluator import ModelEvaluator
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.layers import Dense, LSTM
from keras.models import Sequential
from config import Config
from statsmodels.tsa.stattools import adfuller
from decomposers.wavelet_transform import WaveletDecomposition
from chm.chm import CascadeHierarquicalModel

In [28]:
from typing                 import List
from chm.level              import CHMLevel
from chm.stages             import CHMStage
from decomposers.decomposer import BaseDecomposer
from regressors.regressor   import BaseRegressor
from copy import copy
from pandas.core.frame import DataFrame

class CascadeHierarquicalModel():
    """
    Cascade Hierarquical Model used to extract context from timeseries and predict non stationary timeseries

    parameters : BaseRegressor    (Instanciated base regressor used to extract context)
                 BaseRegressor    (Instanciated base regressor used to join context created from the stages)
                 SeriesDecomposer (decomposition used to extract frequency from time series)
                 int              (number of levels in the hierarquical model)
                 int              (number of stages in the hierarquical model)
                 bool             (use frequency extracted from time series as a feature)
    
    """
    
    def __init__(self,  ds:DataFrame                      ,
                        x_cols:List[str]                  ,
                        y_cols:str                        ,
                        context_regressor:BaseRegressor   , 
                        stage_regressor:BaseRegressor     ,
                        decomposer:BaseDecomposer         ,
                        dec_cols:List[str]                ,
                        num_stages:int=1                  ,
                        use_frequency:bool=False
                ):
        self._ds                = ds
        self._x_cols            = x_cols
        self._y_cols            = y_cols
        self._context_regressor = context_regressor
        self._stage_regressor   = stage_regressor
        self._decomposer        = decomposer
        self._dec_cols          = dec_cols
        self._num_stages        = num_stages
        self._use_frequency     = use_frequency
        self._list_stages       = list[CHMStage]

        
    def extract_context(self) -> None:
        # Decomposing series
        dec:BaseDecomposer = self._decomposer(
                                ds         = self._ds,
                                apply_cols = self._dec_cols
                                )

        # Obtaining the biggest decomposition
        max_wave = 0
        for i in dec.dict_waves.keys():    
            if(len(dec.dict_waves[i]) > max_wave):
                max_wave = len(dec.dict_waves[i])

        # For each stage
        for stage in range(self._num_stages):

            # List of levels for this stage
            temp_level_list = list[CHMLevel]

            # For each level in the current stage
            for level in range(max_wave):
                
                temp_ds = self._ds.copy()
                temp_ds = temp_ds.drop(self._dec_cols, axis=1)

                for wave in dec.dict_waves.keys():
                    

                temp_level = CHMLevel(self._context_regressor(
                                                                ds=train, 
                                                                xcols=x_cols, 
                                                                y_cols=y_cols, 
                                                                n_inputs=7, 
                                                                n_features=len(x_cols), 
                                                                epochs=50
                                                             )
                                        )
                temp_level_list.append(temp_level)

            self._list_stages.append(CHMStage(temp_level_list, copy(self._stage_regressor)))

    def __sintetize_series(self):
        pass

    def __recompose_series(self):
        pass

    

In [2]:
#Retrieving data from yahoo API
a = DataRetriever()
a.get_yahoo_stock_data()

#Defining which stocks will be predicted
#x_cols_ = [x for x in b.columns.difference(['Date']) if x[-5:] == 'Close']
predict_cols = ['ITUB3.SA_Close']


#Obtaining yahoo dataset
dataset = a.get_stock_ds()

#Setting up column to be predicted
y_cols = predict_cols
#Setting up column to be used as features
x_cols = dataset.columns.difference(['Date'] + list(y_cols))

#Preprocessing yahoo data
pp = PreProcessing(dataset, MinMax)
train, test = pp.pre_process()

#Training model to preprocessed data
#reg = LSTMRegressor(ds=train, xcols=x_cols, y_cols=y_cols, n_inputs=7, n_features=len(x_cols), epochs=50)
# reg = ESNRegressor(ds=train, xcols=x_cols, y_cols=y_cols, n_inputs=7, n_features=len(x_cols), epochs=50)
# reg.fit_generator()

# #Predicting data with trained model
# reg_pred = reg.predict_generator(test)

# #Adding predicted data to original dataset
# reg_pred = pp._scalers[y_cols].inverse_transform(reg_pred)
# pred = pd.DataFrame(reg_pred)
# pred.columns = ['Prediction']
# begin_idx  = len(dataset) - len(pred)
# finish_idx = len(dataset) - 1
# pred.index = list(range(begin_idx, finish_idx + 1))
# dataset = dataset.join(pred)

# #Evaluating model
# me = ModelEvaluator(model_name='ESN', 
#                     data_set=dataset, 
#                     pred_col='Prediction', 
#                     y_col=y_cols, 
#                     x_col='Date'
#                     )
# #me.plot_results(False, True)
# me.plot_results_predicted(False, True)

[*********************100%***********************]  7 of 7 completed


In [3]:
wd = WaveletDecomposition()

In [4]:
decompose_cols = list(train.filter(regex=("Open\\b|High\\b|Low\\b|Close\\b")).columns)
wd.decompose_series(ds=train,
                    apply_cols=decompose_cols)

  ds[new_col] = rec[:len(rec)-1]


In [20]:
wd.dict_waves['MSFT_Close']

['MSFT_Close_wave_0',
 'MSFT_Close_wave_1',
 'MSFT_Close_wave_2',
 'MSFT_Close_wave_3',
 'MSFT_Close_wave_4',
 'MSFT_Close_wave_5',
 'MSFT_Close_wave_6',
 'MSFT_Close_wave_7',
 'MSFT_Close_wave_8']

In [23]:
# Obtaining the biggest decomposition
max_wave = 0
for i in wd.dict_waves.keys():
    if(len(wd.dict_waves[i]) > max_wave):
        max_wave = len(wd.dict_waves[i])
    

In [30]:
x_cols = train.columns.difference(['Date'])
y_cols = ['ITUB3.SA_Close']

In [33]:
x_cols


Index(['7011.T_Close', '7011.T_Close_BBL_5_2.0', '7011.T_Close_BBM_5_2.0',
       '7011.T_Close_BBU_5_2.0', '7011.T_Close_RSI', '7011.T_Close_SMA_5',
       '7011.T_Close_wave_0', '7011.T_Close_wave_1', '7011.T_Close_wave_2',
       '7011.T_Close_wave_3',
       ...
       'MSFT_Open_wave_1', 'MSFT_Open_wave_2', 'MSFT_Open_wave_3',
       'MSFT_Open_wave_4', 'MSFT_Open_wave_5', 'MSFT_Open_wave_6',
       'MSFT_Open_wave_7', 'MSFT_Open_wave_8', 'MSFT_STOCHd_14_3_3',
       'MSFT_STOCHk_14_3_3'],
      dtype='object', length=329)

In [29]:
chm = CascadeHierarquicalModel( X                 = train[x_cols],
                                y                 = train[y_cols],
                                context_regressor = ESNRegressor,
                                stage_regressor   = ESNRegressor,
                                decomposer        = WaveletDecomposition,
                                num_levels        = max_wave 
                                )

TypeError: __init__() missing 2 required positional arguments: 'X' and 'y'

In [18]:
wd.count_waves

{'MSFT_Open': 9,
 'MSFT_High': 9,
 'MSFT_Low': 9,
 'MSFT_Close': 9,
 'AMZN_Open': 9,
 'AMZN_High': 9,
 'AMZN_Low': 9,
 'AMZN_Close': 9,
 'B3SA3.SA_Open': 9,
 'B3SA3.SA_High': 9,
 'B3SA3.SA_Low': 9,
 'B3SA3.SA_Close': 9,
 'ABEV3.SA_Open': 9,
 'ABEV3.SA_High': 9,
 'ABEV3.SA_Low': 9,
 'ABEV3.SA_Close': 9,
 'ITUB3.SA_Open': 9,
 'ITUB3.SA_High': 9,
 'ITUB3.SA_Low': 9,
 'ITUB3.SA_Close': 9,
 'AAPL_Open': 9,
 'AAPL_High': 9,
 'AAPL_Low': 9,
 'AAPL_Close': 9,
 '7011.T_Open': 9,
 '7011.T_High': 9,
 '7011.T_Low': 9,
 '7011.T_Close': 9}

In [12]:
train.filter(regex=("Open|High|Low|Close")).columns

Index(['MSFT_Open', 'MSFT_High', 'MSFT_Low', 'MSFT_Close', 'AMZN_Open',
       'AMZN_High', 'AMZN_Low', 'AMZN_Close', 'B3SA3.SA_Open', 'B3SA3.SA_High',
       'B3SA3.SA_Low', 'B3SA3.SA_Close', 'ABEV3.SA_Open', 'ABEV3.SA_High',
       'ABEV3.SA_Low', 'ABEV3.SA_Close', 'ITUB3.SA_Open', 'ITUB3.SA_High',
       'ITUB3.SA_Low', 'ITUB3.SA_Close', 'AAPL_Open', 'AAPL_High', 'AAPL_Low',
       'AAPL_Close', '7011.T_Open', '7011.T_High', '7011.T_Low',
       '7011.T_Close', '7011.T_Close_BBL_5_2.0', '7011.T_Close_BBM_5_2.0',
       '7011.T_Close_BBU_5_2.0', 'AAPL_Close_BBL_5_2.0',
       'AAPL_Close_BBM_5_2.0', 'AAPL_Close_BBU_5_2.0',
       'ABEV3.SA_Close_BBL_5_2.0', 'ABEV3.SA_Close_BBM_5_2.0',
       'ABEV3.SA_Close_BBU_5_2.0', 'AMZN_Close_BBL_5_2.0',
       'AMZN_Close_BBM_5_2.0', 'AMZN_Close_BBU_5_2.0',
       'B3SA3.SA_Close_BBL_5_2.0', 'B3SA3.SA_Close_BBM_5_2.0',
       'B3SA3.SA_Close_BBU_5_2.0', 'ITUB3.SA_Close_BBL_5_2.0',
       'ITUB3.SA_Close_BBM_5_2.0', 'ITUB3.SA_Close_BBU_5_2.0',
 