In [1]:
from data_retriever import DataRetriever
from pre_processing import PreProcessing
from scalers.min_max import MinMax
from regressors.lstm_regressor import LSTMRegressor
from regressors.esn_regressor import ESNRegressor
import pandas as pd
from model_evaluator import ModelEvaluator
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.layers import Dense, LSTM
from keras.models import Sequential
from config import Config
from statsmodels.tsa.stattools import adfuller
from decomposers.wavelet_transform import WaveletDecomposition
from chm.chm import CascadeHierarquicalModel

In [2]:
from typing                 import List
from chm.level              import CHMLevel
from chm.stages             import CHMStage
from decomposers.decomposer import BaseDecomposer
from regressors.regressor   import BaseRegressor
from copy import copy
from pandas.core.frame import DataFrame
from copy import deepcopy

class CascadeHierarquicalModel():
    """
    Cascade Hierarquical Model used to extract context from timeseries and predict non stationary timeseries

    parameters : BaseRegressor    (Instanciated base regressor used to extract context)
                 BaseRegressor    (Instanciated base regressor used to join context created from the stages)
                 SeriesDecomposer (decomposition used to extract frequency from time series)
                 int              (number of levels in the hierarquical model)
                 int              (number of stages in the hierarquical model)
                 bool             (use frequency extracted from time series as a feature)
    
    """
    
    def __init__(self,  ds:DataFrame                      ,
                        # x_cols:List[str]                  ,
                        y_cols:str                        ,
                        context_regressor:BaseRegressor   , 
                        stage_regressor:BaseRegressor     ,
                        decomposer:BaseDecomposer         ,
                        dec_cols:List[str]                ,
                        num_stages:int=1                  ,
                        use_frequency:bool=False          ,
                        verbose=True
                ):
        self._ds                                   = ds
        # self._x_cols                             = x_cols
        self._y_cols                               = y_cols
        self._context_regressor                    = context_regressor
        self._stage_regressor                      = stage_regressor
        self._dec                                  = decomposer()
        self._dec_cols                             = dec_cols
        self._num_stages                           = num_stages
        self._use_frequency                        = use_frequency
        self._list_stages_bottom_up:List[CHMStage] = list()
        self._list_stages_top_down:List[CHMStage]  = list()
        self._list_levels_frequency:List[CHMLevel] = list()
        self._verbose                              = verbose

        # Decomposing series
        self._dec.decompose_series(
                                            ds         = self._ds,
                                            apply_cols = self._dec_cols
                                            )
        
        # Obtaining the biggest decomposition
        self._max_wave = len(self._dec.dict_waves[y_cols])
        
    def train_context_extraction(self) -> None:
        pass

    def _train_bottom_up_block(self) -> None:
        pass

    def _create_bottom_up_context(self) -> None:
        pass

    def _train_top_down_block(self) -> None:
        pass

    def _set_bottom_up_block(self) -> None:
        if(self._verbose):
            print('============================================================')
            print('Setting up bottom up block ...')

        # Copying temporary dict of waves
        temp_dec_dict = deepcopy(self._dec.dict_waves)

        # For each stage
        for stage in range(self._num_stages):

            # List of levels for this stage
            temp_level_list:List[CHMLevel] = list()

            # For each level in the current stage
            for level in range(self._max_wave):
                if(self._verbose):
                    print('Setting up Level > ', level, ' of Stage > ', stage)

                temp_ds = self._ds.copy(deep=True)
                # Creating dataset for this level
                for wave in temp_dec_dict.keys():
                    
                    # Initiating sintetized column of wave
                    temp_ds[wave + '_sintetized'] = 0   

                    # For each decomposed subwave of current wave
                    for col in temp_dec_dict[wave]:
                        temp_ds[wave + '_sintetized'] = temp_ds[wave + '_sintetized'] + temp_ds[col]
                        # Dropping used subwave
                        temp_ds.drop(col, axis=1, inplace=True)
                    
                # Dropping original waves
                temp_ds.drop(self._dec_cols, axis=1, inplace=True)

                # List of features of current level
                curr_feat_cols = temp_ds.columns.difference(['Date'])

                # Instantiating CHM Level
                temp_level = CHMLevel(self._context_regressor(
                                                                ds=temp_ds.copy(deep=True), 
                                                                x_cols=curr_feat_cols, 
                                                                y_cols=y_cols, 
                                                                n_inputs=7, 
                                                                n_features=len(curr_feat_cols), 
                                                                epochs=50
                                                             )
                                        )

                # Appending level to list of levels of stage
                temp_level_list.append(temp_level)

                for k in temp_dec_dict.keys():
                    if len(temp_dec_dict[k]) > 1:
                        temp_dec_dict[k].pop()

            self._list_stages_bottom_up.append(CHMStage(temp_level_list, copy(self._stage_regressor)))

    

    def _set_frequency_predictor_block(self) -> None:
        if(self._verbose):
            print('============================================================')
            print('Setting up frequency predictor block ...')

        sub_waves = list() 
        for i in self._dec.dict_waves.keys():
            sub_waves = sub_waves + self._dec.dict_waves[i]
        
        # For each level of frequency of target
        for level in range(self._max_wave):
            temp_ds = self._ds.copy(deep=True)
            
            if(self._verbose):
                print('Setting up predictor of ', self._y_cols, ' frequency > ', level)

            # Creating dataset for this level
            cols = list()
            for wave in self._dec.dict_waves.keys():
                max_len = len(self._dec.dict_waves[wave])
                if level >= max_len:
                    cols.append(self._dec.dict_waves[wave][max_len-1])
                else:
                    cols.append(self._dec.dict_waves[wave][level])

            drop_cols = list(filter(lambda x: x not in cols, sub_waves))
            
            # Dropping unused frequency waves
            temp_ds.drop(drop_cols, axis=1, inplace=True)
            
            # Dropping original waves
            temp_ds.drop(self._dec_cols, axis=1, inplace=True)

            # List of features of current level
            curr_feat_cols = temp_ds.columns.difference(['Date'])

            # Instantiating CHM Level
            temp_level = CHMLevel(self._context_regressor(
                                                            ds=temp_ds.copy(deep=True), 
                                                            x_cols=curr_feat_cols, 
                                                            y_cols=self._dec.dict_waves[y_cols][level], 
                                                            n_inputs=7, 
                                                            n_features=len(curr_feat_cols), 
                                                            epochs=50
                                                            )
                                    )

            self._list_levels_frequency.append(temp_level)

    def __sintetize_series(self):
        pass

    def __recompose_series(self):
        pass

    

In [3]:
#Retrieving data from yahoo API
a = DataRetriever()
a.get_yahoo_stock_data()

#Defining which stocks will be predicted
#x_cols_ = [x for x in b.columns.difference(['Date']) if x[-5:] == 'Close']
predict_cols = 'ITUB3.SA_Close'


#Obtaining yahoo dataset
dataset = a.get_stock_ds()

#Preprocessing yahoo data
pp = PreProcessing(dataset, MinMax)
train, test = pp.pre_process()

#Setting up column to be predicted
y_cols = predict_cols
#Setting up column to be used as features
x_cols = train.columns.difference(['Date'])

#Training model to preprocessed data
#reg = LSTMRegressor(ds=train, xcols=x_cols, y_cols=y_cols, n_inputs=7, n_features=len(x_cols), epochs=50)
# reg = ESNRegressor(ds=train, xcols=x_cols, y_cols=y_cols, n_inputs=7, n_features=len(x_cols), epochs=50)

# #Predicting data with trained model
# reg_pred = reg.predict_generator(test)

# #Adding predicted data to original dataset
# reg_pred = pp._scalers[y_cols].inverse_transform(reg_pred)
# pred = pd.DataFrame(reg_pred)
# pred.columns = ['Prediction']
# begin_idx  = len(dataset) - len(pred)
# finish_idx = len(dataset) - 1
# pred.index = list(range(begin_idx, finish_idx + 1))
# dataset = dataset.join(pred)

# #Evaluating model
# me = ModelEvaluator(model_name='ESN', 
#                     data_set=dataset, 
#                     pred_col='Prediction', 
#                     y_col=y_cols, 
#                     x_col='Date'
#                     )
# #me.plot_results(False, True)
# me.plot_results_predicted(False, True)

[*********************100%***********************]  7 of 7 completed


In [4]:
chm = CascadeHierarquicalModel(train.copy(deep= True),
                                y_cols            = predict_cols,
                                context_regressor = LSTMRegressor,
                                stage_regressor   = LSTMRegressor,
                                decomposer        = WaveletDecomposition,
                                dec_cols          = x_cols
                            )

  ds[new_col] = rec[:len(rec)-1]


In [5]:
chm._set_bottom_up_block()


Setting up bottom up block ...
Setting up Level >  0  of Stage >  0
Setting up Level >  1  of Stage >  0
Setting up Level >  2  of Stage >  0
Setting up Level >  3  of Stage >  0
Setting up Level >  4  of Stage >  0
Setting up Level >  5  of Stage >  0
Setting up Level >  6  of Stage >  0
Setting up Level >  7  of Stage >  0
Setting up Level >  8  of Stage >  0


In [6]:
chm._set_frequency_predictor_block()

Setting up frequency predictor block ...
Setting up predictor of  ITUB3.SA_Close  frequency >  0
Setting up predictor of  ITUB3.SA_Close  frequency >  1
Setting up predictor of  ITUB3.SA_Close  frequency >  2
Setting up predictor of  ITUB3.SA_Close  frequency >  3
Setting up predictor of  ITUB3.SA_Close  frequency >  4
Setting up predictor of  ITUB3.SA_Close  frequency >  5
Setting up predictor of  ITUB3.SA_Close  frequency >  6
Setting up predictor of  ITUB3.SA_Close  frequency >  7
Setting up predictor of  ITUB3.SA_Close  frequency >  8


Unnamed: 0,Date,7011.T_Close_wave_0,7011.T_Close_BBL_5_2.0_wave_0,7011.T_Close_BBM_5_2.0_wave_0,7011.T_Close_BBU_5_2.0_wave_0,7011.T_Close_RSI_wave_0,7011.T_Close_SMA_5_wave_0,7011.T_High_wave_0,7011.T_Low_wave_0,7011.T_Open_wave_0,...,MSFT_Close_BBL_5_2.0_wave_0,MSFT_Close_BBM_5_2.0_wave_0,MSFT_Close_BBU_5_2.0_wave_0,MSFT_Close_RSI_wave_0,MSFT_Close_SMA_5_wave_0,MSFT_High_wave_0,MSFT_Low_wave_0,MSFT_Open_wave_0,MSFT_STOCHd_14_3_3_wave_0,MSFT_STOCHk_14_3_3_wave_0
0,2010-01-04,0.085273,0.087002,0.073565,0.077679,0.364855,0.073565,0.086762,0.084858,0.080820,...,0.100472,0.092986,0.087520,0.338515,0.092986,0.093195,0.095763,0.094670,0.171483,0.188820
1,2010-01-05,0.085155,0.087001,0.073460,0.077475,0.366051,0.073460,0.086657,0.084761,0.080748,...,0.100189,0.092728,0.087291,0.339794,0.092728,0.092956,0.095515,0.094429,0.172436,0.190015
2,2010-01-06,0.085036,0.086999,0.073354,0.077269,0.367259,0.073354,0.086551,0.084662,0.080675,...,0.099903,0.092467,0.087059,0.341084,0.092467,0.092716,0.095265,0.094187,0.173398,0.191221
3,2010-01-07,0.084916,0.086998,0.073248,0.077061,0.368477,0.073248,0.086444,0.084563,0.080602,...,0.099614,0.092204,0.086825,0.342387,0.092204,0.092473,0.095012,0.093942,0.174369,0.192439
4,2010-01-08,0.084796,0.086997,0.073141,0.076853,0.369699,0.073141,0.086336,0.084463,0.080528,...,0.099324,0.091941,0.086590,0.343693,0.091941,0.092229,0.094759,0.093696,0.175343,0.193660
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1926,2017-05-29,0.395649,0.397160,0.388866,0.385793,0.467348,0.388866,0.397684,0.398088,0.391572,...,0.943206,0.947378,0.942814,0.663166,0.947378,0.917970,0.929437,0.931593,0.717395,0.748764
1927,2017-05-30,0.394791,0.396307,0.387977,0.384902,0.466954,0.387977,0.396829,0.397196,0.390673,...,0.943648,0.947878,0.943365,0.663590,0.947878,0.918567,0.929993,0.932160,0.716971,0.748912
1928,2017-05-31,0.393983,0.395507,0.387138,0.384058,0.466574,0.387138,0.396024,0.396358,0.389825,...,0.944109,0.948395,0.943932,0.664016,0.948395,0.919180,0.930565,0.932743,0.716562,0.749061
1929,2017-06-01,0.393257,0.394790,0.386380,0.383288,0.466216,0.386380,0.395298,0.395603,0.389060,...,0.944600,0.948939,0.944522,0.664446,0.948939,0.919817,0.931164,0.933352,0.716178,0.749215
