In [1]:
# Normalise features
# Find the optimal parameters C (inverse regulalisation strengths)
# Find the best features to predict stock movement (up or down)
# Convert probability into absolute values (stock % increase/decrease)
# SVC, RandomForest, MinMax, LongShortTermMemory

%matplotlib inline
%pylab inline

# Nice Formatting within Jupyter Notebook
%matplotlib inline
from IPython.display import display # Allows multiple displays from a single code-cell
from jupyterthemes import jtplot

#import classes
from company import Company
from onestep_baseline_company import OneStepBaselineCompany
from onestep_lstm_company import OneStepLSTMCompany
from multistep_baseline_company import MultiStepBaselineCompany
from multistep_lstm_company import MultiStepLSTMCompany

jtplot.style(theme='grade3')
jtplot.style(theme='grade3')
jtplot.style(theme='grade3')


Populating the interactive namespace from numpy and matplotlib


Using TensorFlow backend.


In [None]:
from multistep_lstm_company import MultiStepLSTMCompany
from alpha_vantage.techindicators import TechIndicators
from time import time
import pandas as pd
from sklearn.preprocessing import MinMaxScaler



class MultiStepLSTMCompanyTechIndicators(MultiStepLSTMCompany):
    def __init__(self, name, train_start_date_string, train_end_test_start_date_string, test_end_date_string,
                 n_lag, n_seq, n_epochs, n_batch, n_neurons, tech_indicators=[]):
        self.tech_indicators = TechIndicators(key='3OMS720IM6CRC3SV', output_format='pandas')
        self.all_tech_indicators = ["ad", "adosc", "adx", "adxr", "apo", "aroon", "aroonosc", 
                                    "bbands", "bop", "cci", "cmo", "dema", "dx", "ema", "ht_dcperiod", 
                                    "ht_dcphase", "ht_phasor", "ht_sine", "ht_trendline", "ht_trendmode", 
                                    "kama", "macd", "macdext", "mama", "mfi", "midpoint", "midprice", 
                                    "minus_di", "minus_dm", "mom", "natr", "obv", "plus_di", "plus_dm", 
                                    "ppo", "roc", "rocr", "rsi", "sar", "sma", "stoch", "stochf", "stochrsi", 
                                    "t3", "tema", "trange", "trima", "trix", "ultsoc", "willr", "wma"]
        if tech_indicators == "all":
            self.input_tech_indicators_list = self.all_tech_indicators
        else:
            self.input_tech_indicators_list = tech_indicators
        self.n_indicators = len(tech_indicators)
        #self.all_tech_indicators
        MultiStepLSTMCompany.__init__(self, name, train_start_date_string, train_end_test_start_date_string, test_end_date_string,
                 n_lag, n_seq, n_epochs, n_batch, n_neurons)
        
    
    def add_tech_indicators_dataframe(self, price_series, indicators):
        combined = price_series
        for ind in indicators:
            print("ind", ind)
            while True: # try again until success
                try:
                    ind_series = self.get_indicator(ind, self.train_start_date_string, self.test_end_date_string)
                    combined = pd.concat([combined, ind_series], axis=1)
                    break
                except:
                    print("Retrying to download indicator ", ind)
                    pass
        
        return combined

    def get_indicator(self, ind_name, start, end):
        data, meta_data = getattr(self.tech_indicators, "get_" + ind_name)(self.name, interval="daily")
        data.index = pd.to_datetime(data.index)
        data = self.get_filtered_series(data, start, end)
        return data
    
    def preprocess_data(self):
        price_series = self.train_raw_series.append(self.test_raw_series)
        display("price data series", len(price_series), price_series)
        if len(self.input_tech_indicators_list) > 0:
            #add additional technical indicators
            combined = self.add_tech_indicators_dataframe(price_series, self.input_tech_indicators_list)
        else:
            combined = price_series
        
        display("combined", combined)
            
        supervised_pd = self.timeseries_to_supervised(combined, self.n_lag, self.n_seq)
        display("supervised", supervised_pd)
        
        cutoff = len(self.train_raw_series) - self.n_seq - 1
        train_supervised_values = supervised_pd.values[:cutoff - self.n_lag + 1]
        display("train supervised values", train_supervised_values)
        test_supervised_values = supervised_pd.values[cutoff + self.n_seq - self.n_lag:]
        display("test supervised values", test_supervised_values)

        self.scaler, scaled_train_supervised, scaled_test_supervised = self.scale(train_supervised_values,
                                                                                  test_supervised_values)
        # delete unnecessary variables for prediction except price (should be var1)
        display("scaled train supervised", scaled_train_supervised)
        display("scaled test supervised", scaled_test_supervised)
        
        return scaled_train_supervised, scaled_test_supervised

    # evaluate the persistence model
    def predict(self):
        self.reset()
        # walk-forward validation on the test data
        predictions = pd.Series()
        # Index is datetime
        test_index = self.test_raw_series.index
        for i in range(len(self.test_scaled)):
            # make multi-step forecast
            X, y = self.test_scaled[i, 0:self.n_lag+self.n_indicators], self.test_scaled[i, self.n_lag+self.n_indicators:]
            print("X: ", X, "y: ", y)
            pred = self.forecast_lstm(X)
            print("Prediction: ", pred)
            # store forecast
            predictions.at[test_index[i]] = pred

        # display("predictions before inverse transform", predictions)
        # inverse transform
        predictions = self.inverse_transform(self.train_raw_series.append(self.test_raw_series), predictions,
                                             len(self.test_raw_series))
        print("Predictions after inverse transform")
        display(predictions)
        return predictions

    # scale train and test data to [-1, 1]
    def scale(self, train_raw, test_raw):
        # fit scaler with 1 Dimensional array data
        scaler = MinMaxScaler(feature_range=(-1, 1))
        # display("fit scaler with train data", scaler_train_data)
        scaler = scaler.fit(train_raw)
        # transform train
        train_scaled = scaler.transform(train_raw)
        # display("train_scaled", train_scaled)
        # transform test
        test_scaled = scaler.transform(test_raw)
        # display("test_scaled", test_scaled)

        return scaler, train_scaled, test_scaled
    
    # fit an LSTM network to training data
    def fit_lstm(self, train):
        # reshape training into [samples, timesteps, features]
        X, y = train[:, 0:self.n_lag+self.n_indicators], train[:, self.n_lag+self.n_indicators:]
        X = X.reshape(X.shape[0], 1, X.shape[1])
        display("train X data", X)
        display("train y data", y)
        # design network
        model = Sequential()
        model.add(LSTM(self.n_neurons, batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]), stateful=True))
        model.add(Dense(y.shape[1]))
        model.compile(loss='mean_squared_error', optimizer='adam')
        # fit network
        for i in range(self.n_epochs):
            model.fit(X, y, epochs=1, batch_size=self.n_batch, verbose=0, shuffle=False)
            model.reset_states()
        return model
    


In [44]:
start_train_date = "01/01/2018"
end_train_start_test_date = "20/01/2018"
end_test_date = "01/02/2018"

multi_step_lstm_tech_indicator = MultiStepLSTMCompanyTechIndicators("MU", start_train_date, end_train_start_test_date, end_test_date, 
                         n_lag=2, n_seq=1, n_epochs=3000, n_batch=1, n_neurons=4, tech_indicators="all")

multi_step_lstm_tech_indicator.train()

'price data series'

22

date
2018-01-02    43.67
2018-01-03    44.98
2018-01-04    46.88
2018-01-05    45.80
2018-01-08    45.55
2018-01-09    42.97
2018-01-10    43.31
2018-01-11    42.82
2018-01-12    42.81
2018-01-16    42.92
2018-01-17    44.26
2018-01-18    43.99
2018-01-19    42.75
2018-01-22    42.88
2018-01-23    43.95
2018-01-24    43.08
2018-01-25    43.01
2018-01-26    43.67
2018-01-29    43.29
2018-01-30    41.67
2018-01-31    43.72
2018-02-01    42.49
Name: 5. adjusted close, dtype: float64

ind ad
ind adosc
ind adx
ind adxr
ind apo
ind aroon
ind aroonosc
ind bbands
ind bop
ind cci
ind cmo
ind dema
ind dx
ind ema
ind ht_dcperiod
ind ht_dcphase
ind ht_phasor
ind ht_sine
ind ht_trendline
ind ht_trendmode
ind kama
ind macd
ind macdext
ind mama
ind mfi
ind midpoint
ind midprice
ind minus_di
ind minus_dm
ind mom
ind natr
ind obv
ind plus_di
ind plus_dm
ind ppo
ind roc
ind rocr
ind rsi
ind sar
ind sma
ind stoch
ind stochf
ind stochrsi
ind t3
ind tema
ind trange
ind trima
ind trix
ind ultsoc
ind willr
ind wma


'combined'

Unnamed: 0_level_0,5. adjusted close,Chaikin A/D,ADOSC,ADX,ADXR,APO,Aroon Up,Aroon Down,AROONOSC,Real Upper Band,...,FastD,FastK,T3,TEMA,TRANGE,TRIMA,TRIX,ULTOSC,WILLR,WMA
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02,43.67,-765838200.0,-19567120.0,17.2292,24.3007,-0.2125,65.0,5.0,60.0,45.3354,...,33.3333,100.0,44.075,42.3004,2.595,43.0422,0.0263,42.5618,-40.4916,42.9522
2018-01-03,44.98,-729112100.0,-1310494.0,16.7447,23.8563,0.1966,60.0,0.0,60.0,45.4364,...,66.6667,100.0,43.9483,42.8867,1.38,43.1687,0.0196,50.5852,-24.6612,43.1695
2018-01-04,46.88,-682842200.0,20999520.0,16.7505,23.6672,0.6149,100.0,0.0,100.0,46.1215,...,100.0,100.0,43.8475,43.8649,2.0,43.287,0.0209,58.4113,-1.497,43.5436
2018-01-05,45.8,-698463600.0,23246840.0,16.7561,23.2246,0.871,95.0,0.0,95.0,46.4327,...,92.9963,78.9888,43.7759,44.3866,1.5048,43.3805,0.0261,59.7692,-19.0016,43.7878
2018-01-08,45.55,-695778600.0,22907160.0,16.6082,22.5155,0.9322,90.0,75.0,15.0,46.6841,...,75.7042,48.1238,43.7338,44.7458,1.18,43.4465,0.0334,60.1755,-23.0274,43.988
2018-01-09,42.97,-749283600.0,3661430.0,15.8264,21.5947,0.6783,85.0,70.0,15.0,46.6793,...,42.3709,0.0,43.7102,44.3655,2.78,43.4908,0.037,53.8623,-64.5733,43.9314
2018-01-10,43.31,-699715900.0,11226850.0,15.3452,20.9336,0.535,80.0,65.0,15.0,46.6866,...,18.7098,8.0057,43.6983,44.1376,1.98,43.5365,0.0383,59.9712,-59.0982,43.9083
2018-01-11,42.82,-727037600.0,4607900.0,14.7127,20.1772,0.3144,75.0,60.0,15.0,46.6566,...,2.6686,0.0,43.6912,43.8211,1.38,43.5923,0.037,51.8318,-66.9887,43.8371
2018-01-12,42.81,-743843400.0,-3866048.0,14.1118,19.4714,0.2995,70.0,55.0,15.0,46.6337,...,2.6686,0.0,43.6842,43.5581,0.675,43.6515,0.0336,48.1284,-67.1498,43.7604
2018-01-16,42.92,-755265800.0,-10615610.0,13.4192,18.7401,0.2846,65.0,50.0,15.0,46.6176,...,8.4387,25.3162,43.6746,43.3718,1.44,43.7292,0.0291,42.5051,-65.3784,43.6906


'supervised'

Unnamed: 0_level_0,var1(t-2),var2(t-2),var3(t-2),var4(t-2),var5(t-2),var6(t-2),var7(t-2),var8(t-2),var9(t-2),var10(t-2),...,var56(t),var57(t),var58(t),var59(t),var60(t),var61(t),var62(t),var63(t),var64(t),var65(t)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-04,43.67,-765838200.0,-19567120.0,17.2292,24.3007,-0.2125,65.0,5.0,60.0,45.3354,...,100.0,100.0,43.8475,43.8649,2.0,43.287,0.0209,58.4113,-1.497,43.5436
2018-01-05,44.98,-729112100.0,-1310494.0,16.7447,23.8563,0.1966,60.0,0.0,60.0,45.4364,...,92.9963,78.9888,43.7759,44.3866,1.5048,43.3805,0.0261,59.7692,-19.0016,43.7878
2018-01-08,46.88,-682842200.0,20999520.0,16.7505,23.6672,0.6149,100.0,0.0,100.0,46.1215,...,75.7042,48.1238,43.7338,44.7458,1.18,43.4465,0.0334,60.1755,-23.0274,43.988
2018-01-09,45.8,-698463600.0,23246840.0,16.7561,23.2246,0.871,95.0,0.0,95.0,46.4327,...,42.3709,0.0,43.7102,44.3655,2.78,43.4908,0.037,53.8623,-64.5733,43.9314
2018-01-10,45.55,-695778600.0,22907160.0,16.6082,22.5155,0.9322,90.0,75.0,15.0,46.6841,...,18.7098,8.0057,43.6983,44.1376,1.98,43.5365,0.0383,59.9712,-59.0982,43.9083
2018-01-11,42.97,-749283600.0,3661430.0,15.8264,21.5947,0.6783,85.0,70.0,15.0,46.6793,...,2.6686,0.0,43.6912,43.8211,1.38,43.5923,0.037,51.8318,-66.9887,43.8371
2018-01-12,43.31,-699715900.0,11226850.0,15.3452,20.9336,0.535,80.0,65.0,15.0,46.6866,...,2.6686,0.0,43.6842,43.5581,0.675,43.6515,0.0336,48.1284,-67.1498,43.7604
2018-01-16,42.82,-727037600.0,4607900.0,14.7127,20.1772,0.3144,75.0,60.0,15.0,46.6566,...,8.4387,25.3162,43.6746,43.3718,1.44,43.7292,0.0291,42.5051,-65.3784,43.6906
2018-01-17,42.81,-743843400.0,-3866048.0,14.1118,19.4714,0.2995,70.0,55.0,15.0,46.6337,...,41.7721,100.0,43.6658,43.5683,1.46,43.8225,0.0264,48.9841,-43.8003,43.7451
2018-01-18,42.92,-755265800.0,-10615610.0,13.4192,18.7401,0.2846,65.0,50.0,15.0,46.6176,...,68.5775,80.4162,43.6591,43.6606,0.74,43.8952,0.0246,46.452,-48.1481,43.765


'train supervised values'

array([[ 4.36700000e+01, -7.65838195e+08, -1.95671214e+07, ...,
         5.84113000e+01, -1.49700000e+00,  4.35436000e+01],
       [ 4.49800000e+01, -7.29112106e+08, -1.31049387e+06, ...,
         5.97692000e+01, -1.90016000e+01,  4.37878000e+01],
       [ 4.68800000e+01, -6.82842157e+08,  2.09995188e+07, ...,
         6.01755000e+01, -2.30274000e+01,  4.39880000e+01],
       ...,
       [ 4.28200000e+01, -7.27037599e+08,  4.60790016e+06, ...,
         4.25051000e+01, -6.53784000e+01,  4.36906000e+01],
       [ 4.28100000e+01, -7.43843389e+08, -3.86604799e+06, ...,
         4.89841000e+01, -4.38003000e+01,  4.37451000e+01],
       [ 4.29200000e+01, -7.55265806e+08, -1.06156097e+07, ...,
         4.64520000e+01, -4.81481000e+01,  4.37650000e+01]])

'test supervised values'

array([[ 4.42600000e+01, -7.23890886e+08, -2.42880943e+06, ...,
         4.83850000e+01, -6.81159000e+01,  4.36656000e+01],
       [ 4.39900000e+01, -7.43267519e+08, -5.02415557e+06, ...,
         4.73603000e+01, -6.60225000e+01,  4.35843000e+01],
       [ 4.27500000e+01, -7.78970542e+08, -1.69891993e+07, ...,
         5.11901000e+01, -4.87923000e+01,  4.36187000e+01],
       ...,
       [ 4.36700000e+01, -7.59903537e+08, -1.03532400e+07, ...,
         4.64422000e+01, -9.17098000e+01,  4.33130000e+01],
       [ 4.32900000e+01, -7.38139467e+08, -1.56004778e+06, ...,
         4.84348000e+01, -5.83184000e+01,  4.33137000e+01],
       [ 4.16700000e+01, -7.64463837e+08, -6.19694607e+06, ...,
         4.36211000e+01, -8.03220000e+01,  4.31970000e+01]])

'scaled train supervised'

array([[-0.57739558, -1.        , -1.        , ...,  0.80032144,
         1.        , -1.        ],
       [ 0.06633907, -0.11499175, -0.14716474, ...,  0.95401349,
         0.46675237,  0.0990099 ],
       [ 1.        ,  1.        ,  0.89501928, ...,  1.        ,
         0.34411327,  1.        ],
       ...,
       [-0.995086  , -0.06500125,  0.12930555, ..., -1.        ,
        -0.94603734, -0.33843384],
       [-1.        , -0.46997936, -0.26654422, ..., -0.26668327,
        -0.28869751, -0.09315932],
       [-0.94594595, -0.74523146, -0.58184146, ..., -0.55327553,
        -0.42114579, -0.00360036]])

'scaled test supervised'

array([[-0.28746929,  0.0108268 , -0.19940544, ..., -0.33449158,
        -1.02943058, -0.45094509],
       [-0.42014742, -0.45610233, -0.32064375, ..., -0.45047084,
        -0.96565874, -0.81683168],
       [-1.02948403, -1.31645719, -0.87957563, ..., -0.01700018,
        -0.44077023, -0.6620162 ],
       ...,
       [-0.57739558, -0.85698937, -0.56958519, ..., -0.55438473,
        -1.7481783 , -2.03780378],
       [-0.76412776, -0.33252891, -0.15882234, ..., -0.32885503,
        -0.73096654, -2.03465347],
       [-1.56019656, -0.96688135, -0.37542918, ..., -0.87368707,
        -1.40126849, -2.55985599]])

Fitting the model


'train X data'

array([[[-0.57739558, -1.        , -1.        ,  1.        ,
          1.        ]],

       [[ 0.06633907, -0.11499175, -0.14716474,  0.74566929,
          0.84016113]],

       [[ 1.        ,  1.        ,  0.89501928,  0.74871391,
          0.77214689]],

       [[ 0.46928747,  0.62356196,  1.        ,  0.75165354,
          0.61295544]],

       [[ 0.34643735,  0.68826338,  0.98413234,  0.67401575,
          0.35791102]],

       [[-0.92137592, -0.60107399,  0.08509241,  0.26362205,
          0.02672373]],

       [[-0.75429975,  0.59338447,  0.43850134,  0.01102362,
         -0.21105636]],

       [[-0.995086  , -0.06500125,  0.12930555, -0.32099738,
         -0.48311333]],

       [[-1.        , -0.46997936, -0.26654422, -0.63643045,
         -0.73697083]],

       [[-0.94594595, -0.74523146, -0.58184146, -1.        ,
         -1.        ]]])

'train y data'

array([[-1.        , -0.75      , -0.86666667, ...,  0.80032144,
         1.        , -1.        ],
       [-0.28522757, -1.        , -1.        , ...,  0.95401349,
         0.46675237,  0.0990099 ],
       [ 0.44561894,  1.        , -1.        , ...,  1.        ,
         0.34411327,  1.        ],
       ...,
       [-0.07940945, -0.25      ,  0.6       , ..., -1.        ,
        -0.94603734, -0.33843384],
       [-0.10544247, -0.5       ,  0.46666667, ..., -0.26668327,
        -0.28869751, -0.09315932],
       [-0.1314755 , -0.75      ,  0.33333333, ..., -0.55327553,
        -0.42114579, -0.00360036]])

NameError: name 'Sequential' is not defined

In [26]:
#multi_step_lstm_tech_indicator.tech_indicators.__dict__.keys()
print(multi_step_lstm_tech_indicator.all_tech_indicators)
#data = multi_step_lstm_tech_indicator.get_rsi()
#X, y = multi_step_lstm_tech_indicator.train_scaled[:, 0:multi_step_lstm_tech_indicator.n_lag], multi_step_lstm_tech_indicator.train_scaled[:, multi_step_lstm_tech_indicator.n_lag:]
#X = X.reshape(X.shape[0], 1, X.shape[1])

['ad', 'adosc', 'adx', 'adxr', 'apo', 'aroon', 'aroonosc', 'bbands', 'bop', 'cci', 'cmo', 'dema', 'dx', 'ema', 'ht_dcperiod', 'ht_dcphase', 'ht_phasor', 'ht_sine', 'ht_trendline', 'ht_trendmode', 'kama', 'macd', 'macdext', 'mama', 'mfi', 'midpoint', 'midprice', 'minus_di', 'minus_dm', 'mom', 'natr', 'obv', 'plus_di', 'plus_dm', 'ppo', 'roc', 'rocr', 'rsi', 'sar', 'sma', 'stoch', 'stochf', 'stochrsi', 't3', 'tema', 'trange', 'trima', 'trix', 'ultsoc', 'willr', 'wma']


In [None]:
#import numpy as np
#import pandas as pd
display(multi_step_lstm_tech_indicator)

display("size", len(data.values), " values:", data.values[1:-1])
display("size", len(X), " values:", X)
display(np.concatenate((X, data.values[1:-1]), axis=1))
#display(pd.concat([price, data], axis=1))