In [19]:
# import libraries 

import pandas as pd
import numpy as np
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import LSTM
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras import optimizers
from sklearn.model_selection import train_test_split
import tensorflow.python.keras.layers
import tensorflow as tf
from google.colab import files
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.callbacks import ModelCheckpoint

# read the data file
# I ran the same block multiple times, just changing the name of the csv files to iterate through period 1~6
df = pd.read_csv('returns_6.csv',header=None)
data = df.to_numpy()
data = np.array(data).ravel()
# convert data to array

# window length was chosen as 20, which is noted as b_size
b_size = 20

# this function changes the shape of the data as LSTM requires
def build_timeseries(array):
    dim_0 = array.shape[0] - b_size
    x = np.zeros((dim_0, b_size))
    y = np.zeros((dim_0,))
    
    for i in range(dim_0):
        x[i] = array[i:b_size+i]
        y[i] = array[b_size+i]

    # dimension is expanded, as required for LSTM inputs
    x = np.expand_dims(x, axis=2)
    print("length of time-series i/o",x.shape,y.shape)
    return x, y
x,y = build_timeseries(data)

# the following lines split the data into 70% training, 20% validation, and 10% test sets
x_train, x_test = train_test_split(x, train_size=0.7, test_size=0.3, shuffle=False)
x_valid, x_test = train_test_split(x_test, train_size=2/3, test_size=1/3, shuffle=False)
y_train, y_test = train_test_split(y, train_size=0.7, test_size=0.3, shuffle=False)
y_valid, y_test = train_test_split(y_test, train_size=2/3, test_size=1/3, shuffle=False)

# convert the outputs to 0 or 1, depending on whether it is less than 0 or not
# 1 indicates market going up or staying the same and 0 indicates market going down
y_train=np.where(y_train>=0,1,0)
y_test=np.where(y_test>=0,1,0)
y_valid=np.where(y_valid>=0,1,0)

# early stopping was employed
es = EarlyStopping(monitor='val_loss', mode='min',patience=0)

# lstm model is implemented here
lstm_model = Sequential()
# the first layer. return_sequences should be False if this is the only LSTM layer being used
lstm_model.add(LSTM(25, batch_input_shape=(1, b_size,1), dropout=0.1,stateful=True,return_sequences=True))
# the second layer, was not used for the original LSTM model
lstm_model.add(LSTM(50,  dropout=0.1,stateful=True,return_sequences=True))
# the third layer, was not used for the original LSTM model
lstm_model.add(LSTM(25,  dropout=0.1,stateful=True))
# the dense layer which serves as output
lstm_model.add(Dense(1,activation='sigmoid'))

# the model is trained with binary crossentropy as the loss function, adam optimizer, and accuracy metrics
lstm_model.compile(loss='binary_crossentropy',
              optimizer = 'adam',
              metrics =['accuracy'])
# the input was scaled by 20 for better convergence as input values were too small to be used without being scaeld
# the input will always be scaled by 20
# the model stucture is explained in the paper
lstm_model.fit(x_train*20, y_train,validation_data=(x_valid*20,y_valid), epochs=1000, 
               batch_size=1, verbose=1, shuffle=False, callbacks=[es])

length of time-series i/o (3755, 20, 1) (3755,)
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000


<tensorflow.python.keras.callbacks.History at 0x7f51675046d8>

In [20]:
# quickly evaluate model performnace
lstm_model.evaluate(x_test*20,y_test,batch_size=1)



[0.24716107547283173, 0.5585106611251831]

In [21]:
# make predicition with the current LSTM model and save the result as csv files
# Just like reading the data file at the beginning, I simply change the output file name for each iteration
pred = lstm_model.predict(x_test*20,batch_size=1)
pred = pred.flatten()
np.savetxt("n_result6.csv", pred, delimiter=",")
files.download('n_result6.csv')
pred

array([0.5343074 , 0.5342568 , 0.5342055 , 0.5341652 , 0.5341751 ,
       0.53426015, 0.5342479 , 0.53426313, 0.53429383, 0.5342912 ,
       0.5342932 , 0.534331  , 0.53435105, 0.5343428 , 0.5343138 ,
       0.5342568 , 0.5342855 , 0.53434443, 0.5343923 , 0.53435946,
       0.5343511 , 0.5343105 , 0.5342757 , 0.53425807, 0.5342553 ,
       0.53427815, 0.5343055 , 0.5343622 , 0.534392  , 0.53434336,
       0.5343709 , 0.5343163 , 0.5342845 , 0.5342707 , 0.534268  ,
       0.5342837 , 0.53431004, 0.53428453, 0.5342443 , 0.53425545,
       0.5342453 , 0.53429455, 0.5343177 , 0.5343398 , 0.5343615 ,
       0.5343819 , 0.5343871 , 0.5343623 , 0.5343237 , 0.53431004,
       0.5342789 , 0.5342798 , 0.5343242 , 0.53430223, 0.53429526,
       0.53425306, 0.53426546, 0.5343052 , 0.53432906, 0.53435546,
       0.53433716, 0.53433096, 0.53430533, 0.5343071 , 0.5343084 ,
       0.53436184, 0.5343973 , 0.5343877 , 0.53437364, 0.5345312 ,
       0.53462285, 0.53447473, 0.53441805, 0.5342247 , 0.53418