In [1]:
from talib import MA,STOCH
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv("SPY.csv",date_parser=True)

In [3]:
df_train = df[ df["Date"]<'2017-01-01' ].copy()
# df_validation = df[(df['Date'] >= '2017-01-01') & (df['Date'] < "2018-01-01")].copy()
df_validation = df[df["Date"] >= "2017-01-01"].copy()
df_plot = df[df["Date"] >= "2018-01-01"].copy()

# Graph Plotting

# Add Technical Analysis

In [4]:
ma_10_train = MA(df_train["Close"], timeperiod=10, matype=0)
ma_30_train = MA(df_train["Close"], timeperiod=30, matype=0)
k_train, d_train = STOCH(df_train["High"], df_train["Low"], df_train["Close"],fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
df_train["MA10"] = ma_10_train
df_train["MA30"] = ma_30_train
df_train["K"] = k_train
df_train["D"] = d_train

ma_10_validation = MA(df_validation["Close"], timeperiod=10, matype=0)
ma_30_validation = MA(df_validation["Close"], timeperiod=30, matype=0)
k_validation, d_validation = STOCH(df_validation["High"], df_validation["Low"], df_validation["Close"],fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
df_validation["MA10"] = ma_10_validation
df_validation["MA30"] = ma_30_validation
df_validation["K"] = k_validation
df_validation["D"] = d_validation

In [5]:
df_validation.shape

(501, 11)

## Drop dates and NA

In [6]:
df_train = df_train.dropna(axis=0)
df_train = df_train.drop(["Date"], axis=1)

df_validation = df_validation.dropna(axis=0)
df_validation = df_validation.drop(["Date"], axis=1)

In [7]:
df_validation.shape

(472, 10)

## Normalize data

In [8]:
def normalizeDataframe(data_frame):
    normalize_df = data_frame.copy()
    for column in normalize_df.columns:
        min_value = min(normalize_df[column])
        max_value = max(normalize_df[column])
        normalize_df[column] = (normalize_df[column] - min_value) / (max_value - min_value)
    return normalize_df

In [9]:
df_train = normalizeDataframe(df_train)
df_validation = normalizeDataframe(df_validation)

In [10]:
df_validation.shape

(472, 10)

## Prepare X, y train and validation for RNN

In [11]:
data_train = df_train.values
data_validation = df_validation.values

In [12]:
X_train = []
y_train = []
X_validation = []
y_validation = []

In [13]:
for i in range(30,data_train.shape[0]):
    X_train.append(data_train[i-30:i])
    y_train.append(data_train[i, 0])

for i in range(30, data_validation.shape[0]):
    X_validation.append(data_validation[i-30:i])
    y_validation.append(data_validation[i,0])

In [14]:
X_train, y_train = np.array(X_train), np.array(y_train)
X_validation, y_validation = np.array(X_validation), np.array(y_validation)

In [15]:
X_train.shape
# y_train.shape

(5734, 30, 10)

# Building Models

In [35]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, SimpleRNN, GRU
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [36]:
early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=8)

## Vanilla RNN

In [37]:
regressor_RNN = Sequential()
regressor_RNN.add(SimpleRNN(units = 32, activation = 'tanh', input_shape = (X_train.shape[1], X_train.shape[2])))
regressor_RNN.add(Dense(units = 1))

In [38]:
regressor_RNN.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 32)                1376      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 1,409
Trainable params: 1,409
Non-trainable params: 0
_________________________________________________________________


In [39]:
checkpoint_RNN = ModelCheckpoint(filepath="best_params_RNN.hdf5", monitor="val_loss",verbose=1,save_best_only=True)

In [40]:
regressor_RNN.compile(optimizer='adam', loss = 'mean_squared_error')

In [41]:
regressor_RNN.fit(X_train, y_train, epochs=256, batch_size=64, validation_data = (X_validation, y_validation),callbacks=[checkpoint_RNN, early_stopping])

Train on 5734 samples, validate on 442 samples
Epoch 1/256
Epoch 00001: val_loss improved from inf to 0.00360, saving model to best_params_RNN.hdf5
Epoch 2/256
Epoch 00002: val_loss improved from 0.00360 to 0.00258, saving model to best_params_RNN.hdf5
Epoch 3/256
Epoch 00003: val_loss improved from 0.00258 to 0.00209, saving model to best_params_RNN.hdf5
Epoch 4/256
Epoch 00004: val_loss did not improve from 0.00209
Epoch 5/256
Epoch 00005: val_loss improved from 0.00209 to 0.00171, saving model to best_params_RNN.hdf5
Epoch 6/256
Epoch 00006: val_loss improved from 0.00171 to 0.00159, saving model to best_params_RNN.hdf5
Epoch 7/256
Epoch 00007: val_loss improved from 0.00159 to 0.00143, saving model to best_params_RNN.hdf5
Epoch 8/256
Epoch 00008: val_loss improved from 0.00143 to 0.00135, saving model to best_params_RNN.hdf5
Epoch 9/256
Epoch 00009: val_loss did not improve from 0.00135
Epoch 10/256
Epoch 00010: val_loss did not improve from 0.00135
Epoch 11/256
Epoch 00011: val_lo

Epoch 00032: val_loss did not improve from 0.00048
Epoch 33/256
Epoch 00033: val_loss did not improve from 0.00048
Epoch 34/256
Epoch 00034: val_loss did not improve from 0.00048
Epoch 35/256
Epoch 00035: val_loss did not improve from 0.00048
Epoch 36/256
Epoch 00036: val_loss improved from 0.00048 to 0.00044, saving model to best_params_RNN.hdf5
Epoch 37/256
Epoch 00037: val_loss did not improve from 0.00044
Epoch 38/256
Epoch 00038: val_loss improved from 0.00044 to 0.00043, saving model to best_params_RNN.hdf5
Epoch 39/256
Epoch 00039: val_loss improved from 0.00043 to 0.00038, saving model to best_params_RNN.hdf5
Epoch 40/256
Epoch 00040: val_loss did not improve from 0.00038
Epoch 41/256
Epoch 00041: val_loss did not improve from 0.00038
Epoch 42/256
Epoch 00042: val_loss improved from 0.00038 to 0.00036, saving model to best_params_RNN.hdf5
Epoch 43/256
Epoch 00043: val_loss did not improve from 0.00036
Epoch 44/256
Epoch 00044: val_loss did not improve from 0.00036
Epoch 45/256


<tensorflow.python.keras.callbacks.History at 0x187383f2b88>

## LSTM 

In [42]:
regressor_LSTM = Sequential()
regressor_LSTM.add(LSTM(units = 32, activation = 'tanh', input_shape = (X_train.shape[1], X_train.shape[2])))
regressor_LSTM.add(Dense(units = 1))

In [43]:
regressor_LSTM.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32)                5504      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 5,537
Trainable params: 5,537
Non-trainable params: 0
_________________________________________________________________


In [44]:
checkpoint_LSTM = ModelCheckpoint(filepath="best_params_LSTM.hdf5", monitor="val_loss",verbose=1,save_best_only=True)

In [45]:
regressor_LSTM.compile(optimizer='adam', loss = 'mean_squared_error')

In [46]:
regressor_LSTM.fit(X_train, y_train, epochs=256, batch_size=64, validation_data = (X_validation, y_validation),callbacks=[checkpoint_LSTM, early_stopping])

Train on 5734 samples, validate on 442 samples
Epoch 1/256
Epoch 00001: val_loss improved from inf to 0.00826, saving model to best_params_LSTM.hdf5
Epoch 2/256
Epoch 00002: val_loss improved from 0.00826 to 0.00495, saving model to best_params_LSTM.hdf5
Epoch 3/256
Epoch 00003: val_loss improved from 0.00495 to 0.00401, saving model to best_params_LSTM.hdf5
Epoch 4/256
Epoch 00004: val_loss improved from 0.00401 to 0.00367, saving model to best_params_LSTM.hdf5
Epoch 5/256
Epoch 00005: val_loss improved from 0.00367 to 0.00357, saving model to best_params_LSTM.hdf5
Epoch 6/256
Epoch 00006: val_loss improved from 0.00357 to 0.00329, saving model to best_params_LSTM.hdf5
Epoch 7/256
Epoch 00007: val_loss improved from 0.00329 to 0.00322, saving model to best_params_LSTM.hdf5
Epoch 8/256
Epoch 00008: val_loss improved from 0.00322 to 0.00307, saving model to best_params_LSTM.hdf5
Epoch 9/256
Epoch 00009: val_loss improved from 0.00307 to 0.00293, saving model to best_params_LSTM.hdf5
Epo

Epoch 31/256
Epoch 00031: val_loss improved from 0.00169 to 0.00162, saving model to best_params_LSTM.hdf5
Epoch 32/256
Epoch 00032: val_loss improved from 0.00162 to 0.00155, saving model to best_params_LSTM.hdf5
Epoch 33/256
Epoch 00033: val_loss did not improve from 0.00155
Epoch 34/256
Epoch 00034: val_loss improved from 0.00155 to 0.00152, saving model to best_params_LSTM.hdf5
Epoch 35/256
Epoch 00035: val_loss did not improve from 0.00152
Epoch 36/256
Epoch 00036: val_loss improved from 0.00152 to 0.00145, saving model to best_params_LSTM.hdf5
Epoch 37/256
Epoch 00037: val_loss improved from 0.00145 to 0.00141, saving model to best_params_LSTM.hdf5
Epoch 38/256
Epoch 00038: val_loss improved from 0.00141 to 0.00139, saving model to best_params_LSTM.hdf5
Epoch 39/256
Epoch 00039: val_loss improved from 0.00139 to 0.00135, saving model to best_params_LSTM.hdf5
Epoch 40/256
Epoch 00040: val_loss improved from 0.00135 to 0.00133, saving model to best_params_LSTM.hdf5
Epoch 41/256
Epo

Epoch 61/256
Epoch 00061: val_loss did not improve from 0.00088
Epoch 62/256
Epoch 00062: val_loss improved from 0.00088 to 0.00084, saving model to best_params_LSTM.hdf5
Epoch 63/256
Epoch 00063: val_loss improved from 0.00084 to 0.00082, saving model to best_params_LSTM.hdf5
Epoch 64/256
Epoch 00064: val_loss did not improve from 0.00082
Epoch 65/256
Epoch 00065: val_loss improved from 0.00082 to 0.00079, saving model to best_params_LSTM.hdf5
Epoch 66/256
Epoch 00066: val_loss improved from 0.00079 to 0.00078, saving model to best_params_LSTM.hdf5
Epoch 67/256
Epoch 00067: val_loss improved from 0.00078 to 0.00076, saving model to best_params_LSTM.hdf5
Epoch 68/256
Epoch 00068: val_loss improved from 0.00076 to 0.00073, saving model to best_params_LSTM.hdf5
Epoch 69/256
Epoch 00069: val_loss improved from 0.00073 to 0.00071, saving model to best_params_LSTM.hdf5
Epoch 70/256
Epoch 00070: val_loss improved from 0.00071 to 0.00069, saving model to best_params_LSTM.hdf5
Epoch 71/256
Epo

Epoch 00091: val_loss improved from 0.00047 to 0.00046, saving model to best_params_LSTM.hdf5
Epoch 92/256
Epoch 00092: val_loss improved from 0.00046 to 0.00045, saving model to best_params_LSTM.hdf5
Epoch 93/256
Epoch 00093: val_loss did not improve from 0.00045
Epoch 94/256
Epoch 00094: val_loss improved from 0.00045 to 0.00043, saving model to best_params_LSTM.hdf5
Epoch 95/256
Epoch 00095: val_loss improved from 0.00043 to 0.00042, saving model to best_params_LSTM.hdf5
Epoch 96/256
Epoch 00096: val_loss did not improve from 0.00042
Epoch 97/256
Epoch 00097: val_loss improved from 0.00042 to 0.00041, saving model to best_params_LSTM.hdf5
Epoch 98/256
Epoch 00098: val_loss did not improve from 0.00041
Epoch 99/256
Epoch 00099: val_loss did not improve from 0.00041
Epoch 100/256
Epoch 00100: val_loss improved from 0.00041 to 0.00039, saving model to best_params_LSTM.hdf5
Epoch 101/256
Epoch 00101: val_loss did not improve from 0.00039
Epoch 102/256
Epoch 00102: val_loss improved from

Epoch 122/256
Epoch 00122: val_loss did not improve from 0.00030
Epoch 123/256
Epoch 00123: val_loss did not improve from 0.00030
Epoch 124/256
Epoch 00124: val_loss did not improve from 0.00030
Epoch 125/256
Epoch 00125: val_loss improved from 0.00030 to 0.00028, saving model to best_params_LSTM.hdf5
Epoch 126/256
Epoch 00126: val_loss improved from 0.00028 to 0.00028, saving model to best_params_LSTM.hdf5
Epoch 127/256
Epoch 00127: val_loss did not improve from 0.00028
Epoch 128/256
Epoch 00128: val_loss did not improve from 0.00028
Epoch 129/256
Epoch 00129: val_loss did not improve from 0.00028
Epoch 130/256
Epoch 00130: val_loss did not improve from 0.00028
Epoch 131/256
Epoch 00131: val_loss did not improve from 0.00028
Epoch 132/256
Epoch 00132: val_loss did not improve from 0.00028
Epoch 133/256
Epoch 00133: val_loss improved from 0.00028 to 0.00027, saving model to best_params_LSTM.hdf5
Epoch 134/256
Epoch 00134: val_loss did not improve from 0.00027
Epoch 135/256
Epoch 00135:

<tensorflow.python.keras.callbacks.History at 0x1877ffd4588>

## GRU

In [47]:
regressor_GRU = Sequential()
regressor_GRU.add(GRU(units = 32, activation = 'tanh', input_shape = (X_train.shape[1], X_train.shape[2])))
regressor_GRU.add(Dense(units = 1))

In [48]:
regressor_GRU.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 32)                4224      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 4,257
Trainable params: 4,257
Non-trainable params: 0
_________________________________________________________________


In [49]:
checkpoint_GRU = ModelCheckpoint(filepath="best_params_GRU.hdf5", monitor="val_loss",verbose=1,save_best_only=True)

In [50]:
regressor_GRU.compile(optimizer='adam', loss = 'mean_squared_error')

In [51]:
regressor_GRU.fit(X_train, y_train, epochs=256, batch_size=64, validation_data = (X_validation, y_validation),callbacks=[checkpoint_GRU, early_stopping])

Train on 5734 samples, validate on 442 samples
Epoch 1/256
Epoch 00001: val_loss improved from inf to 0.00154, saving model to best_params_GRU.hdf5
Epoch 2/256
Epoch 00002: val_loss improved from 0.00154 to 0.00130, saving model to best_params_GRU.hdf5
Epoch 3/256
Epoch 00003: val_loss improved from 0.00130 to 0.00126, saving model to best_params_GRU.hdf5
Epoch 4/256
Epoch 00004: val_loss improved from 0.00126 to 0.00119, saving model to best_params_GRU.hdf5
Epoch 5/256
Epoch 00005: val_loss improved from 0.00119 to 0.00113, saving model to best_params_GRU.hdf5
Epoch 6/256
Epoch 00006: val_loss improved from 0.00113 to 0.00108, saving model to best_params_GRU.hdf5
Epoch 7/256
Epoch 00007: val_loss improved from 0.00108 to 0.00099, saving model to best_params_GRU.hdf5
Epoch 8/256
Epoch 00008: val_loss improved from 0.00099 to 0.00095, saving model to best_params_GRU.hdf5
Epoch 9/256
Epoch 00009: val_loss improved from 0.00095 to 0.00085, saving model to best_params_GRU.hdf5
Epoch 10/256

Epoch 31/256
Epoch 00031: val_loss did not improve from 0.00030
Epoch 32/256
Epoch 00032: val_loss did not improve from 0.00030
Epoch 33/256
Epoch 00033: val_loss improved from 0.00030 to 0.00029, saving model to best_params_GRU.hdf5
Epoch 34/256
Epoch 00034: val_loss did not improve from 0.00029
Epoch 35/256
Epoch 00035: val_loss did not improve from 0.00029
Epoch 36/256
Epoch 00036: val_loss improved from 0.00029 to 0.00028, saving model to best_params_GRU.hdf5
Epoch 37/256
Epoch 00037: val_loss did not improve from 0.00028
Epoch 38/256
Epoch 00038: val_loss did not improve from 0.00028
Epoch 39/256
Epoch 00039: val_loss did not improve from 0.00028
Epoch 40/256
Epoch 00040: val_loss improved from 0.00028 to 0.00027, saving model to best_params_GRU.hdf5
Epoch 41/256
Epoch 00041: val_loss did not improve from 0.00027
Epoch 42/256
Epoch 00042: val_loss did not improve from 0.00027
Epoch 43/256
Epoch 00043: val_loss did not improve from 0.00027
Epoch 44/256
Epoch 00044: val_loss did not

<tensorflow.python.keras.callbacks.History at 0x187181e9288>