In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
with pd.HDFStore(os.path.join('/', 'root', 'fin_lstm', 'observations.h5')) as store:
    observations = store['o']

In [4]:
# Cols to drop:
ctd = ['Open', 'High', 'Low', 'Close', # stock price. Leave volume
       'o_id', # id
       'COMLND'] # commercial lending index - many NaNs       
observations = observations.drop(ctd, axis=1).dropna()

# Normalize Volume
v = observations['Volume']
mi = v.min(); di = v.max() - mi
observations['Volume'] = v.apply(lambda t: (t - mi) / di)

# Separate labels
labels = observations['sig^2'] = observations['sig^2']*100
observations['lag_sig^2'] *= 100

# Drop labels from observations
# observations.drop('sig^2', axis=1, inplace=True)

In [5]:
# Split into train and validate
mark = int(len(observations)*.7)
tX = observations[:mark]
tY = labels[:mark]
vX = observations[mark:]
vY = labels[mark:]

In [6]:
timesteps = 10

In [51]:
create_dataset(observations.values, labels.values, 2)

(array([[[ 1.25,  1.04,  0.99, ...,  0.  ,  0.  ,  1.  ],
         [ 1.27,  1.05,  0.98, ...,  0.  ,  0.  ,  1.  ]],
 
        [[ 1.27,  1.05,  0.98, ...,  0.  ,  0.  ,  1.  ],
         [ 1.31,  1.05,  0.96, ...,  0.  ,  0.  ,  1.  ]],
 
        [[ 1.31,  1.05,  0.96, ...,  0.  ,  0.  ,  1.  ],
         [ 1.3 ,  1.03,  0.94, ...,  0.  ,  0.  ,  1.  ]],
 
        ..., 
        [[ 0.75,  0.67,  0.82, ...,  0.  ,  0.  ,  0.  ],
         [ 0.75,  0.67,  0.81, ...,  0.  ,  0.  ,  0.  ]],
 
        [[ 0.75,  0.67,  0.81, ...,  0.  ,  0.  ,  0.  ],
         [ 0.75,  0.68,  0.81, ...,  0.  ,  0.  ,  0.  ]],
 
        [[ 0.75,  0.68,  0.81, ...,  0.  ,  0.  ,  0.  ],
         [ 0.74,  0.69,  0.81, ...,  0.  ,  0.  ,  0.  ]]]),
 array([ 0.02968029,  0.05076174,  0.01673453, ...,  0.04495117,
         0.02677589,  0.07744982]))

In [11]:
def create_dataset(dataset, Y, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        dataX.append(a)
        dataY.append(Y[i + look_back])
    return np.array(dataX), np.array(dataY)

In [8]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import RMSprop, SGD, Nadam
from keras.callbacks import TensorBoard 

Using TensorFlow backend.


In [9]:
# expected input data shape: (batch_size, timesteps, data_dim)
data_dim = tX.values.shape[1]

model = Sequential()
model.add(LSTM(40, return_sequences=True,
               input_shape=(timesteps, data_dim)
              ))
model.add(LSTM(40, return_sequences=False))
model.add(Dense(1))

model.compile(loss='msle', optimizer=RMSprop(), metrics=['accuracy'])

# TB callback
TB = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)

In [None]:
model.fit(*create_dataset(observations.values, labels.values, timesteps),
          batch_size=50, nb_epoch=500, verbose=2,
          validation_split=.2,
          shuffle=True,
          callbacks=[TB])

Train on 27801 samples, validate on 6951 samples
Epoch 1/500
14s - loss: 0.0080 - acc: 0.0028 - val_loss: 0.0071 - val_acc: 0.0056
Epoch 2/500
13s - loss: 0.0062 - acc: 0.0028 - val_loss: 0.0063 - val_acc: 0.0056
Epoch 3/500
13s - loss: 0.0059 - acc: 0.0028 - val_loss: 0.0108 - val_acc: 0.0056
Epoch 4/500
13s - loss: 0.0056 - acc: 0.0028 - val_loss: 0.0052 - val_acc: 0.0056
Epoch 5/500
12s - loss: 0.0055 - acc: 0.0028 - val_loss: 0.0047 - val_acc: 0.0056
Epoch 6/500
12s - loss: 0.0054 - acc: 0.0028 - val_loss: 0.0050 - val_acc: 0.0056
Epoch 7/500
13s - loss: 0.0054 - acc: 0.0028 - val_loss: 0.0048 - val_acc: 0.0056
Epoch 8/500
12s - loss: 0.0054 - acc: 0.0028 - val_loss: 0.0050 - val_acc: 0.0056
Epoch 9/500
12s - loss: 0.0053 - acc: 0.0028 - val_loss: 0.0055 - val_acc: 0.0056
Epoch 10/500
13s - loss: 0.0053 - acc: 0.0028 - val_loss: 0.0061 - val_acc: 0.0056
Epoch 11/500
14s - loss: 0.0053 - acc: 0.0028 - val_loss: 0.0049 - val_acc: 0.0056
Epoch 12/500
13s - loss: 0.0053 - acc: 0.0028 -

In [None]:
model.evaluate(*create_dataset(observations.values, labels.values, 1), batch_size=10, verbose=1)