In [2]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.optimizers import SGD
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.utils import np_utils
from sklearn.metrics import precision_recall_fscore_support
from keras.callbacks import TensorBoard
import tensorflow as tf

os.chdir('../Utils/')
import featureGenerator
from featureGenerator import *
os.chdir('../src/')
import orderbook_lstm
from orderbook_lstm import OrderBookLSTM

Using TensorFlow backend.


# Generate Features and Response Vars

In [3]:
data_dir = '../../../../ProjectData/'
#in_path = data_dir+'msft-orderbook.csv'
out_path = data_dir+'msft-orderbook-all.csv'
out_path2 = data_dir+'msft-orderbook-final.csv'

In [None]:
mergeOrderBookDays(data_dir, out_path, ['msft'])

In [None]:
createFeatures(out_path, out_path2, response_type = 'Classification')
data = pd.read_csv(out_path2)

In [None]:
pd.set_option('display.max_columns', 500)
data.head()

In [4]:
data = pd.read_csv(out_path2)
#data = data.drop(['datetime', 'direct.last_SRO'], axis = 1)

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
cols_to_normalize = [col for col in data.columns if col != 'Response']
data[cols_to_normalize] = scaler.fit_transform(data[cols_to_normalize])

dataset = data.values
dataset = dataset.astype('float32')


# Train/Test Split

In [5]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

156779 77220


In [6]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=5):
    dataY = get_one_hot(dataset[look_back+1:,dataset.shape[1]-1].astype(int).reshape(-1),3)
    dataX = []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]    
        dataX.append(a)
    return np.array(dataX), np.array(dataY)

In [7]:
# Convert response variable to one-hot vectors
def get_one_hot(targets, nb_classes):
    return np.eye(nb_classes)[np.array(targets).reshape(-1)]

In [8]:
look_back = 30
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print(trainX.shape)
print(trainY.shape)

(156748, 30, 69)
(156748, 3)


# Train Model

In [13]:
#tf.reset_default_graph()
from keras import backend as K
K.clear_session()
#timesteps = 10
n_features = 69
n_neurons = 100
n_classes = 3
n_hidden = 1
dropout = None

#lstm = OrderBookLSTM(lookback, n_neurons, (timesteps,n_features), n_classes, n_hidden, dropout)


lstm = OrderBookLSTM(look_back, 100, (look_back,69), 3, 0)

Building model...
Compiling model...


In [14]:
mod = lstm.get_model()

In [15]:
mod.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 30, 100)           68000     
_________________________________________________________________
flatten_1 (Flatten)          (None, 3000)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 9003      
Total params: 77,003
Trainable params: 77,003
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Class weights to change
class_weight = {0 : 1.,
    1: 12.,
    2: 12.} 

mod.fit(trainX, trainY, 
          epochs=1,  
          batch_size=128, 
          verbose=1, 
          class_weight = class_weight,
          callbacks=[TensorBoard(log_dir='Logs/', write_graph=True)])
    

Epoch 1/1

KeyboardInterrupt: 

# Make Predictions and get Metrics

In [22]:
# Training Error
preds_training = mod.predict(trainX).argmax(axis=-1)
pd.Series(preds_training).value_counts()
precision_recall_fscore_support(np.argmax(trainY, axis=1), preds_training)

(array([ 0.94629158,  0.12765957,  0.13000865]),
 array([ 0.95839973,  0.02788442,  0.18225904]),
 array([ 0.95230717,  0.04577114,  0.15176243]),
 array([146850,   4949,   4949]))

In [23]:
# Validation Error
preds = mod.predict(testX).argmax(axis=-1)
pd.Series(preds).value_counts()
precision_recall_fscore_support(np.argmax(testY, axis=1), preds)

(array([ 0.93383359,  0.09489051,  0.12377495]),
 array([ 0.89764515,  0.02816465,  0.31447894]),
 array([ 0.91538184,  0.04343675,  0.17763501]),
 array([70705,  3231,  3253]))

# Ignore

In [None]:
# Class weights to change
class_weight = {0 : 1.,
    1: 15.,
    2: 18.} 

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(10, input_shape=(5,69), return_sequences=False))
model.add(Dense(3, activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
model.fit(trainX, trainY, 
          epochs=2,  
          batch_size=10, 
          #verbose=2, 
          class_weight = class_weight,
          callbacks=[TensorBoard(log_dir='Logs/testlog', write_graph=True)])

In [None]:
preds = model.predict(testX).argmax(axis=-1)
pd.Series(preds).value_counts()

In [None]:
precision_recall_fscore_support(np.argmax(testY, axis=1), preds)