In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import RNN, Dense, Input, Dropout, LSTM, Activation

from keras import backend as K
from rnn_util import *

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
LOB = create_LOB()
x, y, ts = prepare_RNN_data(LOB, ['CS', 'TM'])
X_train, Y_train, X_test, Y_test = split_and_suffle(x, y, ts, 0.05)

In [3]:
print(X_train.shape, X_test.shape)
print([y.shape for y in Y_train], [y.shape for y in Y_test])

(44447, 6, 6) (2339, 6, 6)
[(44447, 3), (44447, 3)] [(2339, 3), (2339, 3)]


In [4]:
def LOB_RNN_model(n_depth, n_lag):
    
    lob = Input((n_lag, n_depth), dtype='float32', name='LOB')
    X = LSTM(16, return_sequences=True, name='LSTM_1')(lob)
    X = Dropout(0.5, name='Dropout_1')(X)
    X = LSTM(16, return_sequences=False, name='LSTM_2')(X)
    X = Dropout(0.5, name='Dropout_2')(X)
    
    out_b = Dense(3, activation='softmax', name='bid')(X)
    out_a = Dense(3, activation='softmax', name='ask')(X)
    
    model = Model(inputs=lob, outputs=[out_b, out_a])
    
    return model

In [5]:
# https://keras.io/metrics/

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [6]:
model = LOB_RNN_model(6, 6)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[f1, 'categorical_accuracy'])

In [7]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
LOB (InputLayer)                (None, 6, 6)         0                                            
__________________________________________________________________________________________________
LSTM_1 (LSTM)                   (None, 6, 16)        1472        LOB[0][0]                        
__________________________________________________________________________________________________
Dropout_1 (Dropout)             (None, 6, 16)        0           LSTM_1[0][0]                     
__________________________________________________________________________________________________
LSTM_2 (LSTM)                   (None, 16)           2112        Dropout_1[0][0]                  
__________________________________________________________________________________________________
Dropout_2 

In [8]:
y[0][:18714,:].sum(axis=0) / y[0][:18714,:].shape[0], y[0][18714:,:].sum(axis=0) / y[0][18714:,:].shape[0]

(array([0.02244309, 0.95644972, 0.02110719]),
 array([0.01478341, 0.9665503 , 0.01866629]))

In [9]:
model.fit(X_train, Y_train, epochs=2, validation_data=(X_test, Y_test), 
          batch_size=32)

Train on 44447 samples, validate on 2339 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x116ad0cf8>