In [5]:
# load packages
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf # tensorflow v2.2.1
# from tensorflow.compat.v1.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
import keras
from keras import backend as K
from keras.models import load_model, Model
from keras.layers import Flatten, Dense, Dropout, Activation, Input, LSTM, Reshape, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.layers.advanced_activations import LeakyReLU
from keras.backend.tensorflow_backend import set_session
from keras.utils import np_utils
import matplotlib.pyplot as plt

from sqlalchemy import create_engine
import psycopg2
import sqlalchemy

# set random seeds
np.random.seed(1)
tf.random.set_seed(2)

# limit gpu usage for keras
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))

Using TensorFlow backend.


In [7]:
## Model Architect
def create_deeplob(T, NF, number_of_lstm):
    input_lmd = Input(shape=(T, NF, 1))
    
    # build the convolutional block
    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(input_lmd)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)

    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)

    conv_first1 = Conv2D(32, (1, 10))(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    
    # build the inception module
    convsecond_1 = Conv2D(64, (1, 1), padding='same')(conv_first1)
    convsecond_1 = keras.layers.LeakyReLU(alpha=0.01)(convsecond_1)
    convsecond_1 = Conv2D(64, (3, 1), padding='same')(convsecond_1)
    convsecond_1 = keras.layers.LeakyReLU(alpha=0.01)(convsecond_1)

    convsecond_2 = Conv2D(64, (1, 1), padding='same')(conv_first1)
    convsecond_2 = keras.layers.LeakyReLU(alpha=0.01)(convsecond_2)
    convsecond_2 = Conv2D(64, (5, 1), padding='same')(convsecond_2)
    convsecond_2 = keras.layers.LeakyReLU(alpha=0.01)(convsecond_2)

    convsecond_3 = MaxPooling2D((3, 1), strides=(1, 1), padding='same')(conv_first1)
    convsecond_3 = Conv2D(64, (1, 1), padding='same')(convsecond_3)
    convsecond_3 = keras.layers.LeakyReLU(alpha=0.01)(convsecond_3)
    
    convsecond_output = keras.layers.concatenate([convsecond_1, convsecond_2, convsecond_3], axis=3)

    # use the MC dropout here
    conv_reshape = Reshape((int(convsecond_output.shape[1]), int(convsecond_output.shape[3])))(convsecond_output)

    # build the last LSTM layer
    conv_lstm = LSTM(number_of_lstm)(conv_reshape)

    # build the output layer
    out = Dense(3, activation='softmax')(conv_lstm)
    model = Model(inputs=input_lmd, outputs=out)
    adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

deeplob = create_deeplob(100, 40, 64)

In [27]:
# Load postgreSQL data

# Prepare data
def get_SQLtrain1(index, conn):

    queryLob = f'''SELECT * FROM "NoAuction_DecPre" 
                  WHERE "RandID" = {index}
                  AND istrainingdata = 1
                  limit 200;'''  # limit for testing only
    ResLob = pd.read_sql(queryLob, conn)
    sqlTrainLob = ResLob.loc[:, 'pa1':'vb10']
    
    queryLabel = f'''SELECT * FROM "NoAuction_DecPre_Label" 
                    WHERE "RandID" = {index}
                    AND istrainingdata = 1
                    limit 200;'''
    ResLabel = pd.read_sql(queryLabel, conn)
    sqlTrainLabel = ResLabel.loc[:, 'direction']
    sqlTrainLabel = sqlTrainLabel*(-1) + 2 - 1
    
    return sqlTrainLob, sqlTrainLabel

def get_SQLtest1(index, conn):

    queryLob1 = f'''SELECT * FROM "NoAuction_DecPre" 
                  WHERE "RandID" = {index}
                  AND istrainingdata = 0
                  limit 200;'''
    ResLob1 = pd.read_sql(queryLob1, conn)
    sqlTestLob = ResLob1.loc[:, 'pa1':'vb10']
    
    queryLabel1 = f'''SELECT * FROM "NoAuction_DecPre_Label" 
                    WHERE "RandID" = {index}
                    AND istrainingdata = 0
                    limit 200;'''
    ResLabel1 = pd.read_sql(queryLabel1, conn)
    sqlTestLabel = ResLabel1.loc[:, 'direction']
    sqlTestLabel = sqlTestLabel*(-1) + 2 - 1
    
    return sqlTestLob, sqlTestLabel

def data_classification(X, Y, T):
    [N, D] = X.shape
    
    df = np.array(X)

    dY = np.array(Y)

    dataY = dY[T - 1:N]

    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX.reshape(dataX.shape + (1,)), dataY

In [25]:
# Connect to postgresql on VM
engine = create_engine('postgresql://yijing:1qaz2wsx@155.246.104.52/hftlob')
#conn = engine.connect()

In [26]:
# prepare training data. We feed past 100 observations into our algorithms and choose the prediction horizon.
with engine.connect() as conn:
    for i in range(100):
        sqlTrainLOB, sqlTrainLabel = get_SQLtrain1(i, conn)
        sqlTestLOB, sqlTestLabel = get_SQLtest1(i, conn)
        trainX_CNN, trainY_CNN = data_classification(sqlTrainLOB, sqlTrainLabel, T=100)
        trainY_CNN = np_utils.to_categorical(trainY_CNN, 3)
        
        testX_CNN, testY_CNN = data_classification(sqlTestLOB, sqlTestLabel, T=100)
        testY_CNN = np_utils.to_categorical(testY_CNN, 3)
        
        deeplob.fit(trainX_CNN, trainY_CNN, epochs=200, batch_size=64, verbose=2, validation_data=(testX_CNN, testY_CNN))

(200, 40)
(101, 100, 40)
(101, 100, 40)
Train on 101 samples, validate on 101 samples
Epoch 1/200
 - 1s - loss: 1.0985 - accuracy: 0.3564 - val_loss: 1.0979 - val_accuracy: 0.8515
Epoch 2/200
 - 1s - loss: 1.0979 - accuracy: 0.7228 - val_loss: 1.0966 - val_accuracy: 0.8515
Epoch 3/200
 - 1s - loss: 1.0969 - accuracy: 0.7228 - val_loss: 1.0951 - val_accuracy: 0.8515
Epoch 4/200
 - 1s - loss: 1.0957 - accuracy: 0.7228 - val_loss: 1.0933 - val_accuracy: 0.8515
Epoch 5/200
 - 1s - loss: 1.0944 - accuracy: 0.7228 - val_loss: 1.0912 - val_accuracy: 0.8515
Epoch 6/200


KeyboardInterrupt: 

In [None]:
## Model Training
deeplob.fit(trainX_CNN, trainY_CNN, epochs=100, batch_size=32, verbose=2, validation_data=(testX_CNN, testY_CNN))

In [None]:
# Testing model
with engine.connect() as conn:
    sqlTestLOB, sqlTestLabel = get_SQLtest1(1, conn)
    testX_CNN, testY_CNN = data_classification(sqlTestLOB, sqlTestLabel, T=100)
    testY_CNN = np_utils.to_categorical(testY_CNN, 3)
    
    for i in range(10):
        sqlTrainLOB, sqlTrainLabel = get_SQLtrain1(i, conn)
        trainX_CNN, trainY_CNN = data_classification(sqlTrainLOB, sqlTrainLabel, T=100)
        trainY_CNN = np_utils.to_categorical(trainY_CNN, 3)
        
        deeplob.fit(trainX_CNN, trainY_CNN, epochs=5, batch_size=32, verbose=2, validation_data=(testX_CNN, testY_CNN))

        deeplob.evaluate(testX_CNN, testY_CNN, batch_size=32, verbose=2)