In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential, models
from tensorflow.keras.layers import Conv2D, LeakyReLU, Input, MaxPooling2D, concatenate, Reshape, LSTM, Dense
from tensorflow.keras.utils import to_categorical

In [2]:
def prepare_x(data):
    df1 = data[:40, :].T
    return np.array(df1)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)

    dY = np.array(Y)

    dataY = dY[T - 1:N]

    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX.reshape(dataX.shape + (1,)), dataY

In [3]:
dec_train = np.loadtxt('Train_Dst_NoAuction_DecPre_CF_7.txt')
dec_test1 = np.loadtxt('Test_Dst_NoAuction_DecPre_CF_7.txt')
dec_test2 = np.loadtxt('Test_Dst_NoAuction_DecPre_CF_8.txt')
dec_test3 = np.loadtxt('Test_Dst_NoAuction_DecPre_CF_9.txt')

In [8]:
dec_train.shape

(149, 254750)

In [4]:
train_lob = prepare_x(dec_train)

In [5]:
train_lob

array([[0.2615 , 0.00353, 0.2606 , ..., 0.00311, 0.2579 , 0.00128],
       [0.2615 , 0.00211, 0.2606 , ..., 0.00138, 0.2588 , 0.00123],
       [0.2614 , 0.00122, 0.2606 , ..., 0.00311, 0.2588 , 0.00123],
       ...,
       [0.3531 , 0.00775, 0.3527 , ..., 0.00044, 0.3515 , 0.005  ],
       [0.3531 , 0.00775, 0.3527 , ..., 0.00496, 0.3514 , 0.01091],
       [0.3532 , 0.002  , 0.3527 , ..., 0.02366, 0.3514 , 0.01091]])

In [9]:
train_lob.shape

(254750, 40)

In [6]:
train_label = get_label(dec_train)

In [7]:
train_label

array([[2., 2., 2., 2., 2.],
       [2., 2., 2., 2., 2.],
       [3., 3., 2., 2., 2.],
       ...,
       [2., 2., 2., 2., 1.],
       [2., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
train_label.shape

(254750, 5)

In [None]:
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

# extract limit order book data from the FI-2010 dataset
train_lob = prepare_x(dec_train)
test_lob = prepare_x(dec_test)

# extract label from the FI-2010 dataset
train_label = get_label(dec_train)
test_label = get_label(dec_test)

# prepare training data. We feed past 100 observations into our algorithms and choose the prediction horizon. 
trainX_CNN, trainY_CNN = data_classification(train_lob, train_label, T=100)
trainY_CNN = trainY_CNN[:,3] - 1
trainY_CNN = to_categorical(trainY_CNN, 3)

# prepare test data.
testX_CNN, testY_CNN = data_classification(test_lob, test_label, T=100)
testY_CNN = testY_CNN[:,3] - 1
testY_CNN = to_categorical(testY_CNN, 3)

In [7]:
input_shape = Input(shape=(100, 40, 1), name = "input_shape")
layer1 = Conv2D(16, (1,2), strides = (1,2))(input_shape)
layer1_act = LeakyReLU(alpha = 0.01)(layer1)
layer2 = Conv2D(16, (4,1), padding = 'same')(layer1_act)
layer2_act = LeakyReLU(alpha = 0.01)(layer2)
layer3 = Conv2D(16, (4,1), padding = 'same')(layer2_act)
layer3_act = LeakyReLU(alpha = 0.01)(layer3)

layer1 = Conv2D(16, (1,2), strides = (1,2))(input_shape)
layer1_act = LeakyReLU(alpha = 0.01)(layer1)
layer2 = Conv2D(16, (4,1), padding = 'same')(layer1_act)
layer2_act = LeakyReLU(alpha = 0.01)(layer2)
layer2 = Conv2D(16, (4,1), padding = 'same')(layer2_act)
layer2_act = LeakyReLU(alpha = 0.01)(layer2)

layer3 = Conv2D(16, (1,2), strides = (1,2))(layer2_act)
layer3_act = LeakyReLU(alpha = 0.01)(layer3)
layer4 = Conv2D(16, (4,1), padding = 'same')(layer3_act)
layer4_act = LeakyReLU(alpha = 0.01)(layer4)
layer5 = Conv2D(16, (4,1), padding = 'same')(layer4_act)
layer5_act = LeakyReLU(alpha = 0.01)(layer5)

layer6 = Conv2D(16, (1,10))(layer5_act)
layer6_act = LeakyReLU(alpha = 0.01)(layer6)
layer7 = Conv2D(16, (4,1), padding = 'same')(layer6_act)
layer7_act = LeakyReLU(alpha = 0.01)(layer7)
layer8 = Conv2D(16, (4,1), padding = 'same')(layer7_act)
layer8_act = LeakyReLU(alpha = 0.01)(layer8)

#inception

inception_layer1a = Conv2D(32, (1,1), padding = 'same')(layer8_act)
inception_layer1a_act = LeakyReLU(alpha = 0.01) (inception_layer1a)
inception_layer1b = Conv2D(32, (3,1), padding = 'same')(inception_layer1a_act)
inception_layer1b_act = LeakyReLU(alpha = 0.01) (inception_layer1b)

inception_layer2a = Conv2D(32, (1,1), padding = 'same')(layer8_act)
inception_layer2a_act = LeakyReLU(alpha = 0.01) (inception_layer2a)
inception_layer2b = Conv2D(32, (5,1), padding = 'same')(inception_layer2a_act)
inception_layer2b_act = LeakyReLU(alpha = 0.01) (inception_layer2b)

inception_layer3a = MaxPooling2D((3,1), strides = (1,1), padding = 'same')(layer8_act)
inception_layer3a_act = LeakyReLU(alpha = 0.01) (inception_layer2a)
inception_layer3b = Conv2D(32, (1,1), padding = 'same')(inception_layer3a_act)
inception_layer3b_act = LeakyReLU(alpha = 0.01) (inception_layer3b)

inception_concatenate = concatenate([inception_layer1b_act, inception_layer2b_act, inception_layer3b_act])

inception_reshape = Reshape((int(inception_concatenate.shape[1]),int(inception_concatenate.shape[3])))(inception_concatenate)

layer9 = LSTM(64)(inception_reshape)
outputs = Dense(3, activation = 'softmax')(layer9)

In [8]:
model = models.Model(inputs = input_shape, outputs = outputs )

In [9]:
epsilon = 1
learning_rate = 0.01

In [10]:
from tensorflow.keras import optimizers
model.compile(optimizer = optimizers.Adam(learning_rate = learning_rate, epsilon = epsilon),
             loss = 'categorical_crossentropy',metrics = ['accuracy'])

In [12]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_shape (InputLayer)        [(None, 100, 40, 1)] 0                                            
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 100, 20, 16)  48          input_shape[0][0]                
__________________________________________________________________________________________________
leaky_re_lu_3 (LeakyReLU)       (None, 100, 20, 16)  0           conv2d_3[0][0]                   
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 100, 20, 16)  1040        leaky_re_lu_3[0][0]              
______________________________________________________________________________________________

In [11]:
model.fit(trainX_CNN, trainY_CNN, epochs=200, batch_size=64, verbose=2, validation_data=(testX_CNN, testY_CNN))

Train on 254651 samples, validate on 139488 samples
Epoch 1/200


KeyboardInterrupt: 