In [1]:
import pandas as pd
from scipy.stats import zscore
import numpy as np
from sklearn.model_selection import train_test_split

Paper: https://arxiv.org/pdf/1808.03668.pdf

### Data of XBTUSD 10 level orderbook of 2020-09-01

In [2]:
d = pd.read_csv("bitmex_book_snapshot_25_2020-09-01_XBTUSD.csv")
d = d.sort_values(by = 'timestamp')
cols =    ['asks['+str(i)+'].price' for i in range(10)] \
        + ['asks['+str(i)+'].amount' for i in range(10)] \
        + ['bids['+str(i)+'].price' for i in range(10)] \
        + ['bids['+str(i)+'].amount' for i in range(10)] 
d = d[cols]
d = d.apply(zscore)
d = d.head(500000) # not enough power to run the full data

In [3]:
d.shape

(500000, 40)

In [4]:
d

Unnamed: 0,asks[0].price,asks[1].price,asks[2].price,asks[3].price,asks[4].price,asks[5].price,asks[6].price,asks[7].price,asks[8].price,asks[9].price,...,bids[0].amount,bids[1].amount,bids[2].amount,bids[3].amount,bids[4].amount,bids[5].amount,bids[6].amount,bids[7].amount,bids[8].amount,bids[9].amount
0,-1.715884,-1.716104,-1.716244,-1.716319,-1.716381,-1.716440,-1.716476,-1.716506,-1.716525,-1.716545,...,0.534043,-0.333162,-0.154572,-0.344645,-0.372711,-0.334547,-0.406668,-0.004461,-0.002913,-0.446469
1,-1.715884,-1.716104,-1.716244,-1.716319,-1.716381,-1.716440,-1.716476,-1.716506,-1.716525,-1.716545,...,0.534043,-0.333162,-0.154572,-0.344645,-0.372711,-0.334547,-0.406668,-0.004461,-0.002913,-0.446469
2,-1.715884,-1.716104,-1.716244,-1.716319,-1.716381,-1.716440,-1.716476,-1.716506,-1.716525,-1.716545,...,0.534043,-0.333162,-0.154572,-0.344645,-0.372711,-0.334547,-0.406668,-0.004461,-0.002913,1.046836
3,-1.715884,-1.716104,-1.716244,-1.716319,-1.716381,-1.716440,-1.716476,-1.716506,-1.716525,-1.716545,...,0.534043,-0.333162,-0.154572,-0.344645,-0.372711,-0.334547,-0.406668,-0.004461,-0.002913,1.047677
4,-1.715884,-1.716104,-1.716244,-1.716319,-1.716381,-1.716440,-1.716476,-1.716506,-1.716525,-1.716545,...,0.534043,-0.333162,-0.154572,-0.344645,-0.372711,-0.334547,-0.406668,-0.004461,-0.002913,1.047677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,-1.926267,-1.926490,-1.926630,-1.926703,-1.926764,-1.926822,-1.926857,-1.926886,-1.926903,-1.926923,...,-0.433030,-0.383804,-0.346235,0.282216,0.353270,0.610318,1.691306,3.899836,0.080415,1.173897
499996,-1.926267,-1.926490,-1.926630,-1.926703,-1.926764,-1.926822,-1.926857,-1.926886,-1.926903,-1.926923,...,-0.397425,-0.383804,-0.346235,0.282216,0.353270,0.610318,1.691306,3.899836,0.080415,1.173897
499997,-1.926267,-1.926490,-1.926630,-1.926703,-1.926764,-1.926822,-1.926857,-1.926886,-1.926903,-1.926923,...,-0.397425,-0.383804,-0.346235,0.282216,0.392052,0.610318,1.691306,3.899836,0.080415,1.173897
499998,-1.926267,-1.926490,-1.926630,-1.926703,-1.926764,-1.926822,-1.926857,-1.926886,-1.926903,-1.926923,...,-0.397425,-0.383804,-0.346235,0.282216,0.392052,0.596976,1.691306,3.899836,0.080415,1.173897


In [5]:
def get_x_y(df, w = 100, a = 7e-6):
    temp = df.copy()
    temp["mid"] = (temp['asks[0].price']*temp['bids[0].amount'] + temp['bids[0].price']*temp['asks[0].amount']) / (temp['bids[0].amount'] + temp['asks[0].amount'])
    temp["mprev"] = temp.mid.rolling(w).mean()
    temp["maft"] = temp.mprev.shift(-1*temp.mprev.isna().sum())
    temp = temp.dropna()
    temp["move"] = (temp.maft - temp.mid)/temp.mid
    temp["label"] = (-(temp.move < -a).astype(int))  + (temp.move > a).astype(int)
    return temp.iloc[:, :40], np.array(temp.iloc[:, -1])

In [6]:
x, y  = get_x_y(d, 26)

In [7]:
x.shape, y.shape

((499950, 40), (499950,))

#### X and y need reshaping

In [8]:
print("before: ", x.shape)
timestamp_per_sample = 100
data_x = np.array(x)
[N, P_x] = data_x.shape
xt = np.zeros([(N-timestamp_per_sample+1), timestamp_per_sample, P_x])
    
for i in range(N-timestamp_per_sample+1):
    xt[i] = data_x[i:(i+timestamp_per_sample), :]
        
xt = xt.reshape(xt.shape + (1,))
print("after: ", xt.shape)

before:  (499950, 40)
after:  (499851, 100, 40, 1)


In [9]:
print("before: ", y.shape)
yt = pd.get_dummies(y).values
yt = yt[timestamp_per_sample -1:]
print("after: ", yt.shape)

before:  (499950,)
after:  (499851, 3)


with open('x.npy', 'wb') as f:
    np.save(f, xt)
with open('y.npy', 'wb') as f:
    np.save(f, yt)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(xt,yt,test_size = 0.2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((399880, 100, 40, 1), (99971, 100, 40, 1), (399880, 3), (99971, 3))

### Model and training

In [11]:
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Conv2D, LeakyReLU, MaxPooling2D, concatenate, LSTM, Reshape, Dense
from tensorflow.keras.callbacks import EarlyStopping

# import pandas_market_calendars as mcal

def initiate_DeepLOB_model(lookback_timestep, feature_num, conv_filter_num, inception_num, LSTM_num, leaky_relu_alpha,
                          loss, optimizer, metrics):
    
    input_tensor = Input(shape=(lookback_timestep, feature_num, 1))
    
    # Conv block1
    print(input_tensor.shape)
    conv_layer1 = Conv2D(conv_filter_num, (1,2), strides=(1, 2))(input_tensor)
    print(conv_layer1.shape)
    conv_layer1 =LeakyReLU(alpha=leaky_relu_alpha)(conv_layer1)
    print(conv_layer1.shape)
    conv_layer1 = Conv2D(conv_filter_num, (4,1), padding='same')(conv_layer1)
    conv_first1 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer1)
    print(conv_layer1.shape)
    conv_layer1 = Conv2D(conv_filter_num, (4,1), padding='same')(conv_layer1)
    conv_layer1 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer1)
    print(conv_layer1.shape)

    # Conv block2
    conv_layer2 = Conv2D(conv_filter_num, (1,2), strides=(1, 2))(conv_layer1)
    conv_layer2 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer2)
    print(conv_layer2.shape)
    conv_layer2 = Conv2D(conv_filter_num, (4,1), padding='same')(conv_layer2)
    conv_layer2 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer2)
    print(conv_layer2.shape)
    conv_layer2 = Conv2D(conv_filter_num, (4,1), padding='same')(conv_layer2)
    conv_layer2 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer2)
    print(conv_layer2.shape)

    # Conv block3
    conv_layer3 = Conv2D(conv_filter_num, (1,10))(conv_layer2)
    conv_layer3 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer3)
    print(conv_layer3.shape)
    conv_layer3 = Conv2D(conv_filter_num, (4,1), padding='same')(conv_layer3)
    conv_layer3 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer3)
    print(conv_layer3.shape)
    conv_layer3 = Conv2D(conv_filter_num, (4,1), padding='same')(conv_layer3)
    conv_layer3 = LeakyReLU(alpha=leaky_relu_alpha)(conv_layer3)
    print(conv_layer3.shape)
    
    # Inception module
    inception_module1 = Conv2D(inception_num, (1,1), padding='same')(conv_layer3)
    inception_module1 = LeakyReLU(alpha=leaky_relu_alpha)(inception_module1)
    print(inception_module1.shape)
    inception_module1 = Conv2D(inception_num, (3,1), padding='same')(inception_module1)
    inception_module1 = LeakyReLU(alpha=leaky_relu_alpha)(inception_module1)
    print(inception_module1.shape)

    inception_module2 = Conv2D(inception_num, (1,1), padding='same')(conv_layer3)
    inception_module2 = LeakyReLU(alpha=leaky_relu_alpha)(inception_module2)
    print(inception_module2.shape)
    inception_module2 = Conv2D(inception_num, (5,1), padding='same')(inception_module2)
    inception_module2 = LeakyReLU(alpha=leaky_relu_alpha)(inception_module2)
    print(inception_module2.shape)

    inception_module3 = MaxPooling2D((3,1), strides=(1,1), padding='same')(conv_layer3)
    print(inception_module3.shape)
    inception_module3 = Conv2D(inception_num, (1,1), padding='same')(inception_module3)
    print(inception_module3.shape)
    inception_module3 = LeakyReLU(alpha=leaky_relu_alpha)(inception_module3)
    print(inception_module3.shape)
    
    inception_module_final = concatenate([inception_module1, inception_module2, inception_module3], axis=3)
    print(inception_module_final.shape)
    inception_module_final = Reshape((inception_module_final.shape[1], inception_module_final.shape[3]))(inception_module_final)
    print(inception_module_final.shape)

    # LSTM
    LSTM_output = LSTM(LSTM_num)(inception_module_final)
    print(LSTM_output.shape)

    # Fully Connected Layer with softmax activation function for output
    model_output = Dense(3, activation='softmax')(LSTM_output)
    print(model_output.shape)
    
    DeepLOB_model = Model(inputs=input_tensor, outputs= model_output)  
    es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1)
    
    DeepLOB_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    return DeepLOB_model

In [None]:
#Input param
lookback_timestep = 100
feature_num = 40

#Conv param
conv_filter_num = 16

#Inception module param
inception_num = 32

#LSTM param
LSTM_num = 64

#Activation param
leaky_relu_alpha = 0.01

#Training params
loss = 'categorical_crossentropy'
learning_rate = 0.01
adam_epsilon = 1
optimizer = Adam(learning_rate=learning_rate, epsilon=1)
batch_size = 32

#Training stopping Criteria
metrics = ['accuracy']
#stop training when validation accuracy does not improve for 20 epochs
stop_epoch_num = 20

#max epoch num is not specified in paper, use an arbitrary large number 10000
num_epoch = 10000

DeepLOB_model = initiate_DeepLOB_model(lookback_timestep, feature_num, conv_filter_num, inception_num, LSTM_num, leaky_relu_alpha,
                          loss, optimizer, metrics)

# definte the training stop criteria (no new max validation accuracy in 20 consecutive epochs)
es = EarlyStopping(monitor='val_accuracy', mode='max', patience = stop_epoch_num, verbose=1)
history = DeepLOB_model.fit(X_train, y_train, epochs=num_epoch, batch_size=batch_size, verbose=2, validation_data=(X_test, y_test), callbacks = [es])

(None, 100, 40, 1)


2023-01-04 17:45:33.615475: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-04 17:45:37.008038: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30985 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:8a:00.0, compute capability: 7.0


(None, 100, 20, 16)
(None, 100, 20, 16)
(None, 100, 20, 16)
(None, 100, 20, 16)
(None, 100, 10, 16)
(None, 100, 10, 16)
(None, 100, 10, 16)
(None, 100, 1, 16)
(None, 100, 1, 16)
(None, 100, 1, 16)
(None, 100, 1, 32)
(None, 100, 1, 32)
(None, 100, 1, 32)
(None, 100, 1, 32)
(None, 100, 1, 16)
(None, 100, 1, 32)
(None, 100, 1, 32)
(None, 100, 1, 96)
(None, 100, 96)
(None, 64)
(None, 3)
Epoch 1/10000


2023-01-04 17:45:58.385602: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8204


12497/12497 - 248s - loss: 0.8906 - accuracy: 0.6211 - val_loss: 0.7724 - val_accuracy: 0.6887 - 248s/epoch - 20ms/step
Epoch 2/10000
12497/12497 - 228s - loss: 0.7438 - accuracy: 0.6899 - val_loss: 0.7158 - val_accuracy: 0.6995 - 228s/epoch - 18ms/step
Epoch 3/10000
12497/12497 - 224s - loss: 0.7065 - accuracy: 0.7017 - val_loss: 0.6958 - val_accuracy: 0.7046 - 224s/epoch - 18ms/step
Epoch 4/10000
12497/12497 - 224s - loss: 0.6931 - accuracy: 0.7068 - val_loss: 0.6856 - val_accuracy: 0.7097 - 224s/epoch - 18ms/step
Epoch 5/10000
12497/12497 - 226s - loss: 0.6849 - accuracy: 0.7095 - val_loss: 0.6761 - val_accuracy: 0.7135 - 226s/epoch - 18ms/step
Epoch 6/10000
12497/12497 - 225s - loss: 0.6792 - accuracy: 0.7113 - val_loss: 0.6736 - val_accuracy: 0.7124 - 225s/epoch - 18ms/step
Epoch 7/10000
12497/12497 - 225s - loss: 0.6737 - accuracy: 0.7122 - val_loss: 0.6786 - val_accuracy: 0.7101 - 225s/epoch - 18ms/step
Epoch 8/10000
12497/12497 - 225s - loss: 0.6697 - accuracy: 0.7131 - val_los

#### Data prep and training require massive amounts of memory and processing power.