### **TransLOB** 
This is the implementation of the model TransLOB proposed in the paper *Transformers for limit order books by james wallbridge*

In [1]:
# load packages

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import drive
import torch
from tqdm import tqdm
from torch.utils import data
import torch.nn as nn
import torch.optim as optim
from keras import layers
from keras.layers import Conv1D, BatchNormalization
import tensorflow as tf

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

drive.mount('/content/drive')

Mounted at /content/drive


### **Data**


In [3]:
# please change the data_path to your local path and unzip the file

dec_data = np.loadtxt('/content/drive/MyDrive/Data/Train_Dst_NoAuction_ZScore_CF_7.txt')
dec_train = dec_data[:, :int(dec_data.shape[1] * 0.8)]
dec_val = dec_data[:, int(dec_data.shape[1] * 0.8):]

dec_test1 = np.loadtxt('/content/drive/MyDrive/Data/Test_Dst_NoAuction_ZScore_CF_7.txt')
dec_test2 = np.loadtxt('/content/drive/MyDrive/Data/Test_Dst_NoAuction_ZScore_CF_8.txt')
dec_test3 = np.loadtxt('/content/drive/MyDrive/Data/Test_Dst_NoAuction_ZScore_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))


horizon = 5         #if horizon = 5, than k = 10

y_train = dec_train[-horizon, :].flatten()

y_val = dec_val[-horizon, :].flatten()
y_test = dec_test[-horizon, :].flatten()

y_train = y_train[99:] - 1
y_val = y_val[99:] - 1
y_test = y_test[99:] - 1 

dec_train = dec_train[:40, :].T
dec_val = dec_val[:40, :].T
dec_test = dec_test[:40, :].T

print(dec_train.shape)

'''
data_path =  "/content/drive/MyDrive/Data2/DB2.npy"
dec = np.load(data_path)

print(dec.shape)
train_size = int(0.70 * dec.shape[0])
val_size = int(0.15 * dec.shape[0])

dec_train = dec[:train_size]
dec_val = dec[train_size:val_size+train_size]
dec_test = dec[val_size+train_size:]
'''

(203800, 40)


'\ndata_path =  "/content/drive/MyDrive/Data2/DB2.npy"\ndec = np.load(data_path)\n\nprint(dec.shape)\ntrain_size = int(0.70 * dec.shape[0])\nval_size = int(0.15 * dec.shape[0])\n\ndec_train = dec[:train_size]\ndec_val = dec[train_size:val_size+train_size]\ndec_test = dec[val_size+train_size:]\n'

In [4]:
def labeling(X, T):

  [N, D] = X.shape
  print(N)
  Y = np.zeros((X.shape[0] - 2*T + 1))
  alpha = 0.00072
  media = []
  for i in range(0, X.shape[0]- 2*T + 1):
    ask_minus = X[i:i+T, :1]
    bid_minus = X[i:i+T, 2:3]
    ask_plus = X[i+T:i+2*T, :1]
    bid_plus = X[i+T:i+2*T, 2:3]
    m_minus = (ask_minus + bid_minus) / 2
    m_minus = np.sum(m_minus) / T
    m_plus = (ask_plus + bid_plus) / 2
    m_plus = np.sum(m_plus) / T
    media.append((m_plus - m_minus) / m_minus)
    if (m_plus - m_minus) / m_minus < -alpha:
      label = 1
    elif (m_plus - m_minus) / m_minus > alpha:
      label = 0
    else:
      label = 2
    Y[i] = label
  
  plt.hist(Y)
  plt.show()

  return Y

In [5]:
#Create the dataset

class Dataset(data.Dataset):
    """Characterizes a dataset for PyTorch"""
    def __init__(self, x, y, num_classes, n, dim):
        """Initialization""" 
        self.num_classes = num_classes
        self.dim = dim
        self.x = x   
        self.y = y
        self.n = n

        self.length = x.shape[0] - T -self.dim + 1
        print(self.length)

        x = torch.from_numpy(x)
        self.x = torch.unsqueeze(x, 1)
        self.y = torch.from_numpy(y)

    def __len__(self):
        """Denotes the total number of samples"""
        return self.length

    def __getitem__(self, i):
        input = self.x[i:i+self.dim, :]
        input = input.permute(1, 0, 2)
        input = np.squeeze(input)
        input = input.permute(1, 0)
        return input, self.y[i]

#Hyperparameters
batch_size = 32
epochs = 100
T = 100   #horizon    
lr = 0.0001
num_classes = 3
dim = 100
n = 3 

#y_val = labeling(dec_val, T, dim)
#y_test = labeling(dec_test, T, dim)
#y_train = labeling(dec_train, T, dim)

dataset_val = Dataset(dec_val, y_val, num_classes, n, dim)
dataset_test = Dataset(dec_test, y_test, num_classes, n, dim)
dataset_train = Dataset(dec_train, y_train, num_classes, n, dim)

train_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False)



50751
139388
203601


### **Model Architecture**
The model architecture is specified in the original paper.

In [9]:
from keras import layers
from keras.models import Model
from keras.layers import Input, Dense, Reshape
from keras.layers import Dropout, Activation, Lambda
from keras.layers import LSTM, Conv1D, Conv2D, Flatten
from keras.layers import MaxPooling1D, MaxPooling2D, Reshape, BatchNormalization
import tensorflow as tf
import keras.backend as K


def positional_encoding(x):
  n_levels = 100
  pos = np.arange(0, n_levels,  1, dtype=np.float32) / (n_levels-1)
  pos = (pos + pos) - 1
  #pos = np.reshape(pos, (pos.shape[0]))
  pos_final = np.zeros((x.shape[0], n_levels, 1), dtype=np.float32)
  for i in range(pos_final.shape[0]):
    for j in range(pos_final.shape[1]):
      pos_final[i, j, 0] = pos[j]

  pos_final = torch.tensor(pos_final).to(device)
  x = torch.cat((x, pos_final), 2)
  
  return x

class transformer(nn.Module):
    def __init__(self):
        super().__init__()

        #self.transformer = nn.Transformer(d_model=15, nhead=3, 
                                            #num_encoder_layers=2, num_decoder_layers=2, 
                                            #dim_feedforward=3000, dropout=0.0, norm_first=True,
                                            #batch_first=True, device=device)
        
        self.conv = nn.Sequential(
            nn.Conv1d(in_channels=40, out_channels=14, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=14, out_channels=14, kernel_size=2, dilation=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(in_channels=14, out_channels=14, kernel_size=2, dilation=4, padding=4),
            nn.ReLU(),
            nn.Conv1d(in_channels=14, out_channels=14, kernel_size=2, dilation=8, padding=8),
            nn.ReLU(),
            nn.Conv1d(in_channels=14, out_channels=14, kernel_size=2, dilation=16, padding=16),
            nn.ReLU(),
            nn.BatchNorm1d(14),
        )

        self.dropout = nn.Dropout(0.1)

        self.activation = nn.ReLU()

        self.encoder_layer = nn.TransformerEncoderLayer(15, 3, 60, 0.0, batch_first=True, device=device)
        self.norm = nn.BatchNorm1d(100)
 
        self.transformer = nn.TransformerEncoder(self.encoder_layer, 2, norm=self.norm)
        
        self.fc1 = nn.Linear(1500, 64)
        self.fc2 = nn.Linear(64, 3)
     
    def forward(self, x):
        '''
        np_tensor = x.cpu().numpy()
        tf_tensor = tf.convert_to_tensor(np_tensor)

        x = lob_dilated(tf_tensor)          
        '''
        
        x = self.conv(x)
        x = x[:, :, :-31]
        x = x.permute(0, 2, 1)
        #np_tensor = x.cpu().detach().numpy()
        #tf_tensor = tf.convert_to_tensor(np_tensor)
        
        x = positional_encoding(x)
        

        #np_tensor = x.cpu().numpy()
        #np_tensor = np.squeeze(np_tensor)
   
       #x = torch.tensor(np_tensor).to(device)

        x = self.transformer(x)
        #print(x.shape)
        x = torch.reshape(x, (x.shape[0], x.shape[1]*x.shape[2]))

        x = self.dropout(self.activation(self.fc1(x)))
        #x = self.dropout(self.norm2(self.activation(self.fc2(x))))
        x = self.fc2(x)
        forecast_y = torch.softmax(x, dim=1)
        
        return forecast_y

### **Model Training**

In [10]:
model = transformer()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr, weight_decay=1e-5)

def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs):
    
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    best_test_loss = np.inf
    best_test_epoch = 0

    for it in tqdm(range(epochs)):
        
        model.train()
        t0 = datetime.now()
        train_loss = []
        for inputs, targets in train_loader:
            # move data to GPU
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)
            # print("inputs.shape:", inputs.shape)
            # zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass
            # print("about to get model output")
            outputs = model(inputs)
            # print("done getting model output")
            # print("outputs.shape:", outputs.shape, "targets.shape:", targets.shape)
            loss = criterion(outputs, targets)
            # Backward and optimize
            # print("about to optimize")
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        # Get train loss and test loss
        train_loss = np.mean(train_loss) # a little misleading
    
        model.eval()
        test_loss = []
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)      
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss.append(loss.item())
        test_loss = np.mean(test_loss)

        # Save losses
        train_losses[it] = train_loss
        test_losses[it] = test_loss
        
        if test_loss < best_test_loss:
            torch.save(model, '/content/drive/MyDrive/Output/best_model_transformer')
            best_test_loss = test_loss
            best_test_epoch = it
            print('model saved')

        dt = datetime.now() - t0
        print(f'Epoch {it+1}/{epochs}, Train Loss: {train_loss:.4f}, \
          Validation Loss: {test_loss:.4f}, Duration: {dt}, Best Val Epoch: {best_test_epoch}')

    return train_losses, test_losses

In [11]:
train_losses, val_losses = batch_gd(model, criterion, optimizer, 
                                    train_loader, val_loader, epochs)

  1%|          | 1/100 [01:31<2:31:18, 91.70s/it]

model saved
Epoch 1/100, Train Loss: 0.9450,           Validation Loss: 0.9178, Duration: 0:01:31.703095, Best Val Epoch: 0


  2%|▏         | 2/100 [02:59<2:26:21, 89.61s/it]

model saved
Epoch 2/100, Train Loss: 0.9442,           Validation Loss: 0.9178, Duration: 0:01:28.134393, Best Val Epoch: 1


  3%|▎         | 3/100 [04:28<2:24:18, 89.26s/it]

model saved
Epoch 3/100, Train Loss: 0.9437,           Validation Loss: 0.9178, Duration: 0:01:28.856953, Best Val Epoch: 2


  4%|▍         | 4/100 [05:56<2:21:40, 88.55s/it]

Epoch 4/100, Train Loss: 0.9432,           Validation Loss: 0.9178, Duration: 0:01:27.445117, Best Val Epoch: 2


  5%|▌         | 5/100 [07:24<2:20:14, 88.57s/it]

model saved
Epoch 5/100, Train Loss: 0.9425,           Validation Loss: 0.9178, Duration: 0:01:28.612821, Best Val Epoch: 4


  6%|▌         | 6/100 [08:52<2:18:06, 88.16s/it]

Epoch 6/100, Train Loss: 0.9416,           Validation Loss: 0.9178, Duration: 0:01:27.342683, Best Val Epoch: 4


  7%|▋         | 7/100 [10:18<2:15:57, 87.71s/it]

Epoch 7/100, Train Loss: 0.9407,           Validation Loss: 0.9178, Duration: 0:01:26.790978, Best Val Epoch: 4


  7%|▋         | 7/100 [10:43<2:22:30, 91.94s/it]


KeyboardInterrupt: ignored

### **Model Testing**

In [12]:
model = torch.load('/content/drive/MyDrive/Output/best_model_transformer')

n_correct = 0.
n_total = 0.
all_targets = []
all_predictions = []

for inputs, targets in test_loader:
    # Move to GPU
    inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)

    # Forward pass
    outputs = model(inputs)
    
    # Get prediction
    # torch.max returns both max and argmax
    _, predictions = torch.max(outputs, 1)

    # update counts
    n_correct += (predictions == targets).sum().item()
    n_total += targets.shape[0]

    all_targets.append(targets.cpu().numpy())
    all_predictions.append(predictions.cpu().numpy())

test_acc = n_correct / n_total
print(f"Test acc: {test_acc:.4f}")

all_targets = np.concatenate(all_targets)    
all_predictions = np.concatenate(all_predictions)   

print('accuracy_score:', accuracy_score(all_targets, all_predictions))
print(classification_report(all_targets, all_predictions, digits=4))
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

c = confusion_matrix(all_targets, all_predictions, normalize="true")
disp = ConfusionMatrixDisplay(c)
disp.plot()
plt.show()

Test acc: 0.7059
accuracy_score: 0.7059431227939278
              precision    recall  f1-score   support

           0     0.2449    0.0006    0.0011     21127
           1     0.7067    0.9986    0.8277     98502
           2     0.1439    0.0010    0.0020     19759

    accuracy                         0.7059    139388
   macro avg     0.3651    0.3334    0.2769    139388
weighted avg     0.5569    0.7059    0.5853    139388



NameError: ignored