In [1]:
# load packages

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import drive
import torch
from tqdm import tqdm
from torch.utils import data
import torch.nn as nn
import torch.optim as optim
from keras import layers
from keras.layers import Conv1D, BatchNormalization
import tensorflow as tf

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:

dec_data = np.loadtxt('/content/drive/MyDrive/Data/Train_Dst_NoAuction_ZScore_CF_7.txt')
dec_train = dec_data[:, :int(dec_data.shape[1] * 0.8)]
dec_val = dec_data[:, int(dec_data.shape[1] * 0.8):]

dec_test1 = np.loadtxt('/content/drive/MyDrive/Data/Test_Dst_NoAuction_ZScore_CF_7.txt')
dec_test2 = np.loadtxt('/content/drive/MyDrive/Data/Test_Dst_NoAuction_ZScore_CF_8.txt')
dec_test3 = np.loadtxt('/content/drive/MyDrive/Data/Test_Dst_NoAuction_ZScore_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

print(dec_train.shape, dec_val.shape, dec_test.shape)


(149, 203800) (149, 50950) (149, 139587)


In [3]:
def prepare_x(data):
    df1 = data[:40, :].T
    return np.array(df1)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)

    dY = np.array(Y)

    dataY = dY[T - 1:N]

    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX, dataY

def torch_data(x, y):
    x = torch.from_numpy(x)
    x = torch.unsqueeze(x, 1)
    y = torch.from_numpy(y)
    y = F.one_hot(y, num_classes=3)
    return x, y

In [4]:
class Dataset(data.Dataset):
    """Characterizes a dataset for PyTorch"""
    def __init__(self, data, k, num_classes, T):
        """Initialization""" 
        self.k = k
        self.num_classes = num_classes
        self.T = T
            
        x = prepare_x(data)
        y = get_label(data)
        x, y = data_classification(x, y, self.T)
        y = y[:,self.k] - 1
        self.length = len(x)

        x = torch.from_numpy(x)
        self.x = torch.unsqueeze(x, 1)
        self.y = torch.from_numpy(y)

    def __len__(self):
        """Denotes the total number of samples"""
        return self.length

    def __getitem__(self, index):
        """Generates samples of data"""
        return self.x[index], self.y[index]


In [5]:
batch_size = 32

dataset_train = Dataset(data=dec_train, k=4, num_classes=3, T=100)
dataset_val = Dataset(data=dec_val, k=4, num_classes=3, T=100)
dataset_test = Dataset(data=dec_test, k=4, num_classes=3, T=100)

train_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False)

print(dataset_train.x.shape, dataset_train.y.shape)


torch.Size([203701, 1, 100, 40]) torch.Size([203701])


In [6]:
from copy import deepcopy
from typing import List
from typing import Tuple
from typing import Union

from torch import Tensor
from torch.nn import Conv1d
from torch.nn import Module
from torch.nn import ReLU
from torch.nn import ReplicationPad1d
from torch.nn import Sequential
from torch.nn import ConstantPad1d

class CausalConv1d(Sequential):
    r"""Applies a 1D convolution with causal padding.
    Args:
        in_channels (int): Number of channels in the input sequence.
        out_channels (int): Number of channels produced by the convolution.
        kernel_size (int): Size of the convolving kernel.
        dilation (int, default=1): Spacing between kernel elements.
    Shape:
        - Input: :math:`(N, C_{\text{in}}, L)`
        - Output: :math:`(N, C_{\text{out}}, L)` where :math:`N` is the batch size,
          :math:`C_{\text{in}}` is the number of channels in the input sequence,
          :math:`C_{\text{out}}` is the number of channels in the output sequence,
          :math:`L` is the length of the sequence.
    Examples:
        >>> import torch
        >>>
        >>> m = CausalConv1d(40, 14, 2, dilation=1)
        >>> m
        CausalConv1d(40, 14, kernel_size=(2,), stride=(1,))
        >>>
        >>> input = torch.empty(1, 40, 100)
        >>> m(input).size()
        torch.Size([1, 14, 100])
    """

    def __init__(
        self, in_channels: int, out_channels: int, kernel_size: int, dilation: int = 1
    ):
        super().__init__()
        self.pad = ReplicationPad1d(((kernel_size - 1) * dilation, 0))
        self.conv = Conv1d(in_channels, out_channels, kernel_size, dilation=dilation)

    def forward(self, input: Tensor) -> Tensor:
        #print(input.shape)
        return self.conv(self.pad(input))

    def __repr__(self) -> str:
        return self._get_name() + f"({self.conv.extra_repr()})"


class CausalConvLayers(Sequential):
    r"""Applies dilated causal convolution.
    Args:
        in_features (int): The number of channels in the input sequence.
        channels (int): The number of channels in the intermediate and output sequences.
        kernel_size (int): Size of the convolving kernel.
        dilation (int or tuple[int], default=1):
            If int, use the common value of dilation for each layer.
            If tuple[int], use different value for each layer.
        n_layers (int, default=5): The number of causal convolutional layer in the module.
    Shapes:
        - Input: :math:`(N, C_{\text{in}}, L)`
        - Output: :math:`(N, C_{\text{out}}, L)` where :math:`N` is the batch size,
          :math:`C_{\text{in}}` is the number of channels in the input sequence,
          :math:`C_{\text{out}}` is the number of channels in the output sequence,
          :math:`L` is the length of the sequence.
    Examples:
        >>> import torch
        >>>
        >>> _ = torch.manual_seed(42)
        >>> m = CausalConvLayers(40, 14, 2, dilation=(1, 2, 4, 8, 16))
        >>> m
        CausalConvLayers(
          (0): CausalConv1d(40, 14, kernel_size=(2,), stride=(1,))
          (1): ReLU()
          (2): CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(2,))
          (3): ReLU()
          (4): CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(4,))
          (5): ReLU()
          (6): CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(8,))
          (7): ReLU()
          (8): CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(16,))
        )
        >>> input = torch.empty((1, 40, 100))
        >>> m(input).size()
        torch.Size([1, 14, 100])
    """

    def __init__(
        self,
        in_channels: int,
        n_features: int,
        kernel_size: int,
        dilation: Union[Tuple[int, ...], int] = 1,
        n_layers: int = 5,
        activation: Module = ReLU(),
    ):
        if isinstance(dilation, int):
            dilation = (dilation,) * n_layers

        layers: List[Module] = []
        for i in range(n_layers):
            c = in_channels if i == 0 else n_features
            layers.append(CausalConv1d(c, n_features, kernel_size, dilation[i]))
            if i != n_layers - 1:
                layers.append(deepcopy(activation))
        print(layers)
        super().__init__(*layers)

In [7]:
from copy import deepcopy
from typing import List

from torch.nn import Linear
from torch.nn import Module
from torch.nn import ReLU
from torch.nn import Sequential


class MultiLayerPerceptron(Sequential):
    """Multi-layer perceptron.
    Args:
        in_features (int): size of each input sample.
        out_features (int): size of each output sample.
        n_layers (int): number of hidden layers.
        n_units (int): number of units in each hidden layer.
        activation (Module): activation module in hidden layers.
    Shape:
        - Input: :math:`(N, *, H_in)` where
          :math:`*` means any number of additional dimensions and
          :math`H_in` is ``in_features``.
        - Output: :math:`(N, *, H_out)` where
          all but the last dimension are the same shape as the input and
          :math:`H_out` is ``out_features``.
    Examples:
        >>> import torch
        >>>
        >>> m = MultiLayerPerceptron(2, 3)
        >>> m
        MultiLayerPerceptron(
          (0): Linear(in_features=2, out_features=32, bias=True)
          (1): ReLU()
          (2): Linear(in_features=32, out_features=32, bias=True)
          (3): ReLU()
          (4): Linear(in_features=32, out_features=3, bias=True)
        )
        >>> m(torch.empty(1, 2)).size()
        torch.Size([1, 3])
    """

    def __init__(
        self,
        in_features: int,
        out_features: int,
        n_layers: int = 2,
        n_units: int = 32,
        activation: Module = ReLU(),
    ) -> None:
        layers: List[Module] = []
        for i_layer in range(n_layers):
            layers.append(Linear(in_features if i_layer == 0 else n_units, n_units))
            layers.append(deepcopy(activation))
        layers.append(Linear(n_units, out_features))

        super().__init__(*layers)

In [8]:
import math

import torch
from torch import Tensor
from torch.nn import Module


class PositionalEncoding(Module):
    """Positional encoder.
    Args:
        d_model (int, default=1): Dimension of the model.
        max_length (int, default=100): Maximum length of the sequence
        encoding ({"linear"}): Method of encoding.
            For "linear":
                y = x / max_length
                x : position in the sequence
                y : positional encoder
    Shape:
        - Input: :math:`(N, *, X, L)` where :math:`N` is the batch size,
          :math:`X` is the number of features in the input,
          :math:`F` is the number of features in the positional encoding,
          :math:`L` is the length of the sequence,
          :math:`*` is any number of additional dimensions.
        - Output: :math:`(N, *, X + F, L)`.
    Examples:
        >>> _ = torch.manual_seed(42)
        >>> x = torch.randn(1, 2, 10)
        >>> m = PositionalEncoding()
        >>> m(x).size()
        torch.Size([1, 3, 10])
    """

    positional_encoder: Tensor

    def __init__(
        self, d_model: int = 1, max_length: int = 100, encoding: str = "linear"
    ) -> None:
        super().__init__()

        self.d_model = d_model
        self.max_length = max_length
        self.encoding = encoding

        self.register_buffer("positional_encoder", self._compute_positional_encoder())

    def _compute_positional_encoder(self) -> Tensor:
        # Returns:
        # positional_encoder : tensor, shape (F, L)
        #     F : number of features
        #     L : maximum length
        if self.encoding == "sinusoid":
            position = torch.linspace(0.0, 2 * math.pi, self.max_length).reshape(-1, 1)
            frequency = torch.logspace(0.0, math.log(2 * math.pi), self.d_model, math.e)
            frequency = frequency.unsqueeze(0)

            phase = frequency * position

            positional_encoder = torch.empty((self.max_length, 2 * self.d_model))
            positional_encoder[:, 0::2] = phase.sin()
            positional_encoder[:, 1::2] = phase.cos()

        if self.encoding == "linear":
            positional_encoder = torch.linspace(0.0, 1.0, self.max_length).unsqueeze(0)
        else:
            raise ValueError("invalid 'encoding'")

        return positional_encoder

    def forward(self, input: Tensor) -> Tensor:
        # cut and align shape
        p = self.positional_encoder[..., : input.size(-1)]
        # for input shape (N, *, X, L), p's shape is (N, *, F, L)
        p = p.expand(input.size()[:-2] + p.size()[-2:])
        return torch.cat((input, p), -2)

In [9]:
import torch
from torch import Tensor
from torch.nn import TransformerEncoderLayer


class CausalTransformerEncoderLayer(TransformerEncoderLayer):
    """Transformer encoder layer with causal mask.
    See :class:`torch.nn.TransformerEncoderLayer` for details.
    Examples:
        >>> L, N, E = 5, 1, 2  # sequence length, batch, features
        >>> m = CausalTransformerEncoderLayer(E, 1)
        >>> src = torch.empty(L, N, E)
        >>> m.causal_mask(src)
        tensor([[False,  True,  True,  True,  True],
                [False, False,  True,  True,  True],
                [False, False, False,  True,  True],
                [False, False, False, False,  True],
                [False, False, False, False, False]])
        >>> assert m(src).size() == src.size()
    """

    def causal_mask(self, src: Tensor) -> Tensor:
        # In PyTorch documentation of MultiHeadAttention:
        # > (L, S) where L is the target sequence length,
        # > S is the source sequence length.
        query, key, value = src, src, src
        trues = torch.ones(
            (query.size(0), key.size(0)), dtype=torch.bool, device=src.device
        )
        return trues.triu(diagonal=1)

    def forward(self, src: Tensor, *args, **kwargs) -> Tensor:
        return super().forward(src, src_mask=self.causal_mask(src))

In [10]:
from typing import Tuple
from typing import Union

import torch
from torch import Tensor
from torch.nn import Dropout
from torch.nn import Flatten
from torch.nn import LayerNorm
from torch.nn import Linear
from torch.nn import Module
from torch.nn import Sequential
from torch.nn import Softmax
from torch.nn import TransformerEncoder

#from .conv import CausalConvLayers
#from .mlp import MultiLayerPerceptron
#from .position import PositionalEncoding
#from .transformer import CausalTransformerEncoderLayer


class TransLOB(Module):
    r"""Transformers for limit order books.
    Default values are the same with the original paper, unless stated otherwise.
    Reference:
        - Transformers for limit order books, James Wallbridge (2020)
          https://github.com/jwallbridge/translob
    Args:
        in_features (int, default=40): The number of input features.
        out_features (int, default=3): The number of output features.
        out_activation (torch.nn.Module, default=torch.nn.Softmax(-1)):
            The activation layer applied to the output.
        conv_n_layers (int, default=5): The number of convolutional layers.
        conv_n_features (int, default=14): The number of features
            in the convolutional layers.
        conv_kernel_size (int, default=2): The kernel size
            in the convolutional layers.
        conv_dilation (int or tuple[int], default=(1, 2, 4, 8, 16)): The dilation(s)
            in the convolutional layers.
        tf_n_channels: (int, default=3): The number of channels
            in the multi-head self-attension of Transformer encoder.
            Its default value may be different from the original implementation.
            Its default value (denoted "C" in the paper?) does not seem to be
            clarified in the original papar and so we set the default value arbitrarily.
        tf_dim_feedforward (int, default=60): The dimension of feed-forward
            network model in Transformer encoder.
        tf_dropout_rate (float, default=0.0): Dropout rate in Transformer encoder.
        tf_num_layers (int, default=2): Number of sub-encoder-layers in the Transformer encoder.
        mlp_dim (int, default=64):
            Dimension of feedforward network model after Transformer encoder.
        mlp_n_layers (int, default=1):
            Number of layers in feedforward network model after Transformer encoder.
        dropout_rate (float, default=0.1):
            Dropout rate after Transformer encoder.
    Shapes:
        - Input: :math:`(N, C, L)` where :math:`N` is the batch size,
          :math:`C` is the number of features and
          :math:`L` is the length of the sequence.
          :math:`C = 40` in the original paper: ask/bid, level 1-10, and price/volume.
          :math:`L = 100` in the original paper.
        - Output: :math:`(N, N_{\text{out}})`
          :math:`N_{\text{out}} = 3` in the original paper (up, down, and neutral).
    Examples:
        >>> import torch
        >>>
        >>> m = TransLOB()
        >>> input = torch.empty(1, 40, 100)
        >>> m(input).size()
        torch.Size([1, 3])
    """

    def __init__(
        self,
        in_features: int = 40,
        out_features: int = 3,
        len_sequence: int = 100,
        out_activation: Module = Softmax(-1),
        conv_n_features: int = 14,
        conv_kernel_size: int = 2,
        conv_dilation: Union[Tuple[int, ...], int] = (1, 2, 4, 8, 16),
        conv_n_layers: int = 5,
        tf_n_channels: int = 3,
        tf_dim_feedforward: int = 60,
        tf_dropout_rate: float = 0.0,
        tf_num_layers: int = 2,
        mlp_dim: int = 64,
        mlp_n_layers: int = 1,
        dropout_rate: float = 0.1,
    ):
        super().__init__()

        # Define convolutional module.
        convolution = CausalConvLayers(
            in_features,
            conv_n_features,
            conv_kernel_size,
            dilation=conv_dilation,
            n_layers=conv_n_layers,
        )
        self.pre_transformer = Sequential(
            convolution,
            LayerNorm(torch.Size((conv_n_features, len_sequence))),
            PositionalEncoding(max_length=len_sequence),
        )

        # Define Transformer encoder module.
        d_model = conv_n_features + 1
        encoder_layer = CausalTransformerEncoderLayer(
            d_model=d_model,
            nhead=tf_n_channels,
            dim_feedforward=tf_dim_feedforward,
            dropout=tf_dropout_rate,
        )
        self.transformer = TransformerEncoder(encoder_layer, num_layers=tf_num_layers)

        # Define modules used after Transformer encoder.
        multi_layer_perceptron = MultiLayerPerceptron(
            in_features=d_model * len_sequence,
            out_features=mlp_dim,
            n_layers=mlp_n_layers,
            n_units=mlp_dim,
        )
        self.post_transformer = Sequential(
            Flatten(1, -1),
            multi_layer_perceptron,
            Dropout(dropout_rate),
            Linear(mlp_dim, out_features),
            out_activation,
        )

    def forward(self, input: Tensor) -> Tensor:
        input = self.pre_transformer(input).movedim(-1, 0)
        input = self.transformer(input)
        input = self.post_transformer(input.movedim(0, -1))
        return input



In [11]:
model = TransLOB(in_features= 40,
        out_features = 3,
        len_sequence = 100,
        out_activation= Softmax(-1),
        conv_n_features = 14,
        conv_kernel_size = 2,
        conv_dilation = (1, 2, 4, 8, 16),
        conv_n_layers = 5,
        tf_n_channels = 3,
        tf_dim_feedforward = 60,
        tf_dropout_rate = 0.0,
        tf_num_layers = 2,
        mlp_dim = 64,
        mlp_n_layers = 1,
        dropout_rate = 0.1).to(device)
input = torch.empty(1, 40, 100)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

[CausalConv1d(40, 14, kernel_size=(2,), stride=(1,)), ReLU(), CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(2,)), ReLU(), CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(4,)), ReLU(), CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(8,)), ReLU(), CausalConv1d(14, 14, kernel_size=(2,), stride=(1,), dilation=(16,))]


In [12]:
# A function to encapsulate the training loop
def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs):
    
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    best_test_loss = np.inf
    best_test_epoch = 0

    for it in tqdm(range(epochs)):
        
        model.train()
        t0 = datetime.now()
        train_loss = []
        for inputs, targets in train_loader:
            # move data to GPU
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)
            # print("inputs.shape:", inputs.shape)
            # zero the parameter gradients
            optimizer.zero_grad()
            inputs = torch.squeeze(inputs, 1)
            inputs = torch.permute(inputs, (0, 2, 1))
            # Forward pass
            # print("about to get model output")
            #print(inputs.shape)
            outputs = model(inputs)
            # print("done getting model output")
            # print("outputs.shape:", outputs.shape, "targets.shape:", targets.shape)
            loss = criterion(outputs, targets)
            # Backward and optimize
            # print("about to optimize")
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        # Get train loss and test loss
        train_loss = np.mean(train_loss) # a little misleading
    
        model.eval()
        test_loss = []
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)      
            inputs = torch.squeeze(inputs, 1)
            inputs = torch.permute(inputs, (0, 2, 1))
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss.append(loss.item())
        test_loss = np.mean(test_loss)

        # Save losses
        train_losses[it] = train_loss
        test_losses[it] = test_loss
        
        if test_loss < best_test_loss:
            torch.save(model, './best_val_model_pytorch')
            best_test_loss = test_loss
            best_test_epoch = it
            print('model saved')

        dt = datetime.now() - t0
        print(f'Epoch {it+1}/{epochs}, Train Loss: {train_loss:.4f}, \
          Validation Loss: {test_loss:.4f}, Duration: {dt}, Best Val Epoch: {best_test_epoch}')

    return train_losses, test_losses

In [None]:
train_losses, val_losses = batch_gd(model, criterion, optimizer, 
                                    train_loader, val_loader, epochs=50)

  2%|▏         | 1/50 [01:21<1:06:47, 81.78s/it]

model saved
Epoch 1/50, Train Loss: 1.0184,           Validation Loss: 1.0686, Duration: 0:01:21.778518, Best Val Epoch: 0


  4%|▍         | 2/50 [02:36<1:02:19, 77.90s/it]

Epoch 2/50, Train Loss: 0.9746,           Validation Loss: 1.0691, Duration: 0:01:15.183190, Best Val Epoch: 0


  6%|▌         | 3/50 [03:53<1:00:23, 77.10s/it]

model saved
Epoch 3/50, Train Loss: 0.9574,           Validation Loss: 1.0632, Duration: 0:01:16.133175, Best Val Epoch: 2


  8%|▊         | 4/50 [05:07<58:25, 76.20s/it]  

Epoch 4/50, Train Loss: 0.9420,           Validation Loss: 1.0638, Duration: 0:01:14.825116, Best Val Epoch: 2


In [None]:
model = torch.load('/content/drive/MyDrive/Output/best_model_CNN_FI')

n_correct = 0.
n_total = 0.
all_targets = []
all_predictions = []

for inputs, targets in test_loader:
    # Move to GPU
    inputs, targets = inputs.to(device, dtype=torch.float), targets.to(device, dtype=torch.int64)

    # Forward pass
    outputs = model(inputs)
    
    # Get prediction
    # torch.max returns both max and argmax
    _, predictions = torch.max(outputs, 1)

    # update counts
    n_correct += (predictions == targets).sum().item()
    n_total += targets.shape[0]

    all_targets.append(targets.cpu().numpy())
    all_predictions.append(predictions.cpu().numpy())

test_acc = n_correct / n_total
print(f"Test acc: {test_acc:.4f}")
  
all_targets = np.concatenate(all_targets)    
all_predictions = np.concatenate(all_predictions)  

In [None]:
print('accuracy_score:', accuracy_score(all_targets, all_predictions))
print(classification_report(all_targets, all_predictions, digits=4))

c = confusion_matrix(all_targets, all_predictions, normalize="true")
disp = ConfusionMatrixDisplay(c)
disp.plot()
plt.show()