In [2]:
%pip install pandas numpy torch tqdm seaborn matplotlib

Note: you may need to restart the kernel to use updated packages.


In [None]:
import sys
sys.path.append('..')

import torch
import os
import torch.nn as nn
import pandas as pd 
import numpy as np
from torch.utils.data import random_split, DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from LOBnet.Models import DeepLOB_Network_v1
from LOBnet.Orderbooks import FI2010_Orderbook_Optimized
import shutil

  from .autonotebook import tqdm as notebook_tqdm
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [3]:
datapath = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), "data", "BenchmarkDatasets_csv", "Auction", "1.Auction_Zscore", "Auction_Zscore_Training"))
window_size = 100
overlap_percent = 25  # 25% overlap between windows
num_workers = 5


In [4]:
# initialize dataset
dataset = FI2010_Orderbook_Optimized(
    root_path=datapath,
    window_size=window_size,
    overlap_percent=overlap_percent,
    num_workers=num_workers,
    verbose=True
)

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux


Raw price movement statistics:
Mean: 1.9965
Std: 0.8855
Min: 1.0000
Max: 3.0000

Label distribution:
0    10454
1     5729
2    10360
Name: count, dtype: int64





In [5]:
# create train/test splits (80/20)
total_size = len(dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

train_dataset, test_dataset = random_split(
    dataset, 
    [train_size, test_size],
    generator=torch.Generator().manual_seed(42)  # for reproducibility
)

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=True if torch.cuda.is_available() else False,
    collate_fn=lambda batch: (
        torch.stack([x[0] for x in batch]).transpose(1, 2).unsqueeze(1),
        torch.stack([x[1] for x in batch])
    )
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=True if torch.cuda.is_available() else False,
    collate_fn=lambda batch: (
        torch.stack([x[0] for x in batch]).transpose(1, 2).unsqueeze(1),
        torch.stack([x[1] for x in batch])
    )   
)

print(f"Total dataset size: {total_size}")
print(f"Training set size: {len(train_dataset)}")
print(f"Test set size: {len(test_dataset)}")

Total dataset size: 26543
Training set size: 21234
Test set size: 5309


In [6]:
# look at first batch
for batch in train_loader:
    batch_X, batch_y = batch  # unpack batch tuple
    print(f"Batch X shape: {batch_X.shape}")  # should be (batch_size, 100, 40)
    print(f"Batch y shape: {batch_y.shape}")
    break

Batch X shape: torch.Size([32, 1, 100, 40])
Batch y shape: torch.Size([32, 3])


In [5]:
def evaluate_model(model, test_loader, device):
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            
            # convert one-hot encoded outputs to class predictions
            predictions = outputs.argmax(dim=1)
            true_labels = batch_y.argmax(dim=1)
            
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(true_labels.cpu().numpy())
    
    return all_predictions, all_labels

In [4]:
from LOBnet.Models import DeepLOB_Network_v0
from LOBnet.Models import DeepLOB_Network_v1
from LOBnet.Models import DeepLOB_Network_v2
from LOBnet.Models import DeepLOB_Network_v3
from LOBnet.Models import TransformerLOB

In [None]:
# Model Selection
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Comment out the models you do not want to train

# Purpose of this is to streamline the naming convention
# of saved models, the model names match the naming
# conventions used in model_eval notebook

# model = DeepLOB_Network_v0(num_classes=3).to(device)
# model_name = 'baseline_model'

# model = DeepLOB_Network_v1(y_len=3, device=device).to(device)
# model_name = 'deepLOB_v1'

# model = DeepLOB_Network_v2(y_len=3, device=device).to(device)
# model_name = 'deepLOB_v2'

# model = DeepLOB_Network_v3(y_len=3, device=device).to(device)
# model_name = 'deepLOB_v3'

# model = TransformerLOB(y_len=3, device=device).to(device)
# model_name = 'transformer_model'

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=-1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training parameters
num_epochs = 25
best_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')
    
    for batch_X, batch_y in pbar:
        batch_X = batch_X.float().to(device)
        batch_y = batch_y.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        _, labels = torch.max(batch_y.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        running_loss += loss.item()
        
        pbar.set_postfix({
            'loss': f'{running_loss/len(pbar):.4f}',
            'acc': f'{100 * correct/total:.2f}%'
        })
    
    # Epoch statistics
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch {epoch+1} Loss: {epoch_loss:.4f}')
    
    # Save best model
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_loss,
        }, os.path.abspath(os.path.join("..", "saved", model_name + ".pth")))

print('Training finished!')

Epoch 1/25:   0%|          | 0/664 [00:00<?, ?it/s]

Epoch 1/25: 100%|██████████| 664/664 [01:54<00:00,  5.79it/s, loss=1.0461, acc=44.66%]


Epoch 1 Loss: 1.0461


Epoch 2/25: 100%|██████████| 664/664 [01:47<00:00,  6.15it/s, loss=0.9813, acc=54.62%]


Epoch 2 Loss: 0.9813


Epoch 3/25:   6%|▌         | 37/664 [00:07<01:58,  5.28it/s, loss=0.0531, acc=58.87%]


KeyboardInterrupt: 

In [None]:
# load the saved model state if it exists
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = os.path.abspath(os.path.join("..", "savedModels", model_name + ".pth"))
best_model_path = os.path.abspath(os.path.join("..", "bestModels", model_name + ".pth"))
if os.path.exists(model_path):
    print(f"Loading saved model from {model_path}")
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  # set to evaluation mode
else:
    print(f"No saved model found at {model_path}. Please train the model first.")
    exit()

predictions, true_labels = evaluate_model(model, test_loader, device)

print(f"\n{model_name}:\nClassification Report:")
print(classification_report(true_labels, predictions, 
                          target_names=['No Movement', 'Up', 'Down']))

plt.figure(figsize=(10, 8))
cm = confusion_matrix(true_labels, predictions)
mod_acc = cm.trace() / cm.sum()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Movement', 'Up', 'Down'],
            yticklabels=['No Movement', 'Up', 'Down'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

if os.path.exists(best_model_path):
    print(f"Loading best model from {best_model_path}")
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  # set to evaluation mode
else:
    print(f"No saved model found at {best_model_path}.\nSetting best saved model as official best model")
    shutil.copyfile(model_path, best_model_path)
    exit()

predictions, true_labels = evaluate_model(model, test_loader, device)

print(f"\n{model_name} (current Best):\nClassification Report:")
print(classification_report(true_labels, predictions, 
                          target_names=['No Movement', 'Up', 'Down']))

plt.figure(figsize=(10, 8))
cm = confusion_matrix(true_labels, predictions)
best_mod_acc = cm.trace() / cm.sum()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Movement', 'Up', 'Down'],
            yticklabels=['No Movement', 'Up', 'Down'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

print(f"Accuracy of new model vs. best:\n New: {mod_acc}\tBest: {best_mod_acc}")
if mod_acc > best_mod_acc:
    print(f"\nNew model exceeds old model accuracy, replacing best model with new best.")
    shutil.copyfile(model_path, best_model_path)


NameError: name 'torch' is not defined

In [33]:
import graphviz
import torchviz

In [None]:
# Initialize model and load weights

# Load the saved model state
# model_path = 'best_transformer.pth'
# if os.path.exists(model_path):
#     print(f"Loading saved model from {model_path}")
#     checkpoint = torch.load(model_path)
#     model.load_state_dict(checkpoint['model_state_dict'])
#     model.eval()
# else:
#     print(f"No saved model found at {model_path}")
#     exit()

# Create visualization
batch_size = 1
sample_input = torch.randn(batch_size, 1, 100, 40).to(device)
output = model(sample_input)

# Create simplified visualization
dot = make_dot(output, params=dict(model.named_parameters()))



In [None]:
from graphviz import Digraph

def create_simple_arch():
    dot = Digraph(comment='Transformer LOB Architecture')
    dot.attr(rankdir='TB')

    # Define color scheme
    input_color = '#E6F3FF' # Light blue
    conv_color = '#FFE6E6' # Light red
    pos_color = '#E6FFE6' # Light green
    trans_color = '#FFE6FF' # Light purple
    head_color = '#FFFFD9' # Light yellow
    output_color = '#F0F0F0' # Light gray

    # Add nodes with colors and styles
    dot.node('input', 'Input\n(batch, 1, 100, 40)', 
        style='filled', fillcolor=input_color)
    dot.node('conv', 'Conv Reduction\n(2 Conv1d + MaxPool)', 
        style='filled', fillcolor=conv_color)
    dot.node('pos', 'Positional\nEncoding', 
        style='filled', fillcolor=pos_color)
    dot.node('trans', 'Transformer\nEncoder\n(3 layers)', 
        style='filled', fillcolor=trans_color)
    dot.node('head', 'Classification\nHead', 
        style='filled', fillcolor=head_color)
    dot.node('output', 'Output\n(batch, 3)', 
        style='filled', fillcolor=output_color)

    # Add edges with a gentle curve
    dot.edge('input', 'conv', penwidth='1.5')
    dot.edge('conv', 'pos', penwidth='1.5')
    dot.edge('pos', 'trans', penwidth='1.5')
    dot.edge('trans', 'head', penwidth='1.5')
    dot.edge('head', 'output', penwidth='1.5')

    # Save with higher DPI
    dot.render("transformer_architecture_minimal", format="png", cleanup=True)


In [38]:
create_simple_arch()

ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH