In [39]:
!pip install -U gymnasium
!pip install stable-baselines3



In [40]:
from typing import Optional
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch

In [41]:
example_LOB = torch.tensor([
    [55.8, 2, 55.76, 6, 55.7, 4, 55.82, 8, 55.88, 2, 55.89, 4, 0.8, 55.78, 0],
    [55.78, 10, 55.73, 12, 55.71, 8, 55.81, 8, 55.84, 12, 55.87, 8, 0.65, 55.83, 1],
    [55.78, 10, 55.74, 3, 55.73, 12, 55.81, 8, 55.84, 12, 55.89, 4, 0.6, 55.82, 1],
    [55.76, 4, 55.75, 2, 55.74, 8, 55.81, 8, 55.82, 2, 55.84, 11, 0.82, 55.77, 0],
    [55.81, 10, 55.79, 23, 55.78, 15, 55.83, 8, 55.85, 6, 55.93, 8, 0.44, 55.79, 0],
])

In [42]:
example_LOB.shape

torch.Size([5, 15])

In [43]:
fee_pct = 0.0005

class LOBEnv(gym.Env):
    def __init__(self, orderbook, device):
        # large_cpu_tensor should be pinned for speed
        if device == 'cuda':
            self.data = orderbook.pin_memory() 
        else:
            self.data = orderbook
        self.current_step = 0
        self.max_steps = self.data.shape[0] - 1

        self.initial_cash = 100_000.0
        self.action_space = spaces.Discrete(3)

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.current_step = 0
        
        self.cash = self.initial_cash
        self.total_balance = self.initial_cash
        self.inventory = 0
        self.entry_price = 0
        self.steps_held = 0
        self.transaction_costs = 0
        self.total_return = 0
        
        return self.data[self.current_step].to(device, non_blocking=True), {}
        
    def _get_obs(self):
        # Slice the CPU tensor and move ONLY this tiny piece to GPU
        # This transfer is very fast for a single snapshot
        return self.data[self.current_step].to(device, non_blocking=True)

    def step(self, action):
        # 1. Logic (PnL, Position tracking) stays on CPU/NumPy for simplicity
        current_obs = self._get_obs()

        book_depth = 3
        bid_prices = current_obs[0:book_depth*2:2]
        bid_volumes = current_obs[1:book_depth*2:2]
        
        ask_prices = current_obs[book_depth*2:book_depth*4:2]
        ask_volumes = current_obs[(book_depth*2)+1:book_depth*4:2]

        cumu_bid_volume = sum(bid_volumes)
        cumu_ask_volume = sum(ask_volumes)
        bid_ask_balance = (cumu_bid_volume - cumu_ask_volume) / (cumu_bid_volume + cumu_ask_volume)

        bid_liquidity = sum(bid_prices * bid_volumes)
        ask_liquidity = sum(ask_prices * ask_volumes)
        
        spread = current_obs[6] - current_obs[0]
        #midpoint = (current_obs[0][6] + current_obs[0][0]) / 2
        #self.total_balance = self.cash + (self.inventory * midpoint)

        exec_shares = 0
        exec_cost = 0
        exec_fees = 0

        if action == 1: # buy order
            trade_budget = self.cash * 0.01 # 1% of portfolio value
            remaining_budget = trade_budget
            
            for i in range(len(ask_prices)):
                price = ask_prices[i].item()
                volume = ask_volumes[i].item()

                # get the maximum number of shares that can be traded based on budget
                max_shares = remaining_budget / (price * (1 + fee_pct))

                # take the minimum volume between what is available and wehat we can trade
                shares_taken = min(volume, max_shares)

                # compute metrics
                cost = shares_taken * price
                fees = cost * fee_pct
                exec_shares += shares_taken
                exec_cost += (cost + fees)
                exec_fees += fees
                remaining_budget -= (cost + fees)

                if remaining_budget <= 0 or shares_taken < volume:
                    break

            if exec_shares > 0:
                total_val = (self.inventory * self.entry_price) + exec_cost
                self.inventory += exec_shares
                self.entry_price = total_val / self.inventory
                self.cash -= exec_cost
                self.transaction_costs += exec_fees
            
        elif action == 2: # sell order
            trade_vol = self.inventory
            remaining_inventory = trade_vol

            for i in range(len(bid_prices)):
                price = bid_prices[i].item()
                volume = bid_volumes[i].item()

                # get the max number of shares that can be sold
                shares_sold = min(volume, remaining_inventory)

                # compute metrics
                cost = shares_sold * price
                fees = cost * fee_pct
                exec_shares += shares_sold
                exec_cost += (cost - fees)
                exec_fees += fees
                remaining_inventory -= shares_sold

                if remaining_inventory <= 0 or shares_sold < volume:
                    break
            # handle edge case where not all shares are sold. give warning and execute remaining sale at the worst bid price
            if exec_shares > 0:
                self.total_return += exec_cost - (self.entry_price * self.inventory)
                self.inventory = 0
                self.entry_price = 0
                self.cash += exec_cost
                self.transaction_costs += exec_fees
                
        # 2. Increment step
        self.current_step += 1

        if self.current_step >= self.max_steps:
            terminated = True
            truncated = False
            obs = self.data[-1].to(device, non_blocking=True) 
        elif self.total_balance <= 0:
            terminated = False
            truncated = True
            obs = self.data[-1].to(device, non_blocking=True) 
        else:
            terminated = False
            truncated = False
            obs = self._get_obs()

        midpoint = (obs[6].item() + obs[0].item()) / 2
        new_total_balance = self.cash + (self.inventory * midpoint)
        reward = new_total_balance - self.total_balance
        self.total_balance = new_total_balance

        return obs, reward, terminated, truncated, {}

In [44]:
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
env = LOBEnv(example_LOB, device)

In [45]:
env.action_space

Discrete(3)

In [46]:
obs, info = env.reset()
print(obs)

for i in range(env.max_steps + 1):
    action = 1
    obs, reward, terminated, truncated, info = env.step(1)
    print(obs)

    if terminated or truncated:
        break

tensor([55.8000,  2.0000, 55.7600,  6.0000, 55.7000,  4.0000, 55.8200,  8.0000,
        55.8800,  2.0000, 55.8900,  4.0000,  0.8000, 55.7800,  0.0000])
tensor([55.7800, 10.0000, 55.7300, 12.0000, 55.7100,  8.0000, 55.8100,  8.0000,
        55.8400, 12.0000, 55.8700,  8.0000,  0.6500, 55.8300,  1.0000])
tensor([55.7800, 10.0000, 55.7400,  3.0000, 55.7300, 12.0000, 55.8100,  8.0000,
        55.8400, 12.0000, 55.8900,  4.0000,  0.6000, 55.8200,  1.0000])
tensor([55.7600,  4.0000, 55.7500,  2.0000, 55.7400,  8.0000, 55.8100,  8.0000,
        55.8200,  2.0000, 55.8400, 11.0000,  0.8200, 55.7700,  0.0000])
tensor([55.8100, 10.0000, 55.7900, 23.0000, 55.7800, 15.0000, 55.8300,  8.0000,
        55.8500,  6.0000, 55.9300,  8.0000,  0.4400, 55.7900,  0.0000])


In [47]:
example_LOB[0][0]

tensor(55.8000)

In [48]:
example_LOB[0][0].item()

55.79999923706055

In [49]:
#LOB = example_LOB[0]
#[LOB[i] for i in range(0, 6, 2)]
book_depth = 3
bid_prices = torch.tensor([example_LOB[0][i] for i in range(0, book_depth*2, 2)])
bid_volumes = torch.tensor([example_LOB[0][i] for i in range(1, book_depth*2, 2)])

ask_prices = torch.tensor([example_LOB[0][i] for i in range(book_depth*2, book_depth*4, 2)])
ask_volumes = torch.tensor([example_LOB[0][i] for i in range((book_depth*2)+1, book_depth*4, 2)])

print(bid_prices, bid_volumes)
print(sum(bid_volumes).item())
#print(sum(bid_prices * bid_volumes).item())

print(ask_prices, ask_volumes)
print(sum(ask_volumes).item())

cumu_bid_volume = sum(bid_volumes)
cumu_ask_volume = sum(ask_volumes)
orderbook_imbalance = (cumu_bid_volume - cumu_ask_volume) / (cumu_bid_volume + cumu_ask_volume)

print(orderbook_imbalance)

spread = example_LOB[0][6] - example_LOB[0][0]
print(spread)

bid_liquidity = sum(bid_prices * bid_volumes)
ask_liquidity = sum(ask_prices * ask_volumes)

print(bid_liquidity)
print(ask_liquidity)

tensor([55.8000, 55.7600, 55.7000]) tensor([2., 6., 4.])
12.0
tensor([55.8200, 55.8800, 55.8900]) tensor([8., 2., 4.])
14.0
tensor(-0.0769)
tensor(0.0200)
tensor(668.9600)
tensor(781.8800)


In [50]:
max_trade_amount = 100_000 * 0.01 # 1% of portfolio value
max_trade_amount

1000.0

In [51]:
print(example_LOB[0][0:6:2])
print(example_LOB[0][1:6:2])

tensor([55.8000, 55.7600, 55.7000])
tensor([2., 6., 4.])


In [52]:
bid_prices = example_LOB[0][book_depth*2:book_depth*4:2]
bid_volumes = example_LOB[0][(book_depth*2)+1:book_depth*4:2]

In [53]:
print(bid_prices)
print(bid_volumes)

tensor([55.8200, 55.8800, 55.8900])
tensor([8., 2., 4.])
