In [1]:
!pip install -U gymnasium
!pip install stable-baselines3

Collecting gymnasium
  Downloading gymnasium-1.2.3-py3-none-any.whl.metadata (10 kB)
Downloading gymnasium-1.2.3-py3-none-any.whl (952 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m952.1/952.1 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: gymnasium
  Attempting uninstall: gymnasium
    Found existing installation: gymnasium 1.2.0
    Uninstalling gymnasium-1.2.0:
      Successfully uninstalled gymnasium-1.2.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
kaggle-environments 1.27.0 requires gymnasium==1.2.0, but you have gymnasium 1.2.3 which is incompatible.
dopamine-rl 4.1.2 requires gym<=0.25.2, but you have gym 0.26.2 which is incompatible.[0m[31m
[0mSuccessfully installed gymnasium-1.2.3
Collecting stable-baselines3
  Downloading stable_baselines3-2.7.1-py3-none-any.whl.me

In [2]:
from typing import Optional
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch

In [3]:
example_LOB = torch.tensor([
    [55.8, 2, 55.76, 6, 55.7, 4, 55.82, 8, 55.88, 2, 55.89, 4, 0.8, 55.78, 0],
    [55.78, 10, 55.73, 12, 55.71, 8, 55.81, 8, 55.84, 12, 55.87, 8, 0.65, 55.83, 1],
    [55.78, 10, 55.74, 3, 55.73, 12, 55.81, 8, 55.84, 12, 55.89, 4, 0.6, 55.82, 1],
    [55.76, 4, 55.75, 2, 55.74, 8, 55.81, 8, 55.82, 2, 55.84, 11, 0.82, 55.77, 0],
    [55.81, 10, 55.79, 23, 55.78, 15, 55.83, 8, 55.85, 6, 55.93, 8, 0.44, 55.79, 0],
])

In [4]:
example_LOB.shape

torch.Size([5, 15])

In [72]:
fee_pct = 0.0005

class LOBEnv(gym.Env):
    def __init__(self, orderbook, device, book_depth):
        # large_cpu_tensor should be pinned for speed
        if device == 'cuda':
            self.data = orderbook.pin_memory() 
        else:
            self.data = orderbook
        self.device = device
        self.book_depth = book_depth
        self.current_step = 0
        self.max_steps = self.data.shape[0] - 1
        self.initial_cash = 100_000.0
        
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(26,), dtype=float)

    def reset(self, seed=None):
        super().reset(seed=seed)
        self.current_step = 0
        
        self.cash = self.initial_cash
        self.total_balance = self.initial_cash
        self.inventory = 0
        self.entry_price = 0
        self.steps_held = 0
        self.transaction_costs = 0
        self.total_return = 0
        
        #return self.data[self.current_step].to(self.device, non_blocking=True), {}
        return self._get_obs(), {}
        
    def _get_obs(self):
        # Slice the CPU tensor and move ONLY this tiny piece to GPU
        # This transfer is very fast for a single snapshot
        LOB = self.data[self.current_step].to(self.device, non_blocking=True)

        bid_prices = LOB[0:self.book_depth*2:2]
        bid_volumes = LOB[1:self.book_depth*2:2]

        ask_prices = LOB[self.book_depth*2:self.book_depth*4:2]
        ask_volumes = LOB[(self.book_depth*2)+1:self.book_depth*4:2]

        cumu_bid_volume = sum(bid_volumes)
        cumu_ask_volume = sum(ask_volumes)

        bid_liquidity = sum(bid_prices * bid_volumes)
        ask_liquidity = sum(ask_prices * ask_volumes)

        order_book_imbalance = (cumu_bid_volume - cumu_ask_volume) / (cumu_bid_volume + cumu_ask_volume)
        spread = LOB[6] - LOB[0]
        midpoint = (LOB[6] + LOB[0]) / 2
        microprice = (bid_liquidity + ask_liquidity) / (cumu_bid_volume + cumu_ask_volume)

        # create tensors that will be used for the observation space
        # create tensor private state from inventory and balance
        private_state = torch.tensor([
            self.cash / self.initial_cash,
            self.inventory,
            self.total_balance / self.initial_cash
            
        ], device=self.device, dtype=torch.float32)

        # create tensor for engineed features
        engineered_features = torch.stack([
            cumu_bid_volume, cumu_ask_volume, bid_liquidity, ask_liquidity, order_book_imbalance, spread, midpoint, microprice
        ])

        # combine all tensors together
        full_obs = torch.cat([LOB, engineered_features, private_state])
        return full_obs

    def step(self, action):
        # 1. get LOB data at current state and make necessary variables for the action taken
        current_obs = self.data[self.current_step]

        bid_prices = current_obs[0:self.book_depth*2:2]
        bid_volumes = current_obs[1:self.book_depth*2:2]
        ask_prices = current_obs[self.book_depth*2:self.book_depth*4:2]
        ask_volumes = current_obs[(self.book_depth*2)+1:self.book_depth*4:2]

        exec_shares = 0
        exec_cost = 0
        exec_fees = 0

        if action == 1: # buy order
            trade_budget = self.cash * 0.01 # 1% of portfolio value
            remaining_budget = trade_budget
            
            for i in range(len(ask_prices)):
                price = ask_prices[i].item()
                volume = ask_volumes[i].item()

                # get the maximum number of shares that can be traded based on budget
                max_shares = remaining_budget / (price * (1 + fee_pct))

                # take the minimum volume between what is available and wehat we can trade
                shares_taken = min(volume, max_shares)

                # compute metrics
                cost = shares_taken * price
                fees = cost * fee_pct
                exec_shares += shares_taken
                exec_cost += (cost + fees)
                exec_fees += fees
                remaining_budget -= (cost + fees)

                if remaining_budget <= 0 or shares_taken < volume:
                    break

            if exec_shares > 0:
                total_val = (self.inventory * self.entry_price) + exec_cost
                self.inventory += exec_shares
                self.entry_price = total_val / self.inventory
                self.cash -= exec_cost
                self.transaction_costs += exec_fees
            
        elif action == 2: # sell order
            trade_vol = self.inventory
            remaining_inventory = trade_vol

            for i in range(len(bid_prices)):
                price = bid_prices[i].item()
                volume = bid_volumes[i].item()

                # get the max number of shares that can be sold
                shares_sold = min(volume, remaining_inventory)

                # compute metrics
                cost = shares_sold * price
                fees = cost * fee_pct
                exec_shares += shares_sold
                exec_cost += (cost - fees)
                exec_fees += fees
                remaining_inventory -= shares_sold

                if remaining_inventory <= 0 or shares_sold < volume:
                    break
            # handle edge case where not all shares are sold. give warning and execute remaining sale at the worst bid price
            if exec_shares > 0:
                self.total_return += exec_cost - (self.entry_price * self.inventory)
                self.inventory = 0
                self.entry_price = 0
                self.cash += exec_cost
                self.transaction_costs += exec_fees
                
        # 2. Increment step
        self.current_step += 1

        if self.current_step >= self.max_steps:
            terminated = True
            truncated = False
        elif self.total_balance <= 0:
            terminated = False
            truncated = True 
        else:
            terminated = False
            truncated = False
        obs = self._get_obs()

        # replace these variables with the variables from the observation space
        midpoint = (obs[6].item() + obs[0].item()) / 2
        new_total_balance = self.cash + (self.inventory * midpoint)
        reward = new_total_balance - self.total_balance
        self.total_balance = new_total_balance

        return obs, reward, terminated, truncated, {}

In [73]:
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
env = LOBEnv(example_LOB, device, 3)

obs, info = env.reset()
print(obs)

for i in range(env.max_steps + 1):
    action = 1
    obs, reward, terminated, truncated, info = env.step(1)
    print(obs)

    if terminated or truncated:
        break

tensor([ 5.5800e+01,  2.0000e+00,  5.5760e+01,  6.0000e+00,  5.5700e+01,
         4.0000e+00,  5.5820e+01,  8.0000e+00,  5.5880e+01,  2.0000e+00,
         5.5890e+01,  4.0000e+00,  8.0000e-01,  5.5780e+01,  0.0000e+00,
         1.2000e+01,  1.4000e+01,  6.6896e+02,  7.8188e+02, -7.6923e-02,
         2.0000e-02,  5.5810e+01,  5.5802e+01,  1.0000e+00,  0.0000e+00,
         1.0000e+00])
tensor([5.5780e+01, 1.0000e+01, 5.5730e+01, 1.2000e+01, 5.5710e+01, 8.0000e+00,
        5.5810e+01, 8.0000e+00, 5.5840e+01, 1.2000e+01, 5.5870e+01, 8.0000e+00,
        6.5000e-01, 5.5830e+01, 1.0000e+00, 3.0000e+01, 2.8000e+01, 1.6722e+03,
        1.5635e+03, 3.4483e-02, 3.0003e-02, 5.5795e+01, 5.5789e+01, 9.9218e-01,
        1.4000e+01, 1.0000e+00])
tensor([5.5780e+01, 1.0000e+01, 5.5740e+01, 3.0000e+00, 5.5730e+01, 1.2000e+01,
        5.5810e+01, 8.0000e+00, 5.5840e+01, 1.2000e+01, 5.5890e+01, 4.0000e+00,
        6.0000e-01, 5.5820e+01, 1.0000e+00, 2.5000e+01, 2.4000e+01, 1.3938e+03,
        1.3401e+03, 

In [68]:
env.action_space

Discrete(3)

In [11]:
#LOB = example_LOB[0]
#[LOB[i] for i in range(0, 6, 2)]
book_depth = 3
bid_prices = torch.tensor([example_LOB[0][i] for i in range(0, book_depth*2, 2)])
bid_volumes = torch.tensor([example_LOB[0][i] for i in range(1, book_depth*2, 2)])

ask_prices = torch.tensor([example_LOB[0][i] for i in range(book_depth*2, book_depth*4, 2)])
ask_volumes = torch.tensor([example_LOB[0][i] for i in range((book_depth*2)+1, book_depth*4, 2)])

print(bid_prices, bid_volumes)
print(sum(bid_volumes).item())
#print(sum(bid_prices * bid_volumes).item())

print(ask_prices, ask_volumes)
print(sum(ask_volumes).item())

cumu_bid_volume = sum(bid_volumes)
cumu_ask_volume = sum(ask_volumes)
orderbook_imbalance = (cumu_bid_volume - cumu_ask_volume) / (cumu_bid_volume + cumu_ask_volume)

print(orderbook_imbalance)

spread = example_LOB[0][6] - example_LOB[0][0]
print(spread)

bid_liquidity = sum(bid_prices * bid_volumes)
ask_liquidity = sum(ask_prices * ask_volumes)

print(bid_liquidity)
print(ask_liquidity)

tensor([55.8000, 55.7600, 55.7000]) tensor([2., 6., 4.])
12.0
tensor([55.8200, 55.8800, 55.8900]) tensor([8., 2., 4.])
14.0
tensor(-0.0769)
tensor(0.0200)
tensor(668.9600)
tensor(781.8800)


In [12]:
max_trade_amount = 100_000 * 0.01 # 1% of portfolio value
max_trade_amount

1000.0

In [13]:
print(example_LOB[0][0:6:2])
print(example_LOB[0][1:6:2])

tensor([55.8000, 55.7600, 55.7000])
tensor([2., 6., 4.])


In [14]:
bid_prices = example_LOB[0][book_depth*2:book_depth*4:2]
bid_volumes = example_LOB[0][(book_depth*2)+1:book_depth*4:2]

In [15]:
print(bid_prices)
print(bid_volumes)

tensor([55.8200, 55.8800, 55.8900])
tensor([8., 2., 4.])


In [None]:
torch.tensor([1,])

In [65]:
microprice = (bid_liquidity + ask_liquidity) / (cumu_bid_volume + cumu_ask_volume)
microprice

tensor(55.8015)