<a href="https://colab.research.google.com/github/Raj-Shah-20/ML-Researcher-Task-BlockHouse/blob/main/Final_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Market Order Implementation

In [2]:
!pip install stable-baselines3
!pip install gymnasium>=0.28.1
!pip install shimmy==0.2.1

Collecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, stable-baselines3
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1 stable-baselines3-2.

#### SAC

In [3]:
import pandas as pd
import numpy as np
import json
from stable_baselines3 import SAC, PPO
import gym
from gym import spaces

# Loading the dataset
file_path = '/content/AAPL_Quotes_Data.csv'
data = pd.read_csv(file_path)

# Converting 'timestamp' column to datetime format for accurate time handling
data['timestamp'] = pd.to_datetime(data['timestamp'], errors='coerce')
data.ffill(inplace=True)
data = data.sort_values(by='timestamp')

  from jax import xla_computation as _xla_computation


In [4]:
class Benchmark:
    def __init__(self, data):
        self.data = data

    def get_twap_trades(self, data, initial_inventory, preferred_timeframe=390):
        total_steps = len(data)
        twap_shares_per_step = initial_inventory / preferred_timeframe
        remaining_inventory = initial_inventory
        trades = []
        for step in range(min(total_steps, preferred_timeframe)):
            size_of_slice = min(twap_shares_per_step, remaining_inventory)
            remaining_inventory -= int(np.ceil(size_of_slice))
            trade = {
                'timestamp': data.iloc[step]['timestamp'],
                'step': step,
                'price': data.iloc[step]['bid_price_1'],
                'shares': size_of_slice,
                'inventory': remaining_inventory,
            }
            trades.append(trade)
        return pd.DataFrame(trades)

    def get_vwap_trades(self, data, initial_inventory, preferred_timeframe=390):
        total_volume = data[['bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4', 'bid_size_5']].sum().sum()
        total_steps = len(data)
        remaining_inventory = initial_inventory
        trades = []
        for step in range(min(total_steps, preferred_timeframe)):
            volume_at_step = data.iloc[step][['bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4', 'bid_size_5']].sum()
            size_of_slice = (volume_at_step / total_volume) * initial_inventory
            size_of_slice = min(size_of_slice, remaining_inventory)
            remaining_inventory -= int(np.ceil(size_of_slice))
            trade = {
                'timestamp': data.iloc[step]['timestamp'],
                'step': step,
                'price': data.iloc[step]['bid_price_1'],
                'shares': size_of_slice,
                'inventory': remaining_inventory,
            }
            trades.append(trade)
        return pd.DataFrame(trades)

    def calculate_vwap(self, idx, shares):
        bid_prices = [self.data[f'bid_price_{i}'].iloc[idx] for i in range(1, 6)]
        bid_sizes = [self.data[f'bid_size_{i}'].iloc[idx] for i in range(1, 6)]
        cumsum = 0
        for j, size in enumerate(bid_sizes):
            cumsum += size
            if cumsum >= shares:
                break
        vwap_numerator = np.sum(np.array(bid_prices[:j+1]) * np.array(bid_sizes[:j+1]))
        vwap_denominator = np.sum(bid_sizes[:j+1])
        return vwap_numerator / vwap_denominator if vwap_denominator != 0 else np.nan

    def compute_components(self, alpha, shares, idx):
        bid_price_1 = self.data['bid_price_1'].iloc[idx]
        actual_price = self.calculate_vwap(idx, shares)
        Slippage = (bid_price_1 - actual_price) * shares
        Market_Impact = alpha * np.sqrt(shares)
        return np.array([float(Slippage), float(Market_Impact)])

    def simulate_strategy(self, trades, data, preferred_timeframe):
        slippage = []
        market_impact = []
        alpha = 4.439584265535017e-06
        for idx in range(len(trades)):
            shares = trades.iloc[idx]['shares']
            reward = self.compute_components(alpha, shares, idx)
            slippage.append(reward[0])
            market_impact.append(reward[1])
        return slippage, market_impact

class TradingEnvWithOrders(gym.Env):
    def __init__(self, data, total_shares=1000, trading_horizon=390, benchmark=None):
        super(TradingEnvWithOrders, self).__init__()
        self.data = data.reset_index(drop=True)
        self.total_shares = total_shares
        self.trading_horizon = trading_horizon
        self.current_step = 0
        self.shares_remaining = self.total_shares
        self.benchmark = benchmark
        self.done = False
        self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.shares_remaining = self.total_shares
        self.done = False
        return self._get_observation()

    def _get_observation(self):
        if self.current_step >= len(self.data):
            return np.zeros(self.observation_space.shape)
        row = self.data.iloc[self.current_step]
        return np.array([
            row['timestamp'].timestamp(),
            row['bid_price_1'],
            self.shares_remaining
        ], dtype=np.float32)

    def step(self, action):
        trade_size_fraction = action[0]
        current_volume = self.data.iloc[self.current_step][['bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4', 'bid_size_5']].sum()
        trade_size = int(min(trade_size_fraction * self.shares_remaining, current_volume))
        current_price = self.data.iloc[self.current_step]['bid_price_1']
        transaction_cost = trade_size * current_price * 0.001
        if self.benchmark:
            slippage, market_impact = self.benchmark.compute_components(alpha=0.001, shares=trade_size, idx=self.current_step)
            transaction_cost += slippage + market_impact
        self.shares_remaining -= trade_size
        reward = -transaction_cost
        self.current_step += 1
        self.done = self.shares_remaining <= 0 or self.current_step >= self.trading_horizon
        next_state = self._get_observation() if not self.done else np.zeros(self.observation_space.shape)
        info = {
            "timestamp": self.data.iloc[self.current_step - 1]['timestamp'] if self.current_step <= len(self.data) else None,
            "trade_size": trade_size,
            "order_type": "market",
            "remaining_shares": self.shares_remaining,
            "transaction_cost": transaction_cost
        }
        return next_state, reward, self.done, info

benchmark = Benchmark(data)
env_with_orders = TradingEnvWithOrders(data, benchmark=benchmark)
model = SAC('MlpPolicy', env_with_orders, verbose=1)
model.learn(total_timesteps=10000)

trade_schedule = []
obs = env_with_orders.reset()
done = False
while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = env_with_orders.step(action)
    trade_size = info["trade_size"]
    slippage, market_impact = benchmark.compute_components(alpha=0.001, shares=trade_size, idx=env_with_orders.current_step)
    transaction_cost = slippage + market_impact
    trade_schedule.append({
        "timestamp": str(info["timestamp"]),
        "trade_size": trade_size,
        "order_type": info["order_type"],
        "remaining_shares": env_with_orders.shares_remaining,
        "slippage": slippage,
        "market_impact": market_impact,
        "transaction_cost": transaction_cost
    })
output_file = "/content/trade_schedule1.json"
with open(output_file, 'w') as f:
    json.dump(trade_schedule, f, indent=4)

print(f"Trade schedule saved to {output_file}")

twap_trades = benchmark.get_twap_trades(data, initial_inventory=1000)
vwap_trades = benchmark.get_vwap_trades(data, initial_inventory=1000)
twap_slippage, twap_market_impact = benchmark.simulate_strategy(twap_trades, data, preferred_timeframe=390)
vwap_slippage, vwap_market_impact = benchmark.simulate_strategy(vwap_trades, data, preferred_timeframe=390)

twap_total_cost = sum(twap_slippage) + sum(twap_market_impact)
vwap_total_cost = sum(vwap_slippage) + sum(vwap_market_impact)

print(f'TWAP Total Cost: {twap_total_cost}')
print(f'VWAP Total Cost: {vwap_total_cost}')

  and should_run_async(code)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| rollout/           |          |
|    ep_len_mean     | 2        |
|    ep_rew_mean     | -213     |
| time/              |          |
|    episodes        | 3776     |
|    fps             | 63       |
|    time_elapsed    | 120      |
|    total_timesteps | 7651     |
| train/             |          |
|    actor_loss      | 1.36e+04 |
|    critic_loss     | 4.61e+11 |
|    ent_coef        | 9.63     |
|    ent_coef_loss   | -72.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 7550     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2        |
|    ep_rew_mean     | -213     |
| time/              |          |
|    episodes        | 3780     |
|    fps             | 63       |
|    time_elapsed    | 120      |
|    total_timesteps | 7659     |
| train/             |          |
|    actor_loss      | 1.74e+05 |
|    critic_loss 

#### PPO

In [6]:
class Benchmark:
    def __init__(self, data):
        self.data = data

    def get_twap_trades(self, data, initial_inventory, preferred_timeframe=390):
        total_steps = len(data)
        twap_shares_per_step = initial_inventory / preferred_timeframe
        remaining_inventory = initial_inventory
        trades = []
        for step in range(min(total_steps, preferred_timeframe)):
            size_of_slice = min(twap_shares_per_step, remaining_inventory)
            remaining_inventory -= int(np.ceil(size_of_slice))
            trade = {
                'timestamp': data.iloc[step]['timestamp'],
                'step': step,
                'price': data.iloc[step]['bid_price_1'],
                'shares': size_of_slice,
                'inventory': remaining_inventory,
            }
            trades.append(trade)
        return pd.DataFrame(trades)

    def get_vwap_trades(self, data, initial_inventory, preferred_timeframe=390):
        total_volume = data[['bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4', 'bid_size_5']].sum().sum()
        total_steps = len(data)
        remaining_inventory = initial_inventory
        trades = []
        for step in range(min(total_steps, preferred_timeframe)):
            volume_at_step = data.iloc[step][['bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4', 'bid_size_5']].sum()
            size_of_slice = (volume_at_step / total_volume) * initial_inventory
            size_of_slice = min(size_of_slice, remaining_inventory)
            remaining_inventory -= int(np.ceil(size_of_slice))
            trade = {
                'timestamp': data.iloc[step]['timestamp'],
                'step': step,
                'price': data.iloc[step]['bid_price_1'],
                'shares': size_of_slice,
                'inventory': remaining_inventory,
            }
            trades.append(trade)
        return pd.DataFrame(trades)

    def calculate_vwap(self, idx, shares):
        bid_prices = [self.data[f'bid_price_{i}'].iloc[idx] for i in range(1, 6)]
        bid_sizes = [self.data[f'bid_size_{i}'].iloc[idx] for i in range(1, 6)]
        cumsum = 0
        for j, size in enumerate(bid_sizes):
            cumsum += size
            if cumsum >= shares:
                break
        vwap_numerator = np.sum(np.array(bid_prices[:j+1]) * np.array(bid_sizes[:j+1]))
        vwap_denominator = np.sum(bid_sizes[:j+1])
        return vwap_numerator / vwap_denominator if vwap_denominator != 0 else np.nan

    def compute_components(self, alpha, shares, idx):
        bid_price_1 = self.data['bid_price_1'].iloc[idx]
        actual_price = self.calculate_vwap(idx, shares)
        Slippage = (bid_price_1 - actual_price) * shares
        Market_Impact = alpha * np.sqrt(shares)
        return np.array([float(Slippage), float(Market_Impact)])

    def simulate_strategy(self, trades, data, preferred_timeframe):
        slippage = []
        market_impact = []
        alpha = 4.439584265535017e-06
        for idx in range(len(trades)):
            shares = trades.iloc[idx]['shares']
            reward = self.compute_components(alpha, shares, idx)
            slippage.append(reward[0])
            market_impact.append(reward[1])
        return slippage, market_impact


class TradingEnvWithOrders(gym.Env):
    def __init__(self, data, total_shares=1000, trading_horizon=390, benchmark=None):
        super(TradingEnvWithOrders, self).__init__()
        self.data = data.reset_index(drop=True)
        self.total_shares = total_shares
        self.trading_horizon = trading_horizon
        self.current_step = 0
        self.shares_remaining = self.total_shares
        self.benchmark = benchmark
        self.done = False
        self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.shares_remaining = self.total_shares
        self.done = False
        return self._get_observation()

    def _get_observation(self):
        if self.current_step >= len(self.data):
            return np.zeros(self.observation_space.shape)
        row = self.data.iloc[self.current_step]
        return np.array([
            row['timestamp'].timestamp(),
            row['bid_price_1'],
            self.shares_remaining
        ], dtype=np.float32)

    def step(self, action):
      trade_size_fraction = max(action[0] * 10, 0.01)
      current_volume = self.data.iloc[self.current_step][['bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4', 'bid_size_5']].sum()

      trade_size = int(max(trade_size_fraction * self.shares_remaining, 1))
      trade_size = min(trade_size, current_volume, self.shares_remaining)

      current_price = self.data.iloc[self.current_step]['bid_price_1']
      transaction_cost = trade_size * current_price * 0.001

      if self.benchmark:
          slippage, market_impact = self.benchmark.compute_components(alpha=0.001, shares=trade_size, idx=self.current_step)
          transaction_cost += slippage + market_impact
      self.shares_remaining -= trade_size
      reward = -transaction_cost + 0.05 * (self.total_shares - self.shares_remaining)

      self.current_step += 1
      self.done = self.shares_remaining <= 0 or self.current_step >= self.trading_horizon
      next_state = self._get_observation() if not self.done else np.zeros(self.observation_space.shape)

      info = {
        "timestamp": self.data.iloc[self.current_step - 1]['timestamp'] if self.current_step <= len(self.data) else None,
        "trade_size": trade_size,
        "order_type": "market",
        "remaining_shares": self.shares_remaining,
        "transaction_cost": transaction_cost
      }
      return next_state, reward, self.done, info


benchmark = Benchmark(data)
env_with_orders = TradingEnvWithOrders(data, benchmark=benchmark)

model = PPO('MlpPolicy', env_with_orders, verbose=1, n_steps=2048)
model.learn(total_timesteps=50000, log_interval=10)

trade_schedule = []
obs = env_with_orders.reset()
done = False
while not done:
    action, _ = model.predict(obs, deterministic=False)
    obs, reward, done, info = env_with_orders.step(action)


    trade_size = info["trade_size"]
    slippage, market_impact = benchmark.compute_components(alpha=0.001, shares=trade_size, idx=env_with_orders.current_step)
    transaction_cost = slippage + market_impact

    trade_schedule.append({
        "timestamp": str(info["timestamp"]),
        "trade_size": trade_size,
        "order_type": info["order_type"],
        "remaining_shares": env_with_orders.shares_remaining,
        "slippage": slippage,
        "market_impact": market_impact,
        "transaction_cost": transaction_cost
    })

output_file = "/content/trade_schedule2.json"
with open(output_file, 'w') as f:
    json.dump(trade_schedule, f, indent=4)

print(f"Trade schedule saved to {output_file}")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 17.8        |
|    ep_rew_mean          | 146         |
| time/                   |             |
|    fps                  | 303         |
|    iterations           | 10          |
|    time_elapsed         | 67          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.006965261 |
|    clip_fraction        | 0.0469      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.32       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 1.61e+04    |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.009      |
|    std                  | 0.902       |
|    value_loss           | 3.9e+04     |
-----------------------------------------
------------------------------------------
| rollout/                |      

### Limit Order Implementation

In [7]:
# Defining the trading environment with market and limit orders
class TradingEnvWithOrders(gym.Env):
    def __init__(self, data, total_shares=1000, trading_horizon=390, benchmark = None):
        super(TradingEnvWithOrders, self).__init__()
        self.data = data.reset_index(drop=True)
        self.total_shares = total_shares
        self.trading_horizon = trading_horizon
        self.current_step = 0
        self.shares_remaining = self.total_shares
        self.benchmark = benchmark
        self.done = False

        # Action space: [trade_size_fraction, order_type, limit_price_offset]
        self.action_space = spaces.Box(low=np.array([0, 0, -0.05], dtype=np.float32),
                                       high=np.array([1, 1, 0.05], dtype=np.float32),
                                       dtype=np.float32)

        # Observation space: 12 features related to market data and shares remaining
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.shares_remaining = self.total_shares
        self.done = False
        return self._get_observation()

    def _get_observation(self):
      if self.current_step >= len(self.data):
        return np.zeros(self.observation_space.shape)

      row = self.data.iloc[self.current_step]

      # Converting timestamp to timezone-naive
      timestamp = row['timestamp'].replace(tzinfo=None)
      epoch_timestamp = (timestamp - pd.Timestamp("1970-01-01")).total_seconds()

      return np.array([epoch_timestamp, row['bid_price_1'], row['bid_size_1'], row['ask_price_1'], row['ask_size_1'],
                     row['bid_price_2'], row['bid_size_2'], row['ask_price_2'], row['ask_size_2'],
                     row['bid_price_3'], row['ask_price_3'], self.shares_remaining], dtype=np.float32)


    def step(self, action):
      trade_size_fraction, order_type_index, price_offset = action
      trade_size = int(trade_size_fraction * self.shares_remaining)

    # Executing the appropriate order type
      if order_type_index < 0.5:
          transaction_cost, fill_status = self._execute_market_order(trade_size)
      else:
          transaction_cost, fill_status = self._execute_limit_order(trade_size, price_offset)

    # Checking if benchmark is provided to compute additional transaction cost components
      if self.benchmark:
          slippage, market_impact = self.benchmark.compute_components(alpha=0.001, shares=trade_size, idx=self.current_step)
          transaction_cost += slippage + market_impact  # Add benchmark-based transaction costs if using benchmark

    # Updating the inventory and compute reward based on transaction costs
      self.shares_remaining -= trade_size if fill_status else 0
      reward = -transaction_cost  # Negative reward as cost incurred

      self.current_step += 1
      self.done = self.shares_remaining <= 0 or self.current_step >= self.trading_horizon

      next_state = self._get_observation() if not self.done else np.zeros(self.observation_space.shape)

    # Information dictionary
      info = {
        "timestamp": self.data.iloc[self.current_step - 1]['timestamp'] if self.current_step <= len(self.data) else None,
        "trade_size": trade_size,
        "order_type": "market" if order_type_index < 0.5 else "limit"
      }

      return next_state, reward, self.done, info


    def _execute_market_order(self, trade_size):
        current_price = self.data.iloc[self.current_step]['ask_price_1']
        transaction_cost = trade_size * current_price
        return transaction_cost, True

    def _execute_limit_order(self, trade_size, price_offset):
        adjusted_price = self.data.iloc[self.current_step]['bid_price_1'] * (1 + price_offset)
        ask_price = self.data.iloc[self.current_step]['ask_price_1']
        if adjusted_price >= ask_price:
            transaction_cost = trade_size * adjusted_price
            return transaction_cost, True
        else:
            return 0, False