## Main structure of CLOB
- deal with each order and try to match a successful trade
- search the ideal prices, from good to bad, then from ealry to late
- place in a proper position if not matched / partly matched
- update the CLOB
- record in the trade book (trade_df)

In [84]:
import logging
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Configure logging at the top level
log_path_to_clob = "clob.log"
logger = logging.getLogger('CLOBLogger')
logger.setLevel(logging.INFO)

# Prevent adding multiple handlers if the logger already has handlers
if not logger.handlers:
    file_handler = logging.FileHandler(log_path_to_clob, mode='w')
    formatter = logging.Formatter('%(message)s')
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

class CLOB():
    def __init__(self, orders):
        """
        Initialize the CLOB with given orders.
        
        Args:
            orders (pd.DataFrame): DataFrame containing order details.
        """
       # initialize orders and CLOB
        self.orders = orders
        self.clob = {"buy": {}, "sell": {}}
        self.trade_df = pd.DataFrame(columns=["trade_time", "buy_group_id", "sell_group_id", "price", "size"])
        self.new_df = pd.DataFrame(columns=self.trade_df.columns)
        self.aggregated_clob = pd.DataFrame(columns=["Size", "Bid Price", "Ask Price", "Size"])
    
    def restart(self):
        """Reset CLOB and trade data"""
        self.clob = {"buy": {}, "sell": {}}
        self.trade_df = pd.DataFrame(columns=["trade_time", "buy_group_id", "sell_group_id", "price", "size"])
        self.new_df = pd.DataFrame(columns=self.trade_df.columns)
        self.aggregated_clob = pd.DataFrame(columns=["Size", "Bid Price", "Ask Price", "Size"])
    
    def print_aggregated_clob(self):
        """Display the current aggregated state of the order book (buy/sell)."""
        self.aggregated_clob = pd.DataFrame(columns=["Size", "Bid Price", "Ask Price", "Size"])

        log_message =  "-"*60 + "\n"
        log_message += " "*15 + "Current Aggregated CLOB state: \n"
        log_message += "-"*60 + "\n"
        log_message += "{:<16} {:<16} {:<16} {:<16}\n".format("Size", "Bid Price", "Ask Price", "Size")
        # aggregate buy and sell orders
        buy_orders = [(price, sum(order['size'] for order in orders)) for price, orders in self.clob["buy"].items() if self.has_size_given_price(price, "buy")]
        sell_orders = [(price, sum(order['size'] for order in orders)) for price, orders in self.clob["sell"].items() if self.has_size_given_price(price, "sell")]
        # sort buy orders (descending by price) and sell orders (ascending by price)
        buy_orders.sort(key=lambda x: x[0], reverse=True)
        sell_orders.sort(key=lambda x: x[0])

        max_rows = max(len(buy_orders), len(sell_orders))

        for i in range(max_rows):
            buy_price, buy_size = buy_orders[i] if i < len(buy_orders) else ("", "")
            sell_price, sell_size = sell_orders[i] if i < len(sell_orders) else ("", "")
            log_message += "{:<16} {:<16} {:<16} {:<16}\n".format(buy_size, f"{buy_price:.2f}" if buy_price else "", f"{sell_price:.2f}" if sell_price else "", sell_size)
            self.aggregated_clob = pd.concat([self.aggregated_clob, pd.DataFrame([[buy_size, buy_price, sell_price, sell_size]], columns=self.aggregated_clob.columns)], ignore_index=True)

        log_message += "-"*60
        log_message += "\n"*3
        print(log_message)


    def print_top_orders(self):
        """Print the highest bid and lowest ask orders."""
        highest_bid = max([p for p in self.clob["buy"].keys() if self.has_size_given_price(p, "buy")]) if self.clob["buy"] else None
        lowest_ask = min([p for p in self.clob["sell"].keys() if self.has_size_given_price(p, "sell")]) if self.clob["sell"] else None
        

        log_message = "-"*80 + "\n"
        log_message += " "*35 + "Top Orders: \n"
        log_message += "-"*80 + "\n"
        
        if highest_bid:
            log_message += f"Highest Bid: {round(highest_bid, 2):.2f}\n"
            for order in self.clob["buy"][highest_bid]:
                if order['size'] > 0:
                    log_message += "  Order ID: {:<4} Group ID: {:<4} Size: {:<4} Time: {}\n".format(
                        order['order id'], order['group id'],order['size'], order['time'])
        else:
            log_message += "No bids available.\n"
        
        if lowest_ask:
            log_message += f"Lowest Ask: {round(lowest_ask, 2):.2f}\n"
            for order in self.clob["sell"][lowest_ask]:
                if order['size'] > 0:
                    log_message += "  Order ID: {:<4} Group ID: {:<4} Size: {:<4} Time: {}\n".format(
                        order['order id'], order['group id'],order['size'], order['time'])
        else:
            log_message += "No asks available.\n"
        
        log_message += "-"*80
        log_message += "\n"*3
        # logging.info(log_message)
        print(log_message)

    def print_trade(self):
        """Print the current trade state."""
        log_message = "-"*80 + "\n"
        log_message += " "*30 + "Current Trade state: \n"
        log_message += "-"*80 + "\n"
        if self.trade_df.empty:
            log_message += "No trade happened"
        else:
            log_message += self.trade_df.to_string()
        log_message += "\n"
        log_message += "-"*80
        log_message += "\n"*3
        print(log_message)


    def log_clob(self, verbose=False,is_print=False):
        """Log the current CLOB state."""
        log_message =  "-"*80 + "\n"
        log_message += " "*30 + "Current CLOB state: \n"
        log_message += "-"*80 + "\n"
        for side in ["buy", "sell"]:
            log_message += "Bid:\n" if side == "buy" else "Ask:\n"
            for price, orders in self.clob[side].items():
                if self.has_size_given_price(price, side):
                    log_message += f"Price: {price:.2f}\n"
                    for order in orders:
                        if order['size'] > 0:
                            log_message += "  Order ID: {:<4} Group ID: {:<4} Size: {:<4} Time: {}\n".format(
                                order['order id'], order.get('group id', 'N/A'), order['size'], order['time']
                            )
            if side == "buy":
                log_message += "\n" 
        log_message += "-"*80
        if is_print:
            print(log_message)
        if verbose:
            logger.info(log_message)

    def log_trade(self,verbose=False):
        """Log the details of all trades matched."""
        log_message = "-"*80 + "\n"
        log_message += " "*30 + "Current Trade Done: \n"
        log_message += "-"*80 + "\n"
        if self.new_df.empty:
            log_message += "No trade happened"
        else:
            log_message += self.new_df.to_string()
        self.new_df = pd.DataFrame(columns=self.trade_df.columns)
        # print(log_message)
        if verbose:
            logger.info(log_message)

    def upload_clob_info(self, order, outstanding=""):
        """
        Add order to the CLOB.

        Args:
            order (dict): Order details.
            outstanding (int, optional): Outstanding size. Defaults to order's size.
        """
        if not outstanding:
            outstanding = order["size"]
        if type(outstanding) != int:
            outstanding = int(outstanding)
        price0 = float(order["price"])
        order_dic = {
            "time":order["timestamp"],
            "order id": int(order["order id"]), 
            "size": int(outstanding),
            "group id": int(order["group id"])
            }
        if price0 not in self.clob[order["side"]]:
            self.clob[order["side"]][price0] = [order_dic]
        else:
            self.clob[order["side"]][price0].append(order_dic)
        # sort CLOB
        if order["side"] == "buy":
            self.clob["buy"] = dict(sorted(self.clob["buy"].items(), reverse=True))
        else:
            self.clob["sell"] = dict(sorted(self.clob["sell"].items()))

    def upload_trade_info(self, trades):
        """
        Record the details of all trades matched.

        Args:
            trades (list): List of matched trade dictionaries.
        """
        for trade in trades:
            buyer_seller = {self.orders.iloc[trade["latecomer"]-1, 2]: self.orders.iloc[trade["latecomer"]-1, 1],
                            self.orders.iloc[trade["pre-existing"]-1, 2]: self.orders.iloc[trade["pre-existing"]-1, 1]}
            trade_detail =  [
                self.orders.iloc[trade["latecomer"]-1, 0], # timestamp
                buyer_seller["buy"], # buy_group_id
                buyer_seller["sell"], # sell_group_id
                self.orders.iloc[trade["pre-existing"]-1, 3], # price
                trade["trade size"]] # size
            self.new_df = pd.concat([self.new_df, pd.DataFrame([trade_detail], columns=self.new_df.columns)], ignore_index=True)
        self.trade_df = pd.concat([self.trade_df, self.new_df], ignore_index=True)
        
    def has_size_given_price(self, price, side):
        """
        Check if there is any outstanding size for a given price and side.

        Args:
            price (float): The price level.
            side (str): 'buy' or 'sell'.

        Returns:
            bool: True if there is outstanding size, False otherwise.
        """
        price = float(price)
        try:
            order_sizes = [order['size'] for order in self.clob[side][price]]
            return sum(order_sizes) > 0
        except KeyError:
            print("Error: no such price")
            return False
        
    def match_trade(self, order):
        """
        Match an incoming order with existing orders in the CLOB.

        Args:
            order (dict): Incoming order details.

        Returns:
            tuple: (outstanding_size, matched_trades)
        """
        matched_trades = []

        cur_side, cur_price, cur_outstanding = order["side"], float(order["price"]), int(order["size"])
        opp_side = "buy" if cur_side == "sell" else "sell"
        opp_side_price = self.clob[opp_side].keys()

        # get the good sell prices that are good for the current buy order
        if cur_side == "buy":
            # Sell prices <= Buy price
            good_prices = [p for p in opp_side_price if
                           p <= cur_price and self.has_size_given_price(p, opp_side)]
            good_prices.sort() # Lowest price first
        else:
            # Buy prices >= Sell price
            good_prices = [p for p in opp_side_price if 
                           p >= cur_price and self.has_size_given_price(p, opp_side)]
            good_prices.sort(reverse=True) # Highest price first

        if not good_prices:
            return cur_outstanding, matched_trades

        for good_price in good_prices:
            for i, opp_order in enumerate(self.clob[opp_side][good_price]):
                opp_size = opp_order["size"]
                if opp_size == 0:
                    continue
                if opp_size >= cur_outstanding:
                    matched_trades.append({"pre-existing":int(opp_order["order id"]),
                                            "latecomer": int(order["order id"]),
                                            "trade size": cur_outstanding})
                    self.clob[opp_side][good_price][i]["size"] -= cur_outstanding # assign the value in the dictionary
                    return 0, matched_trades
                else:
                    cur_outstanding -= opp_size
                    self.clob[opp_side][good_price][i]["size"] = 0 # assign the value in the dictionary
                    matched_trades.append({"pre-existing":int(opp_order["order id"]),
                                            "latecomer": int(order["order id"]),
                                            "trade size": opp_size})
        return cur_outstanding, matched_trades
    
    def get_bid_ask_metrix(self):
        """
        Get the bid-ask spread value, .

        Returns:
            float or None: The spread if both bid and ask exist, else None.
        """
        highest_bid = max([p for p in self.clob["buy"].keys() if self.has_size_given_price(p, "buy")]) if self.clob["buy"] else None
        lowest_ask = min([p for p in self.clob["sell"].keys() if self.has_size_given_price(p, "sell")]) if self.clob["sell"] else None
        if highest_bid and lowest_ask:
            spread_dollars = round(lowest_ask - highest_bid, 2)
            spread_ticks = spread_dollars / 0.01  # 每个tick为$0.01
            spread_bps = (spread_dollars / highest_bid) * 10000  # 基点计算

            best_bid_quantity = sum(order['size'] for order in self.clob["buy"][highest_bid])
            best_ask_quantity = sum(order['size'] for order in self.clob["sell"][lowest_ask])
            best_bid_value = best_bid_quantity * highest_bid
            best_ask_value = best_ask_quantity * lowest_ask
            avg_market_depth = (best_bid_value + best_ask_value) / 2
            # keep 2 decimal places
            spread_ticks = round(spread_ticks, 2)
            spread_bps = round(spread_bps, 2)
            avg_market_depth = round(avg_market_depth, 2)
            return spread_dollars, spread_ticks, spread_bps,avg_market_depth
        else:
            return None, None, None, None
        
    def step_run(self, order, verbose=False,is_print_step=True,is_print_matrix=True):
        """
        Process a single order.

        Args:
            order (dict): Order details.
            verbose (bool, optional): If True, log detailed information. Defaults to False.
        """
        outstanding, matched_trades = self.match_trade(order)

        if matched_trades:
            self.upload_trade_info(matched_trades)
        if outstanding > 0:
            self.upload_clob_info(order, outstanding)
            
        spread_dollars, spread_ticks, spread_bps, average_market_depth = self.get_bid_ask_metrix()
    
        self.log_trade(verbose)
        self.log_clob(verbose)
        
        if is_print_step:
            print(f"Coming Order ID {order['order id']}: {order['side']} {order['size']} @ {order['price']} by Group {order['group id']}")
        if is_print_matrix:
            print(f"Spread: ${spread_dollars}, Ticks: {spread_ticks}, BPS: {spread_bps}, Average Market Depth: ${average_market_depth}")
        if verbose:
            logger.info(f"Coming Order ID {order['order id']}: {order['side']} {order['size']} @ {order['price']} by Group {order['group id']}\nSpread: ${spread_dollars}, Ticks: {spread_ticks}, BPS: {spread_bps}, Average Market Depth: ${average_market_depth}")
            logger.info("-"*80+"\n"*3)

    def run(self, verbose=False,is_print_top=False,is_print_aggregated=False,is_print_trade=False,is_print_step=True,is_print_matrix=False): 
        """Run the CLOB simulation for all orders."""
        for _, order in self.orders.iterrows():
            self.step_run(order,verbose,is_print_step,is_print_matrix)
            if is_print_top:
                self.print_top_orders()
            if is_print_aggregated:
                self.print_aggregated_clob()
            if is_print_trade:
                self.print_trade()
        print('Done. Please check the log file (clob.log) for the details of the CLOB and trades.')     

# load and ini data

In [85]:
import pandas as pd

orders = pd.read_csv('orders.csv')
orders["Timestamp"] = pd.to_datetime(orders["Timestamp"])
orders.columns = ['timestamp', 'group id', 'side', 'price', 'size']

orders["group id"] = orders["group id"].astype('int')
orders["price"] = orders["price"].astype('float')
orders["size"] = orders["size"].astype('int')

orders["order id"] = orders.index + 1
print(orders.head())
print(orders.describe())

clob = CLOB(orders)

            timestamp  group id  side  price  size  order id
0 2024-09-30 20:26:01         5   buy    8.0    50         1
1 2024-09-30 20:26:11        14  sell   11.0    20         2
2 2024-09-30 20:26:14        10  sell   11.0   100         3
3 2024-09-30 20:26:15        12  sell   10.0    50         4
4 2024-09-30 20:26:16         7   buy    5.0    50         5
         group id       price        size    order id
count  100.000000  100.000000  100.000000  100.000000
mean    10.370000   11.368300   49.390000   50.500000
std      5.004352    9.508723   63.500242   29.011492
min      1.000000    5.000000    5.000000    1.000000
25%      6.000000    9.175000   13.750000   25.750000
50%     10.500000   10.000000   30.000000   50.500000
75%     14.000000   11.000000   60.000000   75.250000
max     20.000000  100.000000  500.000000  100.000000


## Order Book State

In [86]:
clob.run(verbose=False,is_print_top=True)

Coming Order ID 1: buy 50 @ 8.0 by Group 5
--------------------------------------------------------------------------------
                                   Top Orders: 
--------------------------------------------------------------------------------
Highest Bid: 8.00
  Order ID: 1    Group ID: 5    Size: 50   Time: 2024-09-30 20:26:01
No asks available.
--------------------------------------------------------------------------------



Coming Order ID 2: sell 20 @ 11.0 by Group 14
--------------------------------------------------------------------------------
                                   Top Orders: 
--------------------------------------------------------------------------------
Highest Bid: 8.00
  Order ID: 1    Group ID: 5    Size: 50   Time: 2024-09-30 20:26:01
Lowest Ask: 11.00
  Order ID: 2    Group ID: 14   Size: 20   Time: 2024-09-30 20:26:11
--------------------------------------------------------------------------------



Coming Order ID 3: sell 100 @ 11.0 by Group

- Implement the function to print the full order book for both bid ask price levels with aggregated size for each level. (1.5%)

In [87]:
clob.restart()
clob.run(verbose=False,is_print_aggregated=True)
clob.aggregated_clob.to_csv("final_clob.csv", index=False)

Coming Order ID 1: buy 50 @ 8.0 by Group 5
------------------------------------------------------------
               Current Aggregated CLOB state: 
------------------------------------------------------------
Size             Bid Price        Ask Price        Size            
50               8.00                                              
------------------------------------------------------------



Coming Order ID 2: sell 20 @ 11.0 by Group 14
------------------------------------------------------------
               Current Aggregated CLOB state: 
------------------------------------------------------------
Size             Bid Price        Ask Price        Size            
50               8.00             11.00            20              
------------------------------------------------------------



Coming Order ID 3: sell 100 @ 11.0 by Group 10
------------------------------------------------------------
               Current Aggregated CLOB state: 
------------------

# Show the order book state after 5th order, 20th order, and last order.

In [88]:
clob.restart()
index = [4,19,len(orders)-1]
for j, order in orders.iterrows():
    clob.step_run(order, verbose=False,is_print_step=False,is_print_matrix=False)
    if j in index:
        print(f"Order {j+1}")
        clob.print_aggregated_clob()

TypeError: step_run() got an unexpected keyword argument 'is_print_step'

In [None]:
clob.restart()
index = [4,19,len(orders)-1]
for j, order in orders.iterrows():
    clob.step_run(order, verbose=False,is_print_step=False,is_print_matrix=False)
    if j in index:
        print(f"Order {j+1}")
        clob.log_clob(False,True)

Order 5
--------------------------------------------------------------------------------
                              Current CLOB state: 
--------------------------------------------------------------------------------
Bid:
Price: 8.00
  Order ID: 1    Group ID: 5    Size: 50   Time: 2024-09-30 20:26:01
Price: 5.00
  Order ID: 5    Group ID: 7    Size: 50   Time: 2024-09-30 20:26:16

Ask:
Price: 10.00
  Order ID: 4    Group ID: 12   Size: 50   Time: 2024-09-30 20:26:15
Price: 11.00
  Order ID: 2    Group ID: 14   Size: 20   Time: 2024-09-30 20:26:11
  Order ID: 3    Group ID: 10   Size: 100  Time: 2024-09-30 20:26:14
--------------------------------------------------------------------------------
Order 20
--------------------------------------------------------------------------------
                              Current CLOB state: 
--------------------------------------------------------------------------------
Bid:
Price: 10.00
  Order ID: 15   Group ID: 3    Size: 40   Time: 202

## Trade Log
- Record the details of all trades matched with all the necessary information (2%).

In [None]:
clob.restart()
clob.run(verbose=False,is_print_trade=True)
clob.trade_df["trade_time"] = clob.trade_df["trade_time"].astype(str)
clob.trade_df.to_excel("final_trade.xlsx", index=False)

Coming Order ID 1: buy 50 @ 8.0 by Group 5
Spread: $None, Ticks: None, BPS: None, Average Market Depth: $None
--------------------------------------------------------------------------------
                              Current Trade state: 
--------------------------------------------------------------------------------
No trade happened
--------------------------------------------------------------------------------



Coming Order ID 2: sell 20 @ 11.0 by Group 14
Spread: $3.0, Ticks: 300.0, BPS: 3750.0, Average Market Depth: $310.0
--------------------------------------------------------------------------------
                              Current Trade state: 
--------------------------------------------------------------------------------
No trade happened
--------------------------------------------------------------------------------



Coming Order ID 3: sell 100 @ 11.0 by Group 10
Spread: $3.0, Ticks: 300.0, BPS: 3750.0, Average Market Depth: $860.0
-------------------------

# Spread and Market Depth

In [None]:
clob.restart()
clob.run(verbose=True,is_print_top=True,is_print_matrix=True)

TypeError: run() got an unexpected keyword argument 'is_print_matrix'