## Load and describe orders data

In [1]:
import pandas as pd

orders = pd.read_csv('orders.csv')
orders["Timestamp"] = pd.to_datetime(orders["Timestamp"])
orders.columns = ['timestamp', 'group id', 'side', 'price', 'size']

orders["group id"] = orders["group id"].astype('int')
orders["price"] = orders["price"].astype('float')
orders["size"] = orders["size"].astype('int')

orders["order id"] = orders.index + 1
orders.head()

Unnamed: 0,timestamp,group id,side,price,size,order id
0,2024-09-30 20:26:01,5,buy,8.0,50,1
1,2024-09-30 20:26:11,14,sell,11.0,20,2
2,2024-09-30 20:26:14,10,sell,11.0,100,3
3,2024-09-30 20:26:15,12,sell,10.0,50,4
4,2024-09-30 20:26:16,7,buy,5.0,50,5


In [2]:
orders.describe()

Unnamed: 0,timestamp,group id,price,size,order id
count,100,100.0,100.0,100.0,100.0
mean,2024-09-30 20:36:43.260000768,10.37,11.3683,49.39,50.5
min,2024-09-30 20:26:01,1.0,5.0,5.0,1.0
25%,2024-09-30 20:32:24.500000,6.0,9.175,13.75,25.75
50%,2024-09-30 20:40:15,10.5,10.0,30.0,50.5
75%,2024-09-30 20:41:38.249999872,14.0,11.0,60.0,75.25
max,2024-09-30 20:43:14,20.0,100.0,500.0,100.0
std,,5.004352,9.508723,63.500242,29.011492


## Main structure of CLOB
- deal with each order and try to match a successful trade
- search the ideal prices, from good to bad, then from ealry to late
- place in a proper position if not matched / partly matched
- update the CLOB
- record in the trade book (trade_df)

In [3]:
import logging
import os
import warnings
warnings.filterwarnings("ignore")

class CLOB():
    def __init__(self, orders):
        log_path_to_clob = "clob.log"   
        logging.basicConfig(filename=log_path_to_clob, level=logging.INFO, format='%(message)s', filemode='w')
        # rename to bid & ask in the final display
        self.orders = orders
        self.clob = {"buy": {}, "sell": {}}
        self.trade_df = pd.DataFrame(columns=["trade_time", "buy_group_id", "sell_group_id", "price", "size"])
        self.new_df = pd.DataFrame(columns=self.trade_df.columns)
        self.aggregated_clob = pd.DataFrame(columns=["Size", "Bid Price", "Ask Price", "Size"])

    def restart(self):
        self.clob = {"buy": {}, "sell": {}}
        self.trade_df = pd.DataFrame(columns=["trade_time", "buy_group_id", "sell_group_id", "price", "size"])
        self.new_df = pd.DataFrame(columns=self.trade_df.columns)
        self.aggregated_clob = pd.DataFrame(columns=["Size", "Bid Price", "Ask Price", "Size"])
    
   
    def print_aggregated_clob(self):
        self.aggregated_clob = pd.DataFrame(columns=["Size", "Bid Price", "Ask Price", "Size"])

        log_message =  "-"*60 + "\n"
        log_message += " "*15 + "Current Aggregated CLOB state: \n"
        log_message += "-"*60 + "\n"
        log_message += "{:<16} {:<16} {:<16} {:<16}\n".format("Size", "Bid Price", "Ask Price", "Size")

        buy_orders = [(price, sum(order['size'] for order in orders)) for price, orders in self.clob["buy"].items() if self.has_size_given_price(price, "buy")]
        sell_orders = [(price, sum(order['size'] for order in orders)) for price, orders in self.clob["sell"].items() if self.has_size_given_price(price, "sell")]

        buy_orders.sort(key=lambda x: x[0], reverse=True)
        sell_orders.sort(key=lambda x: x[0])

        max_rows = max(len(buy_orders), len(sell_orders))

        for i in range(max_rows):
            buy_price, buy_size = buy_orders[i] if i < len(buy_orders) else ("", "")
            sell_price, sell_size = sell_orders[i] if i < len(sell_orders) else ("", "")
            log_message += "{:<16} {:<16} {:<16} {:<16}\n".format(buy_size, f"{buy_price:.2f}" if buy_price else "", f"{sell_price:.2f}" if sell_price else "", sell_size)
            self.aggregated_clob = pd.concat([self.aggregated_clob, pd.DataFrame([[buy_size, buy_price, sell_price, sell_size]], columns=self.aggregated_clob.columns)], ignore_index=True)

        log_message += "-"*60
        log_message += "\n"*3
        print(log_message)


    def print_top_orders(self):
        highest_bid = max([p for p in self.clob["buy"].keys() if self.has_size_given_price(p, "buy")]) if self.clob["buy"] else None
        lowest_ask = min([p for p in self.clob["sell"].keys() if self.has_size_given_price(p, "sell")]) if self.clob["sell"] else None
        

        log_message = "-"*80 + "\n"
        log_message += " "*35 + "Top Orders: \n"
        log_message += "-"*80 + "\n"
        
        if highest_bid:
            log_message += f"Highest Bid: {round(highest_bid, 2):.2f}\n"
            for order in self.clob["buy"][highest_bid]:
                if order['size'] > 0:
                    log_message += "  Order ID: {:<4} Group ID: {:<4} Size: {:<4} Time: {}\n".format(
                        order['order id'], order['group id'],order['size'], order['time'])
        else:
            log_message += "No bids available.\n"
        
        if lowest_ask:
            log_message += f"Lowest Ask: {round(lowest_ask, 2):.2f}\n"
            for order in self.clob["sell"][lowest_ask]:
                if order['size'] > 0:
                    log_message += "  Order ID: {:<4} Group ID: {:<4} Size: {:<4} Time: {}\n".format(
                        order['order id'], order['group id'],order['size'], order['time'])
        else:
            log_message += "No asks available.\n"
        
        log_message += "-"*80
        log_message += "\n"*3
        # logging.info(log_message)
        print(log_message)

    def print_trade(self):
        log_message = "-"*80 + "\n"
        log_message += " "*30 + "Current Trade state: \n"
        log_message += "-"*80 + "\n"
        if self.trade_df.empty:
            log_message += "No trade happened"
        else:
            log_message += self.trade_df.to_string()
        log_message += "\n"
        log_message += "-"*80
        log_message += "\n"*3
        print(log_message)


    def log_clob(self):
        log_message =  "-"*80 + "\n"
        log_message += " "*30 + "Current CLOB state: \n"
        log_message += "-"*80 + "\n"
        for side in ["buy", "sell"]:
            log_message += "Bid:\n" if side == "buy" else "Ask:\n"
            for price, orders in self.clob[side].items():
                if self.has_size_given_price(price, side):
                    log_message += f"Price: {price:.2f}\n"
                    for order in orders:
                        if order['size'] > 0:
                            log_message += "  Order ID: {:<4} Group ID: {:<4} Size: {:<4} Time: {}\n".format(
                                order['order id'], order.get('group id', 'N/A'), order['size'], order['time']
                            )
            if side == "buy":
                log_message += "\n" 
        log_message += "-"*80
        logging.info(log_message)

    # Record the details of all trades matched with all the necessary information (2%).
    def log_trade(self):
        log_message = "-"*80 + "\n"
        log_message += " "*30 + "Current Trade Done: \n"
        log_message += "-"*80 + "\n"
        if self.new_df.empty:
            log_message += "No trade happened"
        else:
            log_message += self.new_df.to_string()
        self.new_df = pd.DataFrame(columns=self.trade_df.columns)
        logging.info(log_message)

    def upload_clob_info(self, order, outstanding=""):
        if not outstanding:
            outstanding = order["size"]
        if type(outstanding) != int:
            outstanding = int(outstanding)
        price0 = float(order["price"])
        order_dic = {
            "time":order["timestamp"],
            "order id": int(order["order id"]), 
            "size": int(outstanding),
            "group id": int(order["group id"])
            }
        if price0 not in self.clob[order["side"]]:
            self.clob[order["side"]][price0] = [order_dic]
        else:
            self.clob[order["side"]][price0].append(order_dic)
        
        if order["side"] == "buy":
            self.clob["buy"] = dict(sorted(self.clob["buy"].items(), reverse=True))
        else:
            self.clob["sell"] = dict(sorted(self.clob["sell"].items()))

    def upload_trade_info(self, trades):
        # dict -> df
        # "pre-existing","latecomer","trade size" -> "timestamp", "buy", "sell", "price", "size"
        for j in trades:
            buyer_seller = {self.orders.iloc[j["latecomer"]-1, 2]: self.orders.iloc[j["latecomer"]-1, 1],
                            self.orders.iloc[j["pre-existing"]-1, 2]: self.orders.iloc[j["pre-existing"]-1, 1]}
            trade_detail =  [self.orders.iloc[j["latecomer"]-1, 0],
                            buyer_seller["buy"],
                            buyer_seller["sell"],
                            self.orders.iloc[j["pre-existing"]-1, 3],
                            j["trade size"]]
            self.new_df = pd.concat([self.new_df, pd.DataFrame([trade_detail], columns=self.new_df.columns)], ignore_index=True)
        self.trade_df = pd.concat([self.trade_df, self.new_df], ignore_index=True)
        
    def has_size_given_price(self, price, side):
        price = float(price)
        try:
            order_sizes = [order['size'] for order in self.clob[side][price]]
            return sum(order_sizes) > 0
        except KeyError:
            print("Error: no such price")
            return False
        
    def match_trade(self, order):
        # return outstanding, matched_trades
        matched_trades = []

        cur_side, cur_price, cur_outstanding = order["side"], float(order["price"]), int(order["size"])
        opp_side = "buy" if cur_side == "sell" else "sell"
        opp_side_price = self.clob[opp_side].keys()

        # get the good sell prices that are good for the current buy order
        if cur_side == "buy":
            # good prices contains the sell prices that are lower than the current buy price and have size > 0
            good_prices = [p for p in opp_side_price if
                           p <= cur_price and self.has_size_given_price(p, opp_side)]
            # if there is no such price, return the current outstanding and no trade
            if not good_prices:
                return cur_outstanding, matched_trades
            good_prices.sort() # buy side look for the lowest price
        # get the good buy prices that are good for the current sell order
        elif cur_side == "sell":
            good_prices = [p for p in opp_side_price if 
                           p >= cur_price and self.has_size_given_price(p, opp_side)]
            if not good_prices:
                return cur_outstanding, matched_trades
            good_prices.sort(reverse=True) # sell side look for the highest price
        
        for good_price in good_prices:
            for i, opp_order in enumerate(self.clob[opp_side][good_price]):
                opp_size = opp_order["size"]
                if opp_size == 0:
                    continue
                if opp_size >= cur_outstanding:
                    matched_trades.append({"pre-existing":int(opp_order["order id"]),
                                            "latecomer": int(order["order id"]),
                                            "trade size": cur_outstanding})
                    self.clob[opp_side][good_price][i]["size"] -= cur_outstanding # assign the value in the dictionary
                    return 0, matched_trades
                else:
                    cur_outstanding -= opp_size
                    self.clob[opp_side][good_price][i]["size"] = 0 # assign the value in the dictionary
                    matched_trades.append({"pre-existing":int(opp_order["order id"]),
                                            "latecomer": int(order["order id"]),
                                            "trade size": opp_size})
        return cur_outstanding, matched_trades
    
    # Print the bid-ask spread in the loop
    def get_bid_ask_spread(self):
        highest_bid = max([p for p in self.clob["buy"].keys() if self.has_size_given_price(p, "buy")]) if self.clob["buy"] else None
        lowest_ask = min([p for p in self.clob["sell"].keys() if self.has_size_given_price(p, "sell")]) if self.clob["sell"] else None
        if highest_bid and lowest_ask:
            return round(lowest_ask - highest_bid,2)
        else:
            return None
        
    def step_run(self, order, verbose=False):
        outstanding, matched_trades = self.match_trade(order)

        if matched_trades:
            self.upload_trade_info(matched_trades)
        if outstanding > 0:
            self.upload_clob_info(order, outstanding)
             
        bid_ask_spread = self.get_bid_ask_spread()
        
        if verbose:
            self.log_trade()
            self.log_clob()
            logging.info(f"Coming Order ID {order['order id']}: {order['side']} {order['size']} @ {order['price']} by Group {order['group id']}, Spread: {bid_ask_spread}")
            logging.info("-"*80+"\n"*3)
       

    def run(self): 
        for index, order in orders.iterrows():
            self.step_run(order)
        print('Done. Please check the log file (clob.log) for the details of the CLOB and trades.')     

In [4]:
clob = CLOB(orders)

## Order Book State
- After each order submission, print the updated order book showing the top buy and sell orders (1%).

In [5]:
for index, order in orders.iterrows():
    print(f"Coming Order ID {order['order id']}: {order['side']} {order['size']} @ {order['price']} by Group {order['group id']}")
    clob.step_run(order, verbose=True)
    clob.print_top_orders()

Coming Order ID 1: buy 50 @ 8.0 by Group 5
--------------------------------------------------------------------------------
                                   Top Orders: 
--------------------------------------------------------------------------------
Highest Bid: 8.00
  Order ID: 1    Group ID: 5    Size: 50   Time: 2024-09-30 20:26:01
No asks available.
--------------------------------------------------------------------------------



Coming Order ID 2: sell 20 @ 11.0 by Group 14
--------------------------------------------------------------------------------
                                   Top Orders: 
--------------------------------------------------------------------------------
Highest Bid: 8.00
  Order ID: 1    Group ID: 5    Size: 50   Time: 2024-09-30 20:26:01
Lowest Ask: 11.00
  Order ID: 2    Group ID: 14   Size: 20   Time: 2024-09-30 20:26:11
--------------------------------------------------------------------------------



Coming Order ID 3: sell 100 @ 11.0 by Group

- Implement a method to print the full order book for both bid and ask price levels with aggregated size for each level (2%).

In [6]:
clob.restart()
for index, order in orders.iterrows():
    print(f"Coming Order ID {order['order id']}: {order['side']} {order['size']} @ {order['price']} by Group {order['group id']}")
    clob.step_run(order)
    clob.print_aggregated_clob()
clob.aggregated_clob.to_csv("final_clob.csv", index=False)

Coming Order ID 1: buy 50 @ 8.0 by Group 5
------------------------------------------------------------
               Current Aggregated CLOB state: 
------------------------------------------------------------
Size             Bid Price        Ask Price        Size            
50               8.00                                              
------------------------------------------------------------



Coming Order ID 2: sell 20 @ 11.0 by Group 14
------------------------------------------------------------
               Current Aggregated CLOB state: 
------------------------------------------------------------
Size             Bid Price        Ask Price        Size            
50               8.00             11.00            20              
------------------------------------------------------------



Coming Order ID 3: sell 100 @ 11.0 by Group 10
------------------------------------------------------------
               Current Aggregated CLOB state: 
------------------

## Trade Log
- Record the details of all trades matched with all the necessary information (2%).

In [7]:
clob.restart()
for index, order in orders.iterrows():
    print(f"Coming Order ID {order['order id']}: {order['side']} {order['size']} @ {order['price']} by Group {order['group id']}")
    clob.step_run(order)
    clob.print_trade()
clob.trade_df["trade_time"] = clob.trade_df["trade_time"].astype(str)
clob.trade_df.to_excel("final_trade.xlsx", index=False)

Coming Order ID 1: buy 50 @ 8.0 by Group 5
--------------------------------------------------------------------------------
                              Current Trade state: 
--------------------------------------------------------------------------------
No trade happened
--------------------------------------------------------------------------------



Coming Order ID 2: sell 20 @ 11.0 by Group 14
--------------------------------------------------------------------------------
                              Current Trade state: 
--------------------------------------------------------------------------------
No trade happened
--------------------------------------------------------------------------------



Coming Order ID 3: sell 100 @ 11.0 by Group 10
--------------------------------------------------------------------------------
                              Current Trade state: 
--------------------------------------------------------------------------------
No trade happened


In [8]:
# trade_df (暂时用了单行设计没有显示order id，order id暂时只用来快速匹配交易信息：price, group id)

## Spread and Market Depth