In [1]:
import pandas as pd
import numpy as np
import datetime as dt

## Global Variables
quote_name = {'TIME_M': 0, 'BID': 1, 'BIDSIZ': 2, 'ASK': 3, 'ASKSIZ': 4}
trade_name = {'TIME_M': 0, 'SIZE': 1, 'PRICE': 2}


def t_s(time):
    t = time.split(":")
    return float(t[0]) * 3600 + float(t[1]) * 60 + float(t[2])


class OrderBook:
    def __init__(self, depth=5):
        self.depth = depth
        self.bids = {}
        self.asks = {}
        self.bid_prices = []
        self.ask_prices = []
        self.time = 0

    # Update best bid and ask price, for convenience in comparison
    def update_bp(self):
        self.ask_prices = sorted(list(self.asks.keys()))
        self.bid_prices = sorted(list(self.bids.keys()), reverse=True)

    # Update OB due to quote
    def each_quote(self, quote):
        print(str(quote[1:]))
        self.time = quote[quote_name['TIME_M']]
        ## Update bids
        self.bids[quote[quote_name['BID']]] = quote[quote_name['BIDSIZ']]
        for price in self.bid_prices:
            if price > quote[quote_name['BID']]:
                del self.bids[price]

        # Update asks
        self.asks[quote[quote_name['ASK']]] = quote[quote_name['ASKSIZ']]
        for price in self.ask_prices:
            if price < quote[quote_name['ASK']]:
                del self.asks[price]

        # Update best_price
        self.update_bp()

    # For orderbook update when the trade is sell
    def sell_trade_update(self, trade_price, trade_size):
        # Sell limit order executed, now ask orderbook would change. Priority is descended by prices
        accu_count = 0
        for price in self.ask_prices:
            # If the price on ask orderbook is lower than the trade, then it must be eaten by the trade
            # So we accumulate the total numbers of orders eaten
            if price < trade_price:
                accu_count += self.asks[price]
                del self.asks[price]
            # Now if price is equal, we let the original amount of orders minus the accumulated orders
            elif price == trade_price:
                if accu_count < trade_size:
                    remain = self.asks[price] + accu_count - trade_size
                    if remain > 0:
                        self.asks[price] = remain
                    else:
                        del self.asks[price]
            else:
                break

    # For orderbook update when the trade is sell
    def buy_trade_update(self, trade_price, trade_size):
        # Buy limit order executed, now bid orderbook would change. Priority is increased by prices
        accu_count = 0
        for price in self.bid_prices:
            # If the price on ask orderbook is higher than the trade, then it must be eaten by the trade
            # So we accumulate the total numbers of orders eaten
            if price > trade_price:
                accu_count += self.bids[price]
                del self.bids[price]
            # Now if price is equal, we let the original amount of orders minus the accumulated orders
            elif price == trade_price:
                if accu_count < trade_size:
                    remain = self.bids[price] + accu_count - trade_size
                    if remain > 0:
                        self.bids[price] = remain
                    else:
                        del self.bids[price]
            else:
                break

    # Update OB due to trade
    def each_trade(self, trade):
        print(str(trade[1:]))
        self.time = trade[trade_name['TIME_M']]

        # Get the direction of this trade, and update the orderbook
        # direct = -1: "Sell" limit order, 1: "buy" limit order (According to Lobster)
        direct = None
        trade_price = trade[trade_name['PRICE']]
        trade_size = trade[trade_name['SIZE']]
        if len(self.ask_prices) > 0 and trade_price >= self.ask_prices[0]:
            direct = -1
            self.sell_trade_update(trade_price, trade_size)
        elif len(self.bid_prices) > 0 and trade_price <= self.bid_prices[0]:
            direct = 1
            self.buy_trade_update(trade_price, trade_size)
        else:
            print('Trade at midprice')
        self.update_bp()
        return direct

    # Show the orderbook!
    def show_orderbook(self):
        def cut_depth(prices):
            while len(prices) < self.depth:
                prices.append(0)
            return prices

        ask_prices = cut_depth(self.ask_prices.copy())
        bid_prices = cut_depth(self.bid_prices.copy())
        res = [self.time]
        for i in range(self.depth):
            res.extend([ask_prices[i], self.asks.get(ask_prices[i], 0), bid_prices[i], self.bids.get(bid_prices[i], 0)])
        return res


In [2]:
Quote_dir = 'quote_intc_110816.csv'
Trade_dir = 'trade_intc_110816.csv'

current = dt.datetime.now()
print('Begin Read')
df_quote = pd.read_csv(Quote_dir)
df_trade = pd.read_csv(Trade_dir)

quote_name = {'TIME_M': 0, 'BID': 1, 'BIDSIZ': 2, 'ASK': 3, 'ASKSIZ': 4}
trade_name = {'TIME_M': 0, 'SIZE': 1, 'PRICE': 2}
df_quote = df_quote[['TIME_M', 'BID', 'BIDSIZ', 'ASK', 'ASKSIZ']].values
df_trade = df_trade[['TIME_M', 'SIZE', 'PRICE']].values
print('Finish Read', (dt.datetime.now() - current).total_seconds())

current = dt.datetime.now()
print('Begin Time Process')
## Timestamp processing
vt_s = np.vectorize(t_s)
df_quote[:, quote_name['TIME_M']] = vt_s(df_quote[:, quote_name['TIME_M']])
df_trade[:, trade_name['TIME_M']] = vt_s(df_trade[:, trade_name['TIME_M']])

## Given start and end time, cut the trade and quote data
def time_selection(data):
    start_time = t_s("09:30:00")
    end_time = t_s("16:00:00")
    time_line = data[:, 0]
    return data[(time_line > start_time) & (time_line <= end_time)]

df_quote = time_selection(df_quote)
df_trade = time_selection(df_trade)
n_trade = len(df_trade)
n_quote = len(df_quote)
print('Finish Time process', (dt.datetime.now() - current).total_seconds())

# Quote and trade, order book, message initialize
orderbook = OrderBook(depth=5)

# Judge the data is quote or trade
def judge_quote(trade_index, quote_index):
    if df_trade[trade_index][0] > df_quote[quote_index][0]:
        return True
    else:
        return False

dummy = 0
trade_index = 0
quote_index = 0
print(orderbook.show_orderbook())
while trade_index != (n_trade - 1) and quote_index != (n_quote - 1) and dummy < 200:
    if judge_quote(trade_index, quote_index):
        orderbook.each_quote(df_quote[quote_index])
        quote_index += 1
    else:
        orderbook.each_trade(df_trade[trade_index])
        trade_index += 1
    print(orderbook.show_orderbook()[1:])
    dummy += 1

Begin Read
Finish Read 2.036848
Begin Time Process
Finish Time process 1.383676
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[34.6 7 34.7 4]
[34.7, 4, 34.6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[34.6 7 34.7 4]
[34.7, 4, 34.6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[69 34.69]
Trade at midprice
[34.7, 4, 34.6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[69 34.69]
Trade at midprice
[34.7, 4, 34.6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[34.53 3 34.75 3]
[34.75, 3, 34.53, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[34.6 7 34.7 4]
[34.7, 4, 34.6, 7, 34.75, 3, 34.53, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[34.6 4 34.7 4]
[34.7, 4, 34.6, 4, 34.75, 3, 34.53, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[300 34.6]
[34.7, 4, 34.53, 3, 34.75, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[100 34.66]
Trade at midprice
[34.7, 4, 34.53, 3, 34.75, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[34.59 7 34.7 4]
[34.7, 4, 34.59, 7, 34.75, 