In [97]:
import json
import time
from collections import defaultdict
from typing import Sequence
import numpy as np
import pandas as pd
from bisect import bisect_left
from tqdm import tqdm

In [98]:
with open('diffs.json', 'r', encoding='utf-8') as file_out:
    diffs = json.load(file_out)

with open('trades.json', 'r', encoding='utf-8') as file_out:
    trades = json.load(file_out)

with open('init_lob.json', 'r', encoding='utf-8') as file_out:
    init_lob = json.load(file_out)

init_lob['bids'] = np.array(init_lob['bids']).astype(float)
init_lob['asks'] = np.array(init_lob['asks']).astype(float)

In [100]:
i = 0
while diffs[i]['u'] <= init_lob['lastUpdateId']: i += 1
diffs = diffs[i:]

i = 0
while trades[i]['E'] <= diffs[0]['E']: i += 1
trades = trades[i:]

In [101]:
MARKET_ID: int = 0
MM_ID: int = 1

BUY: int = 0
SELL: int = 1

PRICE_TICK: int = 2
AMOUNT_TICK: int = 8

class LimitOrder:
    def __init__(self, price: float, amount: float, side: int, trader_id: int):
        price = round(price, PRICE_TICK)
        amount = round(amount, AMOUNT_TICK)
        assert amount > 0
        assert price >= 0

        self.price = price
        self.amount = amount
        self.side = side
        self.trader_id = trader_id
    
    def __repr__(self):
        return f'LimitOrder({self.price}, {self.amount}, {self.side}, {self.trader_id})'


class PriceLevel:
    def __init__(self, first_limit_order: LimitOrder):
        self.price = first_limit_order.price
        self.amount = first_limit_order.amount
        self.side = first_limit_order.side
        self.traders_order = [first_limit_order]
    
    def __repr__(self):
        return f'PriceLevel({self.price}, {self.amount}, {self.side})'

    def add_limit_order(self, limit_order: LimitOrder): # trader_id: 0 - market, 1 - MM
        assert limit_order.price == self.price
        assert limit_order.side == self.side

        self.amount += limit_order.amount
        self.amount = round(self.amount, AMOUNT_TICK)
        if len(self.traders_order):
            if limit_order.trader_id == self.traders_order[-1].trader_id:
                self.traders_order[-1].amount += limit_order.amount
                self.traders_order[-1].amount = round(self.traders_order[-1].amount, AMOUNT_TICK)
            else:
                self.traders_order.append(limit_order)
        else:
            self.traders_order.append(limit_order)
        
    def execute_limit_order(self, amount: float):
        remain_amount = round(amount, AMOUNT_TICK)
        match_info = defaultdict(int) # trader_id - amount_sold

        for i, limit_order in enumerate(self.traders_order):
            match_info[limit_order.trader_id] += min(limit_order.amount, remain_amount)
            self.amount -= min(limit_order.amount, remain_amount)

            if remain_amount < limit_order.amount:
                limit_order.amount -= remain_amount
                limit_order.amount = round(limit_order.amount, AMOUNT_TICK)
                self.traders_order = self.traders_order[i:]
                remain_amount = 0
                break
            else:
                remain_amount -= limit_order.amount
        
        remain_amount = round(remain_amount, AMOUNT_TICK)
        self.amount = round(self.amount, AMOUNT_TICK)
        if self.amount == 0:
            self.traders_order = []

        return remain_amount, match_info

    def history_update(self, limit_order: LimitOrder):
        self.amount = limit_order.amount
        self.traders_order = [limit_order]

In [114]:
class MatchingEngine:
    def __init__(self):
        pass 
    
    @staticmethod
    def match_orders(price_levels: Sequence[PriceLevel], amount: float):
        remain_amount = amount
        price: float = 0.0
        remain_orders = []
        matches_info = defaultdict(list) # trader_info - [amount, price]

        for i, price_level in enumerate(price_levels):
            this_price: float = price_level.price
            this_amount: float = price_level.amount
            price += this_price * min(remain_amount, this_amount)
            remain_amount, this_match_info = price_level.execute_limit_order(remain_amount)

            for k, v in this_match_info.items():
                trader_info = matches_info[k]
                if len(trader_info):
                    trader_info[0] += v
                    trader_info[1] += v * this_price
                else:
                    trader_info = [v, v * this_price]
                matches_info[k] = trader_info

            if not remain_amount:
                if price_level.amount:
                    remain_orders += [price_level]
                remain_orders += price_levels[i+1:]
                break

        return matches_info, remain_orders, remain_amount


class OrderBook:
    def __init__(self, bids: Sequence[PriceLevel] = [], asks: Sequence[PriceLevel] = [], 
                 matching_engine: MatchingEngine = MatchingEngine()):
        self.bids = sorted(bids, reverse=True, key=lambda x: x.price)
        self.asks = sorted(asks, key=lambda x: x.price)
        self.matching_engine = matching_engine
    
    def __repr__(self):
        ob_repr = ''

        min_len = min(len(self.bids), len(self.asks))
        for i in range(min_len):
            bid_str = repr(self.bids[i])
            ob_repr += bid_str + (36 - len(bid_str)) * ' ' + repr(self.asks[i]) + '\n'
        
        if len(self.bids) > min_len:
            remain_price_levels = self.bids[min_len:]
            indent = 0
        else:
            remain_price_levels = self.asks[min_len:]
            indent = 36
        
        for p_l in remain_price_levels:
            ob_repr += indent * ' ' + repr(p_l) + '\n'
        
        return ob_repr
    
    def get_state(self):
        return self.bids, self.asks
    
    def get_bids(self):
        return self.bids
    
    def get_asks(self):
        return self.asks
    
    def bid_price(self):
        return self.bids[0].price

    def ask_price(self):
        return self.asks[0].price

    def mid_price(self):
        return (self.bids[0].price + self.asks[0].price) / 2

    def bid_ask_spread(self):
        return self.asks[0].price - self.bids[0].price

    def market_depth(self):
        return self.asks[-1].price - self.bids[-1].price
    
    def track_diff(self, diff):
        bids_update = diff[1]
        asks_update = diff[2]

        bids_diff_new = []
        asks_diff_new = []
        for bid in bids_update:
            price = round(bid[0], PRICE_TICK)
            amount = round(bid[1], AMOUNT_TICK)
            bid_index = bisect_left(self.bids, -price, key=lambda x: -x.price)
            if bid_index == len(self.bids):
                if amount > 0:
                    self.bids.append(PriceLevel(LimitOrder(price, amount, BUY, MARKET_ID)))
            elif self.bids[bid_index].price == price:
                if round(amount - self.bids[bid_index].amount, AMOUNT_TICK) != 0:
                    bids_diff_new.append([price, round(amount - self.bids[bid_index].amount, AMOUNT_TICK)])
                if amount > 0:
                    self.bids[bid_index].history_update(LimitOrder(price, amount, BUY, MARKET_ID))
                else:
                    del self.bids[bid_index]
            else:
                if amount > 0:
                    self.bids.insert(bid_index, PriceLevel(LimitOrder(price, amount, BUY, MARKET_ID)))
                    bids_diff_new.append([price, amount])
        
        for ask in asks_update:
            price = round(ask[0], PRICE_TICK)
            amount = round(ask[1], AMOUNT_TICK)
            ask_index = bisect_left(self.asks, price, key=lambda x: x.price)
            if ask_index == len(self.asks):
                if amount > 0:
                    self.asks.append(PriceLevel(LimitOrder(price, amount, BUY, MARKET_ID)))
            elif self.asks[ask_index].price == price:
                if round(amount - self.asks[ask_index].amount, AMOUNT_TICK) != 0:
                    asks_diff_new.append([price, round(amount - self.asks[ask_index].amount, AMOUNT_TICK)])
                if amount > 0:
                    self.asks[ask_index].history_update(LimitOrder(price, amount, BUY, MARKET_ID))
                else:
                    del self.asks[ask_index]
            else:
                if amount > 0:
                    self.asks.insert(ask_index, PriceLevel(LimitOrder(price, amount, BUY, MARKET_ID)))
                    asks_diff_new.append([price, amount])
        
        return (diff[0], bids_diff_new, asks_diff_new)


    def set_ask_order(self, limit_order: LimitOrder):
        index = len(self.bids)
        for i, price_level in enumerate(self.bids):
            if price_level.price < limit_order.price:
                index = i
                break

        eligible_bids = self.bids[:index]
        ineligible_bids = self.bids[index:]

        matches_info, remain_bids, remain_amount = self.matching_engine.match_orders(eligible_bids, limit_order.amount)
        new_bids: Sequence[PriceLevel] = remain_bids + ineligible_bids
        new_asks: Sequence[PriceLevel] = self.asks

        if remain_amount > 0:
            new_limit_order = LimitOrder(limit_order.price, remain_amount, 
                                         limit_order.side, limit_order.trader_id)

            index = len(new_asks)
            for i, price_level in enumerate(new_asks):
                if price_level.price >= limit_order.price:
                    index = i
                    break

            if index == len(new_asks):
                new_asks.append(PriceLevel(new_limit_order))
            elif new_asks[index].price != limit_order.price:
                new_asks.insert(index, PriceLevel(new_limit_order))
            else:
                new_asks[index].add_limit_order(new_limit_order)
        
        self.bids = new_bids
        self.asks = new_asks

        return matches_info
    
    def set_bid_order(self, limit_order: LimitOrder):
        index = len(self.asks)
        for i, price_level in enumerate(self.asks):
            if price_level.price > limit_order.price:
                index = i
                break
 
        eligible_asks = self.asks[:index]
        ineligible_asks = self.asks[index:]

        matches_info, remain_asks, remain_amount = self.matching_engine.match_orders(eligible_asks, limit_order.amount)
        new_asks: Sequence[PriceLevel] = remain_asks + ineligible_asks
        new_bids: Sequence[PriceLevel] = self.bids

        if remain_amount > 0:
            new_limit_order = LimitOrder(limit_order.price, remain_amount, 
                                         limit_order.side, limit_order.trader_id)

            index = len(new_bids)
            for i, price_level in enumerate(new_bids):
                if price_level.price <= limit_order.price:
                    index = i
                    break
            
            if index == len(new_bids):
                new_bids.append(PriceLevel(new_limit_order))
            elif new_bids[index].price != limit_order.price:
                new_bids.insert(index, PriceLevel(new_limit_order))
            else:
                new_bids[index].add_limit_order(new_limit_order)
        
        self.bids = new_bids
        self.asks = new_asks

        return matches_info

    def set_order(self, limit_order: LimitOrder):
        if limit_order.side == SELL:
            matches_info = self.set_ask_order(limit_order)
        elif limit_order.side == BUY:
            matches_info = self.set_bid_order(limit_order)
        else:
            raise Exception("WRONG SIDE!")
        
        return matches_info

def create_lob_init(lob_state: dict):
    bids_raw = lob_state['bids']
    asks_raw = lob_state['asks']

    bids = []
    asks = []

    for bid_raw in bids_raw:
        bid = PriceLevel(LimitOrder(bid_raw[0], bid_raw[1], BUY, MARKET_ID))
        bids.append(bid)
    
    for ask_raw in asks_raw:
        ask = PriceLevel(LimitOrder(ask_raw[0], ask_raw[1], SELL, MARKET_ID))
        asks.append(ask)
    
    return OrderBook(bids, asks)

In [115]:
trades_by_diff = []

trades_index = 0
for v in diffs[1:]:
    time_to = v['E']
    trades_after_diff = []
    cur_trade = trades[trades_index]
    while cur_trade['T'] <= time_to:
        trades_after_diff.append(cur_trade['T'], float(cur_trade['p']), float(cur_trade['q']))
        trades_index += 1
        cur_trade = trades[trades_index]
    trades_by_diff.append(trades_after_diff)

new_diffs = []
for diff in diffs:
    new_diffs.append((diff['E'], np.array(diff['b']).astype(float), 
                        np.array(diff['a']).astype(float)))

In [143]:
order_book = create_lob_init(init_lob)
order_book.track_diff(new_diffs[0])
pass

In [144]:
bids_prepared = []
asks_prepared = []

for bid in order_book.bids:
    bids_prepared.append([bid.price, bid.amount])

for ask in order_book.asks:
    asks_prepared.append([ask.price, ask.amount])

init_lob_prepared = {'lastUpdateId': new_diffs[0][0], 'bids': bids_prepared, 'asks': asks_prepared}

with open("init_lob_prepared.json", "w") as fp:
    json.dump(init_lob_prepared, fp)

In [145]:
order_book = create_lob_init(init_lob)
order_book.track_diff(new_diffs[0])
trades_prepared = []
diffs_prepared = []

for i, diff in enumerate(tqdm(new_diffs[1:])):
    cur_trades = trades_by_diff[i]
    for trade in cur_trades:
        if trade[1] >= order_book.ask_price():
            side = BUY
        elif trade[1] <= order_book.bid_price():
            side = SELL
        order_book.set_order(LimitOrder(trade[1], trade[2], side, MARKET_ID))
        trades_prepared.append([trade[0], trade[1], trade[2], side])
    diffs_prepared.append(order_book.track_diff(diff))

100%|██████████| 76/76 [00:00<00:00, 544.96it/s]


In [146]:
trades_prepared = pd.DataFrame(trades_prepared, columns=['timestamp', 'price', 'amount', 'side'])
trades_prepared.to_csv('trades_prepared.csv')

with open("diffs_prepared.json", "w") as fp:
    json.dump(diffs_prepared , fp)