In [1]:
import gzip
import json

def messages_parse(data):
    data_with_no_binary = str(data).split("'")[1:-1][0]
    data_list =[i for i in data_with_no_binary.split('\\n') if i != '']
    messages_data = [json.loads(D) for D in data_list]
    return messages_data 

def messages_read(file="../data/coinbase_BTC-USD_20_10_06_000000-010000.json.gz"):
    file_object = gzip.open(file, "r")
    data = file_object.read()
    messages_data = messages_parse(data)
    return messages_data 

def snapshot_read(file="../data/coinbase_BTC-USD_20_10_06_00_00.json"):
    snaphsot_data = json.loads(open(file,'r').read())
    return snaphsot_data

def messages_filter(messages_data,initial_clob,final_clob):
    messages_data_filtered = [message_dict for message_dict in messages_data if message_dict['sequence'] >= initial_clob['sequence'] and message_dict['sequence'] <= final_clob['sequence']]
    return messages_data_filtered
            
messages_data = messages_read()
initial_clob = snapshot_read(file="../data/coinbase_BTC-USD_20_10_06_00_00.json")
final_clob = snapshot_read(file="../data/coinbase_BTC-USD_20_10_06_00_15.json")
messages_data_filtered = messages_filter(messages_data,initial_clob,final_clob)


In [35]:
import logging
import queue
#import data_load


logging.basicConfig(level=20)
logger = logging.getLogger()

class OrderDictTable:
    """
        Convert Dataset into Dictionary
    """
    def __init__(self, order_items):
        self.order_dict = {} 
        self.assign_orders_dict(order_items)
        
    def assign_orders_dict(self,order_items):
        keys = [subli[2] for subli in order_items]#map(lambda price,size,order_id: order_id,order_items)
        self.order_dict = dict([(order_id,order_item) for (order_id,order_item) in zip(keys,order_items) ])
    
    def get_values(self):
        return self.order_dict.values()

    def get_obj(self):
        return self.order_dict


class CLOB:
    def __init__(self,clob):
        self.bids_obj = OrderDictTable(clob['bids']).get_obj()
        self.asks_obj = OrderDictTable(clob['asks']).get_obj()
        
    def order_remove(self,order_obj,order_id):
        order_obj.pop(order_id,None)
        
    def order_add(self,order_obj,price,size,order_id):
        order_obj[order_id] = [price,size,order_id]
    
    def order_size_decrease(self,order_obj,order_id,size):
        previous_size = order_obj[order_id][1]
        new_size = str(float(previous_size) - float(size))
        price = order_obj[order_id][0]
        order_obj[order_id] = [price,new_size,order_id]
        
    def update_from_message(self,message_obj):
        message_type = message_obj['type']
        message_side = message_obj['side']
        if message_side == 'sell':
            order_obj = self.asks_obj
        elif message_side == 'buy' :
            order_obj = self.bids_obj
        
        if message_type == 'done':
            self.order_remove(order_obj,message_obj['order_id'])
        elif message_type == 'open':
            self.order_add(order_obj,message_obj['price'],message_obj['remaining_size'],message_obj['order_id'])
        elif message_type == 'match' and message_side == 'buy':
            self.order_size_decrease(order_obj,message_obj['maker_order_id'],message_obj['size'])
          
        elif message_type == 'match' and message_side == 'sell':
            self.order_size_decrease(order_obj,message_obj['maker_order_id'],message_obj['size'])
            
            
    def get_clob(self):
        return {
            'bids':self.bids_obj.values(),
            'asks':self.asks_obj.values()
        }


class CLOBSync:
    def clob_sync(self,initial_clob,messages_data_filtered):
        clob = CLOB(initial_clob)
        q = queue.Queue()
        messages_queue_data = sorted(messages_data_filtered,key=lambda i: i['sequence'])

        list(map(q.put,messages_queue_data ))
        while not q.empty():
            message_obj = q.get()
            clob.update_from_message(message_obj)
        final_clob_processed = clob.get_clob()
        return final_clob_processed

    
updated_clob = CLOBSync().clob_sync(initial_clob,messages_data_filtered)


In [18]:

def messages_from_order_id(messages_data,order_id):
    return [D for D in messages_data if 'order_id' in D.keys() and D['order_id'] == order_id]

def match_messages_from_order_id(messages_data,order_id):
    maker_orders = [D for D in messages_data if 'maker_order_id' in D.keys()  and D['maker_order_id'] == order_id]
    taker_orders = [D for D in messages_data if 'taker_order_id' in D.keys()  and D['taker_order_id'] == order_id]
    return maker_orders + taker_orders 

def orders_from_order_id(orders,order_id):
    return [subli for subli in orders if subli[2] == order_id]

def clob_length_test(final_clob,updated_clob,node='bids'):
    final_count = len(final_clob[node])
    updated_count = len(updated_clob[node])
    is_equal = final_count == updated_count
    return is_equal

def list_difference_find(l1,l2):
    return [order_subli for order_subli in l1 if order_subli not in l2]

def clob_difference_test(final_clob_orders,updated_clob_orders):
    difference_list_1 = list_difference_find(final_clob_orders,updated_clob_orders)
    difference_list_2 = list_difference_find(updated_clob_orders,final_clob_orders)
    return difference_list_1 + difference_list_2

def clob_difference_test_messages_debug_base(l1,l2,messages_data):
    difference_orders = list_difference_find(l1,l2)
    difference_orders_id = [order_subli[2] for order_subli in difference_orders]
    order_messages = [messages_from_order_id(messages_data,order_id) for order_id in difference_orders_id]
    order_messages_match = [messages_from_order_id(messages_data,order_id) for order_id in difference_orders_id]

    return order_messages + order_messages_match

def clob_difference_test_messages_debug(final_clob_orders,updated_clob_orders,messages_data):
    missing_order_messages = clob_difference_test_messages_debug_base(final_clob_orders,updated_clob_orders,messages_data)
    return missing_order_messages
    
def run_tests(final_clob,updated_clob):
    bids_difference = clob_difference_test(final_clob['bids'],updated_clob['bids'])
    asks_difference = clob_difference_test(final_clob['asks'],updated_clob['asks'])

    bids_length_equal = clob_length_test(final_clob,updated_clob,node='bids')
    asks_length_equal= clob_length_test(final_clob,updated_clob,node='asks')
    
    #logger.info('bids_difference: %s', str(bids_difference))
    #logger.info('asks_difference: %s', str(asks_difference))

    logger.info('bids_length_equal: %s', str(bids_length_equal))
    logger.info('asks_length_equal: %s', str(asks_length_equal))

    
#missing_order_messages = clob_difference_test_messages_debug(final_clob['bids'],updated_clob['bids'],messages_data_filtered)

#run_tests(final_clob,updated_clob)

In [6]:
run_tests(final_clob,updated_clob)

INFO:root:bids_difference: [['10788.93', '2.80522372', '4c53de3f-b8e4-4538-9efb-11d324615719'], ['10788.8', '3.08320458', '52c9f4ec-9a87-4676-beb1-cc628e578fb9'], ['10800.05', '0.001', '9ee2c3ef-0309-406d-a0a0-c0e1599cd9b6'], ['10850', '0.1', '6f4dbd88-e19a-4fec-aec4-efd433acb663'], ['10800', '0.01909259', '2301bb39-3679-41f8-8e64-bfdf1aca94d2'], ['10900', '0.2', '5d862b33-a4b8-469f-9366-d535a15a3ed1'], ['10800', '0.00102408', '93301e06-0b0b-458b-a927-bc143e5133f0'], ['10819.55', '0.004', '6c16977f-0d40-417a-a1d7-a21ac15571f7'], ['10800', '0.006', '80a272cd-c667-451e-9d7e-209a8c60315a'], ['10800', '0.026', 'e3dd375b-565a-4f50-943f-5c63fd0dae69'], ['10808.6', '4.4', '4d1555bf-6129-4be1-bf3d-44833dfa0ad7'], ['10810', '0.09', '84af99b1-497f-45eb-bb57-e4bbba303aeb'], ['10802.78', '0.00368648', '5041b1fe-dde0-45ce-a8f7-15a88bfce5d1'], ['10803.13', '0.00368647', 'eacb1091-f1fb-412d-8339-62f0f1cd202e'], ['10812.45', '0.00368633', 'd7ea2861-26e8-48da-a214-2c8cfb19ff20'], ['10802.88', '0.003686

INFO:root:asks_difference: [['10793', '1.95938089', '8871aea3-f344-4b73-829c-509fbbde35e3'], ['10800', '5.84856151', 'c6be4839-3d3b-418e-b91a-b70ca6813d3a'], ['10800', '25', 'c6be4839-3d3b-418e-b91a-b70ca6813d3a'], ['10772.29', '0.0066', '17a2a1cc-f172-4dc6-8b80-d8f9dc440314'], ['10769.14', '0.0066', 'b136a373-2a67-4d9a-94c5-7060aa59723e'], ['10761.25', '0.0066', '9a91dbe9-69be-48c7-9d51-f6f8ac3b6df5'], ['10747.07', '0.0066', 'ec6df847-219f-47e8-a127-a6b76586144d'], ['10731.33', '0.0066', '2214dbea-5014-4e96-94db-fb599264864b'], ['10715.62', '0.0066', '795d8b7b-af03-4f17-9d4f-57537f2b1dd3'], ['10699.93', '0.0066', 'bd69509e-692a-4825-bb2c-eeb8640f52b5'], ['10684.27', '0.0066', '2f610b20-e7fc-4b9a-b464-a2e745df5882'], ['10120', '0.00245807', '38f95011-ba99-406a-ae2f-5dbbb1750ece'], ['5600', '0.03554014', 'ac6946a5-a1f1-4dd8-922b-8b2faa0eb335'], ['10772.63', '5.1', '7aaf31a8-6857-401c-ba8f-05e081aa31a4'], ['10551.24', '0.013', '6abc5142-2dad-4f61-8081-6cfca2580caa'], ['9910', '1', '0b17d

INFO:root:bids_length_equal: False
INFO:root:asks_length_equal: False


In [12]:
run_tests(final_clob,updated_clob)

INFO:root:bids_length_equal: True
INFO:root:asks_length_equal: True


In [13]:
missing_order_messages = clob_difference_test_messages_debug(final_clob['bids'],updated_clob['bids'],messages_data_filtered)
print(missing_order_messages[0])

[{'type': 'open', 'side': 'buy', 'product_id': 'BTC-USD', 'time': '2020-10-06T00:14:28.127788Z', 'sequence': 16713045832, 'price': '10788.93', 'order_id': '4c53de3f-b8e4-4538-9efb-11d324615719', 'remaining_size': '3.1'}, {'type': 'received', 'side': 'buy', 'product_id': 'BTC-USD', 'time': '2020-10-06T00:14:28.127788Z', 'sequence': 16713045831, 'order_id': '4c53de3f-b8e4-4538-9efb-11d324615719', 'order_type': 'limit', 'size': '3.1', 'price': '10788.93', 'client_oid': '20201006-0000-0000-0002-000000003995'}]


In [14]:
for order_message in missing_order_messages[0]:
    print (order_message)

{'type': 'open', 'side': 'buy', 'product_id': 'BTC-USD', 'time': '2020-10-06T00:14:28.127788Z', 'sequence': 16713045832, 'price': '10788.93', 'order_id': '4c53de3f-b8e4-4538-9efb-11d324615719', 'remaining_size': '3.1'}
{'type': 'received', 'side': 'buy', 'product_id': 'BTC-USD', 'time': '2020-10-06T00:14:28.127788Z', 'sequence': 16713045831, 'order_id': '4c53de3f-b8e4-4538-9efb-11d324615719', 'order_type': 'limit', 'size': '3.1', 'price': '10788.93', 'client_oid': '20201006-0000-0000-0002-000000003995'}


In [None]:
16713045831
16713045832

In [15]:
orders_from_order_id(updated_clob['bids'],'4c53de3f-b8e4-4538-9efb-11d324615719')


[['10788.93', '3.1', '4c53de3f-b8e4-4538-9efb-11d324615719']]

In [16]:
orders_from_order_id(final_clob['bids'],'4c53de3f-b8e4-4538-9efb-11d324615719')


[['10788.93', '2.80522372', '4c53de3f-b8e4-4538-9efb-11d324615719']]

In [19]:
match_messages_from_order_id(messages_data_filtered,'4c53de3f-b8e4-4538-9efb-11d324615719')

[{'type': 'match',
  'side': 'buy',
  'product_id': 'BTC-USD',
  'time': '2020-10-06T00:14:28.196746Z',
  'sequence': 16713045873,
  'trade_id': 105018264,
  'maker_order_id': '4c53de3f-b8e4-4538-9efb-11d324615719',
  'taker_order_id': '89b45d96-99a8-445e-880b-8e1be67fd662',
  'size': '0.01608628',
  'price': '10788.93'},
 {'type': 'match',
  'side': 'buy',
  'product_id': 'BTC-USD',
  'time': '2020-10-06T00:14:28.420307Z',
  'sequence': 16713045977,
  'trade_id': 105018266,
  'maker_order_id': '4c53de3f-b8e4-4538-9efb-11d324615719',
  'taker_order_id': '77ac1ba2-dc82-4325-b499-b6e67a8210da',
  'size': '0.11753',
  'price': '10788.93'},
 {'type': 'match',
  'side': 'buy',
  'product_id': 'BTC-USD',
  'time': '2020-10-06T00:14:28.400747Z',
  'sequence': 16713045963,
  'trade_id': 105018265,
  'maker_order_id': '4c53de3f-b8e4-4538-9efb-11d324615719',
  'taker_order_id': '5efffd65-cb3f-4206-af9e-0fe4de4e5b71',
  'size': '0.13953',
  'price': '10788.93'},
 {'type': 'match',
  'side': 'buy'

New Tests

In [40]:
missing_order_messages = clob_difference_test_messages_debug(updated_clob['bids'],final_clob['bids'],messages_data_filtered)
for order_message in missing_order_messages[0]:
    print (order_message)

IndexError: list index out of range

In [34]:
match_messages_from_order_id(messages_data_filtered,'8871aea3-f344-4b73-829c-509fbbde35e3')

[{'type': 'match',
  'side': 'sell',
  'product_id': 'BTC-USD',
  'time': '2020-10-06T00:15:06.689787Z',
  'sequence': 16713057477,
  'trade_id': 105018305,
  'maker_order_id': '8871aea3-f344-4b73-829c-509fbbde35e3',
  'taker_order_id': '2da1a6fe-96b6-4a77-bfdb-e3588f5c484d',
  'size': '0.04061911',
  'price': '10793'}]

In [36]:
orders_from_order_id(updated_clob['asks'],'8871aea3-f344-4b73-829c-509fbbde35e3')


[['10793', '1.95938089', '8871aea3-f344-4b73-829c-509fbbde35e3']]

In [37]:
orders_from_order_id(final_clob['asks'],'8871aea3-f344-4b73-829c-509fbbde35e3')


[['10793', '1.95938089', '8871aea3-f344-4b73-829c-509fbbde35e3']]