In [None]:
# convert the depth dataset into dictionary of price: val for easier handling
odb_dict = data['DEPTH']['BTCUSDT'].to_dict(orient='records')
odb_tup_keys = [(f'bp{i}', f'bv{i}') for i in range(10, 0, -1)] + [(f'ap{i}', f'av{i}') for i in range(1, 11)]
for i in tqdm(range(len(odb_dict))):
    odb_dict[i] = { odb_dict[i][p]: odb_dict[i][q] for p, q in odb_tup_keys }
    odb_dict[i]['t'] = data['DEPTH']['BTCUSDT'].index[i]

In [None]:
# convert the trade dataset into dictionary of price: val for easier handling
trade_dict = data['TRADE']['BTCUSDT'].to_dict(orient='records')
for i in tqdm(range(len(trade_dict))):
    trade_dict[i] = { trade_dict[i]['p']: trade_dict[i]['q'], 'mm_buy': trade_dict[i]['mm_buy'] }
    trade_dict[i]['t'] = data['TRADE']['BTCUSDT'].index[i]

In [None]:
odb_ptr, trade_ptr = 0, 0
# next_odb = dict()
temp_odb = dict() # accounts for cancelled orders that appear in between two orderbook depth snapshots [TO THE BEST OF KNOWLEDGE]
temp_limits = dict() # accounts for new limit orders that appear in between two orderbook depth snapshots [TO THE BEST OF KNOWLEDGE]
temp_best_bid, temp_best_ask = -1, -1 # -1 means undefined
temp_best_bid_estimate, temp_best_ask_estimate = -1, -1 # in case one side of the orderbook is wiped. This stores the trade price on the missing side as the best price estimate

# store the best prices
best_prices = { 'timestamp': list(), 'bp1': list(), 'ap1': list() }

for _ in tqdm(range(len(odb_dict) + len(trade_dict))):

    if trade_ptr >= len(trade_dict) or odb_dict[odb_ptr]['t'] <= trade_dict[trade_ptr]['t']:

        # stores the current state of the orderbook depth
        temp_odb = copy.deepcopy(odb_dict[odb_ptr])
        del temp_odb['t']
        temp_odb_list = list(temp_odb)
        temp_best_bid = temp_odb_list[9]
        temp_best_ask = temp_odb_list[10]

        # # stores the next state of the orderbook depth
        # next_odb = copy.deepcopy(odb_dict[odb_ptr+1])

        # print('------------- LIMIT ORDERBOOK SNAPSHOT -------------')
        # print('CURR ODB:', odb_dict[odb_ptr])
        # print('----------------------------------------------------')
        # print()

        best_prices['timestamp'].append(odb_dict[odb_ptr]['t'])
        best_prices['ap1'].append(temp_best_ask if temp_best_ask != -1 else temp_best_ask_estimate)
        best_prices['bp1'].append(temp_best_bid if temp_best_bid != -1 else temp_best_bid_estimate)

        temp_limits = dict()
        odb_ptr += 1

    else:

        # get all the info from the trade
        trade_dict_list = list(trade_dict[trade_ptr].items())
        # get the price and quantity of trade
        p, q = trade_dict_list[0]
        # direction of trade
        mm_buy = trade_dict_list[1][-1]
        # time of trade
        t = trade_dict_list[-1][-1]

        # if it is a MM buy
        if mm_buy:
            prices = list(temp_odb.keys())
            # try to find better prices only when temp_best_bid is defined
            # o.w. most likely the ask side of the orderbook upto 10 limits which we keep track of is missing/lifted
            if temp_best_bid != -1:
                # iterate over the temp_odb
                for price in prices:
                    # check if there are better bid prices than p
                    if price <= temp_best_bid and price > p:
                        # if there are that means they are cancelled before this trade happened
                        del temp_odb[price]
                        # if the price which we deleted was the temp_best_bid
                        if price == temp_best_bid:
                            # then set it to p
                            temp_best_bid = p
        else:
            # do the same thing but for the sell side
            prices = list(temp_odb.keys())
            # try to find better prices only when temp_best_ask is defined
            # o.w. most likely the sell side of the orderbook upto 10 limits which we keep track of is missing/lifted
            if temp_best_ask != -1:
                for price in prices:
                    # check if there are better offer prices than p
                    if price >= temp_best_ask and price < p:
                        # if there are that means they are cancelled before this trade happened
                        del temp_odb[price]
                        # if the price which we deleted was the temp_best_ask
                        if price == temp_best_ask:
                            # then set it to p
                            temp_best_ask = p

        # if the price is present in the temp_orderbook we are maintaining
        if p in temp_odb:
            
            # check if the quantity traded at this price is possible
            # o.w. it will mean new limit orders were added at this price level before this trade occured
            if temp_odb[p] < q:
                # Add this to the temp_limits dictionary to keep track of this inferred new limit order from this trade
                # we are sure that there is atleast this much quantity of limit order added here (maybe even more, no way of being sure)
                temp_limits[p] = temp_limits.get(p, 0) + q - temp_odb[p]
            else:
                # the quantity of the trade is possible

                # if temp_limits does not have this price level then we can safely subtract the quantity as no new limit order was added at this price level
                if not temp_limits.get(p, 0):
                    temp_odb[p] -= q

                    # if all the quantity at this price level is exhausted remove the level
                    if temp_odb[p] == 0:
                        del temp_odb[p]

                        # set the temp_best_bid/temp_best_ask accordingly
                        if mm_buy == 1 and p == temp_best_bid:
                            worse_prices = np.array([price for price in temp_odb.keys() if price < p])
                            temp_best_bid = max(worse_prices) if worse_prices.shape[0] else -1
                            # the best estimate of temp_best_bid
                            if not worse_prices.shape[0]:
                                temp_best_bid_estimate = p
                        
                        if mm_buy == 0 and p == temp_best_ask:
                            worse_prices = np.array([price for price in temp_odb.keys() if price > p])
                            temp_best_ask = min(worse_prices) if worse_prices.shape[0] else -1
                            # the best estimate of temp_best_ask
                            if not worse_prices.shape[0]:
                                temp_best_ask_estimate = p

        # if the price is not present in the temp_orderbook we are maintaining
        else:

            # This means that there is had been new limit orders waiting in the orderbook when the trade occurred
            # Check if we have already accounted for this new trade
            # if we have done so, then we can now say that the agg quantity at this new level was more than what we inferred before
            # o.w. we have info that there are new limit orders at this new level with agg quantity at least that of the trade
            temp_limits[p] = temp_limits.get(p, 0) + q

            # in case of missing side of orderbook we infer the trade price to happen at the best bid/best ask on the resp side (which is obv true)
            if temp_best_ask == -1 and mm_buy == 0:
                temp_best_ask_estimate = p
            
            if temp_best_bid == -1 and mm_buy == 1:
                temp_best_bid_estimate = p

        # print(trade_dict[trade_ptr])
        # print('TEMP ODB:', temp_odb)
        # print('TEMP LIM:', temp_limits)
        # print(f'BEST BID: {temp_best_bid if temp_best_bid != -1 else temp_best_bid_estimate}, BEST ASK: {temp_best_ask if temp_best_ask != -1 else temp_best_ask_estimate}')
        # print()

        best_prices['timestamp'].append(trade_dict[trade_ptr]['t'])
        best_prices['ap1'].append(temp_best_ask if temp_best_ask != -1 else temp_best_ask_estimate)
        best_prices['bp1'].append(temp_best_bid if temp_best_bid != -1 else temp_best_bid_estimate)

        trade_ptr += 1

# create a dataframe out of best_prices
best_prices = pd.DataFrame(best_prices)
best_prices.set_index('timestamp', inplace=True)
best_prices

In [None]:
# compute the bid-ask spreads
bid_ask_spreads = { crypto_pair: best_prices.ap1 - best_prices.bp1 for crypto_pair in CRYPTO_PAIRS if crypto_pair == 'BTCUSDT' }

# compute the reference price based on https://arxiv.org/pdf/1312.0563 section 2.2.2
reference_prices = { crypto_pair: (best_prices.bp1 + best_prices.ap1)/2 for crypto_pair in CRYPTO_PAIRS if crypto_pair == 'BTCUSDT' }

# for crypto_pair in CRYPTO_PAIRS:
for crypto_pair in ['BTCUSDT']:
    even_bid_ask_spread_idx = np.where(bid_ask_spreads['BTCUSDT'] % 2 == 0)[0]
    candidate_reference_prices = pd.concat([reference_prices[crypto_pair].iloc[even_bid_ask_spread_idx] - 0.5, reference_prices[crypto_pair].iloc[even_bid_ask_spread_idx] + 0.5], axis=1)
    prev_reference_prices = reference_prices[crypto_pair].iloc[even_bid_ask_spread_idx-1].to_numpy()
    reference_prices[crypto_pair].iloc[even_bid_ask_spread_idx] = candidate_reference_prices.to_numpy()[np.arange(candidate_reference_prices.shape[0]), np.argmin(np.abs(candidate_reference_prices.sub(prev_reference_prices, axis=0)), axis=1)]

In [None]:
reference_prices['BTCUSDT']