In [1]:
import json
import numpy as np
import pandas as pd
import polars as pl
from tqdm import tqdm
from copy import deepcopy

from lobio.lob.limit_order import LimitOrder, PRICE_TICK, AMOUNT_TICK
from lobio.lob.price_level import Side
from lobio.lob.order_book import OrderBookPrep, TraderId

%load_ext autoreload
%autoreload 2

In [2]:
with open("../data/diffs.json", "r", encoding="utf-8") as file_out:
    diffs = json.load(file_out)

with open("../data/aggtrades.json", "r", encoding="utf-8") as file_out:
    aggtrades = json.load(file_out)

with open("../data/init_lob.json", "r", encoding="utf-8") as file_out:
    init_lob = json.load(file_out)

init_lob["bids"] = np.array(init_lob["bids"]).astype(float)
init_lob["asks"] = np.array(init_lob["asks"]).astype(float)

In [3]:
# prev_T = 0
# prev_t = 0
# for trade in trades:
#     assert trade['T'] >= prev_T
#     assert trade['t'] >= prev_t
#     prev_T = trade['T']
#     prev_t = trade['t']

In [4]:
prev_T = 0
prev_l = aggtrades[0]['f'] - 1
for aggtrade in aggtrades:
    assert aggtrade['T'] >= prev_T
    assert aggtrade['f'] == prev_l + 1
    prev_T = aggtrade['T']
    prev_l = aggtrade['l']

In [5]:
prev_T = diffs[0]['T']
prev_u = diffs[0]['u']
for j, diff in enumerate(diffs[1:]):
    assert diff['T'] >= prev_T
    assert diff['U'] >= prev_u + 1, j
    assert diff['pu'] == prev_u
    prev_u = diff['u']
    prev_T = diff['T']

In [6]:
# prev_u = 0
# for ticker in tickers:
#     assert ticker['u'] > prev_u
#     prev_u = ticker['u']

In [7]:
i = 0
while diffs[i]["u"] < init_lob["lastUpdateId"]:
    i += 1

if len(diffs) > i:
    if diffs[i]["U"] > init_lob["lastUpdateId"] or diffs[i]["u"] < init_lob["lastUpdateId"]:
        raise Exception("CORRUPTED DATA. DIFF. DEPTH are not consistent.")
else:
    raise Exception("CORRUPTED DATA. DIFF. DEPTH are not consistent.")
diffs = diffs[i:]

i = 0
while aggtrades[i]['T'] <= diffs[0]["T"]:
    i += 1
aggtrades = aggtrades[i:]

In [8]:
new_diffs = []
for diff in diffs:
    cur_bids = np.array(diff["b"]).astype(float)
    cur_asks = np.array(diff["a"]).astype(float)
    new_diffs.append(
        (
            diff["T"],
            cur_bids,
            cur_asks,
        )
    )

In [9]:
order_book = OrderBookPrep.create_lob_init(init_lob)
order_book.track_diff(new_diffs[0])
new_diffs = new_diffs[1:]

In [10]:
diff_sequence = []
for diff in new_diffs:
    ts = diff[0]
    bids = diff[1]
    asks = diff[2]

    for bid in bids[::-1]:
        diff_sequence.append((ts, bid[0], bid[1], Side.BUY))

    for ask in asks:
        diff_sequence.append((ts, ask[0], ask[1], Side.SELL))

In [11]:
diffs_prepared = pl.DataFrame(diff_sequence, ('T', 'base', 'quote', 'side'))
diffs_prepared = diffs_prepared.with_columns((pl.col('base') * 10**PRICE_TICK).round().cast(pl.UInt32), 
                       (pl.col('quote') * 10**AMOUNT_TICK).round().cast(pl.UInt64), 
                       pl.col('side').cast(pl.Int8),
                       pl.col('T').cast(pl.UInt64))

In [12]:
diffs_prepared.write_parquet("../data/diffs_prepared.parquet")

In [31]:
bids_prepared = []
asks_prepared = []
min_len = min(len(order_book.bids), len(order_book.asks))

for bid in order_book.bids[:min_len]:
    bids_prepared.append([bid.base, bid.quote])

for ask in order_book.asks[:min_len]:
    asks_prepared.append([ask.base, ask.quote])

In [32]:
init_lob_data = np.concatenate([bids_prepared, asks_prepared], axis=1)
init_lob_data[:, [0, 2]] *= 10**PRICE_TICK
init_lob_data[:, [1, 3]] *= 10**AMOUNT_TICK
init_lob_data = np.round(init_lob_data)
init_lob_data = np.append(init_lob_data, [[diffs[0]['T']] * init_lob_data.shape[1]], axis=0).astype(int)

In [16]:
with open('../data/init_lob_prepared.npy', 'wb') as f:
    np.save(f, init_lob_data)

In [50]:
aggtrades_raw = []
for aggtrade in aggtrades:
    side = Side.BUY if aggtrade['m'] else Side.SELL
    aggtrades_raw.append((aggtrade['T'], float(aggtrade['p']), float(aggtrade['q']), side))

In [51]:
aggtrades_raw = pl.DataFrame(aggtrades_raw, ('T', 'base', 'quote', 'side'))
aggtrades_raw = aggtrades_raw.with_columns((pl.col('base') * 10**PRICE_TICK).round().cast(pl.UInt32), 
                       (pl.col('quote') * 10**AMOUNT_TICK).round().cast(pl.UInt64), 
                       pl.col('side').cast(pl.Int8),
                       pl.col('T').cast(pl.UInt64))

In [52]:
aggtrades_raw.write_parquet("../data/aggtrades_raw.parquet")

In [84]:
# i = 0
# for j, diff in enumerate(new_diffs[1:]):
#     order_book.track_diff(diff)
#     while (i < len(tickers)) and tickers[i]['u'] <= diffs[j+1]['u']:
#         i += 1
#     ob_best_bids = order_book.bids[0]
#     ob_best_asks = order_book.asks[0]
#     assert float(tickers[i-1]['b']) == ob_best_bids.base
#     assert float(tickers[i-1]['B']) == ob_best_bids.quote
#     assert float(tickers[i-1]['a']) == ob_best_asks.base
#     assert float(tickers[i-1]['A']) == ob_best_asks.quote