In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import json
from functools import reduce
import os 
from lobio.lob.order_book import OrderBook

%load_ext autoreload
%autoreload 2

In [2]:
diffs_file = "../data/diffs_only_prepared.json"
init_lob_file = "../data/init_lob_prepared.json"
trades_file = "../data/trades_prepared.csv"

with open(diffs_file, "r", encoding="utf-8") as file:
    diffs = json.load(file)
with open(init_lob_file, "r", encoding="utf-8") as file:
    init_lob = json.load(file)

trades = pd.read_csv(trades_file)

In [3]:
order_book = OrderBook.create_lob_init(init_lob)

In [4]:
lob_snapshots = []
diff_timestamps = []
LOB_DEPTH = 10

for i, diff in enumerate(tqdm(diffs)):
    lob_snapshot = []
    for j in range(LOB_DEPTH):
        lob_snapshot += [order_book.asks[j].base, 
                            order_book.asks[j].quote, 
                            order_book.bids[j].base, 
                            order_book.bids[j].quote]
    
    lob_snapshots.append(lob_snapshot)
    diff_timestamps.append(diff[0])

    order_book.apply_historical_update(diff)

  8%|▊         | 769/9904 [00:00<00:01, 7684.57it/s]

100%|██████████| 9904/9904 [00:01<00:00, 6300.29it/s]


In [5]:
book_column_names = [[f'ask[{i}].price', f'ask[{i}].amount', f'bid[{i}].price', f'bid[{i}].amount'] for i in range(LOB_DEPTH)]
book_column_names = reduce(lambda x, y: x + y, book_column_names)

In [6]:
book_data = pd.DataFrame(data = np.array(lob_snapshots), columns=book_column_names, index=diff_timestamps)
book_data.index.name = 'timestamp'

trades_data = trades.set_index('timestamp')

In [7]:
if not os.path.isdir("./exchange_data"):
    os.mkdir("./exchange_data")

In [8]:
book_data.to_csv('./exchange_data/book.csv', sep=',')

In [9]:
trades_data.to_csv('./exchange_data/trades.csv', sep=',')