In [2]:
import json
import os
import itertools
import pandas as pd

# get files
def get_files(path):
    files = os.listdir(path)
    snapshot_files = [f"{path}{f}" for f in files if "ob" in f]
    updates_files = [f"{path}{f}" for f in files if "ob" not in f]
    snapshot_files.sort()
    updates_files.sort()
    return snapshot_files, updates_files

# get orderbook
def get_orderbook(snapshot_file):
    f = open(snapshot_file)
    orderbook_raw = json.load(f)
    orderbook = {
        "u" : orderbook_raw["lastUpdateId"],
        "E" : 0,
        "bids" : {float(b[0]) : float(b[1]) for b in orderbook_raw["bids"]},
        "asks" : {float(a[0]) : float(a[1]) for a in orderbook_raw["asks"]}
    }
    return orderbook

# get updates
def get_updates(updates_file):
    f = open(updates_file)
    updates_raw = json.load(f)
    updates = []
    for u in updates_raw:
        updates.append({
            "u" : u["u"],
            "E" : u["E"],
            "bids" : {float(b[0]) : float(b[1]) for b in u["b"]},
            "asks" : {float(a[0]) : float(a[1]) for a in u["a"]}
            })
    return updates

# update orderbook
def update_side(updates, orderbook, side):
    for u in updates:
        if (updates[u] != 0.0):
            orderbook[u] = updates[u]
        elif (u in orderbook):
            del orderbook[u]
    orderbook = dict(sorted(orderbook.items(), reverse=(side=="bids")))
    return orderbook

def update_orderbook(updates, orderbook):
    orderbook["u"] = updates["u"]
    orderbook["E"] = updates["E"]
    for side in ["bids", "asks"]:
        orderbook[side] = update_side(updates[side], orderbook[side], side)
    return orderbook

# create and update pandas dataframe
def get_processed_orderbook(rows, depth):
    columns = get_columns(depth)
    df = pd.DataFrame(rows, columns=columns)
    return df

def get_columns(depth):
    column_generator = lambda side : [col for cols in [[f"{side}{i+1}", f"v{side}{i+1}"] for i in range(depth)] for col in cols]
    columns = ["u"] + ["E"] + column_generator("b") + column_generator("a")
    return columns

def get_row(orderbook, depth):
    asks = dict(itertools.islice(orderbook["asks"].items(), depth))
    bids = dict(itertools.islice(orderbook["bids"].items(), depth))
    list_generator = lambda side : [col for cols in [[s, side[s]] for s in side] for col in cols]
    row = [orderbook["u"], orderbook["E"]] + list_generator(bids) + list_generator(asks)
    return row

def get_processed_orderbook(path, depth):

    # initialize orderbook
    snapshot_files, updates_files = get_files(path)
    orderbook = get_orderbook(snapshot_files[0])

    # store all orderbook updates
    rows = []
    rows.append(get_row(orderbook, depth)) # first update
    for file in updates_files:
        updates = get_updates(file)
        for u in updates:
            orderbook = update_orderbook(u, orderbook)
            rows.append(get_row(orderbook, depth))

    # store data as dataframe
    columns = get_columns(depth)
    df = pd.DataFrame(rows, columns=columns)
    return df


depth = 2
path = "../data/btcusdt/20231112/take_1/orderbook/"
orderbook = get_processed_orderbook(path, depth)


In [7]:
orderbook[orderbook["u"] > 40293208782]

Unnamed: 0,u,E,b1,vb1,b2,vb2,a1,va1,a2,va2
4100,40293208790,1699814224324,37174.99,8.24679,37174.89,0.02693,37175.00,3.25955,37175.25,0.00067
4101,40293208798,1699814224424,37174.99,8.24679,37174.89,0.02693,37175.00,3.25955,37175.25,0.00067
4102,40293208810,1699814224525,37174.99,8.24611,37174.89,0.02693,37175.00,3.25955,37175.25,0.00067
4103,40293208812,1699814224625,37174.99,8.24611,37174.89,0.02693,37175.00,3.25955,37175.25,0.00067
4104,40293208819,1699814224725,37174.99,8.24611,37174.89,0.02693,37175.00,3.25955,37175.25,0.00067
...,...,...,...,...,...,...,...,...,...,...
8188,40293275611,1699814646332,37155.00,0.41480,37154.99,0.00500,37155.01,9.98543,37155.50,0.24518
8189,40293275615,1699814646432,37155.00,0.41480,37154.99,0.00500,37155.01,9.98543,37155.50,0.24518
8190,40293275634,1699814646532,37155.00,0.41422,37154.99,0.00500,37155.01,10.29222,37155.50,0.24518
8191,40293275652,1699814646632,37155.00,0.40741,37154.99,0.00500,37155.01,10.28725,37155.50,0.24518


In [15]:
orderbook[orderbook["u"] <= 40293244692].iloc[-2:]

Unnamed: 0,u,E,b1,vb1,b2,vb2,a1,va1,a2,va2
6146,40293244690,1699814433126,37199.99,2.00277,37199.21,0.00054,37200.0,19.7082,37200.03,0.00067
6147,40293244692,1699814433226,37199.99,2.00277,37199.21,0.00054,37200.0,19.7082,37200.03,0.00067


In [16]:
orderbook[orderbook["u"] >= 40293244692].iloc[:2]

Unnamed: 0,u,E,b1,vb1,b2,vb2,a1,va1,a2,va2
6147,40293244692,1699814433226,37199.99,2.00277,37199.21,0.00054,37200.0,19.7082,37200.03,0.00067
6148,40293244696,1699814433326,37199.99,2.00277,37199.21,0.00054,37200.0,19.7082,37200.03,0.00067
