In [54]:
import json
import os
import itertools
import pandas as pd

# get files
def get_files(path):
    files = os.listdir(path)
    snapshot_files = [f"{path}{f}" for f in files if "ob" in f]
    updates_files = [f"{path}{f}" for f in files if "ob" not in f]
    snapshot_files.sort()
    updates_files.sort()
    return snapshot_files, updates_files

# get orderbook
def get_orderbook(snapshot_file):
    f = open(snapshot_file)
    orderbook_raw = json.load(f)
    orderbook = {
        "u" : orderbook_raw["lastUpdateId"],
        "bids" : {float(b[0]) : float(b[1]) for b in orderbook_raw["bids"]},
        "asks" : {float(a[0]) : float(a[1]) for a in orderbook_raw["asks"]}
    }
    return orderbook

# get updates
def get_updates(updates_file):
    f = open(updates_file)
    updates_raw = json.load(f)
    updates = []
    for u in updates_raw:
        updates.append({
            "u" : u["u"],
            "bids" : {float(b[0]) : float(b[1]) for b in u["b"]},
            "asks" : {float(a[0]) : float(a[1]) for a in u["a"]}
            })
    return updates

# update orderbook
def update_side(updates, orderbook, side):
    for u in updates:
        if (updates[u] != 0.0):
            orderbook[u] = updates[u]
        elif (u in orderbook):
            del orderbook[u]
    orderbook = dict(sorted(orderbook.items(), reverse=(side=="bids")))
    return orderbook

def update_orderbook(updates, orderbook):
    orderbook["u"] = updates["u"]
    for side in ["bids", "asks"]:
        orderbook[side] = update_side(updates[side], orderbook[side], side)
    return orderbook

# create and update pandas dataframe
def get_df(rows, depth):
    columns = get_columns(depth)
    df = pd.DataFrame(rows, columns=columns)
    return df

def get_columns(depth):
    column_generator = lambda side : [col for cols in [[f"{side}{i+1}", f"v{side}{i+1}"] for i in range(depth)] for col in cols]
    columns = ["ts"] + column_generator("b") + column_generator("a")
    return columns

def get_row(orderbook, depth):
    asks = dict(itertools.islice(orderbook["asks"].items(), depth))
    bids = dict(itertools.islice(orderbook["bids"].items(), depth))
    list_generator = lambda side : [col for cols in [[s, side[s]] for s in side] for col in cols]
    row = [orderbook["u"]] + list_generator(bids) + list_generator(asks)
    return row

# initialize pandas dataframe to store orderbook
depth = 2

# initialize orderbook
path = "../data/btcusdt/20231112/take_1/orderbook/"
snapshot_files, updates_files = get_files(path)
orderbook = get_orderbook(snapshot_files[0])

# store first orderbook
rows = []
rows.append(get_row(orderbook, depth))

for file in updates_files:
    updates = get_updates(file)
    for u in updates:
        orderbook = update_orderbook(u, orderbook)
        rows.append(get_row(orderbook, depth))

df = get_df(rows, depth)
df


Unnamed: 0,ts,b1,vb1,b2,vb2,a1,va1,a2,va2
0,40293046957,37074.01,12.6843,37074.0,0.0045,37074.02,0.41662,37074.03,0.00067
1,40293046961,37074.01,12.6843,37074.0,0.0045,37074.02,0.41662,37074.03,0.00067
2,40293046967,37074.01,12.6843,37074.0,0.0045,37074.02,0.41301,37074.03,0.00067
3,40293046970,37074.01,12.6843,37074.0,0.0045,37074.02,0.41301,37074.03,0.00067
4,40293046975,37074.01,12.68455,37074.0,0.0045,37074.02,0.41301,37074.03,0.00067
5,40293046979,37074.01,12.68455,37074.0,0.0045,37074.02,0.40887,37074.03,0.00067
6,40293046983,37074.01,12.68455,37074.0,0.0045,37074.02,0.40887,37074.03,0.00067
7,40293046997,37074.01,12.68455,37074.0,0.0045,37074.02,0.40887,37074.03,0.00067
8,40293047018,37074.01,12.74996,37074.0,0.0045,37074.02,0.17924,37074.03,0.00067
9,40293047026,37074.01,12.74996,37074.0,0.0045,37074.02,0.17924,37074.03,0.00067
