In [None]:
import pandas as pd
import numpy as np


tick_size = 0.01

def load_mbp10_features(df: pd.DataFrame):
    
    # =========== TOP OF BOOK ===========

    df["best_bid"] = df["bid_px_00"]
    df["best_ask"] = df["ask_px_00"]

    df["mid"] = 0.5 * (df["best_bid"] + df["best_ask"])
    df["spread"] = df["best_ask"] - df["best_bid"]

    df["spread_ticks"] = (df["spread"] / tick_size).round().astype("float32")
    df["rel_spread_bp"] = ((df["spread"] / df["mid"]) * 1e4).astype("float32")


    # =========== DEPTH IMBALANCE FEATURES ===========

    for k in (1,3,5,10):

        bid_cols = [f"bid_sz_{j:02d}" for j in range(k)]
        ask_cols = [f"ask_sz_{j:02d}" for j in range(k)]

        bid_sums = df[bid_cols].sum(axis=1)
        ask_sums = df[ask_cols].sum(axis=1)

        df[f'bid_sz_sum_0{k}'] = bid_sums.astype('float32')
        df[f'ask_sz_sum_0{k}'] = ask_sums.astype('float32')

        total = bid_sums + ask_sums
        df[f'imbalance_0{k}'] = np.where(total > 0, (bid_sums - ask_sums) / total, np.nan).astype('float32')

    # ===========  OFI FEATURES ===========

    bb = df["best_bid"]
    ba = df["best_ask"]
    bs = df["bid_sz_00"]
    az = df["ask_sz_00"]

    bb_prev = bb.shift()
    ba_prev = ba.shift()
    bs_prev = bs.shift()
    az_prev = az.shift()

    bid_up   = bb > bb_prev
    bid_same = bb == bb_prev
    bid_down = bb < bb_prev

    ask_down = ba < ba_prev
    ask_same = ba == ba_prev
    ask_up   = ba > ba_prev

    ofi = (
        bs.where(bid_up, 0.0)
        + (bs - bs_prev).where(bid_same, 0.0)
        - bs_prev.where(bid_down, 0.0)

        - az.where(ask_down, 0.0)
        - (az - az_prev).where(ask_same, 0.0)
        + az_prev.where(ask_up, 0.0)
    ).fillna(0.0).astype("float32")

    df["ofi_0"]      = ofi
    df["ofi_0_1s"]   = df["ofi_0"].rolling("1s").sum().astype("float32")
    df["ofi_0_5s"]   = df["ofi_0"].rolling("5s").sum().astype("float32")
    df["ofi_0_10s"]  = df["ofi_0"].rolling("10s").sum().astype("float32")

    # =========== RETURN VOL FEATURES ===========

    df["mid_ret_1"] = df["mid"].pct_change(fill_method=None).fillna(0).astype("float32")

    vol_1s  = df["mid_ret_1"].rolling("1s").std()
    vol_5s  = df["mid_ret_1"].rolling("5s").std()
    vol_30s = df["mid_ret_1"].rolling("30s").std()

    df["roll_mid_vol_1s"]  = vol_1s.fillna(0).astype("float32")
    df["roll_mid_vol_5s"]  = vol_5s.fillna(0).astype("float32")
    df["roll_mid_vol_30s"] = vol_30s.fillna(0).astype("float32")

    vol_1s  = df["mid_ret_1"].rolling("1s").std()
    vol_5s  = df["mid_ret_1"].rolling("5s").std()
    vol_30s = df["mid_ret_1"].rolling("30s").std()

    df["roll_mid_vol_1s"]  = vol_1s.fillna(0).astype("float32")
    df["roll_mid_vol_5s"]  = vol_5s.fillna(0).astype("float32")
    df["roll_mid_vol_30s"] = vol_30s.fillna(0).astype("float32")

    # =========== MICROPRICE FEATURES ===========
    
    bb = df["best_bid"]
    ba = df["best_ask"]
    bs = df["bid_sz_00"]
    az = df["ask_sz_00"]

    total_sz = bs + az
    microprice = np.where(total_sz > 0, (bb * az + ba * bs) / total_sz, 0.5 * (bb + ba),).astype("float32")

    df["microprice"] = microprice
    df["microprice_delta"] = df["microprice"].diff().fillna(0).astype("float32")

    spread = df["spread"].replace(0, np.nan)
    df["microprice_imb"] = ((df["microprice"] - df["mid"]) / spread).astype("float32")

    # =========== NORMALIZED OFI ===========
    
    depth_ref = (df["bid_sz_sum_03"] + df["ask_sz_sum_03"]).replace(0, np.nan)
    df["ofi_norm_depth"] = (df["ofi_0"] / depth_ref).astype("float32")

    vol_ref = df["roll_mid_vol_5s"].replace(0, np.nan)
    ofi_norm_vol = df["ofi_0"] / vol_ref
    ofi_norm_vol = ofi_norm_vol.replace([np.inf, -np.inf], np.nan)
    df["ofi_norm_vol"] = ofi_norm_vol.astype("float32")

    df["ofi_0_sq"] = (df["ofi_0"] ** 2).astype("float32")

    # =========== TIME OF DAY ===========
    
    idx = df.index
    sec_of_day = (idx.hour * 3600 + idx.minute * 60 + idx.second).astype("int32")
    df["sec_of_day"] = sec_of_day

    angle = 2 * np.pi * sec_of_day / 86400.0
    df["tod_sin"] = np.sin(angle).astype("float32")
    df["tod_cos"] = np.cos(angle).astype("float32")

    return df