In [1]:
# Chemin relatif depuis notebooks/
import sys, os
sys.path.append(os.path.abspath(".."))

import pandas as pd
import numpy as np
from src.feature_extraction import compute_midprice, compute_order_imbalance

# Charger les données prétraitées
df = pd.read_csv("../data/btc_2025_1min.csv", skiprows=1)
df.rename(columns={"Date": "timestamp", "Open": "price"}, inplace=True)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df.sort_values("timestamp", inplace=True)

# Simuler carnet L1
spread = 1.0
df["bid1"] = df["price"] - spread / 2
df["ask1"] = df["price"] + spread / 2
np.random.seed(42)
df["bid_size1"] = np.random.uniform(1, 5, len(df))
df["ask_size1"] = np.random.uniform(1, 5, len(df))

# Features de base
df = compute_midprice(df)
df = compute_order_imbalance(df)

# 🎯 Nouvelles features
df["return_1"] = df["midprice"].pct_change()
df["return_5"] = df["midprice"].pct_change(5)
df["volatility_5"] = df["return_1"].rolling(5).std()
df["zscore_ofi"] = (df["ofi"] - df["ofi"].rolling(20).mean()) / df["ofi"].rolling(20).std()

# Lags
for lag in [1, 2, 3]:
    df[f"ofi_lag{lag}"] = df["ofi"].shift(lag)

# Nettoyage
df.dropna(inplace=True)

# Sauvegarde pour modélisation
df.to_csv("../data/processed_orderbook.csv", index=False)
