In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

# Loading in the cleaned data
PROJECT_ROOT = Path.cwd().resolve().parents[0]
data_path = PROJECT_ROOT / "data" / "processed" / "clean_BTC_1min.parquet"

df = pd.read_parquet(data_path)
df.head()


In [None]:
# Quick check up on the dataframe
df.info()
df.describe().T.head(20)

In [None]:
# Plot the midpoint and spread
sample = df.iloc[:5000]  # get first 5000 points but we can adjust as needed

fig, axes = plt.subplots(2, 1, figsize=(10, 6), sharex=True)

# Want to plot two charts (system_time x midpoint and then system_time x spread)
axes[0].plot(sample["system_time"], sample["midpoint"])
axes[0].set_ylabel("Midpoint Price")

axes[0].set_title("BTC 1min – Midpoint")

axes[1].plot(sample["system_time"], sample["spread"])
axes[1].set_ylabel("Spread")

axes[1].set_xlabel("Time")

# Automatically adjusts spacing
plt.tight_layout()
plt.show()


In [None]:
if "buys" in df.columns and "sells" in df.columns:
    # Adding in a imbalance col using the imbalance formula
    df["buy_sell_imbalance"] = (df["buys"] - df["sells"]) / (df["buys"] + df["sells"] + 1e-9)

    sample = df.iloc[:5000]

    plt.figure(figsize=(10, 4))
    plt.plot(sample["system_time"], sample["buy_sell_imbalance"])
    # Just adding a horizontal line at 0
    plt.axhline(0, linestyle="--")
    plt.ylabel("Buy/Sell Imbalance")
    plt.xlabel("Time")
    plt.title("BTC 1min – Buy vs Sell Imbalance")
    plt.tight_layout()
    plt.show()
