Trying to automate everything in one place

In [1]:
import re
import json
import pandas as pd

def parse_trade_history(lines):
    """
    Extracts the JSON array after 'Trade History:'.
    Returns a DataFrame of trades with columns:
      timestamp, buyer, seller, product, currency, price, quantity
    """
    start_idx = None
    for i, line in enumerate(lines):
        if "Trade History:" in line:
            start_idx = i
            break
    if start_idx is None:
        return pd.DataFrame()  # no trades

    # Collect lines until the end (or next section)
    trade_lines = lines[start_idx+1:]
    json_str = "".join(trade_lines).strip()

    # In case there's extraneous text before '[', find the first '['
    bracket_index = json_str.find("[")
    if bracket_index != -1:
        json_str = json_str[bracket_index:]

    try:
        trade_list = json.loads(json_str)
    except json.JSONDecodeError:
        return pd.DataFrame()

    df = pd.DataFrame(trade_list)

    # Rename 'symbol' -> 'product' if needed
    if "symbol" in df.columns and "product" not in df.columns:
        df.rename(columns={"symbol": "product"}, inplace=True)

    # Normalize buyer/seller
    def fix_name(x):
        if x == "":
            return "BOT"
        elif x == "SUBMISSION":
            return "ME"
        return x

    if "buyer" in df.columns:
        df["buyer"] = df["buyer"].apply(fix_name)
    if "seller" in df.columns:
        df["seller"] = df["seller"].apply(fix_name)

    # Keep a standard set of columns
    desired_cols = ["timestamp", "buyer", "seller", "product", "currency", "price", "quantity"]
    df = df[[c for c in desired_cols if c in df.columns]]
    df.sort_values("timestamp", inplace=True)
    return df

def parse_sandbox_orders(lines):
    """
    Extracts JSON objects after 'Sandbox logs:', each containing a 'lambdaLog' with lines like:
      timestamp: 0
      BUY Order: 10 x KELP at 2028
      SELL Order: 5 x KELP at 2029
    Returns a DataFrame with columns:
      timestamp, product, side, quantity, price
    """
    start_idx = None
    for i, line in enumerate(lines):
        if "Sandbox logs:" in line:
            start_idx = i
            break
    if start_idx is None:
        return pd.DataFrame()  # no sandbox logs

    # Collect lines until a line that matches '}}' on its own
    collected = []
    for line in lines[start_idx+1:]:
        # If we see a line with only '}}' (and optional spaces), stop
        if re.match(r'^\}\}\s*$', line.strip()):
            break
        collected.append(line)
    text_block = "".join(collected)

    # Find each JSON object {...}
    json_objects = re.findall(r'\{.*?\}', text_block, flags=re.DOTALL)
    orders = []
    for obj_str in json_objects:
        try:
            data = json.loads(obj_str)
        except json.JSONDecodeError:
            continue
        lambda_log = data.get("lambdaLog", "")
        if not lambda_log:
            continue

        lines_log = lambda_log.strip().split("\n")
        if not lines_log:
            continue

        # First line: "timestamp: X"
        current_ts = None
        if lines_log[0].startswith("timestamp:"):
            parts = lines_log[0].split(":")
            if len(parts) == 2:
                try:
                    current_ts = int(parts[1].strip())
                except ValueError:
                    pass

        # Subsequent lines: e.g. "BUY Order: 10 x KELP at 2028"
        for ln in lines_log[1:]:
            ln = ln.strip()
            m = re.match(r'^(BUY|SELL)\s+Order:\s+(\d+)\s+x\s+(\S+)\s+at\s+(\d+)$', ln)
            if m:
                side = m.group(1)         # "BUY" or "SELL"
                qty = int(m.group(2))    # e.g. 10
                prod = m.group(3)        # e.g. "KELP"
                price = int(m.group(4))  # e.g. 2028
                orders.append({
                    "timestamp": current_ts,
                    "product": prod,
                    "side": side,
                    "quantity": qty,
                    "price": price
                })
    df = pd.DataFrame(orders)
    df.sort_values("timestamp", inplace=True)
    return df

def run_all(csv_file="1.csv", txt_file="1.txt", output_xlsx="merged_output.xlsx"):
    """
    Reads:
      - CSV market data (semicolon-separated) from 'csv_file'
      - A single TXT file containing both "Trade History:" (JSON array of trades)
        and "Sandbox logs:" (JSON objects for your orders)
    Merges everything into 'merged_output.xlsx' with one sheet per product.
    - All trades at a timestamp => separate rows
    - Orders at a timestamp => aggregated into 'my_orders' in the first row only
    """
    # 1) Read the CSV (semicolon-separated) for market data
    df_book = pd.read_csv(csv_file, sep=';')
    df_book.columns = [c.strip() for c in df_book.columns]

    # 2) Read all lines from the TXT file
    with open(txt_file, "r", encoding="utf-8") as f:
        all_lines = f.readlines()

    # 3) Parse trades & orders from the same TXT
    df_trades = parse_trade_history(all_lines)      # columns: timestamp,buyer,seller,product,currency,price,quantity
    df_orders = parse_sandbox_orders(all_lines)     # columns: timestamp,product,side,quantity,price

    # 4) Merge the CSV with trades
    #    We'll get multiple rows if multiple trades at the same timestamp.
    #    No "clear_duplicates" => we keep all trades.
    df_merged_trades = pd.merge(
        df_book,
        df_trades,
        on=["timestamp", "product"],
        how="left",
        suffixes=("", "_trade")
    )

    # 5) Aggregate orders by (timestamp, product) into a single string "my_orders"
    #    Example: "BUY 10@2028, SELL 5@2029"
    if not df_orders.empty:
        def build_order_string(grp):
            # For each row in grp, e.g. side=BUY, quantity=10, price=2028
            # combine them into "BUY 10@2028"
            order_strs = []
            for _, row in grp.iterrows():
                order_strs.append(f"{row['side']} {row['quantity']}@{row['price']}")
            return ", ".join(order_strs)

        # Group df_orders by timestamp, product
        df_orders_agg = df_orders.groupby(["timestamp", "product"], as_index=False).apply(
            lambda g: pd.Series({"my_orders": build_order_string(g)})
        ).reset_index(drop=True)
    else:
        df_orders_agg = pd.DataFrame(columns=["timestamp", "product", "my_orders"])

    # 6) Merge the aggregated orders onto df_merged_trades
    df_merged_all = pd.merge(
        df_merged_trades,
        df_orders_agg,
        on=["timestamp", "product"],
        how="left"
    )

    # 7) We want 'my_orders' only in the first row of each (timestamp, product) group, blank in others
    def blank_orders_in_subsequent_rows(grp):
        # If the group has multiple rows (due to multiple trades), keep 'my_orders' in the first,
        # set to "" for the rest
        grp = grp.copy()
        if len(grp) > 1:
            grp.loc[grp.index[1:], "my_orders"] = ""
        return grp

    df_merged_all = df_merged_all.groupby(["timestamp", "product"], as_index=False).apply(blank_orders_in_subsequent_rows)
    # Remove the multi-index introduced by groupby+apply
    if isinstance(df_merged_all.index, pd.MultiIndex):
        df_merged_all.reset_index(drop=True, inplace=True)

    # 8) Final columns in the order you wanted, plus "my_orders"
    DESIRED_COLS = [
        "day", "timestamp", "product",
        "bid_price_1", "bid_volume_1",
        "bid_price_2", "bid_volume_2",
        "bid_price_3", "bid_volume_3",
        "ask_price_1", "ask_volume_1",
        "ask_price_2", "ask_volume_2",
        "ask_price_3", "ask_volume_3",
        "mid_price",
        "my_orders",      # <--- new column for your orders
        "buyer", "seller", "currency", "price", "quantity"
    ]
    # Keep only existing columns
    final_cols = [c for c in DESIRED_COLS if c in df_merged_all.columns]
    df_merged_all = df_merged_all[final_cols]

    # 9) Write one sheet per product
    products = df_merged_all["product"].unique()
    with pd.ExcelWriter(output_xlsx) as writer:
        for prod in products:
            df_prod = df_merged_all[df_merged_all["product"] == prod].copy()
            # Sort by day, then timestamp if both exist
            sort_cols = []
            if "day" in df_prod.columns:
                sort_cols.append("day")
            if "timestamp" in df_prod.columns:
                sort_cols.append("timestamp")
            if sort_cols:
                df_prod.sort_values(sort_cols, inplace=True)

            sheet_name = str(prod).replace("/", "_")[:31]
            df_prod.to_excel(writer, sheet_name=sheet_name, index=False)

    print(f"Done! Created '{output_xlsx}' with one sheet per product.")

# --- Single-cell usage ---
if __name__ == "__main__":
    run_all("1.csv.csv", "1.txt.log", "merged_output.xlsx")


ModuleNotFoundError: No module named 'pandas'

Final piece


In [11]:
import re
import numpy as np
import pandas as pd
import dash
from dash import dcc, html, Input, Output
import plotly.graph_objs as go
from hmmlearn.hmm import GaussianHMM

# ----------------------------------------------------------------
# 1) LOAD THE MERGED DATA
#    We assume merged_output.xlsx has two sheets: "KELP" and "RAINFOREST_RESIN".
# ----------------------------------------------------------------
df_kelp = pd.read_excel("merged_output.xlsx", sheet_name="KELP")
df_resin = pd.read_excel("merged_output.xlsx", sheet_name="RAINFOREST_RESIN")

df_combined = pd.concat([df_kelp, df_resin], ignore_index=True)
df_combined = df_combined.sort_values("timestamp").reset_index(drop=True)

PRODUCTS = df_combined["product"].unique()

# ----------------------------------------------------------------
# 2) DEFINE THE DASH APP
# ----------------------------------------------------------------
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Enhanced Visualization: Book, Trades, Orders & Position"),

    # Product selection
    html.Label("Select Product:"),
    dcc.Dropdown(
        id='product-dropdown',
        options=[{'label': p, 'value': p} for p in PRODUCTS],
        value=PRODUCTS[0] if len(PRODUCTS) else None,
        style={'width': '300px', 'margin-bottom': '10px'}
    ),

    # Checklist for lines to display on the price graph
    html.Label("Show Lines:"),
    dcc.Checklist(
        id='lines-checklist',
        options=[
            {'label': 'Mid Price', 'value': 'mid_price'},
            {'label': 'VWAP', 'value': 'vwap'},  # Add option for VWAP
            {'label': 'Bid1', 'value': 'bid1'},
            {'label': 'Ask1', 'value': 'ask1'},
            {'label': 'Bid2', 'value': 'bid2'},
            {'label': 'Ask2', 'value': 'ask2'},
            {'label': 'Bid3', 'value': 'bid3'},
            {'label': 'Ask3', 'value': 'ask3'},
            {'label': 'My Orders', 'value': 'my_orders'},
            {'label': 'Theo Highest', 'value': 'theo_highest'}
        ],
        value=['mid_price'],
        inline=True,
        style={'margin-bottom': '10px'}
    ),


    # Trade type filter
    html.Label("Trade Type Filter:"),
    dcc.Dropdown(
        id='trade-type-filter',
        options=[
            {'label': 'All Trades', 'value': 'all'},
            {'label': 'My Trades Only', 'value': 'my'},
            {'label': 'Bot vs Bot Only', 'value': 'botbot'}
        ],
        value='all',
        style={'width': '200px', 'margin-bottom': '10px'}
    ),

    # Trade volume range filter
    html.Label("Trade Volume Range:"),
    dcc.RangeSlider(
        id='volume-range',
        min=0,
        max=50,
        step=1,
        value=[0, 50],
        marks={i: str(i) for i in range(0, 51, 10)},
        allowCross=False,
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    # Volume plot mode options – including new MM Spread options
    html.Label("Volume Plot:"),
    dcc.RadioItems(
        id='volume-mode',
        options=[
            {'label': 'Bars (bid vs ask)', 'value': 'bars'},
            {'label': 'Ratio (bid/ask)', 'value': 'ratio'},
            {'label': 'Difference (bid - ask)', 'value': 'difference'},
            {'label': 'MM Quote Volume', 'value': 'mm'},
            {'label': 'Rolling MM (3 timesteps)', 'value': 'mm_roll_3'},
            {'label': 'Rolling MM (5 timesteps)', 'value': 'mm_roll_5'},
            {'label': 'MM Spread', 'value': 'mm_spread'},
            {'label': 'Rolling MM Spread (3 timesteps)', 'value': 'mm_spread_roll_3'},
            {'label': 'Rolling MM Spread (5 timesteps)', 'value': 'mm_spread_roll_5'}
        ],
        value='bars',
        inline=True,
        style={'margin-bottom': '20px'}
    ),

    # Graphs: Price on top, then Volume & Position side by side
    dcc.Graph(id='price-graph'),
    html.Div([
        html.Div([dcc.Graph(id='volume-graph')], style={'width': '50%', 'display': 'inline-block'}),
        html.Div([dcc.Graph(id='position-graph')], style={'width': '50%', 'display': 'inline-block'})
    ]),
    dcc.Graph(id='orders-graph'),
    dcc.Graph(id='hmm-graph')
])

# ----------------------------------------------------------------
# 3) HELPER FUNCTIONS
# ----------------------------------------------------------------
def parse_my_orders(order_str):
    """
    Parse a string like "BUY 10@2026, SELL 10@2027" into a list of dicts.
    """
    results = []
    if not isinstance(order_str, str) or not order_str.strip():
        return results
    parts = order_str.split(",")
    for part in parts:
        part = part.strip()
        m = re.match(r"^(BUY|SELL)\s+(\d+)@(\d+)$", part)
        if m:
            side = m.group(1)
            qty = int(m.group(2))
            price = int(m.group(3))
            results.append({"side": side, "qty": qty, "price": price})
    return results

def compute_theo_highest(row):
    """
    'Theo Highest' = average of the highest-volume bid level and highest-volume ask level.
    """
    bid_vols = [row.get('bid_volume_1',0), row.get('bid_volume_2',0), row.get('bid_volume_3',0)]
    bid_prices = [row.get('bid_price_1',np.nan), row.get('bid_price_2',np.nan), row.get('bid_price_3',np.nan)]
    ask_vols = [row.get('ask_volume_1',0), row.get('ask_volume_2',0), row.get('ask_volume_3',0)]
    ask_prices = [row.get('ask_price_1',np.nan), row.get('ask_price_2',np.nan), row.get('ask_price_3',np.nan)]
    if max(bid_vols) == 0 or max(ask_vols) == 0:
        return np.nan
    bid_idx = bid_vols.index(max(bid_vols))
    ask_idx = ask_vols.index(max(ask_vols))
    return (bid_prices[bid_idx] + ask_prices[ask_idx]) / 2

def safe_vol(v):
    """Return v if not NaN, else 0."""
    return v if pd.notna(v) else 0

# ----------------------------------------------------------------
# 4) DASH CALLBACK
# ----------------------------------------------------------------
@app.callback(
    Output('price-graph', 'figure'),
    Output('volume-graph', 'figure'),
    Output('position-graph', 'figure'),
    Output('orders-graph', 'figure'),
    Output('hmm-graph', 'figure'),
    Input('product-dropdown', 'value'),
    Input('lines-checklist', 'value'),
    Input('volume-mode', 'value'),
    Input('trade-type-filter', 'value'),
    Input('volume-range', 'value')
)
def update_plots(selected_product, selected_lines, volume_mode, trade_type_filter, vol_range):
    if not selected_product:
        return go.Figure(), go.Figure(), go.Figure()
    
    # Filter data for selected product
    filtered_df = df_combined[df_combined['product'] == selected_product].copy()

    if 'mid_price'in filtered_df.columns:
        # ===================== PREPARE DATA FOR HMM =====================
        # Calculate log returns for HMM features
        filtered_df['log_return'] = np.log(filtered_df['mid_price'] / filtered_df['mid_price'].shift(1)).fillna(0)

        # Calculate rolling volatility
        filtered_df['volatility'] = filtered_df['log_return'].rolling(window=10).std().fillna(0)

        # Use log return and volatility as features
        observations = filtered_df[['log_return', 'volatility']].values

        # ===================== TRAIN HMM =====================
        # Define and fit the Gaussian HMM with 3 states
        hmm_model = GaussianHMM(n_components=3, covariance_type='diag', n_iter=100, random_state=42)
        hmm_model.fit(observations)

        # Predict the hidden states
        states = hmm_model.predict(observations)
        filtered_df['state'] = states

        # ===================== HMM STATE VISUALIZATION =====================
        fig_hmm = go.Figure()

        # Add price trace
        fig_hmm.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['mid_price'],
            mode='lines',
            name='Mid Price',
            line=dict(color='white'),
            hovertemplate="Timestamp: %{x}<br>Mid Price: %{y}<extra></extra>"
        ))

        # Add state traces
        state_colors = ['blue', 'red', 'green']
        for state in range(hmm_model.n_components):
            fig_hmm.add_trace(go.Scatter(
                x=filtered_df[filtered_df['state'] == state]['timestamp'],
                y=filtered_df[filtered_df['state'] == state]['mid_price'],
                mode='markers',
                name=f"State {state}",
                marker=dict(color=state_colors[state]),
                hovertemplate="Timestamp: %{x}<br>Mid Price: %{y}<br>State: %{text}<extra></extra>",
                text=[f"State {state}" for _ in range(len(filtered_df[filtered_df['state'] == state]))]
            ))

        fig_hmm.update_layout(
            title=f"{selected_product}: Hidden Markov Model States",
            xaxis_title="Timestamp",
            yaxis_title="Mid Price",
            hovermode="x unified"
        )

    else:
        fig_hmm = go.Figure()

    # ================= EXTRACT ORDERS PLACED VS FILLED =================
    # Orders placed are derived from 'my_orders' column
    orders_placed_df = filtered_df[['timestamp', 'my_orders']].copy()
    orders_placed_df['placed_quantity'] = orders_placed_df['my_orders'].apply(
        lambda x: sum(o['qty'] for o in parse_my_orders(x)))
    # Group placed quantities per timestamp
    orders_placed_df = orders_placed_df.groupby('timestamp', as_index=False)['placed_quantity'].sum()

    # Orders filled are derived from trades executed (buyer='ME' or seller='ME')
    trades_filled_df = filtered_df[(filtered_df['buyer'] == 'ME') | (filtered_df['seller'] == 'ME')][
        ['timestamp', 'quantity']].copy()
    trades_filled_df = trades_filled_df.groupby('timestamp', as_index=False)['quantity'].sum().rename(
        columns={'quantity': 'filled_quantity'})

    # Merge placed and filled dataframes
    orders_merged = pd.merge(orders_placed_df, trades_filled_df, on='timestamp', how='outer').fillna(0)
    orders_merged.sort_values('timestamp', inplace=True)

    # ================= BUILD THE ORDERS GRAPH =================
    fig_orders = go.Figure()

    # Add orders placed trace
    fig_orders.add_trace(go.Scatter(
        x=orders_merged['timestamp'],
        y=orders_merged['placed_quantity'],
        mode='lines+markers',
        name='Orders Placed',
        line=dict(color='blue'),
        marker=dict(symbol='circle', size=6, color='blue'),
        hovertemplate="Timestamp: %{x}<br>Orders Placed: %{y}<extra></extra>"
    ))

    # Add orders filled trace
    fig_orders.add_trace(go.Scatter(
        x=orders_merged['timestamp'],
        y=orders_merged['filled_quantity'],
        mode='lines+markers',
        name='Orders Filled',
        line=dict(color='green'),
        marker=dict(symbol='circle', size=6, color='green'),
        hovertemplate="Timestamp: %{x}<br>Orders Filled: %{y}<extra></extra>"
    ))

    fig_orders.update_layout(
        title=f"{selected_product}: Orders Placed vs Orders Filled",
        xaxis_title="Timestamp",
        yaxis_title="Order Quantity",
        hovermode="closest"
    )
    
    # Compute MM quote volumes: use safe_vol to ensure volumes are not NaN.
    filtered_df["mm_bid_vol"] = filtered_df[["bid_volume_1", "bid_volume_2", "bid_volume_3"]].fillna(0).max(axis=1)
    filtered_df["mm_ask_vol"] = filtered_df[["ask_volume_1", "ask_volume_2", "ask_volume_3"]].fillna(0).max(axis=1)
    filtered_df["mm_volume"] = filtered_df["mm_bid_vol"] + filtered_df["mm_ask_vol"]
    filtered_df["vwap"] = (
        (filtered_df["price"] * filtered_df["quantity"]).groupby(filtered_df["timestamp"]).transform("sum") /
        filtered_df["quantity"].groupby(filtered_df["timestamp"]).transform("sum")
    )

    
    # Compute MM bid price: choose the price corresponding to the highest bid volume.
    filtered_df["mm_bid_price"] = filtered_df.apply(
        lambda row: row["bid_price_1"] if safe_vol(row["bid_volume_1"]) >= safe_vol(row["bid_volume_2"]) and safe_vol(row["bid_volume_1"]) >= safe_vol(row["bid_volume_3"])
        else (row["bid_price_2"] if safe_vol(row["bid_volume_2"]) >= safe_vol(row["bid_volume_3"]) else row["bid_price_3"]),
        axis=1
    )
    # Compute MM ask price: choose the price corresponding to the highest ask volume.
    filtered_df["mm_ask_price"] = filtered_df.apply(
        lambda row: row["ask_price_1"] if safe_vol(row["ask_volume_1"]) >= safe_vol(row["ask_volume_2"]) and safe_vol(row["ask_volume_1"]) >= safe_vol(row["ask_volume_3"])
        else (row["ask_price_2"] if safe_vol(row["ask_volume_2"]) >= safe_vol(row["ask_volume_3"]) else row["ask_price_3"]),
        axis=1
    )
    # MM Spread = MM bid price - MM ask price (as per your instruction)
    filtered_df["mm_spread"] = filtered_df["mm_bid_price"] - filtered_df["mm_ask_price"]
    
    # ------------------ FILTER TRADES ------------------
    df_trades = filtered_df.dropna(subset=['price', 'quantity']).copy()
    min_vol, max_vol = vol_range
    df_trades = df_trades[(df_trades['quantity'] >= min_vol) & (df_trades['quantity'] <= max_vol)]
    if trade_type_filter == 'my':
        df_trades = df_trades[(df_trades['buyer'] == 'ME') | (df_trades['seller'] == 'ME')]
    elif trade_type_filter == 'botbot':
        df_trades = df_trades[(df_trades['buyer'] == 'BOT') & (df_trades['seller'] == 'BOT')]
    
    # ================= PRICE GRAPH =================
    fig_price = go.Figure()
    
    # Plot Mid Price trace with MM bid/ask volumes and MM spread in hover.
    # Note: we use customdata to pass [mm_bid_vol, mm_ask_vol, mm_spread]
    customdata = np.column_stack((filtered_df["mm_bid_vol"], filtered_df["mm_ask_vol"], filtered_df["mm_spread"]))
    if 'mid_price' in selected_lines and 'mid_price' in filtered_df.columns:
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['mid_price'],
            mode='lines+markers',
            name='Mid Price',
            line=dict(color='blue'),
            marker=dict(symbol='circle', size=6, color='blue'),
            connectgaps=True,
            customdata=customdata,
            hovertemplate=(
                "Timestamp: %{x}<br>Price: %{y}<br>" +
                "MM Bid Vol: %{customdata[0]}<br>" +
                "MM Ask Vol: %{customdata[1]}<br>" +
                "MM Spread: %{customdata[2]}<extra></extra>"
            )
        ))

    # Plot VWAP trace
    if 'vwap' in selected_lines and 'vwap' in filtered_df.columns:
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['vwap'],
            mode='lines+markers',
            name='VWAP',
            line=dict(color='purple'),
            marker=dict(symbol='square', size=6, color='purple'),
            connectgaps=True,
            hovertemplate="Timestamp: %{x}<br>VWAP: %{y}<extra></extra>"
        ))
    
    # Helper to add bid/ask lines (unchanged)
    def add_line_with_volume(df_local, price_col, vol_col, label, color, dash='dot'):
        if price_col in df_local.columns and df_local[price_col].notna().any():
            customdata = None
            hovertemplate = "Timestamp: %{x}<br>Price: %{y}"
            if vol_col in df_local.columns and df_local[vol_col].notna().any():
                customdata = df_local[vol_col]
                hovertemplate += "<br>Volume: %{customdata}"
            hovertemplate += "<extra></extra>"
            fig_price.add_trace(go.Scatter(
                x=df_local['timestamp'],
                y=df_local[price_col],
                mode='lines+markers',
                name=label,
                line=dict(color=color, dash=dash),
                marker=dict(symbol='circle', size=6, color=color),
                connectgaps=True,
                customdata=customdata,
                hovertemplate=hovertemplate
            ))
    
    if 'bid1' in selected_lines:
        add_line_with_volume(filtered_df, 'bid_price_1', 'bid_volume_1', 'Bid1', 'green')
    if 'ask1' in selected_lines:
        add_line_with_volume(filtered_df, 'ask_price_1', 'ask_volume_1', 'Ask1', 'red')
    if 'bid2' in selected_lines and 'bid_price_2' in filtered_df.columns:
        cdata = filtered_df.get('bid_volume_2')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['bid_price_2'],
            mode='lines+markers',
            name='Bid2',
            line=dict(color='darkgreen', dash='dot'),
            marker=dict(symbol='star', size=8, color='darkgreen'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    if 'ask2' in selected_lines and 'ask_price_2' in filtered_df.columns:
        cdata = filtered_df.get('ask_volume_2')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['ask_price_2'],
            mode='lines+markers',
            name='Ask2',
            line=dict(color='darkred', dash='dot'),
            marker=dict(symbol='star', size=8, color='darkred'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    if 'bid3' in selected_lines and 'bid_price_3' in filtered_df.columns:
        cdata = filtered_df.get('bid_volume_3')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['bid_price_3'],
            mode='lines+markers',
            name='Bid3',
            line=dict(color='lightgreen', dash='dot'),
            marker=dict(symbol='diamond', size=8, color='lightgreen'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    if 'ask3' in selected_lines and 'ask_price_3' in filtered_df.columns:
        cdata = filtered_df.get('ask_volume_3')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['ask_price_3'],
            mode='lines+markers',
            name='Ask3',
            line=dict(color='lightcoral', dash='dot'),
            marker=dict(symbol='diamond', size=8, color='lightcoral'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    
    if 'theo_highest' in selected_lines:
        filtered_df['theo_highest'] = filtered_df.apply(compute_theo_highest, axis=1)
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['theo_highest'],
            mode='lines+markers',
            name='Theo Highest',
            line=dict(color='brown', dash='dash'),
            marker=dict(symbol='star-triangle-up', size=8, color='brown'),
            connectgaps=True,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    
    # ----------------- Plot Filtered Trades with Jitter -----------------
    df_trades['trade_idx'] = df_trades.groupby('timestamp').cumcount()
    df_trades['timestamp_jitter'] = df_trades['timestamp'] + 0.1 * df_trades['trade_idx']
    
    df_me_buys = df_trades[df_trades['buyer'] == 'ME']
    fig_price.add_trace(go.Scatter(
        x=df_me_buys['timestamp_jitter'],
        y=df_me_buys['price'],
        mode='markers',
        marker=dict(color='green', symbol='circle', size=8),
        name='My Buys',
        customdata=df_me_buys['quantity'],
        hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>Volume: %{customdata}<extra></extra>"
    ))
    df_me_sells = df_trades[df_trades['seller'] == 'ME']
    fig_price.add_trace(go.Scatter(
        x=df_me_sells['timestamp_jitter'],
        y=df_me_sells['price'],
        mode='markers',
        marker=dict(color='red', symbol='circle', size=8),
        name='My Sells',
        customdata=df_me_sells['quantity'],
        hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>Volume: %{customdata}<extra></extra>"
    ))
    df_bot_bot = df_trades[(df_trades['buyer'] == 'BOT') & (df_trades['seller'] == 'BOT')]
    fig_price.add_trace(go.Scatter(
        x=df_bot_bot['timestamp_jitter'],
        y=df_bot_bot['price'],
        mode='markers',
        marker=dict(color='white', symbol='x', size=8),
        name='Bot vs Bot',
        customdata=df_bot_bot['quantity'],
        hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>Volume: %{customdata}<extra></extra>"
    ))
    
    if 'my_orders' in selected_lines and 'my_orders' in filtered_df.columns:
        order_points = []
        for _, row in filtered_df.iterrows():
            orders_list = parse_my_orders(row['my_orders'])
            for i, o in enumerate(orders_list):
                x_val = row['timestamp'] + 0.02 * i
                order_points.append({
                    'x': x_val,
                    'y': o['price'],
                    'side': o['side'],
                    'qty': o['qty']
                })
        buys_orders = [p for p in order_points if p['side'] == 'BUY']
        sells_orders = [p for p in order_points if p['side'] == 'SELL']
        fig_price.add_trace(go.Scatter(
            x=[b['x'] for b in buys_orders],
            y=[b['y'] for b in buys_orders],
            mode='markers',
            marker=dict(color='purple', symbol='triangle-up', size=10),
            name='My Orders (BUY)',
            text=[f"Qty={b['qty']}" for b in buys_orders],
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>%{text}<extra></extra>"
        ))
        fig_price.add_trace(go.Scatter(
            x=[s['x'] for s in sells_orders],
            y=[s['y'] for s in sells_orders],
            mode='markers',
            marker=dict(color='purple', symbol='triangle-down', size=10),
            name='My Orders (SELL)',
            text=[f"Qty={s['qty']}" for s in sells_orders],
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>%{text}<extra></extra>"
        ))
    
    fig_price.update_layout(
        title=f"{selected_product}: Book, Trades, Orders & Theo Highest",
        xaxis_title="Timestamp",
        yaxis_title="Price",
        hovermode="closest"
    )
    
    # ================= VOLUME GRAPH =================
    bid_vol_total = filtered_df[['bid_volume_1', 'bid_volume_2', 'bid_volume_3']].fillna(0).sum(axis=1)
    ask_vol_total = filtered_df[['ask_volume_1', 'ask_volume_2', 'ask_volume_3']].fillna(0).sum(axis=1)
    total_volume = bid_vol_total + ask_vol_total
    mm_volume = filtered_df["mm_volume"]
    mm_spread = filtered_df["mm_spread"]

    fig_vol = go.Figure()
    if volume_mode == 'bars':
        fig_vol.add_trace(go.Bar(
            x=filtered_df['timestamp'],
            y=bid_vol_total,
            name="Total Bid Volume",
            marker_color='green'
        ))
        fig_vol.add_trace(go.Bar(
            x=filtered_df['timestamp'],
            y=ask_vol_total,
            name="Total Ask Volume",
            marker_color='red'
        ))
        fig_vol.update_layout(
            title="Total Bid vs Ask Volume",
            xaxis_title="Timestamp",
            yaxis_title="Volume",
            barmode='group',
            hovermode="x"
        )
    elif volume_mode == 'ratio':
        ratio = np.where(ask_vol_total > 0, bid_vol_total / ask_vol_total, np.nan)
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=ratio,
            mode='lines+markers',
            name="Bid/Ask Ratio",
            line=dict(color='purple')
        ))
        fig_vol.update_layout(
            title="Bid/Ask Volume Ratio",
            xaxis_title="Timestamp",
            yaxis_title="Ratio",
            hovermode="x"
        )
    elif volume_mode == 'difference':
        diff = bid_vol_total - ask_vol_total
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=diff,
            mode='lines+markers',
            name="Bid - Ask Volume",
            line=dict(color='orange')
        ))
        fig_vol.update_layout(
            title="Bid - Ask Volume Difference",
            xaxis_title="Timestamp",
            yaxis_title="Difference (Bid - Ask)",
            hovermode="x"
        )
    elif volume_mode == 'mm':
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=mm_volume,
            mode='lines+markers',
            name="MM Quote Volume",
            line=dict(color='darkmagenta')
        ))
        fig_vol.update_layout(
            title="MM Quote Volume",
            xaxis_title="Timestamp",
            yaxis_title="Volume",
            hovermode="x"
        )
    elif volume_mode == 'mm_roll_3':
        roll_mm_3 = mm_volume.rolling(window=3, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_mm_3,
            mode='lines+markers',
            name="Rolling MM (3 timesteps)",
            line=dict(color='darkmagenta')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Quote Volume (3 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Volume (Rolling Mean)",
            hovermode="x"
        )
    elif volume_mode == 'mm_roll_5':
        roll_mm_5 = mm_volume.rolling(window=5, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_mm_5,
            mode='lines+markers',
            name="Rolling MM (5 timesteps)",
            line=dict(color='darkmagenta')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Quote Volume (5 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Volume (Rolling Mean)",
            hovermode="x"
        )
    elif volume_mode == 'mm_spread':
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=mm_spread,
            mode='lines+markers',
            name="MM Spread",
            line=dict(color='darkorange')
        ))
        fig_vol.update_layout(
            title="MM Spread",
            xaxis_title="Timestamp",
            yaxis_title="Spread",
            hovermode="x"
        )
    elif volume_mode == 'mm_spread_roll_3':
        roll_spread_3 = mm_spread.rolling(window=3, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_spread_3,
            mode='lines+markers',
            name="Rolling MM Spread (3 timesteps)",
            line=dict(color='darkorange')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Spread (3 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Spread (Rolling Mean)",
            hovermode="x"
        )
    elif volume_mode == 'mm_spread_roll_5':
        roll_spread_5 = mm_spread.rolling(window=5, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_spread_5,
            mode='lines+markers',
            name="Rolling MM Spread (5 timesteps)",
            line=dict(color='darkorange')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Spread (5 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Spread (Rolling Mean)",
            hovermode="x"
        )
    
    # ================= POSITION GRAPH =================
    df_my_trades = df_trades[(df_trades['buyer'] == 'ME') | (df_trades['seller'] == 'ME')].copy()
    df_my_trades['net_change'] = 0
    df_my_trades.loc[df_my_trades['buyer'] == 'ME', 'net_change'] = df_my_trades.loc[df_my_trades['buyer'] == 'ME', 'quantity']
    df_my_trades.loc[df_my_trades['seller'] == 'ME', 'net_change'] = -df_my_trades.loc[df_my_trades['seller'] == 'ME', 'quantity']
    grouped = df_my_trades.groupby('timestamp')['net_change'].sum().reset_index().sort_values('timestamp')
    unique_ts = sorted(filtered_df['timestamp'].unique())
    pos_df = pd.DataFrame({'timestamp': unique_ts})
    pos_df = pos_df.merge(grouped, on='timestamp', how='left')
    pos_df['net_change'] = pos_df['net_change'].fillna(0)
    pos_df['position'] = pos_df['net_change'].shift(fill_value=0).cumsum()
    pos_df['position_percent'] = (pos_df['position'] / 50) * 100

    fig_pos = go.Figure()
    fig_pos.add_trace(go.Scatter(
        x=pos_df['timestamp'],
        y=pos_df['position_percent'],
        mode='lines+markers',
        name='Position (%)',
        connectgaps=True
    ))
    fig_pos.update_layout(
        title="My Position Over Time (% of ±50)",
        xaxis_title="Timestamp",
        yaxis_title="Position (%)",
        yaxis=dict(range=[-100, 100])
    )
    
    return fig_price, fig_vol, fig_pos, fig_orders, fig_hmm

# ----------------------------------------------------------------
# 5) RUN THE APP ON PORT 8055
# ----------------------------------------------------------------
if __name__ == "__main__":
    app.run_server(debug=True, port=8055)


In [12]:
import re
import numpy as np
import pandas as pd
import dash
from dash import dcc, html, Input, Output
import plotly.graph_objs as go

# ----------------------------------------------------------------
# 1) LOAD THE MERGED DATA
#    We assume merged_output.xlsx has two sheets: "KELP" and "RAINFOREST_RESIN".
# ----------------------------------------------------------------
df_kelp = pd.read_excel("merged_output.xlsx", sheet_name="KELP")
df_resin = pd.read_excel("merged_output.xlsx", sheet_name="RAINFOREST_RESIN")

df_combined = pd.concat([df_kelp, df_resin], ignore_index=True)
df_combined = df_combined.sort_values("timestamp").reset_index(drop=True)

PRODUCTS = df_combined["product"].unique()

# ----------------------------------------------------------------
# 2) DEFINE THE DASH APP
# ----------------------------------------------------------------
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Enhanced Visualization: Book, Trades, Orders & Position"),

    # Product selection
    html.Label("Select Product:"),
    dcc.Dropdown(
        id='product-dropdown',
        options=[{'label': p, 'value': p} for p in PRODUCTS],
        value=PRODUCTS[0] if len(PRODUCTS) else None,
        style={'width': '300px', 'margin-bottom': '10px'}
    ),

    # Checklist for lines to display on the price graph
    html.Label("Show Lines:"),
    dcc.Checklist(
        id='lines-checklist',
        options=[
            {'label': 'Mid Price', 'value': 'mid_price'},
            {'label': 'Bid1', 'value': 'bid1'},
            {'label': 'Ask1', 'value': 'ask1'},
            {'label': 'Bid2', 'value': 'bid2'},
            {'label': 'Ask2', 'value': 'ask2'},
            {'label': 'Bid3', 'value': 'bid3'},
            {'label': 'Ask3', 'value': 'ask3'},
            {'label': 'My Orders', 'value': 'my_orders'},
            {'label': 'Theo Highest', 'value': 'theo_highest'}
        ],
        value=['mid_price'],
        inline=True,
        style={'margin-bottom': '10px'}
    ),

    # Trade type filter
    html.Label("Trade Type Filter:"),
    dcc.Dropdown(
        id='trade-type-filter',
        options=[
            {'label': 'All Trades', 'value': 'all'},
            {'label': 'My Trades Only', 'value': 'my'},
            {'label': 'Bot vs Bot Only', 'value': 'botbot'}
        ],
        value='all',
        style={'width': '200px', 'margin-bottom': '10px'}
    ),

    # Trade volume range filter
    html.Label("Trade Volume Range:"),
    dcc.RangeSlider(
        id='volume-range',
        min=0,
        max=50,
        step=1,
        value=[0, 50],
        marks={i: str(i) for i in range(0, 51, 10)},
        allowCross=False,
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    # Volume plot mode options – including new MM Spread options
    html.Label("Volume Plot:"),
    dcc.RadioItems(
        id='volume-mode',
        options=[
            {'label': 'Bars (bid vs ask)', 'value': 'bars'},
            {'label': 'Ratio (bid/ask)', 'value': 'ratio'},
            {'label': 'Difference (bid - ask)', 'value': 'difference'},
            {'label': 'MM Quote Volume', 'value': 'mm'},
            {'label': 'Rolling MM (3 timesteps)', 'value': 'mm_roll_3'},
            {'label': 'Rolling MM (5 timesteps)', 'value': 'mm_roll_5'},
            {'label': 'MM Spread', 'value': 'mm_spread'},
            {'label': 'Rolling MM Spread (3 timesteps)', 'value': 'mm_spread_roll_3'},
            {'label': 'Rolling MM Spread (5 timesteps)', 'value': 'mm_spread_roll_5'}
        ],
        value='bars',
        inline=True,
        style={'margin-bottom': '20px'}
    ),

    # Graphs: Price on top, then Volume & Position side by side
    dcc.Graph(id='price-graph'),
    html.Div([
        html.Div([dcc.Graph(id='volume-graph')], style={'width': '50%', 'display': 'inline-block'}),
        html.Div([dcc.Graph(id='position-graph')], style={'width': '50%', 'display': 'inline-block'})
    ]),
    dcc.Graph(id='orders-graph'),
    dcc.Graph(id='hmm-graph')
])

# ----------------------------------------------------------------
# 3) HELPER FUNCTIONS
# ----------------------------------------------------------------
def parse_my_orders(order_str):
    """
    Parse a string like "BUY 10@2026, SELL 10@2027" into a list of dicts.
    """
    results = []
    if not isinstance(order_str, str) or not order_str.strip():
        return results
    parts = order_str.split(",")
    for part in parts:
        part = part.strip()
        m = re.match(r"^(BUY|SELL)\s+(\d+)@(\d+)$", part)
        if m:
            side = m.group(1)
            qty = int(m.group(2))
            price = int(m.group(3))
            results.append({"side": side, "qty": qty, "price": price})
    return results

def compute_theo_highest(row):
    """
    'Theo Highest' = average of the highest-volume bid level and highest-volume ask level.
    """
    bid_vols = [row.get('bid_volume_1',0), row.get('bid_volume_2',0), row.get('bid_volume_3',0)]
    bid_prices = [row.get('bid_price_1',np.nan), row.get('bid_price_2',np.nan), row.get('bid_price_3',np.nan)]
    ask_vols = [row.get('ask_volume_1',0), row.get('ask_volume_2',0), row.get('ask_volume_3',0)]
    ask_prices = [row.get('ask_price_1',np.nan), row.get('ask_price_2',np.nan), row.get('ask_price_3',np.nan)]
    if max(bid_vols) == 0 or max(ask_vols) == 0:
        return np.nan
    bid_idx = bid_vols.index(max(bid_vols))
    ask_idx = ask_vols.index(max(ask_vols))
    return (bid_prices[bid_idx] + ask_prices[ask_idx]) / 2

def safe_vol(v):
    """Return v if not NaN, else 0."""
    return v if pd.notna(v) else 0

# ----------------------------------------------------------------
# 4) DASH CALLBACK
# ----------------------------------------------------------------
@app.callback(
    Output('price-graph', 'figure'),
    Output('volume-graph', 'figure'),
    Output('position-graph', 'figure'),
    Output('orders-graph', 'figure'),
    Output('hmm-graph', 'figure'),
    Input('product-dropdown', 'value'),
    Input('lines-checklist', 'value'),
    Input('volume-mode', 'value'),
    Input('trade-type-filter', 'value'),
    Input('volume-range', 'value')
)
def update_plots(selected_product, selected_lines, volume_mode, trade_type_filter, vol_range):
    if not selected_product:
        return go.Figure(), go.Figure(), go.Figure()
    
    # Filter data for selected product
    filtered_df = df_combined[df_combined['product'] == selected_product].copy()

    if 'mid_price'in filtered_df.columns:
        # ===================== PREPARE DATA FOR HMM =====================
        # Calculate log returns for HMM features
        filtered_df['log_return'] = np.log(filtered_df['mid_price'] / filtered_df['mid_price'].shift(1)).fillna(0)

        # Calculate rolling volatility
        filtered_df['volatility'] = filtered_df['log_return'].rolling(window=10).std().fillna(0)

        # Use log return and volatility as features
        observations = filtered_df[['log_return', 'volatility']].values

        # ===================== TRAIN HMM =====================
        # Define and fit the Gaussian HMM with 3 states
        hmm_model = GaussianHMM(n_components=3, covariance_type='diag', n_iter=100, random_state=42)
        hmm_model.fit(observations)

        # Predict the hidden states
        states = hmm_model.predict(observations)
        filtered_df['state'] = states

        # ===================== HMM STATE VISUALIZATION =====================
        fig_hmm = go.Figure()

        # Add price trace
        fig_hmm.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['mid_price'],
            mode='lines',
            name='Mid Price',
            line=dict(color='white'),
            hovertemplate="Timestamp: %{x}<br>Mid Price: %{y}<extra></extra>"
        ))

        # Add state traces
        state_colors = ['blue', 'red', 'green']
        for state in range(hmm_model.n_components):
            fig_hmm.add_trace(go.Scatter(
                x=filtered_df[filtered_df['state'] == state]['timestamp'],
                y=filtered_df[filtered_df['state'] == state]['mid_price'],
                mode='markers',
                name=f"State {state}",
                marker=dict(color=state_colors[state]),
                hovertemplate="Timestamp: %{x}<br>Mid Price: %{y}<br>State: %{text}<extra></extra>",
                text=[f"State {state}" for _ in range(len(filtered_df[filtered_df['state'] == state]))]
            ))

        fig_hmm.update_layout(
            title=f"{selected_product}: Hidden Markov Model States",
            xaxis_title="Timestamp",
            yaxis_title="Mid Price",
            hovermode="x unified"
        )

    else:
        fig_hmm = go.Figure()

    # ================= EXTRACT ORDERS PLACED VS ORDERS FILLED =================
    # Orders placed from 'my_orders'
    orders_placed_df = filtered_df[['timestamp', 'my_orders']].copy()
    orders_placed_df['placed_quantity'] = orders_placed_df['my_orders'].apply(
        lambda x: sum(o['qty'] for o in parse_my_orders(x)))
    # Aggregate placed quantities
    orders_placed_df = orders_placed_df.groupby('timestamp', as_index=False)['placed_quantity'].sum()

    # Orders filled from trades (buyer='ME' or seller='ME')
    trades_filled_df = filtered_df[(filtered_df['buyer'] == 'ME') | (filtered_df['seller'] == 'ME')][
        ['timestamp', 'quantity']].copy()
    trades_filled_df = trades_filled_df.groupby('timestamp', as_index=False)['quantity'].sum().rename(
        columns={'quantity': 'filled_quantity'})

    # Merge placed and filled quantities
    orders_merged = pd.merge(orders_placed_df, trades_filled_df, on='timestamp', how='outer').fillna(0)
    orders_merged.sort_values('timestamp', inplace=True)

    # ================= BUILD ORDERS GRAPH =================
    fig_orders = go.Figure()

    # Add "Orders Placed" trace
    fig_orders.add_trace(go.Scatter(
        x=orders_merged['timestamp'],
        y=orders_merged['placed_quantity'],
        mode='lines+markers',
        name='Orders Placed',
        line=dict(color='blue'),
        marker=dict(symbol='circle', size=6, color='blue'),
        hovertemplate="Timestamp: %{x}<br>Orders Placed: %{y}<extra></extra>"
    ))

    # Add "Orders Filled" trace
    fig_orders.add_trace(go.Scatter(
        x=orders_merged['timestamp'],
        y=orders_merged['filled_quantity'],
        mode='lines+markers',
        name='Orders Filled',
        line=dict(color='green'),
        marker=dict(symbol='circle', size=6, color='green'),
        hovertemplate="Timestamp: %{x}<br>Orders Filled: %{y}<extra></extra>"
    ))

    fig_orders.update_layout(
        title=f"{selected_product}: Orders Placed vs Orders Filled",
        xaxis_title="Timestamp",
        yaxis_title="Order Quantity",
        hovermode="closest"
    )
    
    # Compute MM quote volumes: use safe_vol to ensure volumes are not NaN.
    filtered_df["mm_bid_vol"] = filtered_df[["bid_volume_1", "bid_volume_2", "bid_volume_3"]].fillna(0).max(axis=1)
    filtered_df["mm_ask_vol"] = filtered_df[["ask_volume_1", "ask_volume_2", "ask_volume_3"]].fillna(0).max(axis=1)
    filtered_df["mm_volume"] = filtered_df["mm_bid_vol"] + filtered_df["mm_ask_vol"]
    
    # Compute MM bid price: choose the price corresponding to the highest bid volume.
    filtered_df["mm_bid_price"] = filtered_df.apply(
        lambda row: row["bid_price_1"] if safe_vol(row["bid_volume_1"]) >= safe_vol(row["bid_volume_2"]) and safe_vol(row["bid_volume_1"]) >= safe_vol(row["bid_volume_3"])
        else (row["bid_price_2"] if safe_vol(row["bid_volume_2"]) >= safe_vol(row["bid_volume_3"]) else row["bid_price_3"]),
        axis=1
    )
    # Compute MM ask price: choose the price corresponding to the highest ask volume.
    filtered_df["mm_ask_price"] = filtered_df.apply(
        lambda row: row["ask_price_1"] if safe_vol(row["ask_volume_1"]) >= safe_vol(row["ask_volume_2"]) and safe_vol(row["ask_volume_1"]) >= safe_vol(row["ask_volume_3"])
        else (row["ask_price_2"] if safe_vol(row["ask_volume_2"]) >= safe_vol(row["ask_volume_3"]) else row["ask_price_3"]),
        axis=1
    )
    # MM Spread = MM bid price - MM ask price (as per your instruction)
    filtered_df["mm_spread"] = filtered_df["mm_bid_price"] - filtered_df["mm_ask_price"]
    
    # ------------------ FILTER TRADES ------------------
    df_trades = filtered_df.dropna(subset=['price', 'quantity']).copy()
    min_vol, max_vol = vol_range
    df_trades = df_trades[(df_trades['quantity'] >= min_vol) & (df_trades['quantity'] <= max_vol)]
    if trade_type_filter == 'my':
        df_trades = df_trades[(df_trades['buyer'] == 'ME') | (df_trades['seller'] == 'ME')]
    elif trade_type_filter == 'botbot':
        df_trades = df_trades[(df_trades['buyer'] == 'BOT') & (df_trades['seller'] == 'BOT')]
    
    # ================= PRICE GRAPH =================
    fig_price = go.Figure()
    
    # Plot Mid Price trace with MM bid/ask volumes and MM spread in hover.
    # Note: we use customdata to pass [mm_bid_vol, mm_ask_vol, mm_spread]
    customdata = np.column_stack((filtered_df["mm_bid_vol"], filtered_df["mm_ask_vol"], filtered_df["mm_spread"]))
    if 'mid_price' in selected_lines and 'mid_price' in filtered_df.columns:
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['mid_price'],
            mode='lines+markers',
            name='Mid Price',
            line=dict(color='blue'),
            marker=dict(symbol='circle', size=6, color='blue'),
            connectgaps=True,
            customdata=customdata,
            hovertemplate=(
                "Timestamp: %{x}<br>Price: %{y}<br>" +
                "MM Bid Vol: %{customdata[0]}<br>" +
                "MM Ask Vol: %{customdata[1]}<br>" +
                "MM Spread: %{customdata[2]}<extra></extra>"
            )
        ))
    
    # Helper to add bid/ask lines (unchanged)
    def add_line_with_volume(df_local, price_col, vol_col, label, color, dash='dot'):
        if price_col in df_local.columns and df_local[price_col].notna().any():
            customdata = None
            hovertemplate = "Timestamp: %{x}<br>Price: %{y}"
            if vol_col in df_local.columns and df_local[vol_col].notna().any():
                customdata = df_local[vol_col]
                hovertemplate += "<br>Volume: %{customdata}"
            hovertemplate += "<extra></extra>"
            fig_price.add_trace(go.Scatter(
                x=df_local['timestamp'],
                y=df_local[price_col],
                mode='lines+markers',
                name=label,
                line=dict(color=color, dash=dash),
                marker=dict(symbol='circle', size=6, color=color),
                connectgaps=True,
                customdata=customdata,
                hovertemplate=hovertemplate
            ))
    
    if 'bid1' in selected_lines:
        add_line_with_volume(filtered_df, 'bid_price_1', 'bid_volume_1', 'Bid1', 'green')
    if 'ask1' in selected_lines:
        add_line_with_volume(filtered_df, 'ask_price_1', 'ask_volume_1', 'Ask1', 'red')
    if 'bid2' in selected_lines and 'bid_price_2' in filtered_df.columns:
        cdata = filtered_df.get('bid_volume_2')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['bid_price_2'],
            mode='lines+markers',
            name='Bid2',
            line=dict(color='darkgreen', dash='dot'),
            marker=dict(symbol='star', size=8, color='darkgreen'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    if 'ask2' in selected_lines and 'ask_price_2' in filtered_df.columns:
        cdata = filtered_df.get('ask_volume_2')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['ask_price_2'],
            mode='lines+markers',
            name='Ask2',
            line=dict(color='darkred', dash='dot'),
            marker=dict(symbol='star', size=8, color='darkred'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    if 'bid3' in selected_lines and 'bid_price_3' in filtered_df.columns:
        cdata = filtered_df.get('bid_volume_3')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['bid_price_3'],
            mode='lines+markers',
            name='Bid3',
            line=dict(color='lightgreen', dash='dot'),
            marker=dict(symbol='diamond', size=8, color='lightgreen'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    if 'ask3' in selected_lines and 'ask_price_3' in filtered_df.columns:
        cdata = filtered_df.get('ask_volume_3')
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['ask_price_3'],
            mode='lines+markers',
            name='Ask3',
            line=dict(color='lightcoral', dash='dot'),
            marker=dict(symbol='diamond', size=8, color='lightcoral'),
            connectgaps=True,
            customdata=cdata,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    
    if 'theo_highest' in selected_lines:
        filtered_df['theo_highest'] = filtered_df.apply(compute_theo_highest, axis=1)
        fig_price.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=filtered_df['theo_highest'],
            mode='lines+markers',
            name='Theo Highest',
            line=dict(color='brown', dash='dash'),
            marker=dict(symbol='star-triangle-up', size=8, color='brown'),
            connectgaps=True,
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<extra></extra>"
        ))
    
    # ----------------- Plot Filtered Trades with Jitter -----------------
    df_trades['trade_idx'] = df_trades.groupby('timestamp').cumcount()
    df_trades['timestamp_jitter'] = df_trades['timestamp'] + 0.1 * df_trades['trade_idx']
    
    df_me_buys = df_trades[df_trades['buyer'] == 'ME']
    fig_price.add_trace(go.Scatter(
        x=df_me_buys['timestamp_jitter'],
        y=df_me_buys['price'],
        mode='markers',
        marker=dict(color='green', symbol='circle', size=8),
        name='My Buys',
        customdata=df_me_buys['quantity'],
        hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>Volume: %{customdata}<extra></extra>"
    ))
    df_me_sells = df_trades[df_trades['seller'] == 'ME']
    fig_price.add_trace(go.Scatter(
        x=df_me_sells['timestamp_jitter'],
        y=df_me_sells['price'],
        mode='markers',
        marker=dict(color='red', symbol='circle', size=8),
        name='My Sells',
        customdata=df_me_sells['quantity'],
        hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>Volume: %{customdata}<extra></extra>"
    ))
    df_bot_bot = df_trades[(df_trades['buyer'] == 'BOT') & (df_trades['seller'] == 'BOT')]
    fig_price.add_trace(go.Scatter(
        x=df_bot_bot['timestamp_jitter'],
        y=df_bot_bot['price'],
        mode='markers',
        marker=dict(color='white', symbol='x', size=8),
        name='Bot vs Bot',
        customdata=df_bot_bot['quantity'],
        hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>Volume: %{customdata}<extra></extra>"
    ))
    
    if 'my_orders' in selected_lines and 'my_orders' in filtered_df.columns:
        order_points = []
        for _, row in filtered_df.iterrows():
            orders_list = parse_my_orders(row['my_orders'])
            for i, o in enumerate(orders_list):
                x_val = row['timestamp'] + 0.02 * i
                order_points.append({
                    'x': x_val,
                    'y': o['price'],
                    'side': o['side'],
                    'qty': o['qty']
                })
        buys_orders = [p for p in order_points if p['side'] == 'BUY']
        sells_orders = [p for p in order_points if p['side'] == 'SELL']
        fig_price.add_trace(go.Scatter(
            x=[b['x'] for b in buys_orders],
            y=[b['y'] for b in buys_orders],
            mode='markers',
            marker=dict(color='purple', symbol='triangle-up', size=10),
            name='My Orders (BUY)',
            text=[f"Qty={b['qty']}" for b in buys_orders],
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>%{text}<extra></extra>"
        ))
        fig_price.add_trace(go.Scatter(
            x=[s['x'] for s in sells_orders],
            y=[s['y'] for s in sells_orders],
            mode='markers',
            marker=dict(color='purple', symbol='triangle-down', size=10),
            name='My Orders (SELL)',
            text=[f"Qty={s['qty']}" for s in sells_orders],
            hovertemplate="Timestamp: %{x}<br>Price: %{y}<br>%{text}<extra></extra>"
        ))
    
    fig_price.update_layout(
        title=f"{selected_product}: Book, Trades, Orders & Theo Highest",
        xaxis_title="Timestamp",
        yaxis_title="Price",
        hovermode="closest"
    )
    
    # ================= VOLUME GRAPH =================
    bid_vol_total = filtered_df[['bid_volume_1', 'bid_volume_2', 'bid_volume_3']].fillna(0).sum(axis=1)
    ask_vol_total = filtered_df[['ask_volume_1', 'ask_volume_2', 'ask_volume_3']].fillna(0).sum(axis=1)
    total_volume = bid_vol_total + ask_vol_total
    mm_volume = filtered_df["mm_volume"]
    mm_spread = filtered_df["mm_spread"]

    fig_vol = go.Figure()
    if volume_mode == 'bars':
        fig_vol.add_trace(go.Bar(
            x=filtered_df['timestamp'],
            y=bid_vol_total,
            name="Total Bid Volume",
            marker_color='green'
        ))
        fig_vol.add_trace(go.Bar(
            x=filtered_df['timestamp'],
            y=ask_vol_total,
            name="Total Ask Volume",
            marker_color='red'
        ))
        fig_vol.update_layout(
            title="Total Bid vs Ask Volume",
            xaxis_title="Timestamp",
            yaxis_title="Volume",
            barmode='group',
            hovermode="x"
        )
    elif volume_mode == 'ratio':
        ratio = np.where(ask_vol_total > 0, bid_vol_total / ask_vol_total, np.nan)
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=ratio,
            mode='lines+markers',
            name="Bid/Ask Ratio",
            line=dict(color='purple')
        ))
        fig_vol.update_layout(
            title="Bid/Ask Volume Ratio",
            xaxis_title="Timestamp",
            yaxis_title="Ratio",
            hovermode="x"
        )
    elif volume_mode == 'difference':
        diff = bid_vol_total - ask_vol_total
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=diff,
            mode='lines+markers',
            name="Bid - Ask Volume",
            line=dict(color='orange')
        ))
        fig_vol.update_layout(
            title="Bid - Ask Volume Difference",
            xaxis_title="Timestamp",
            yaxis_title="Difference (Bid - Ask)",
            hovermode="x"
        )
    elif volume_mode == 'mm':
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=mm_volume,
            mode='lines+markers',
            name="MM Quote Volume",
            line=dict(color='darkmagenta')
        ))
        fig_vol.update_layout(
            title="MM Quote Volume",
            xaxis_title="Timestamp",
            yaxis_title="Volume",
            hovermode="x"
        )
    elif volume_mode == 'mm_roll_3':
        roll_mm_3 = mm_volume.rolling(window=3, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_mm_3,
            mode='lines+markers',
            name="Rolling MM (3 timesteps)",
            line=dict(color='darkmagenta')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Quote Volume (3 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Volume (Rolling Mean)",
            hovermode="x"
        )
    elif volume_mode == 'mm_roll_5':
        roll_mm_5 = mm_volume.rolling(window=5, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_mm_5,
            mode='lines+markers',
            name="Rolling MM (5 timesteps)",
            line=dict(color='darkmagenta')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Quote Volume (5 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Volume (Rolling Mean)",
            hovermode="x"
        )
    elif volume_mode == 'mm_spread':
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=mm_spread,
            mode='lines+markers',
            name="MM Spread",
            line=dict(color='darkorange')
        ))
        fig_vol.update_layout(
            title="MM Spread",
            xaxis_title="Timestamp",
            yaxis_title="Spread",
            hovermode="x"
        )
    elif volume_mode == 'mm_spread_roll_3':
        roll_spread_3 = mm_spread.rolling(window=3, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_spread_3,
            mode='lines+markers',
            name="Rolling MM Spread (3 timesteps)",
            line=dict(color='darkorange')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Spread (3 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Spread (Rolling Mean)",
            hovermode="x"
        )
    elif volume_mode == 'mm_spread_roll_5':
        roll_spread_5 = mm_spread.rolling(window=5, min_periods=1).mean()
        fig_vol.add_trace(go.Scatter(
            x=filtered_df['timestamp'],
            y=roll_spread_5,
            mode='lines+markers',
            name="Rolling MM Spread (5 timesteps)",
            line=dict(color='darkorange')
        ))
        fig_vol.update_layout(
            title="Rolling Mean of MM Spread (5 timesteps)",
            xaxis_title="Timestamp",
            yaxis_title="Spread (Rolling Mean)",
            hovermode="x"
        )
    
    # ================= POSITION GRAPH =================
    df_my_trades = df_trades[(df_trades['buyer'] == 'ME') | (df_trades['seller'] == 'ME')].copy()
    df_my_trades['net_change'] = 0
    df_my_trades.loc[df_my_trades['buyer'] == 'ME', 'net_change'] = df_my_trades.loc[df_my_trades['buyer'] == 'ME', 'quantity']
    df_my_trades.loc[df_my_trades['seller'] == 'ME', 'net_change'] = -df_my_trades.loc[df_my_trades['seller'] == 'ME', 'quantity']
    grouped = df_my_trades.groupby('timestamp')['net_change'].sum().reset_index().sort_values('timestamp')
    unique_ts = sorted(filtered_df['timestamp'].unique())
    pos_df = pd.DataFrame({'timestamp': unique_ts})
    pos_df = pos_df.merge(grouped, on='timestamp', how='left')
    pos_df['net_change'] = pos_df['net_change'].fillna(0)
    pos_df['position'] = pos_df['net_change'].shift(fill_value=0).cumsum()
    pos_df['position_percent'] = (pos_df['position'] / 50) * 100

    fig_pos = go.Figure()
    fig_pos.add_trace(go.Scatter(
        x=pos_df['timestamp'],
        y=pos_df['position_percent'],
        mode='lines+markers',
        name='Position (%)',
        connectgaps=True
    ))
    fig_pos.update_layout(
        title="My Position Over Time (% of ±50)",
        xaxis_title="Timestamp",
        yaxis_title="Position (%)",
        yaxis=dict(range=[-100, 100])
    )
    
    return fig_price, fig_vol, fig_pos, fig_orders, fig_hmm

# ----------------------------------------------------------------
# 5) RUN THE APP ON PORT 8055
# ----------------------------------------------------------------
if __name__ == "__main__":
    app.run_server(debug=True, port=8052)
