# BTC 15-minute EDA

Connect to Postgres using the `DATABASE_URL` from the root `.env`, then explore BTC tick data aggregated at 15-minute intervals.



In [52]:
import os
import sys
import subprocess
import importlib.util
from pathlib import Path

# Install lightweight dependencies once per environment

def ensure_package(pkg_name: str) -> None:
    if importlib.util.find_spec(pkg_name) is None:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg_name])

# Core dependencies + parallel computing libraries
for pkg in ("pandas", "sqlalchemy", "python-dotenv", "psycopg2-binary", "plotly", 
            "joblib", "numba", "tqdm"):
    ensure_package(pkg)

import pandas as pd
import sqlalchemy as sa
from sqlalchemy.engine import make_url
from dotenv import load_dotenv

project_root = Path.cwd()
if project_root.name == "notebooks":
    project_root = project_root.parent

load_dotenv(project_root / ".env")

DATABASE_URL = os.getenv("DATABASE_URL")
if not DATABASE_URL:
    raise RuntimeError("DATABASE_URL is missing. Set it in the root .env file.")

# Normalize the driver name: SQLAlchemy expects "postgresql+psycopg2"
url = make_url(DATABASE_URL)
if url.drivername in {"postgres", "postgresql"}:
    url = url.set(drivername="postgresql+psycopg2")
engine = sa.create_engine(url)

with engine.connect() as conn:
    conn.exec_driver_sql("SELECT 1")

print("Connected to Postgres via DATABASE_URL from .env")
engine


Connected to Postgres via DATABASE_URL from .env


Engine(postgresql+psycopg2://lightspeed:***@localhost:5432/lightspeed)

In [53]:
# Calculate total captured data duration (accounting for gaps between market intervals)

duration_query = """
WITH market_spans AS (
    SELECT 
        market_instance_id,
        MIN(event_timestamp_ms) AS min_ts,
        MAX(event_timestamp_ms) AS max_ts,
        (MAX(event_timestamp_ms) - MIN(event_timestamp_ms)) AS span_ms
    FROM polymarket_orderbook_events
    GROUP BY market_instance_id
)
SELECT 
    COUNT(*) AS num_markets,
    SUM(span_ms) AS total_captured_ms,
    MIN(min_ts) AS earliest_ts,
    MAX(max_ts) AS latest_ts,
    (MAX(max_ts) - MIN(min_ts)) AS wall_clock_ms
FROM market_spans;
"""

duration_stats = pd.read_sql(duration_query, engine).iloc[0]

# Also get BTC tick duration
btc_duration_query = """
SELECT 
    COUNT(*) AS num_ticks,
    MIN(event_timestamp_ms) AS min_ts,
    MAX(event_timestamp_ms) AS max_ts,
    (MAX(event_timestamp_ms) - MIN(event_timestamp_ms)) AS span_ms
FROM btc_ticks;
"""
btc_stats = pd.read_sql(btc_duration_query, engine).iloc[0]

def format_duration(ms):
    """Convert milliseconds to human-readable duration."""
    total_seconds = ms / 1000
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = total_seconds % 60
    if hours > 0:
        return f"{hours}h {minutes}m {seconds:.1f}s"
    elif minutes > 0:
        return f"{minutes}m {seconds:.1f}s"
    else:
        return f"{seconds:.1f}s"

print("=" * 60)
print("DATA CAPTURE DURATION SUMMARY")
print("=" * 60)
print()
print("POLYMARKET ORDERBOOK EVENTS:")
print(f"  Market intervals captured:     {int(duration_stats['num_markets']):,}")
print(f"  Total captured duration:       {format_duration(duration_stats['total_captured_ms'])}")
print(f"  Wall-clock span (with gaps):   {format_duration(duration_stats['wall_clock_ms'])}")
print(f"  Data coverage:                 {100 * duration_stats['total_captured_ms'] / duration_stats['wall_clock_ms']:.1f}%")
print()
print("BTC TICKS:")
print(f"  Total ticks:                   {int(btc_stats['num_ticks']):,}")
print(f"  Time span:                     {format_duration(btc_stats['span_ms'])}")
print()
print(f"Earliest data: {pd.to_datetime(duration_stats['earliest_ts'], unit='ms', utc=True)}")
print(f"Latest data:   {pd.to_datetime(duration_stats['latest_ts'], unit='ms', utc=True)}")


DATA CAPTURE DURATION SUMMARY

POLYMARKET ORDERBOOK EVENTS:
  Market intervals captured:     159
  Total captured duration:       36h 6m 20.5s
  Wall-clock span (with gaps):   161h 47m 39.8s
  Data coverage:                 22.3%

BTC TICKS:
  Total ticks:                   68,494
  Time span:                     161h 48m 49.3s

Earliest data: 2025-12-25 20:57:23.621000+00:00
Latest data:   2026-01-01 14:45:03.447000+00:00


## Complete markets (≥15 minutes)
Markets where the span between min and max `event_timestamp_ms` is at least 15 minutes.



In [54]:
span_ms = 14 * 60 * 1000

query = f"""
WITH stats AS (
    SELECT market, market_instance_id,
           MIN(event_timestamp_ms) AS min_ts,
           MAX(event_timestamp_ms) AS max_ts,
           count(distinct event_timestamp_ms) AS num_events
    FROM polymarket_orderbook_events
    GROUP BY market, market_instance_id
)
SELECT market_instance_id as market,
       min_ts,
       max_ts,
       (max_ts - min_ts) AS span_ms,
       (max_ts - min_ts) / 1000.0 AS span_seconds,
       num_events
FROM stats
WHERE (max_ts - min_ts) >= {span_ms}
ORDER BY span_ms DESC;
"""

complete_markets = pd.read_sql(query, engine)
print(f"{len(complete_markets)} markets with ≥15 minute span")
complete_markets.head()



101 markets with ≥15 minute span


Unnamed: 0,market,min_ts,max_ts,span_ms,span_seconds,num_events
0,btc-updown-15m-1767131100,1767131102695,1767132003512,900817,900.817,5642
1,btc-updown-15m-1767276000,1767276004998,1767276905553,900555,900.555,6555
2,btc-updown-15m-1767176100,1767176102260,1767177002385,900125,900.125,8586
3,btc-updown-15m-1767140100,1767140102446,1767141002470,900024,900.024,5782
4,btc-updown-15m-1767154500,1767154502347,1767155402176,899829,899.829,4807


In [55]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display

# Pick one market to inspect (use a value from complete_markets['market'])
market_to_plot = complete_markets.iloc[0]["market"]
print(f"Plotting market: {market_to_plot}")

bid_ask_query = """
SELECT event_timestamp_ms, side,
       best_bid_price, best_bid_qty,
       best_ask_price, best_ask_qty
FROM polymarket_orderbook_events
WHERE market_instance_id = %(market)s
ORDER BY event_timestamp_ms;
"""

data = pd.read_sql(bid_ask_query, engine, params={"market": market_to_plot})
if data.empty:
    raise RuntimeError("No orderbook data for selected market")

# Convert to ISO strings so Plotly reliably treats x as dates.
data["ts"] = pd.to_datetime(data["event_timestamp_ms"], unit="ms", utc=True).dt.strftime("%Y-%m-%d %H:%M:%S.%f")
side_label = {"UP": "YES", "DOWN": "NO"}
data["outcome"] = data["side"].map(side_label).fillna(data["side"])

# Get time range from orderbook data to query BTC ticks
min_ts_ms = data["event_timestamp_ms"].min()
max_ts_ms = data["event_timestamp_ms"].max()

btc_query = """
SELECT event_timestamp_ms, price
FROM btc_ticks
WHERE event_timestamp_ms BETWEEN %(min_ts)s AND %(max_ts)s
ORDER BY event_timestamp_ms;
"""
btc_data = pd.read_sql(btc_query, engine, params={"min_ts": int(min_ts_ms), "max_ts": int(max_ts_ms)})
if not btc_data.empty:
    btc_data["ts"] = pd.to_datetime(btc_data["event_timestamp_ms"], unit="ms", utc=True).dt.strftime("%Y-%m-%d %H:%M:%S.%f")
print(f"Loaded {len(btc_data)} BTC ticks for the market time range")


def attach_dynamic_y(figw: go.FigureWidget, price_trace_idxs, btc_trace_idxs):
    """Rescale y-axes when the x-axis range changes (via rangeslider or zoom)."""

    def compute_range(idxs, x0, x1):
        ymin, ymax = None, None
        for idx in idxs:
            tr = figw.data[idx]
            x = pd.to_datetime(np.asarray(tr.x))
            y = np.asarray(tr.y, dtype=float)
            if x0 is not None and x1 is not None:
                mask = (x >= x0) & (x <= x1)
                y = y[mask]
            y = y[np.isfinite(y)]
            if y.size == 0:
                continue
            if ymin is None:
                ymin, ymax = float(y.min()), float(y.max())
            else:
                ymin = min(ymin, float(y.min()))
                ymax = max(ymax, float(y.max()))
        return ymin, ymax

    def on_xaxis_range_change(layout, xaxis_range):
        if xaxis_range is None or len(xaxis_range) != 2:
            x0, x1 = None, None
        else:
            x0 = pd.to_datetime(xaxis_range[0])
            x1 = pd.to_datetime(xaxis_range[1])

        # Polymarket price axis (left)
        pmin, pmax = compute_range(price_trace_idxs, x0, x1)
        if pmin is not None:
            span = max(pmax - pmin, 1e-9)
            pad = span * 0.05
            figw.layout.yaxis.range = [pmin - pad, pmax + pad]
        # BTC price axis (right)
        bmin, bmax = compute_range(btc_trace_idxs, x0, x1)
        if bmin is not None:
            span = max(bmax - bmin, 1e-9)
            pad = span * 0.05
            figw.layout.yaxis2.range = [bmin - pad, bmax + pad]

    # Register callback for xaxis.range changes
    figw.layout.on_change(on_xaxis_range_change, "xaxis.range")
    return figw


def build_bid_ask_fig(side_value: str, label: str, bid_color: str, ask_color: str) -> go.Figure:
    side_df = data[data["side"] == side_value].copy()
    if side_df.empty:
        fig = go.Figure()
        fig.add_annotation(text=f"No data for {label}", showarrow=False)
        return fig

    base_fig = make_subplots(specs=[[{"secondary_y": True}]])
    price_traces = []
    btc_traces = []

    def add_price_trace(price_col: str, qty_col: str, name: str, color: str) -> None:
        sub = side_df.dropna(subset=[price_col])
        if sub.empty:
            return
        # Include qty in customdata for hover
        qty_vals = sub[qty_col].fillna(0).values
        trace = go.Scatter(
            x=sub["ts"],
            y=sub[price_col],
            mode="lines",
            name=name,
            line=dict(color=color, width=2),
            customdata=qty_vals,
            hovertemplate=(
                "<b>%{text}</b><br>"
                "%{x|%Y-%m-%d %H:%M:%S.%L} UTC<br>"
                "price=%{y:.4f}<br>"
                "qty=%{customdata:,.0f}<extra></extra>"
            ),
            text=[label] * len(sub),
        )
        base_fig.add_trace(trace, secondary_y=False)
        price_traces.append(len(base_fig.data) - 1)

    # Add price lines with qty in hover
    add_price_trace("best_bid_price", "best_bid_qty", f"{label} bid", bid_color)
    add_price_trace("best_ask_price", "best_ask_qty", f"{label} ask", ask_color)

    # Add BTC price on secondary y-axis
    if not btc_data.empty:
        btc_trace = go.Scatter(
            x=btc_data["ts"],
            y=btc_data["price"],
            mode="lines",
            name="BTC price",
            line=dict(color="#f7931a", width=1.5),  # Bitcoin orange
            hovertemplate=(
                "<b>BTC</b><br>"
                "%{x|%Y-%m-%d %H:%M:%S.%L} UTC<br>"
                "price=$%{y:,.2f}<extra></extra>"
            ),
        )
        base_fig.add_trace(btc_trace, secondary_y=True)
        btc_traces.append(len(base_fig.data) - 1)

    base_fig.update_layout(
        title=f"{label} best bid/ask vs BTC price",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        hovermode="x unified",
        margin=dict(l=40, r=20, t=50, b=40),
        template="plotly_white",
    )
    base_fig.update_xaxes(
        title="Event time (UTC)",
        rangeslider_visible=True,
        type="date",
        tickformat="%H:%M",
        hoverformat="%Y-%m-%d %H:%M:%S.%L",
    )
    base_fig.update_yaxes(title="Polymarket Price (USDC)", secondary_y=False)
    base_fig.update_yaxes(title="BTC Price (USD)", secondary_y=True, showgrid=False)

    figw = go.FigureWidget(base_fig)
    figw = attach_dynamic_y(figw, price_traces, btc_traces)
    return figw

fig_yes = build_bid_ask_fig("UP", "YES", "#2ca02c", "#ff7f0e")
fig_no = build_bid_ask_fig("DOWN", "NO", "#1f77b4", "#d62728")

display(fig_yes)
display(fig_no)



Plotting market: btc-updown-15m-1767131100
Loaded 445 BTC ticks for the market time range


FigureWidget({
    'data': [{'customdata': {'bdata': ('w/UoXI9CX0BxPQrXo5BxQOF6FK5HoW' ... 'XUihNBXI/C9fyKE0Fcj8L1kIoTQQ=='),
                             'dtype': 'f8'},
              'hovertemplate': ('<b>%{text}</b><br>%{x|%Y-%m-%d' ... 'ustomdata:,.0f}<extra></extra>'),
              'line': {'color': '#2ca02c', 'width': 2},
              'mode': 'lines',
              'name': 'YES bid',
              'text': [YES, YES, YES, ..., YES, YES, YES],
              'type': 'scatter',
              'uid': 'c11538a5-e3f2-4f61-85be-e9da62acefb7',
              'x': array(['2025-12-30 21:45:02.695000', '2025-12-30 21:45:02.733000',
                          '2025-12-30 21:45:02.888000', ..., '2025-12-30 22:00:02.621000',
                          '2025-12-30 22:00:03.310000', '2025-12-30 22:00:03.512000'],
                         shape=(5642,), dtype=object),
              'xaxis': 'x',
              'y': {'bdata': ('zczMzMzM3D/NzMzMzMzcP83MzMzMzN' ... 'oUru8/rkfhehSu7z+uR+F6FK7vPw=='),
   

FigureWidget({
    'data': [{'customdata': {'bdata': ('rkfhehTpk0CuR+F6FI2RQK5H4XoUjZ' ... 'gehevlc0CkcD0K10trQKRwPQrXq2pA'),
                             'dtype': 'f8'},
              'hovertemplate': ('<b>%{text}</b><br>%{x|%Y-%m-%d' ... 'ustomdata:,.0f}<extra></extra>'),
              'line': {'color': '#1f77b4', 'width': 2},
              'mode': 'lines',
              'name': 'NO bid',
              'text': [NO, NO, NO, ..., NO, NO, NO],
              'type': 'scatter',
              'uid': '9453e905-1d29-436b-81b3-126af38003e3',
              'x': array(['2025-12-30 21:45:02.695000', '2025-12-30 21:45:02.733000',
                          '2025-12-30 21:45:02.888000', ..., '2025-12-30 21:59:59.971000',
                          '2025-12-30 22:00:00.010000', '2025-12-30 22:00:00.014000'],
                         shape=(5631,), dtype=object),
              'xaxis': 'x',
              'y': {'bdata': ('SOF6FK5H4T9I4XoUrkfhP0jhehSuR+' ... 'SuR+F6hD97FK5H4XqEP3sUrkfheoQ/'),
          

In [56]:
# Analyze BTC changes vs Polymarket changes using PROPER MARKOUT
# 
# CRITICAL: When going long, you BUY at ASK and SELL at BID
# Therefore, true profit (markout) = bid(t) - ask(entry)
#
# REALISTIC TIMING: Entry happens at btc_ts + MIN_DELAY_MS (network delay)
# So entry_ask is the ask price AFTER the delay, not at the BTC tick time
#
# OPTIMIZED: Uses parallel processing with joblib + vectorized operations
#
# For each BTC change, find:
#   - close_change: first markout where bid(t) - ask(entry) != 0
#   - close_delay: time delay (ms) to that point
#   - best_change: maximum markout before adverse threshold triggers
#   - best_delay: time delay (ms) to the best markout
# IMPORTANT: Only match within the SAME 15-min market interval (market_instance_id)
# FILTERS:
#   - Only use COMPLETE markets (span >= 14 minutes)
#   - Stop analysis 2 minutes before market end (to avoid end-of-market effects)

import time
from joblib import Parallel, delayed
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)

# Configuration
MIN_MARKET_SPAN_MS = 14 * 60 * 1000  # Only use markets with >= 14 minute span
START_BUFFER_MS = 1 * 60 * 1000      # Skip first 1 minute of each market (initialization noise)
END_BUFFER_MS = 2 * 60 * 1000        # Stop analysis 2 minutes before market end
MIN_DELAY_MS = 60  # Minimum delay (ms) before looking for PM changes
ADVERSE_THRESHOLD = 0.02  # Stop tracking best if markout goes this far against us

# Parallel processing config
N_JOBS = -1  # Use all available cores (-1 = auto-detect)
BATCH_SIZE = 1000  # Process BTC ticks in batches for progress tracking

start_time = time.time()

# Query ALL BTC ticks from database (including volume)
all_btc_query = """
SELECT event_timestamp_ms, price, volume
FROM btc_ticks
ORDER BY event_timestamp_ms;
"""
all_btc = pd.read_sql(all_btc_query, engine)
all_btc["ts"] = pd.to_datetime(all_btc["event_timestamp_ms"], unit="ms", utc=True).dt.strftime("%Y-%m-%d %H:%M:%S.%f")
all_btc["price_change"] = all_btc["price"].diff()
print(f"Loaded {len(all_btc)} total BTC ticks from database (with volume)")

# Query ALL Polymarket orderbook events WITH market_instance_id
all_pm_query = """
SELECT event_timestamp_ms, side, market_instance_id, best_bid_price, best_ask_price
FROM polymarket_orderbook_events
ORDER BY event_timestamp_ms;
"""
all_pm = pd.read_sql(all_pm_query, engine)
print(f"Loaded {len(all_pm)} total Polymarket events from database")

# Get market interval time ranges (min/max timestamp per market_instance_id)
market_ranges = all_pm.groupby("market_instance_id")["event_timestamp_ms"].agg(["min", "max"]).reset_index()
market_ranges.columns = ["market_instance_id", "market_start_ms", "market_end_ms"]
market_ranges["span_ms"] = market_ranges["market_end_ms"] - market_ranges["market_start_ms"]

# Filter to only COMPLETE markets
complete_market_ranges = market_ranges[market_ranges["span_ms"] >= MIN_MARKET_SPAN_MS].copy()
complete_market_ranges["analysis_start_ms"] = complete_market_ranges["market_start_ms"] + START_BUFFER_MS
complete_market_ranges["analysis_end_ms"] = complete_market_ranges["market_end_ms"] - END_BUFFER_MS

print(f"Found {len(market_ranges)} total market intervals")
print(f"Using {len(complete_market_ranges)} complete markets (span >= {MIN_MARKET_SPAN_MS/1000/60:.0f} min)")
print(f"Analysis window: skip first {START_BUFFER_MS/1000/60:.0f} min, stop {END_BUFFER_MS/1000/60:.0f} min before end")

# Use UP/YES side from COMPLETE markets only
yes_data = all_pm[
    (all_pm["side"] == "UP") & 
    (all_pm["market_instance_id"].isin(complete_market_ranges["market_instance_id"]))
].copy()
yes_data = yes_data.dropna(subset=["best_bid_price"]).reset_index(drop=True)
yes_data["ts_ms"] = yes_data["event_timestamp_ms"]

# Prepare BTC data
btc_analysis = all_btc.dropna(subset=["price_change"]).copy().reset_index(drop=True)
btc_analysis["ts_ms"] = btc_analysis["event_timestamp_ms"]
btc_analysis["direction"] = np.sign(btc_analysis["price_change"])

# ============================================================================
# OPTIMIZATION 1: Vectorized market assignment using IntervalIndex
# ============================================================================
print("\n[OPTIMIZATION] Vectorized market assignment...")
market_start_end = complete_market_ranges[["market_instance_id", "analysis_start_ms", "analysis_end_ms"]].values
market_ids = complete_market_ranges["market_instance_id"].values
analysis_ends = complete_market_ranges["analysis_end_ms"].values

# Build IntervalIndex for fast interval lookup
intervals = pd.IntervalIndex.from_arrays(
    complete_market_ranges["analysis_start_ms"], 
    complete_market_ranges["analysis_end_ms"],
    closed='both'
)

# Vectorized assignment using IntervalIndex
def assign_markets_vectorized(ts_values):
    """Vectorized market assignment using pd.cut with IntervalIndex."""
    result_ids = np.full(len(ts_values), None, dtype=object)
    result_ends = np.full(len(ts_values), np.nan)
    
    for idx, (start, end, mid, aend) in enumerate(zip(
        complete_market_ranges["analysis_start_ms"].values,
        complete_market_ranges["analysis_end_ms"].values,
        market_ids,
        analysis_ends
    )):
        mask = (ts_values >= start) & (ts_values <= end)
        result_ids[mask] = mid
        result_ends[mask] = aend
    
    return result_ids, result_ends

btc_ts_values = btc_analysis["ts_ms"].values
market_id_results, analysis_end_results = assign_markets_vectorized(btc_ts_values)
btc_analysis["market_instance_id"] = market_id_results
btc_analysis["analysis_end_ms"] = analysis_end_results

btc_in_market = btc_analysis["market_instance_id"].notna().sum()
print(f"BTC ticks within analysis window: {btc_in_market} / {len(btc_analysis)}")

# ============================================================================
# OPTIMIZATION 2: Pre-index PM data by market for O(1) lookup
# ============================================================================
print("[OPTIMIZATION] Pre-indexing PM data by market...")
pm_by_market = {}
for market_id in complete_market_ranges["market_instance_id"].values:
    market_pm = yes_data[yes_data["market_instance_id"] == market_id].copy()
    market_pm = market_pm.sort_values("ts_ms").reset_index(drop=True)
    pm_by_market[market_id] = {
        "ts_ms": market_pm["ts_ms"].values,
        "bid": market_pm["best_bid_price"].values,
        "ask": market_pm["best_ask_price"].values,
    }
print(f"Indexed {len(pm_by_market)} markets")

# ============================================================================
# OPTIMIZATION 3: Numba-compatible processing function (runs in pure Python for compatibility)
# ============================================================================
def process_btc_tick(btc_ts, btc_dir, btc_market, btc_cutoff, pm_data, min_delay, adverse_thresh):
    """Process a single BTC tick and return markout results."""
    if btc_dir == 0 or btc_market is None or pd.isna(btc_market):
        return None
    
    if btc_market not in pm_data:
        return None
    
    market_pm = pm_data[btc_market]
    pm_ts = market_pm["ts_ms"]
    pm_bid = market_pm["bid"]
    pm_ask = market_pm["ask"]
    
    # Binary search for first PM event after delay
    min_ts = btc_ts + min_delay
    start_idx = np.searchsorted(pm_ts, min_ts, side='right')
    
    if start_idx >= len(pm_ts):
        return None
    
    # Filter to cutoff
    end_idx = np.searchsorted(pm_ts, btc_cutoff, side='right')
    if start_idx >= end_idx:
        return None
    
    # Entry price is the ask at first available PM event
    entry_ask = pm_ask[start_idx]
    if np.isnan(entry_ask):
        return None
    
    # Compute markouts
    markouts = pm_bid[start_idx:end_idx] - entry_ask
    timestamps = pm_ts[start_idx:end_idx]
    
    # For shorts, flip the markout sign
    if btc_dir < 0:
        markouts = -markouts
    
    # Find first non-zero markout and track best/worst
    result = {
        "entry_ask": entry_ask,
        "close_change": np.nan,
        "close_delay": np.nan,
        "best_change": np.nan,
        "best_delay": np.nan,
        "opp_change": np.nan,
        "opp_delay": np.nan,
        "worst_change": np.nan,
        "worst_delay": np.nan,
    }
    
    close_found = False
    opp_found = False
    best_markout = None
    best_ts = None
    worst_markout = None
    worst_ts = None
    
    for k in range(len(markouts)):
        markout = markouts[k]
        pm_ts_k = timestamps[k]
        
        if np.isnan(markout):
            continue
        
        if not close_found and not opp_found:
            if markout > 0:
                result["close_change"] = markout if btc_dir > 0 else -markout
                result["close_delay"] = pm_ts_k - btc_ts
                close_found = True
                best_markout = markout
                best_ts = pm_ts_k
            elif markout < 0:
                result["opp_change"] = markout if btc_dir > 0 else -markout
                result["opp_delay"] = pm_ts_k - btc_ts
                opp_found = True
                worst_markout = markout
                worst_ts = pm_ts_k
            continue
        
        if close_found:
            if markout > best_markout:
                best_markout = markout
                best_ts = pm_ts_k
            if markout < -adverse_thresh:
                break
        elif opp_found:
            if markout < worst_markout:
                worst_markout = markout
                worst_ts = pm_ts_k
            if markout > adverse_thresh:
                break
    
    if best_markout is not None:
        result["best_change"] = best_markout if btc_dir > 0 else -best_markout
        result["best_delay"] = best_ts - btc_ts
    
    if worst_markout is not None:
        result["worst_change"] = worst_markout if btc_dir > 0 else -worst_markout
        result["worst_delay"] = worst_ts - btc_ts
    
    return result

# ============================================================================
# OPTIMIZATION 4: Parallel processing with joblib
# ============================================================================
print(f"\n[OPTIMIZATION] Parallel processing with {N_JOBS} workers...")
print(f"Processing {len(btc_analysis)} BTC ticks...")

# Filter to valid ticks only for parallel processing
valid_mask = (btc_analysis["market_instance_id"].notna()) & (btc_analysis["direction"] != 0)
valid_indices = btc_analysis[valid_mask].index.tolist()
print(f"Valid ticks to process: {len(valid_indices)}")

# Prepare data for parallel processing
btc_data_for_parallel = [
    (
        btc_analysis.at[i, "ts_ms"],
        btc_analysis.at[i, "direction"],
        btc_analysis.at[i, "market_instance_id"],
        btc_analysis.at[i, "analysis_end_ms"],
    )
    for i in valid_indices
]

# Run parallel processing with progress bar
def process_batch(batch_data, pm_data, min_delay, adverse_thresh):
    """Process a batch of BTC ticks."""
    results = []
    for btc_ts, btc_dir, btc_market, btc_cutoff in batch_data:
        res = process_btc_tick(btc_ts, btc_dir, btc_market, btc_cutoff, pm_data, min_delay, adverse_thresh)
        results.append(res)
    return results

# Split into batches for progress tracking
n_batches = (len(btc_data_for_parallel) + BATCH_SIZE - 1) // BATCH_SIZE
batches = [btc_data_for_parallel[i*BATCH_SIZE:(i+1)*BATCH_SIZE] for i in range(n_batches)]

print(f"Processing {n_batches} batches of ~{BATCH_SIZE} ticks each...")

# Parallel execution
results_nested = Parallel(n_jobs=N_JOBS, verbose=0)(
    delayed(process_batch)(batch, pm_by_market, MIN_DELAY_MS, ADVERSE_THRESHOLD)
    for batch in tqdm(batches, desc="Analyzing markouts")
)

# Flatten results
results = [r for batch_results in results_nested for r in batch_results]

# Initialize result columns
for col in ["entry_ask", "close_change", "close_delay", "best_change", "best_delay",
            "opp_change", "opp_delay", "worst_change", "worst_delay"]:
    btc_analysis[col] = np.nan

# Apply results back to dataframe
for i, result in zip(valid_indices, results):
    if result is not None:
        for key, value in result.items():
            btc_analysis.at[i, key] = value

elapsed = time.time() - start_time
print(f"\n✓ Completed in {elapsed:.1f} seconds")
print(f"Analyzed {len(btc_analysis)} total BTC ticks")
print(f"BTC ticks in analysis window: {btc_in_market}")
print(f"Found close_change (profitable first): {btc_analysis['close_change'].notna().sum()} ticks")
print(f"Found opp_change (adverse first):      {btc_analysis['opp_change'].notna().sum()} ticks")

btc_analysis[[
    "ts", "market_instance_id", "price_change", "direction", "entry_ask",
    "close_change", "close_delay", "best_change", "best_delay",
    "opp_change", "opp_delay", "worst_change", "worst_delay"
]].dropna(subset=["market_instance_id"]).head(20)


Loaded 68497 total BTC ticks from database (with volume)
Loaded 1283497 total Polymarket events from database
Found 159 total market intervals
Using 101 complete markets (span >= 14 min)
Analysis window: skip first 1 min, stop 2 min before end

[OPTIMIZATION] Vectorized market assignment...
BTC ticks within analysis window: 35868 / 68496
[OPTIMIZATION] Pre-indexing PM data by market...
Indexed 101 markets

[OPTIMIZATION] Parallel processing with -1 workers...
Processing 68496 BTC ticks...
Valid ticks to process: 20785
Processing 21 batches of ~1000 ticks each...


Analyzing markouts:   0%|          | 0/21 [00:00<?, ?it/s]


✓ Completed in 22.9 seconds
Analyzed 68496 total BTC ticks
BTC ticks in analysis window: 35868
Found close_change (profitable first): 9898 ticks
Found opp_change (adverse first):      10446 ticks


Unnamed: 0,ts,market_instance_id,price_change,direction,entry_ask,close_change,close_delay,best_change,best_delay,opp_change,opp_delay,worst_change,worst_delay
110,2025-12-25 21:01:06.812000,btc-updown-15m-1766696400,0.1,1.0,0.44,,,,,-0.02,85.0,-0.08,70549.0
111,2025-12-25 21:01:09.378000,btc-updown-15m-1766696400,0.0,0.0,,,,,,,,,
112,2025-12-25 21:01:10.716000,btc-updown-15m-1766696400,-0.1,-1.0,0.44,-0.02,722.0,-0.08,66645.0,,,,
113,2025-12-25 21:01:13.036000,btc-updown-15m-1766696400,0.0,0.0,,,,,,,,,
114,2025-12-25 21:01:15.107000,btc-updown-15m-1766696400,0.0,0.0,,,,,,,,,
115,2025-12-25 21:01:18.015000,btc-updown-15m-1766696400,0.1,1.0,0.46,,,,,-0.04,741.0,-0.1,59346.0
116,2025-12-25 21:01:19.762000,btc-updown-15m-1766696400,-0.1,-1.0,0.46,-0.02,651.0,-0.1,57599.0,,,,
117,2025-12-25 21:01:21.647000,btc-updown-15m-1766696400,0.1,1.0,0.44,,,,,-0.01,401.0,-0.08,55714.0
118,2025-12-25 21:01:23.951000,btc-updown-15m-1766696400,-0.1,-1.0,0.44,-0.01,1920.0,-0.08,53410.0,,,,
119,2025-12-25 21:01:25.932000,btc-updown-15m-1766696400,0.1,1.0,0.44,,,,,-0.01,337.0,-0.08,51429.0


In [57]:
# Filter: only BTC ticks within analysis window AND non-zero price change
# Analysis window = complete markets only, skip first 1 min, stop 2 min before end
btc_in_market_nonzero = btc_analysis[
    (btc_analysis["market_instance_id"].notna()) & 
    (btc_analysis["price_change"] != 0)
].copy()

total = len(btc_in_market_nonzero)
has_close = btc_in_market_nonzero["close_change"].notna().sum()
has_opp = btc_in_market_nonzero["opp_change"].notna().sum()
neither = total - has_close - has_opp

print("=" * 60)
print("BTC Price Changes Summary (PROPER MARKOUT)")
print("  markout = bid(exit) - ask(entry)")
print(f"  (skip 1st min, stop 2 min before market end)")
print("=" * 60)
print(f"Total BTC price changes:           {total:,}")
print(f"Profitable first (close):          {has_close:,} ({100*has_close/total:.1f}%)")
print(f"Adverse first (opp):               {has_opp:,} ({100*has_opp/total:.1f}%)")
print(f"No markout change:                 {neither:,} ({100*neither/total:.1f}%)")
print()
print("=" * 60)
print("PROFITABLE FIRST Statistics (close_change, best_change)")
print("  entry_ask = ask at (btc_ts + network_delay) -- realistic entry price")
print("  close_change = first bid(t) - ask(entry) > 0")
print("  best_change = max bid(t) - ask(entry) before adverse threshold")
print("=" * 60)
print("Delay Statistics (ms):")
print("-" * 60)
print(btc_in_market_nonzero[["close_delay", "best_delay"]].describe().to_string(float_format="{:,.0f}".format))
print()
print("Markout Statistics (USDC):")
print("-" * 60)
print(btc_in_market_nonzero[["close_change", "best_change"]].describe().to_string(float_format="{:,.4f}".format))
print()
print("=" * 60)
print("ADVERSE FIRST Statistics (opp_change, worst_change)")
print("  opp_change = first bid(t) - ask(entry) < 0")
print("  worst_change = min bid(t) - ask(entry) before recovery")
print("=" * 60)
print("Delay Statistics (ms):")
print("-" * 60)
print(btc_in_market_nonzero[["opp_delay", "worst_delay"]].describe().to_string(float_format="{:,.0f}".format))
print()
print("Markout Statistics (USDC):")
print("-" * 60)
print(btc_in_market_nonzero[["opp_change", "worst_change"]].describe().to_string(float_format="{:,.4f}".format))


BTC Price Changes Summary (PROPER MARKOUT)
  markout = bid(exit) - ask(entry)
  (skip 1st min, stop 2 min before market end)
Total BTC price changes:           20,785
Profitable first (close):          9,898 (47.6%)
Adverse first (opp):               10,446 (50.3%)
No markout change:                 441 (2.1%)

PROFITABLE FIRST Statistics (close_change, best_change)
  entry_ask = ask at (btc_ts + network_delay) -- realistic entry price
  close_change = first bid(t) - ask(entry) > 0
  best_change = max bid(t) - ask(entry) before adverse threshold
Delay Statistics (ms):
------------------------------------------------------------
       close_delay  best_delay
count        9,898       9,898
mean           498      68,267
std          2,116     128,128
min             61          61
25%            123         823
50%            233      10,302
75%            452      62,384
max         64,660     716,925

Markout Statistics (USDC):
---------------------------------------------------------

In [58]:
# Build a dataframe with bid/ask changes at multiple time horizons after each BTC change
# OPTIMIZED: Uses parallel processing + binary search for fast lookups
# Time horizons to measure (in milliseconds)
HORIZONS_MS = [50, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]

start_time = time.time()

# Start with BTC ticks that are in valid analysis window and have non-zero price change
btc_valid = btc_analysis[
    (btc_analysis["market_instance_id"].notna()) & 
    (btc_analysis["price_change"] != 0)
].copy()

print(f"Processing {len(btc_valid)} BTC ticks with valid price changes...")

# Use pre-indexed PM data from previous cell (pm_by_market)
# If not available, create it
if 'pm_by_market' not in dir():
    print("Building PM index...")
    pm_by_market = {}
    for market_id in complete_market_ranges["market_instance_id"].values:
        market_pm = yes_data[yes_data["market_instance_id"] == market_id].copy()
        market_pm = market_pm.sort_values("ts_ms").reset_index(drop=True)
        pm_by_market[market_id] = {
            "ts_ms": market_pm["ts_ms"].values,
            "bid": market_pm["best_bid_price"].values,
            "ask": market_pm["best_ask_price"].values,
        }

def process_horizon_tick(btc_ts, btc_change, btc_volume, btc_ts_str, btc_market, btc_cutoff, pm_data, horizons):
    """Process a single BTC tick for all horizons using binary search."""
    if btc_market not in pm_data:
        return None
    
    market_pm = pm_data[btc_market]
    pm_ts = market_pm["ts_ms"]
    pm_bid = market_pm["bid"]
    pm_ask = market_pm["ask"]
    
    # Find entry point: closest PM event AT OR BEFORE btc_ts
    entry_idx = np.searchsorted(pm_ts, btc_ts, side='right') - 1
    if entry_idx < 0:
        return None
    
    entry_ts = pm_ts[entry_idx]
    entry_bid = pm_bid[entry_idx]
    entry_ask = pm_ask[entry_idx]
    
    if np.isnan(entry_bid) or np.isnan(entry_ask):
        return None
    
    row = {
        "btc_ts": btc_ts_str,
        "btc_ts_ms": btc_ts,
        "market_instance_id": btc_market,
        "btc_volume": btc_volume,
        "btc_change": btc_change,
        "entry_ts_ms": entry_ts,
        "entry_bid": entry_bid,
        "entry_ask": entry_ask,
    }
    
    # For each horizon, use binary search
    for horizon in horizons:
        target_ts = btc_ts + horizon
        
        # Find first PM event at or after target_ts
        horizon_idx = np.searchsorted(pm_ts, target_ts, side='left')
        
        # Check if within cutoff
        if horizon_idx >= len(pm_ts) or pm_ts[horizon_idx] > btc_cutoff:
            row[f"bid_{horizon}ms"] = np.nan
            row[f"ask_{horizon}ms"] = np.nan
            row[f"bid_chg_{horizon}ms"] = np.nan
            row[f"ask_chg_{horizon}ms"] = np.nan
        else:
            horizon_bid = pm_bid[horizon_idx]
            horizon_ask = pm_ask[horizon_idx]
            
            row[f"bid_{horizon}ms"] = horizon_bid
            row[f"ask_{horizon}ms"] = horizon_ask
            row[f"bid_chg_{horizon}ms"] = horizon_bid - entry_bid if not np.isnan(horizon_bid) else np.nan
            row[f"ask_chg_{horizon}ms"] = horizon_ask - entry_ask if not np.isnan(horizon_ask) else np.nan
    
    return row

def process_horizon_batch(batch_data, pm_data, horizons):
    """Process a batch of BTC ticks."""
    results = []
    for btc_ts, btc_change, btc_volume, btc_ts_str, btc_market, btc_cutoff in batch_data:
        res = process_horizon_tick(btc_ts, btc_change, btc_volume, btc_ts_str, btc_market, btc_cutoff, pm_data, horizons)
        if res is not None:
            results.append(res)
    return results

# Prepare data for parallel processing (including volume)
btc_data_for_parallel = [
    (
        row["ts_ms"],
        row["price_change"],
        row["volume"],
        row["ts"],
        row["market_instance_id"],
        row["analysis_end_ms"],
    )
    for _, row in btc_valid.iterrows()
]

# Split into batches
n_batches = (len(btc_data_for_parallel) + BATCH_SIZE - 1) // BATCH_SIZE
batches = [btc_data_for_parallel[i*BATCH_SIZE:(i+1)*BATCH_SIZE] for i in range(n_batches)]

print(f"Processing {n_batches} batches with parallel execution...")

# Parallel execution
results_nested = Parallel(n_jobs=N_JOBS, verbose=0)(
    delayed(process_horizon_batch)(batch, pm_by_market, HORIZONS_MS)
    for batch in tqdm(batches, desc="Building horizon data")
)

# Flatten results
results = [r for batch_results in results_nested for r in batch_results]

# Create the dataframe
horizon_df = pd.DataFrame(results)
elapsed = time.time() - start_time
print(f"✓ Created dataframe with {len(horizon_df)} rows in {elapsed:.1f} seconds")
print(f"Columns: {list(horizon_df.columns)}")
horizon_df.head(10)


Processing 20785 BTC ticks with valid price changes...
Processing 21 batches with parallel execution...


Building horizon data:   0%|          | 0/21 [00:00<?, ?it/s]

✓ Created dataframe with 20558 rows in 4.6 seconds
Columns: ['btc_ts', 'btc_ts_ms', 'market_instance_id', 'btc_volume', 'btc_change', 'entry_ts_ms', 'entry_bid', 'entry_ask', 'bid_50ms', 'ask_50ms', 'bid_chg_50ms', 'ask_chg_50ms', 'bid_100ms', 'ask_100ms', 'bid_chg_100ms', 'ask_chg_100ms', 'bid_200ms', 'ask_200ms', 'bid_chg_200ms', 'ask_chg_200ms', 'bid_300ms', 'ask_300ms', 'bid_chg_300ms', 'ask_chg_300ms', 'bid_500ms', 'ask_500ms', 'bid_chg_500ms', 'ask_chg_500ms', 'bid_700ms', 'ask_700ms', 'bid_chg_700ms', 'ask_chg_700ms', 'bid_1000ms', 'ask_1000ms', 'bid_chg_1000ms', 'ask_chg_1000ms', 'bid_1500ms', 'ask_1500ms', 'bid_chg_1500ms', 'ask_chg_1500ms', 'bid_2000ms', 'ask_2000ms', 'bid_chg_2000ms', 'ask_chg_2000ms', 'bid_2500ms', 'ask_2500ms', 'bid_chg_2500ms', 'ask_chg_2500ms', 'bid_3000ms', 'ask_3000ms', 'bid_chg_3000ms', 'ask_chg_3000ms', 'bid_3500ms', 'ask_3500ms', 'bid_chg_3500ms', 'ask_chg_3500ms', 'bid_4000ms', 'ask_4000ms', 'bid_chg_4000ms', 'ask_chg_4000ms', 'bid_4500ms', 'ask_45

Unnamed: 0,btc_ts,btc_ts_ms,market_instance_id,btc_volume,btc_change,entry_ts_ms,entry_bid,entry_ask,bid_50ms,ask_50ms,...,bid_chg_4000ms,ask_chg_4000ms,bid_4500ms,ask_4500ms,bid_chg_4500ms,ask_chg_4500ms,bid_5000ms,ask_5000ms,bid_chg_5000ms,ask_chg_5000ms
0,2025-12-25 21:01:06.812000,1766696466812,btc-updown-15m-1766696400,52099.896,0.1,1766696466628,0.43,0.44,0.42,0.44,...,-0.01,0.0,0.42,0.44,-0.01,0.0,0.42,0.44,-0.01,0.0
1,2025-12-25 21:01:10.716000,1766696470716,btc-updown-15m-1766696400,52100.115,-0.1,1766696469582,0.42,0.44,0.42,0.44,...,0.0,0.01,0.42,0.44,0.0,0.0,0.42,0.45,0.0,0.01
2,2025-12-25 21:01:18.015000,1766696478015,btc-updown-15m-1766696400,52100.309,0.1,1766696477466,0.42,0.44,0.42,0.46,...,0.01,0.0,0.43,0.44,0.01,0.0,0.43,0.44,0.01,0.0
3,2025-12-25 21:01:19.762000,1766696479762,btc-updown-15m-1766696400,52100.338,-0.1,1766696479702,0.44,0.46,0.44,0.46,...,-0.01,-0.02,0.43,0.44,-0.01,-0.02,0.43,0.44,-0.01,-0.02
4,2025-12-25 21:01:21.647000,1766696481647,btc-updown-15m-1766696400,52101.29,0.1,1766696481574,0.43,0.46,0.43,0.44,...,0.0,-0.02,0.43,0.44,0.0,-0.02,0.43,0.44,0.0,-0.02
5,2025-12-25 21:01:23.951000,1766696483951,btc-updown-15m-1766696400,52101.476,-0.1,1766696483417,0.43,0.44,0.43,0.44,...,0.0,0.0,0.43,0.44,0.0,0.0,0.43,0.45,0.0,0.01
6,2025-12-25 21:01:25.932000,1766696485932,btc-updown-15m-1766696400,52101.5,0.1,1766696485871,0.43,0.44,0.43,0.44,...,0.0,0.01,0.43,0.45,0.0,0.01,0.43,0.45,0.0,0.01
7,2025-12-25 21:01:27.418000,1766696487418,btc-updown-15m-1766696400,52101.633,-0.1,1766696486936,0.43,0.44,0.43,0.44,...,0.0,0.01,0.43,0.45,0.0,0.01,0.43,0.45,0.0,0.01
8,2025-12-25 21:01:31.889000,1766696491889,btc-updown-15m-1766696400,52101.81,0.1,1766696491233,0.43,0.45,0.43,0.45,...,0.0,0.0,0.43,0.45,0.0,0.0,0.44,0.45,0.01,0.0
9,2025-12-25 21:01:33.082000,1766696493082,btc-updown-15m-1766696400,52102.34,-0.1,1766696492925,0.43,0.45,0.43,0.45,...,0.01,0.0,0.43,0.45,0.0,0.0,0.45,0.46,0.02,0.01


In [59]:
# Summary statistics for bid/ask changes at each horizon
print("=" * 70)
print("BID CHANGE at each horizon (relative to entry_bid)")
print("=" * 70)
bid_chg_cols = [f"bid_chg_{h}ms" for h in HORIZONS_MS]
print(horizon_df[bid_chg_cols].describe().to_string(float_format="{:.4f}".format))

print()
print("=" * 70)
print("ASK CHANGE at each horizon (relative to entry_ask)")
print("=" * 70)
ask_chg_cols = [f"ask_chg_{h}ms" for h in HORIZONS_MS]
print(horizon_df[ask_chg_cols].describe().to_string(float_format="{:.4f}".format))


BID CHANGE at each horizon (relative to entry_bid)
       bid_chg_50ms  bid_chg_100ms  bid_chg_200ms  bid_chg_300ms  bid_chg_500ms  bid_chg_700ms  bid_chg_1000ms  bid_chg_1500ms  bid_chg_2000ms  bid_chg_2500ms  bid_chg_3000ms  bid_chg_3500ms  bid_chg_4000ms  bid_chg_4500ms  bid_chg_5000ms
count    20348.0000     20345.0000     20341.0000     20338.0000     20335.0000     20330.0000      20318.0000      20307.0000      20294.0000      20285.0000      20273.0000      20260.0000      20250.0000      20236.0000      20226.0000
mean         0.0005         0.0005         0.0005         0.0005         0.0004         0.0005          0.0005          0.0006          0.0007          0.0006          0.0006          0.0007          0.0008          0.0007          0.0006
std          0.0069         0.0081         0.0102         0.0121         0.0161         0.0196          0.0217          0.0241          0.0265          0.0291          0.0316          0.0336          0.0355          0.0375          

In [60]:
# Mean bid/ask changes by BTC direction
horizon_df["btc_direction"] = np.sign(horizon_df["btc_change"])

print("=" * 70)
print("MEAN BID CHANGE by BTC direction")
print("  (Positive = bid increased, Negative = bid decreased)")
print("=" * 70)
bid_chg_cols = [f"bid_chg_{h}ms" for h in HORIZONS_MS]
print(horizon_df.groupby("btc_direction")[bid_chg_cols].mean().to_string(float_format="{:.4f}".format))

print()
print("=" * 70)
print("MEAN ASK CHANGE by BTC direction")
print("  (Positive = ask increased, Negative = ask decreased)")
print("=" * 70)
ask_chg_cols = [f"ask_chg_{h}ms" for h in HORIZONS_MS]
print(horizon_df.groupby("btc_direction")[ask_chg_cols].mean().to_string(float_format="{:.4f}".format))

print()
print("=" * 70)
print("Sample sizes by direction:")
print(horizon_df.groupby("btc_direction").size())


MEAN BID CHANGE by BTC direction
  (Positive = bid increased, Negative = bid decreased)
               bid_chg_50ms  bid_chg_100ms  bid_chg_200ms  bid_chg_300ms  bid_chg_500ms  bid_chg_700ms  bid_chg_1000ms  bid_chg_1500ms  bid_chg_2000ms  bid_chg_2500ms  bid_chg_3000ms  bid_chg_3500ms  bid_chg_4000ms  bid_chg_4500ms  bid_chg_5000ms
btc_direction                                                                                                                                                                                                                                         
-1.0                -0.0005        -0.0008        -0.0014        -0.0020        -0.0031        -0.0043         -0.0048         -0.0051         -0.0050         -0.0053         -0.0054         -0.0053         -0.0052         -0.0053         -0.0054
 1.0                 0.0014         0.0018         0.0024         0.0029         0.0038         0.0052          0.0056          0.0060          0.0061          0.0062     

In [61]:
# Compute "aligned" changes: positive = market moved WITH BTC direction
# For BTC up: aligned = raw change (positive is good)
# For BTC down: aligned = -raw change (negative raw = positive aligned)

for horizon in HORIZONS_MS:
    horizon_df[f"bid_aligned_{horizon}ms"] = horizon_df[f"bid_chg_{horizon}ms"] * horizon_df["btc_direction"]
    horizon_df[f"ask_aligned_{horizon}ms"] = horizon_df[f"ask_chg_{horizon}ms"] * horizon_df["btc_direction"]

print("=" * 70)
print("ALIGNED BID CHANGE at each horizon")
print("  (Positive = bid moved WITH BTC direction)")
print("=" * 70)
aligned_bid_cols = [f"bid_aligned_{h}ms" for h in HORIZONS_MS]
print(horizon_df[aligned_bid_cols].describe().to_string(float_format="{:.4f}".format))

print()
print("=" * 70)
print("ALIGNED ASK CHANGE at each horizon")
print("  (Positive = ask moved WITH BTC direction)")
print("=" * 70)
aligned_ask_cols = [f"ask_aligned_{h}ms" for h in HORIZONS_MS]
print(horizon_df[aligned_ask_cols].describe().to_string(float_format="{:.4f}".format))

print()
print("=" * 70)
print("% of times market moved WITH BTC direction:")
print("=" * 70)
for horizon in HORIZONS_MS:
    bid_with = (horizon_df[f"bid_aligned_{horizon}ms"] > 0).sum()
    bid_total = horizon_df[f"bid_aligned_{horizon}ms"].notna().sum()
    ask_with = (horizon_df[f"ask_aligned_{horizon}ms"] > 0).sum()
    ask_total = horizon_df[f"ask_aligned_{horizon}ms"].notna().sum()
    print(f"  {horizon:4d}ms: bid={100*bid_with/bid_total:.1f}%, ask={100*ask_with/ask_total:.1f}%")


ALIGNED BID CHANGE at each horizon
  (Positive = bid moved WITH BTC direction)
       bid_aligned_50ms  bid_aligned_100ms  bid_aligned_200ms  bid_aligned_300ms  bid_aligned_500ms  bid_aligned_700ms  bid_aligned_1000ms  bid_aligned_1500ms  bid_aligned_2000ms  bid_aligned_2500ms  bid_aligned_3000ms  bid_aligned_3500ms  bid_aligned_4000ms  bid_aligned_4500ms  bid_aligned_5000ms
count        20348.0000         20345.0000         20341.0000         20338.0000         20335.0000         20330.0000          20318.0000          20307.0000          20294.0000          20285.0000          20273.0000          20260.0000          20250.0000          20236.0000          20226.0000
mean             0.0010             0.0013             0.0019             0.0024             0.0035             0.0048              0.0052              0.0055              0.0056              0.0057              0.0059              0.0058              0.0058              0.0059              0.0059
std              0.0069 

In [62]:
# Export horizon_df to Excel
# Install openpyxl if not available (required for Excel export)
ensure_package("openpyxl")

output_path = project_root / "horizon_analysis.xlsx"
horizon_df.to_excel(output_path, index=False, engine="openpyxl")
print(f"Saved horizon_df to: {output_path}")
print(f"Rows: {len(horizon_df)}, Columns: {len(horizon_df.columns)}")


Saved horizon_df to: d:\Investing\lightspeed-15min\horizon_analysis.xlsx
Rows: 20558, Columns: 99


## Optimal Entry/Exit Window Analysis

For finding optimal trading parameters, we calculate realistic profit:
- **BTC UP → Buy YES**: profit = YES_bid(exit) - YES_ask(entry)
- **BTC DOWN → Buy NO**: profit = NO_bid(exit) - NO_ask(entry)

We compute a profit matrix for all entry (Y ms) and exit (X ms) window combinations where X > Y.


In [63]:
# Distribution of BTC price changes to help decide threshold cutoff
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Get valid BTC ticks (in analysis window, non-zero change)
btc_for_dist = btc_analysis[
    (btc_analysis["market_instance_id"].notna()) & 
    (btc_analysis["price_change"] != 0)
].copy()

btc_for_dist["abs_change"] = btc_for_dist["price_change"].abs()

print("=" * 70)
print("BTC PRICE CHANGE DISTRIBUTION")
print("=" * 70)
print(f"Total BTC ticks with non-zero change: {len(btc_for_dist)}")
print()
print("Absolute Change Statistics:")
print(btc_for_dist["abs_change"].describe().to_string())
print()

# Percentiles to help decide threshold
percentiles = [50, 75, 90, 95, 99]
print("Percentiles (absolute change):")
for p in percentiles:
    val = btc_for_dist["abs_change"].quantile(p/100)
    count_above = (btc_for_dist["abs_change"] >= val).sum()
    print(f"  {p}th percentile: ${val:.2f} ({count_above} ticks >= this)")

# Count by threshold
print()
print("Sample sizes by threshold:")
thresholds = [0, 1, 2, 3, 5, 7, 10, 15, 20]
for t in thresholds:
    count = (btc_for_dist["abs_change"] >= t).sum()
    pct = 100 * count / len(btc_for_dist)
    print(f"  >= ${t:2d}: {count:5d} ticks ({pct:5.1f}%)")

# Create histogram
fig = make_subplots(rows=2, cols=1, 
                    subplot_titles=("BTC Price Change Distribution (All)", 
                                   "Absolute BTC Price Change (Zoomed to $0-$30)"))

# Full distribution (signed)
fig.add_trace(
    go.Histogram(x=btc_for_dist["price_change"], nbinsx=100, name="Signed Change"),
    row=1, col=1
)

# Absolute change (zoomed)
fig.add_trace(
    go.Histogram(x=btc_for_dist["abs_change"], nbinsx=60, name="Absolute Change",
                marker_color="coral"),
    row=2, col=1
)

# Add vertical lines for common thresholds
for t, color in [(5, "green"), (10, "orange"), (20, "red")]:
    fig.add_vline(x=t, line_dash="dash", line_color=color, 
                  annotation_text=f"${t}", row=2, col=1)

fig.update_xaxes(title_text="BTC Price Change ($)", row=1, col=1)
fig.update_xaxes(title_text="Absolute BTC Price Change ($)", range=[0, 30], row=2, col=1)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Count", row=2, col=1)

fig.update_layout(
    height=700,
    width=900,
    title_text="BTC Price Change Distribution for Threshold Selection",
    showlegend=False,
)
fig.show()


BTC PRICE CHANGE DISTRIBUTION
Total BTC ticks with non-zero change: 20785

Absolute Change Statistics:
count    20785.000000
mean         2.715338
std          5.492573
min          0.100000
25%          0.100000
50%          0.100000
75%          3.500000
max        171.100000

Percentiles (absolute change):
  50th percentile: $0.10 (15803 ticks >= this)
  75th percentile: $3.50 (5238 ticks >= this)
  90th percentile: $9.00 (2090 ticks >= this)
  95th percentile: $13.30 (1048 ticks >= this)
  99th percentile: $24.60 (208 ticks >= this)

Sample sizes by threshold:
  >= $ 0: 20785 ticks (100.0%)
  >= $ 1:  6803 ticks ( 32.7%)
  >= $ 2:  6248 ticks ( 30.1%)
  >= $ 3:  5569 ticks ( 26.8%)
  >= $ 5:  4186 ticks ( 20.1%)
  >= $ 7:  2977 ticks ( 14.3%)
  >= $10:  1768 ticks (  8.5%)
  >= $15:   815 ticks (  3.9%)
  >= $20:   385 ticks (  1.9%)


In [64]:
# Analyze mean profit and total profit across different BTC thresholds
# OPTIMIZED: Uses parallel processing + binary search
# This builds the full horizon dataframe ONCE then filters by threshold

start_time = time.time()
print("Building full horizon dataframe for threshold analysis (OPTIMIZED)...")

# Time horizons
HORIZONS_MS_ANALYSIS = [50, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]

# Get NO (DOWN) side data from complete markets
no_data_analysis = all_pm[
    (all_pm["side"] == "DOWN") & 
    (all_pm["market_instance_id"].isin(complete_market_ranges["market_instance_id"]))
].copy()
no_data_analysis = no_data_analysis.dropna(subset=["best_bid_price"]).reset_index(drop=True)
no_data_analysis["ts_ms"] = no_data_analysis["event_timestamp_ms"]

# Pre-index NO data by market (similar to YES data)
print("[OPTIMIZATION] Pre-indexing NO data by market...")
no_by_market = {}
for market_id in complete_market_ranges["market_instance_id"].values:
    market_no = no_data_analysis[no_data_analysis["market_instance_id"] == market_id].copy()
    market_no = market_no.sort_values("ts_ms").reset_index(drop=True)
    no_by_market[market_id] = {
        "ts_ms": market_no["ts_ms"].values,
        "bid": market_no["best_bid_price"].values,
        "ask": market_no["best_ask_price"].values,
    }
print(f"Indexed {len(no_by_market)} markets for NO side")

# Start with ALL valid BTC ticks (no threshold filter)
btc_all = btc_analysis[
    (btc_analysis["market_instance_id"].notna()) & 
    (btc_analysis["price_change"] != 0)
].copy()

def process_full_horizon_tick(btc_ts, btc_change, btc_volume, btc_market, btc_cutoff, yes_data_dict, no_data_dict, horizons):
    """Process a single BTC tick for both YES and NO sides with all horizons."""
    if btc_market not in yes_data_dict or btc_market not in no_data_dict:
        return None
    
    yes_pm = yes_data_dict[btc_market]
    no_pm = no_data_dict[btc_market]
    
    row = {
        "btc_ts_ms": btc_ts,
        "btc_change": btc_change,
        "btc_volume": btc_volume,
        "abs_btc_change": abs(btc_change),
        "btc_direction": np.sign(btc_change),
    }
    
    for horizon in horizons:
        target_ts = btc_ts + horizon
        
        # YES side using binary search
        yes_idx = np.searchsorted(yes_pm["ts_ms"], target_ts, side='left')
        if yes_idx < len(yes_pm["ts_ms"]) and yes_pm["ts_ms"][yes_idx] <= btc_cutoff:
            row[f"yes_bid_{horizon}ms"] = yes_pm["bid"][yes_idx]
            row[f"yes_ask_{horizon}ms"] = yes_pm["ask"][yes_idx]
        else:
            row[f"yes_bid_{horizon}ms"] = np.nan
            row[f"yes_ask_{horizon}ms"] = np.nan
        
        # NO side using binary search
        no_idx = np.searchsorted(no_pm["ts_ms"], target_ts, side='left')
        if no_idx < len(no_pm["ts_ms"]) and no_pm["ts_ms"][no_idx] <= btc_cutoff:
            row[f"no_bid_{horizon}ms"] = no_pm["bid"][no_idx]
            row[f"no_ask_{horizon}ms"] = no_pm["ask"][no_idx]
        else:
            row[f"no_bid_{horizon}ms"] = np.nan
            row[f"no_ask_{horizon}ms"] = np.nan
    
    return row

def process_full_horizon_batch(batch_data, yes_data_dict, no_data_dict, horizons):
    """Process a batch of BTC ticks."""
    results = []
    for btc_ts, btc_change, btc_volume, btc_market, btc_cutoff in batch_data:
        res = process_full_horizon_tick(btc_ts, btc_change, btc_volume, btc_market, btc_cutoff, 
                                        yes_data_dict, no_data_dict, horizons)
        if res is not None:
            results.append(res)
    return results

# Prepare data for parallel processing (including volume)
btc_data_for_parallel = [
    (row["ts_ms"], row["price_change"], row["volume"], row["market_instance_id"], row["analysis_end_ms"])
    for _, row in btc_all.iterrows()
]

# Split into batches
n_batches = (len(btc_data_for_parallel) + BATCH_SIZE - 1) // BATCH_SIZE
batches = [btc_data_for_parallel[i*BATCH_SIZE:(i+1)*BATCH_SIZE] for i in range(n_batches)]

print(f"Processing {len(btc_all)} BTC ticks in {n_batches} parallel batches...")

# Parallel execution
results_nested = Parallel(n_jobs=N_JOBS, verbose=0)(
    delayed(process_full_horizon_batch)(batch, pm_by_market, no_by_market, HORIZONS_MS_ANALYSIS)
    for batch in tqdm(batches, desc="Building threshold horizon data")
)

# Flatten results
results_all = [r for batch_results in results_nested for r in batch_results]

full_df_all = pd.DataFrame(results_all)
print(f"Built dataframe with {len(full_df_all)} rows")

# OPTIMIZED: Calculate ALL profit columns at once using vectorized operations
print("[OPTIMIZATION] Computing profit columns with vectorized operations...")
profit_data = {}
for entry_h in HORIZONS_MS_ANALYSIS:
    for exit_h in HORIZONS_MS_ANALYSIS:
        if exit_h <= entry_h:
            continue
        col = f"profit_{entry_h}_{exit_h}ms"
        
        # Vectorized computation
        up_mask = full_df_all["btc_direction"] > 0
        down_mask = full_df_all["btc_direction"] < 0
        
        profit_col = np.full(len(full_df_all), np.nan)
        profit_col[up_mask] = (
            full_df_all.loc[up_mask, f"yes_bid_{exit_h}ms"].values - 
            full_df_all.loc[up_mask, f"yes_ask_{entry_h}ms"].values
        )
        profit_col[down_mask] = (
            full_df_all.loc[down_mask, f"no_bid_{exit_h}ms"].values - 
            full_df_all.loc[down_mask, f"no_ask_{entry_h}ms"].values
        )
        profit_data[col] = profit_col

# Add all profit columns at once (avoids fragmentation warning)
profit_df = pd.DataFrame(profit_data)
full_df_all = pd.concat([full_df_all, profit_df], axis=1)

profit_cols_all = [c for c in full_df_all.columns if c.startswith("profit_")]
elapsed = time.time() - start_time
print(f"✓ Created {len(profit_cols_all)} profit columns in {elapsed:.1f} seconds")

# Now analyze by threshold
thresholds = list(range(1, 21))
threshold_results = []

for thresh in thresholds:
    df_thresh = full_df_all[full_df_all["abs_btc_change"] >= thresh]
    n_trades = len(df_thresh)
    
    if n_trades < 10:
        continue
    
    # Find best entry/exit combination for this threshold
    best_mean = -np.inf
    best_total = -np.inf
    best_combo_mean = None
    best_combo_total = None
    
    all_profits = []
    for col in profit_cols_all:
        valid = df_thresh[col].dropna()
        if len(valid) > 0:
            all_profits.extend(valid.tolist())
            mean_p = valid.mean()
            total_p = valid.sum()
            if mean_p > best_mean:
                best_mean = mean_p
                best_combo_mean = col
            if total_p > best_total:
                best_total = total_p
                best_combo_total = col
    
    # Overall stats across all combos
    if all_profits:
        overall_mean = np.mean(all_profits)
        overall_total = np.sum(all_profits)
        overall_win_rate = 100 * sum(1 for p in all_profits if p > 0) / len(all_profits)
    else:
        overall_mean = overall_total = overall_win_rate = np.nan
    
    threshold_results.append({
        "threshold": thresh,
        "n_btc_ticks": n_trades,
        "overall_mean_profit": overall_mean,
        "overall_total_profit": overall_total,
        "overall_win_rate": overall_win_rate,
        "best_mean_profit": best_mean,
        "best_mean_combo": best_combo_mean,
        "best_total_profit": best_total,
        "best_total_combo": best_combo_total,
    })

threshold_df = pd.DataFrame(threshold_results)

# Display table
print()
print("=" * 100)
print("PROFIT BY BTC THRESHOLD (All Entry/Exit Combinations Averaged)")
print("=" * 100)
print(threshold_df[["threshold", "n_btc_ticks", "overall_mean_profit", "overall_total_profit", "overall_win_rate"]].to_string(
    index=False, 
    float_format=lambda x: f"{x:.4f}" if abs(x) < 100 else f"{x:.2f}"
))

print()
print("=" * 100)
print("BEST ENTRY/EXIT COMBO BY THRESHOLD")
print("=" * 100)
print(threshold_df[["threshold", "n_btc_ticks", "best_mean_profit", "best_mean_combo", "best_total_profit", "best_total_combo"]].to_string(
    index=False,
    float_format=lambda x: f"{x:.4f}" if abs(x) < 100 else f"{x:.2f}"
))

# Create visualization
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=("Mean Profit by Threshold", "Total Profit by Threshold",
                                   "Win Rate by Threshold", "Sample Size by Threshold"))

# Mean profit
fig.add_trace(
    go.Bar(x=threshold_df["threshold"], y=threshold_df["overall_mean_profit"],
           marker_color=["green" if x > 0 else "red" for x in threshold_df["overall_mean_profit"]],
           name="Mean Profit"),
    row=1, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=1)

# Total profit
fig.add_trace(
    go.Bar(x=threshold_df["threshold"], y=threshold_df["overall_total_profit"],
           marker_color=["green" if x > 0 else "red" for x in threshold_df["overall_total_profit"]],
           name="Total Profit"),
    row=1, col=2
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=2)

# Win rate
fig.add_trace(
    go.Bar(x=threshold_df["threshold"], y=threshold_df["overall_win_rate"],
           marker_color="steelblue", name="Win Rate %"),
    row=2, col=1
)
fig.add_hline(y=50, line_dash="dash", line_color="gray", row=2, col=1)

# Sample size
fig.add_trace(
    go.Bar(x=threshold_df["threshold"], y=threshold_df["n_btc_ticks"],
           marker_color="coral", name="# Trades"),
    row=2, col=2
)

fig.update_xaxes(title_text="BTC Threshold ($)", row=1, col=1)
fig.update_xaxes(title_text="BTC Threshold ($)", row=1, col=2)
fig.update_xaxes(title_text="BTC Threshold ($)", row=2, col=1)
fig.update_xaxes(title_text="BTC Threshold ($)", row=2, col=2)
fig.update_yaxes(title_text="Mean Profit ($)", row=1, col=1)
fig.update_yaxes(title_text="Total Profit ($)", row=1, col=2)
fig.update_yaxes(title_text="Win Rate (%)", row=2, col=1)
fig.update_yaxes(title_text="# BTC Ticks", row=2, col=2)

fig.update_layout(
    height=700,
    width=1000,
    title_text="Profit Analysis by BTC Change Threshold",
    showlegend=False,
)
fig.show()


Building full horizon dataframe for threshold analysis (OPTIMIZED)...
[OPTIMIZATION] Pre-indexing NO data by market...
Indexed 101 markets for NO side
Processing 20785 BTC ticks in 21 parallel batches...


Building threshold horizon data:   0%|          | 0/21 [00:00<?, ?it/s]

Built dataframe with 20785 rows
[OPTIMIZATION] Computing profit columns with vectorized operations...
✓ Created 105 profit columns in 13.2 seconds

PROFIT BY BTC THRESHOLD (All Entry/Exit Combinations Averaged)
 threshold  n_btc_ticks  overall_mean_profit  overall_total_profit  overall_win_rate
         1         6803              -0.0077              -5362.00           16.2657
         2         6248              -0.0075              -4798.37           16.6518
         3         5569              -0.0074              -4192.81           17.0832
         4         4902              -0.0071              -3586.04           17.5636
         5         4186              -0.0068              -2906.02           18.3042
         6         3511              -0.0065              -2334.38           18.6538
         7         2977              -0.0061              -1865.66           19.1352
         8         2514              -0.0057              -1481.26           19.6425
         9         2090 

In [65]:
# Build comprehensive horizon dataframe with BOTH YES and NO prices
# OPTIMIZED: Uses parallel processing + binary search
# For BTC UP: Buy YES (profit = YES_bid_exit - YES_ask_entry)
# For BTC DOWN: Buy NO (profit = NO_bid_exit - NO_ask_entry)

start_time = time.time()

# ============================================================================
# CONFIGURATION: Set minimum absolute BTC price change threshold
# ============================================================================
BTC_CHANGE_THRESHOLD = 5  # Minimum |btc_change| to include (0 = all changes, 5 = only changes >= $5)

# Time horizons for entry and exit windows (in milliseconds)
HORIZONS_MS = [50, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]
# ============================================================================

# Get NO (DOWN) side data from complete markets
no_data = all_pm[
    (all_pm["side"] == "DOWN") & 
    (all_pm["market_instance_id"].isin(complete_market_ranges["market_instance_id"]))
].copy()
no_data = no_data.dropna(subset=["best_bid_price"]).reset_index(drop=True)
no_data["ts_ms"] = no_data["event_timestamp_ms"]
print(f"Loaded {len(no_data)} NO (DOWN) side events from complete markets")

# Pre-index NO data if not already done
if 'no_by_market' not in dir():
    print("[OPTIMIZATION] Pre-indexing NO data by market...")
    no_by_market = {}
    for market_id in complete_market_ranges["market_instance_id"].values:
        market_no = no_data[no_data["market_instance_id"] == market_id].copy()
        market_no = market_no.sort_values("ts_ms").reset_index(drop=True)
        no_by_market[market_id] = {
            "ts_ms": market_no["ts_ms"].values,
            "bid": market_no["best_bid_price"].values,
            "ask": market_no["best_ask_price"].values,
        }
    print(f"Indexed {len(no_by_market)} markets")

# Start with valid BTC ticks (in analysis window, non-zero change, above threshold)
btc_valid = btc_analysis[
    (btc_analysis["market_instance_id"].notna()) & 
    (btc_analysis["price_change"] != 0) &
    (btc_analysis["price_change"].abs() >= BTC_CHANGE_THRESHOLD)
].copy()

print(f"BTC change threshold: >= ${BTC_CHANGE_THRESHOLD}")
print(f"Processing {len(btc_valid)} BTC ticks (filtered)...")

def process_comprehensive_tick(btc_ts, btc_ts_str, btc_change, btc_volume, btc_market, btc_cutoff, yes_data_dict, no_data_dict, horizons):
    """Process a single BTC tick for comprehensive horizon analysis."""
    if btc_market not in yes_data_dict or btc_market not in no_data_dict:
        return None
    
    yes_pm = yes_data_dict[btc_market]
    no_pm = no_data_dict[btc_market]
    
    row = {
        "btc_ts": btc_ts_str,
        "btc_ts_ms": btc_ts,
        "market_instance_id": btc_market,
        "btc_change": btc_change,
        "btc_volume": btc_volume,
        "btc_direction": np.sign(btc_change),
    }
    
    for horizon in horizons:
        target_ts = btc_ts + horizon
        
        # YES side using binary search
        yes_idx = np.searchsorted(yes_pm["ts_ms"], target_ts, side='left')
        if yes_idx < len(yes_pm["ts_ms"]) and yes_pm["ts_ms"][yes_idx] <= btc_cutoff:
            row[f"yes_bid_{horizon}ms"] = yes_pm["bid"][yes_idx]
            row[f"yes_ask_{horizon}ms"] = yes_pm["ask"][yes_idx]
        else:
            row[f"yes_bid_{horizon}ms"] = np.nan
            row[f"yes_ask_{horizon}ms"] = np.nan
        
        # NO side using binary search
        no_idx = np.searchsorted(no_pm["ts_ms"], target_ts, side='left')
        if no_idx < len(no_pm["ts_ms"]) and no_pm["ts_ms"][no_idx] <= btc_cutoff:
            row[f"no_bid_{horizon}ms"] = no_pm["bid"][no_idx]
            row[f"no_ask_{horizon}ms"] = no_pm["ask"][no_idx]
        else:
            row[f"no_bid_{horizon}ms"] = np.nan
            row[f"no_ask_{horizon}ms"] = np.nan
    
    return row

def process_comprehensive_batch(batch_data, yes_data_dict, no_data_dict, horizons):
    """Process a batch of BTC ticks."""
    results = []
    for btc_ts, btc_ts_str, btc_change, btc_volume, btc_market, btc_cutoff in batch_data:
        res = process_comprehensive_tick(btc_ts, btc_ts_str, btc_change, btc_volume, btc_market, btc_cutoff,
                                         yes_data_dict, no_data_dict, horizons)
        if res is not None:
            results.append(res)
    return results

# Prepare data for parallel processing (including volume)
btc_data_for_parallel = [
    (row["ts_ms"], row["ts"], row["price_change"], row["volume"], row["market_instance_id"], row["analysis_end_ms"])
    for _, row in btc_valid.iterrows()
]

# Split into batches
n_batches = (len(btc_data_for_parallel) + BATCH_SIZE - 1) // BATCH_SIZE
batches = [btc_data_for_parallel[i*BATCH_SIZE:(i+1)*BATCH_SIZE] for i in range(n_batches)]

print(f"Processing {n_batches} batches with parallel execution...")

# Parallel execution
results_nested = Parallel(n_jobs=N_JOBS, verbose=0)(
    delayed(process_comprehensive_batch)(batch, pm_by_market, no_by_market, HORIZONS_MS)
    for batch in tqdm(batches, desc="Building comprehensive horizon data")
)

# Flatten results
results = [r for batch_results in results_nested for r in batch_results]

# Create comprehensive horizon dataframe
full_horizon_df = pd.DataFrame(results)
elapsed = time.time() - start_time
print(f"✓ Created comprehensive dataframe with {len(full_horizon_df)} rows in {elapsed:.1f} seconds")
print(f"Columns: {len(full_horizon_df.columns)}")
full_horizon_df.head()


Loaded 450157 NO (DOWN) side events from complete markets
BTC change threshold: >= $5
Processing 4186 BTC ticks (filtered)...
Processing 5 batches with parallel execution...


Building comprehensive horizon data:   0%|          | 0/5 [00:00<?, ?it/s]

✓ Created comprehensive dataframe with 4186 rows in 2.8 seconds
Columns: 66


Unnamed: 0,btc_ts,btc_ts_ms,market_instance_id,btc_change,btc_volume,btc_direction,yes_bid_50ms,yes_ask_50ms,no_bid_50ms,no_ask_50ms,...,no_bid_4000ms,no_ask_4000ms,yes_bid_4500ms,yes_ask_4500ms,no_bid_4500ms,no_ask_4500ms,yes_bid_5000ms,yes_ask_5000ms,no_bid_5000ms,no_ask_5000ms
0,2025-12-25 21:02:12.506000,1766696532506,btc-updown-15m-1766696400,-6.2,52073.399,-1.0,0.41,0.42,0.58,0.59,...,0.6,0.61,0.37,0.38,0.62,0.63,0.36,0.37,0.63,0.64
1,2025-12-25 21:02:16.485000,1766696536485,btc-updown-15m-1766696400,-6.8,52075.561,-1.0,0.39,0.4,0.6,0.61,...,0.63,0.64,0.36,0.38,0.62,0.64,0.37,0.38,0.62,0.63
2,2025-12-25 21:02:54.672000,1766696574672,btc-updown-15m-1766696400,27.5,52101.89,1.0,0.38,0.39,0.61,0.62,...,0.52,0.53,0.47,0.49,0.51,0.53,0.48,0.49,0.51,0.52
3,2025-12-25 21:03:02.312000,1766696582312,btc-updown-15m-1766696400,12.2,52067.818,1.0,0.57,0.6,0.4,0.43,...,0.46,0.49,0.51,0.52,0.48,0.49,0.51,0.52,0.48,0.49
4,2025-12-25 21:03:42.487000,1766696622487,btc-updown-15m-1766696400,-40.2,52088.378,-1.0,0.27,0.29,0.71,0.73,...,0.72,0.74,0.25,0.26,0.74,0.75,0.24,0.26,0.74,0.76


In [66]:
# Calculate profit for each entry/exit window combination
# OPTIMIZED: Build all profit columns at once to avoid fragmentation
# Entry at Y ms, exit at X ms (where X > Y)
# 
# Profit calculation (REALISTIC):
#   BTC UP  → Buy YES: profit = yes_bid(X) - yes_ask(Y)
#   BTC DOWN → Buy NO:  profit = no_bid(X) - no_ask(Y)
#
# This correctly accounts for:
#   1. Buying at ask (what you pay)
#   2. Selling at bid (what you receive)
#   3. Different assets for different directions

start_time = time.time()
print("Computing profit columns with vectorized operations...")

# Pre-compute masks
btc_up_mask = full_horizon_df["btc_direction"] > 0
btc_down_mask = full_horizon_df["btc_direction"] < 0

# Build all profit columns in a dictionary first (avoids fragmentation)
profit_data = {}

for entry_h in HORIZONS_MS:
    for exit_h in HORIZONS_MS:
        if exit_h <= entry_h:
            continue  # Only consider exit > entry
        
        col_name = f"profit_{entry_h}_{exit_h}ms"
        
        # Vectorized computation
        profit_col = np.full(len(full_horizon_df), np.nan)
        
        # BTC UP: Buy YES at ask(entry), sell YES at bid(exit)
        profit_col[btc_up_mask] = (
            full_horizon_df.loc[btc_up_mask, f"yes_bid_{exit_h}ms"].values - 
            full_horizon_df.loc[btc_up_mask, f"yes_ask_{entry_h}ms"].values
        )
        
        # BTC DOWN: Buy NO at ask(entry), sell NO at bid(exit)
        profit_col[btc_down_mask] = (
            full_horizon_df.loc[btc_down_mask, f"no_bid_{exit_h}ms"].values - 
            full_horizon_df.loc[btc_down_mask, f"no_ask_{entry_h}ms"].values
        )
        
        profit_data[col_name] = profit_col

# Add all profit columns at once (avoids DataFrame fragmentation)
profit_df = pd.DataFrame(profit_data)
full_horizon_df = pd.concat([full_horizon_df, profit_df], axis=1)

# List all profit columns created
profit_cols = [c for c in full_horizon_df.columns if c.startswith("profit_")]
elapsed = time.time() - start_time
print(f"✓ Created {len(profit_cols)} profit columns in {elapsed:.1f} seconds")
print("Sample profit columns:", profit_cols[:5], "...")
full_horizon_df[["btc_change", "btc_direction"] + profit_cols[:5]].head(10)


Computing profit columns with vectorized operations...
✓ Created 105 profit columns in 0.1 seconds
Sample profit columns: ['profit_50_100ms', 'profit_50_200ms', 'profit_50_300ms', 'profit_50_500ms', 'profit_50_700ms'] ...


Unnamed: 0,btc_change,btc_direction,profit_50_100ms,profit_50_200ms,profit_50_300ms,profit_50_500ms,profit_50_700ms
0,-6.2,-1.0,-0.01,-0.01,-0.01,0.0,0.0
1,-6.8,-1.0,-0.01,-0.01,-0.01,0.01,0.01
2,27.5,1.0,0.01,0.02,0.02,0.02,0.05
3,12.2,1.0,-0.03,-0.03,-0.04,-0.09,-0.13
4,-40.2,-1.0,-0.02,-0.02,-0.03,-0.03,-0.04
5,-8.9,-1.0,-0.01,-0.01,-0.01,-0.01,-0.01
6,16.0,1.0,-0.01,0.01,0.02,0.05,0.05
7,8.7,1.0,-0.06,-0.06,-0.06,-0.04,0.0
8,7.1,1.0,-0.01,-0.01,0.0,0.0,0.0
9,-18.7,-1.0,-0.01,-0.01,-0.01,-0.01,0.05


In [67]:
# Build PROFIT MATRIX: mean profit for each (entry_horizon, exit_horizon) pair
# Rows = entry time (Y ms), Columns = exit time (X ms)
# Only valid where exit > entry

profit_matrix = pd.DataFrame(index=HORIZONS_MS, columns=HORIZONS_MS, dtype=float)
profit_matrix.index.name = "Entry (ms)"
profit_matrix.columns.name = "Exit (ms)"

count_matrix = pd.DataFrame(index=HORIZONS_MS, columns=HORIZONS_MS, dtype=int)
win_rate_matrix = pd.DataFrame(index=HORIZONS_MS, columns=HORIZONS_MS, dtype=float)

for entry_h in HORIZONS_MS:
    for exit_h in HORIZONS_MS:
        if exit_h <= entry_h:
            profit_matrix.loc[entry_h, exit_h] = np.nan
            count_matrix.loc[entry_h, exit_h] = 0
            win_rate_matrix.loc[entry_h, exit_h] = np.nan
        else:
            col_name = f"profit_{entry_h}_{exit_h}ms"
            valid_data = full_horizon_df[col_name].dropna()
            profit_matrix.loc[entry_h, exit_h] = valid_data.mean()
            count_matrix.loc[entry_h, exit_h] = len(valid_data)
            win_rate_matrix.loc[entry_h, exit_h] = (valid_data > 0).mean() * 100

print("=" * 80)
print("PROFIT MATRIX (Mean Profit per Trade)")
print(f"  BTC change threshold: >= ${BTC_CHANGE_THRESHOLD}")
print(f"  Sample size: {len(full_horizon_df)} trades")
print("  Rows = Entry window (ms after BTC change)")
print("  Cols = Exit window (ms after BTC change)")
print("  BTC UP → buy YES, BTC DOWN → buy NO")
print("  Profit = bid(exit) - ask(entry)")
print("=" * 80)
print()
print(profit_matrix.to_string(float_format="{:.4f}".format, na_rep="-"))

print()
print("=" * 80)
print("SAMPLE SIZE MATRIX (# of trades per cell)")
print("=" * 80)
print()
print(count_matrix.to_string())


PROFIT MATRIX (Mean Profit per Trade)
  BTC change threshold: >= $5
  Sample size: 4186 trades
  Rows = Entry window (ms after BTC change)
  Cols = Exit window (ms after BTC change)
  BTC UP → buy YES, BTC DOWN → buy NO
  Profit = bid(exit) - ask(entry)

Exit (ms)   50      100     200     300     500     700     1000    1500    2000    2500    3000    3500    4000    4500    5000
Entry (ms)                                                                                                                      
50             - -0.0124 -0.0103 -0.0080 -0.0036  0.0009  0.0027  0.0033  0.0037  0.0036  0.0035  0.0034  0.0034  0.0034  0.0031
100            -       - -0.0114 -0.0091 -0.0048 -0.0002  0.0016  0.0022  0.0026  0.0024  0.0024  0.0023  0.0023  0.0023  0.0020
200            -       -       - -0.0114 -0.0071 -0.0025 -0.0008 -0.0001  0.0003  0.0001  0.0001  0.0000  0.0000 -0.0000 -0.0003
300            -       -       -       - -0.0095 -0.0050 -0.0032 -0.0026 -0.0022 -0.0023 -0.0024 -0.

In [68]:
# WIN RATE MATRIX and find OPTIMAL parameters
print("=" * 80)
print("WIN RATE MATRIX (% of profitable trades)")
print(f"  BTC change threshold: >= ${BTC_CHANGE_THRESHOLD}, Sample size: {len(full_horizon_df)}")
print("=" * 80)
print()
print(win_rate_matrix.to_string(float_format="{:.1f}%".format, na_rep="-"))

# Find optimal entry/exit combinations
# Flatten the profit matrix to find best combinations
best_combos = []
for entry_h in HORIZONS_MS:
    for exit_h in HORIZONS_MS:
        if exit_h <= entry_h:
            continue
        col_name = f"profit_{entry_h}_{exit_h}ms"
        valid_data = full_horizon_df[col_name].dropna()
        #if len(valid_data) < 100:  # Require minimum sample size
        #    continue
        
        mean_profit = valid_data.mean()
        win_rate = (valid_data > 0).mean() * 100
        total_profit = valid_data.sum()
        
        best_combos.append({
            "entry_ms": entry_h,
            "exit_ms": exit_h,
            "hold_time_ms": exit_h - entry_h,
            "mean_profit": mean_profit,
            "win_rate": win_rate,
            "total_profit": total_profit,
            "n_trades": len(valid_data),
        })

best_df = pd.DataFrame(best_combos)
best_df = best_df.sort_values("mean_profit", ascending=False)

print()
print("=" * 80)
print("TOP 10 ENTRY/EXIT COMBINATIONS by Mean Profit")
print("  (minimum 100 trades)")
print("=" * 80)
print(best_df.head(10).to_string(index=False, float_format="{:.4f}".format))

print()
print("=" * 80)
print("TOP 10 ENTRY/EXIT COMBINATIONS by Win Rate")
print("=" * 80)
print(best_df.sort_values("win_rate", ascending=False).head(10).to_string(index=False, float_format="{:.4f}".format))

print()
print("=" * 80)
print("TOP 10 ENTRY/EXIT COMBINATIONS by Total Profit")
print("=" * 80)
print(best_df.sort_values("total_profit", ascending=False).head(10).to_string(index=False, float_format="{:.4f}".format))


WIN RATE MATRIX (% of profitable trades)
  BTC change threshold: >= $5, Sample size: 4186

      50    100   200   300   500   700   1000  1500  2000  2500  3000  3500  4000  4500  5000
50       -  1.1%  4.9%  9.3% 18.8% 28.0% 30.6% 32.4% 34.2% 34.9% 34.9% 35.3% 35.8% 36.2% 36.2%
100      -     -  2.9%  7.1% 16.5% 25.8% 28.6% 30.4% 32.3% 33.3% 33.5% 33.9% 34.6% 34.9% 35.4%
200      -     -     -  2.9% 12.3% 21.7% 24.9% 27.1% 28.9% 30.0% 30.6% 31.1% 32.0% 32.3% 32.8%
300      -     -     -     -  7.5% 17.2% 20.9% 23.4% 25.5% 26.8% 27.6% 28.2% 29.0% 29.8% 30.3%
500      -     -     -     -     -  7.5% 12.2% 16.2% 18.3% 20.2% 21.3% 22.3% 23.3% 24.4% 25.1%
700      -     -     -     -     -     -  3.3%  8.0% 11.2% 13.4% 14.8% 16.3% 17.5% 18.4% 19.5%
1000     -     -     -     -     -     -     -  4.5%  8.2% 10.7% 12.2% 13.8% 15.0% 16.7% 17.8%
1500     -     -     -     -     -     -     -     -  3.9%  6.6%  9.0% 10.5% 12.4% 14.5% 15.7%
2000     -     -     -     -     -     -     -     -  

In [69]:
# Visualize profit matrix as heatmap
import plotly.express as px

# Convert profit matrix to long format for heatmap
profit_long = []
for entry_h in HORIZONS_MS:
    for exit_h in HORIZONS_MS:
        if exit_h > entry_h:
            profit_long.append({
                "Entry (ms)": entry_h,
                "Exit (ms)": exit_h,
                "Mean Profit": profit_matrix.loc[entry_h, exit_h],
                "Win Rate %": win_rate_matrix.loc[entry_h, exit_h],
            })

profit_long_df = pd.DataFrame(profit_long)

# Pivot for heatmap
profit_pivot = profit_long_df.pivot(index="Entry (ms)", columns="Exit (ms)", values="Mean Profit")

threshold_label = f"BTC change ≥ ${BTC_CHANGE_THRESHOLD}" if BTC_CHANGE_THRESHOLD > 0 else "All BTC changes"

fig = px.imshow(
    profit_pivot,
    labels=dict(x="Exit Time (ms)", y="Entry Time (ms)", color="Mean Profit"),
    title=f"Profit Matrix: Entry vs Exit Window ({threshold_label}, n={len(full_horizon_df)})<br><sup>BTC UP → buy YES, BTC DOWN → buy NO | Profit = bid(exit) - ask(entry)</sup>",
    color_continuous_scale="RdYlGn",
    color_continuous_midpoint=0,
    aspect="auto",
)
fig.update_layout(
    width=800,
    height=600,
)
fig.show()

# Win rate heatmap
winrate_pivot = profit_long_df.pivot(index="Entry (ms)", columns="Exit (ms)", values="Win Rate %")

fig2 = px.imshow(
    winrate_pivot,
    labels=dict(x="Exit Time (ms)", y="Entry Time (ms)", color="Win Rate %"),
    title=f"Win Rate Matrix: Entry vs Exit Window ({threshold_label}, n={len(full_horizon_df)})<br><sup>% of trades with positive profit</sup>",
    color_continuous_scale="RdYlGn",
    color_continuous_midpoint=50,
    aspect="auto",
)
fig2.update_layout(
    width=800,
    height=600,
)
fig2.show()


In [70]:
# Export comprehensive horizon dataframe with profits to Excel
# Includes both YES and NO prices, and profit for each entry/exit combination

# Include threshold in filename if set
if BTC_CHANGE_THRESHOLD > 0:
    output_path = project_root / f"profit_analysis_btc{BTC_CHANGE_THRESHOLD}.xlsx"
else:
    output_path = project_root / "profit_analysis.xlsx"

# Create Excel writer for multiple sheets
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    # Full data
    full_horizon_df.to_excel(writer, sheet_name="All Trades", index=False)
    
    # Split by direction
    btc_up_df = full_horizon_df[full_horizon_df["btc_direction"] > 0]
    btc_down_df = full_horizon_df[full_horizon_df["btc_direction"] < 0]
    
    btc_up_df.to_excel(writer, sheet_name="BTC UP (buy YES)", index=False)
    btc_down_df.to_excel(writer, sheet_name="BTC DOWN (buy NO)", index=False)
    
    # Profit matrix
    profit_matrix.to_excel(writer, sheet_name="Profit Matrix")
    win_rate_matrix.to_excel(writer, sheet_name="Win Rate Matrix")
    count_matrix.to_excel(writer, sheet_name="Sample Size Matrix")
    
    # Best combinations
    best_df.to_excel(writer, sheet_name="Best Combinations", index=False)

print(f"Saved comprehensive profit analysis to: {output_path}")
print(f"BTC change threshold: >= ${BTC_CHANGE_THRESHOLD}")
print(f"Sheets: All Trades, BTC UP (buy YES), BTC DOWN (buy NO), Profit Matrix, Win Rate Matrix, Sample Size Matrix, Best Combinations")
print(f"Total rows: {len(full_horizon_df)}, BTC UP: {len(btc_up_df)}, BTC DOWN: {len(btc_down_df)}")


PermissionError: [Errno 13] Permission denied: 'd:\\Investing\\lightspeed-15min\\profit_analysis_btc5.xlsx'

In [None]:
# Analyze profit by BTC direction separately
# Compare: BTC UP (buy YES) vs BTC DOWN (buy NO)

print("=" * 80)
print("PROFIT ANALYSIS BY DIRECTION")
print(f"  BTC change threshold: >= ${BTC_CHANGE_THRESHOLD}")
print("=" * 80)

for direction, label, asset in [(1, "BTC UP", "YES"), (-1, "BTC DOWN", "NO")]:
    dir_df = full_horizon_df[full_horizon_df["btc_direction"] == direction]
    print(f"\n{'='*40}")
    print(f"{label} → Buy {asset}")
    print(f"{'='*40}")
    print(f"Sample size: {len(dir_df)} trades")
    print()
    
    # Find best combinations for this direction
    dir_combos = []
    for entry_h in HORIZONS_MS:
        for exit_h in HORIZONS_MS:
            if exit_h <= entry_h:
                continue
            col_name = f"profit_{entry_h}_{exit_h}ms"
            valid_data = dir_df[col_name].dropna()
            if len(valid_data) < 50:  # Lower threshold per direction
                continue
            
            dir_combos.append({
                "entry_ms": entry_h,
                "exit_ms": exit_h,
                "mean_profit": valid_data.mean(),
                "win_rate": (valid_data > 0).mean() * 100,
                "n_trades": len(valid_data),
            })
    
    dir_best = pd.DataFrame(dir_combos).sort_values("mean_profit", ascending=False)
    print("Top 5 combinations by mean profit:")
    print(dir_best.head(5).to_string(index=False, float_format="{:.4f}".format))
    
    print()
    print("Top 5 combinations by win rate:")
    print(dir_best.sort_values("win_rate", ascending=False).head(5).to_string(index=False, float_format="{:.4f}".format))


## Stop Loss Strategy Analysis

This section tests a stop loss strategy that:
1. Triggers AFTER a configurable time mark (e.g., 200ms) - giving the trade time to develop
2. Exits if the loss exceeds a configurable threshold (e.g., 0.05 USDC)
3. Compares results WITH and WITHOUT stop loss to measure impact

**Logic:**
- Entry: Buy at ask price at entry horizon
- Monitor: Track bid price at each subsequent horizon
- Stop Loss: If `ask(entry) - bid(current) >= stop_loss_threshold` AND `current_time >= stop_loss_activation_ms`, exit immediately
- Otherwise: Exit at the normal exit horizon


In [None]:
# ============================================================================
# STOP LOSS CONFIGURATION
# ============================================================================

# Stop loss threshold in USDC (exit if loss exceeds this amount)
STOP_LOSS_THRESHOLD = 0.09

# Time (ms) after entry before stop loss can be triggered
# This gives the trade time to develop before we start monitoring for stop loss
STOP_LOSS_ACTIVATION_MS = 200

# Entry and exit horizons to test (same as before)
ENTRY_HORIZON_MS = 50  # Enter at this many ms after BTC change
EXIT_HORIZON_MS = 2000  # Target exit at this many ms (if stop loss not triggered)

# BTC change threshold (reuse from earlier or set here)
BTC_CHANGE_THRESHOLD_SL = 5  # Only consider BTC changes >= this value

print("=" * 70)
print("STOP LOSS CONFIGURATION")
print("=" * 70)
print(f"Stop Loss Threshold:      {STOP_LOSS_THRESHOLD} USDC")
print(f"Stop Loss Activation:     {STOP_LOSS_ACTIVATION_MS} ms after entry")
print(f"Entry Horizon:            {ENTRY_HORIZON_MS} ms after BTC change")
print(f"Target Exit Horizon:      {EXIT_HORIZON_MS} ms after BTC change")
print(f"BTC Change Threshold:     >= ${BTC_CHANGE_THRESHOLD_SL}")
print("=" * 70)


STOP LOSS CONFIGURATION
Stop Loss Threshold:      0.09 USDC
Stop Loss Activation:     200 ms after entry
Entry Horizon:            50 ms after BTC change
Target Exit Horizon:      2000 ms after BTC change
BTC Change Threshold:     >= $5


In [None]:
# ============================================================================
# STOP LOSS ANALYSIS: Compare WITH and WITHOUT stop loss
# ============================================================================
# Uses the full_df_all dataframe built earlier (or rebuilds if needed)
# Processes each trade and applies stop loss logic

import time
start_time = time.time()

# Define all horizons we'll check for stop loss (must be dense enough)
ALL_HORIZONS_MS = [50, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]

# Filter horizons between entry and exit
MONITOR_HORIZONS = [h for h in ALL_HORIZONS_MS if ENTRY_HORIZON_MS < h <= EXIT_HORIZON_MS]
print(f"Monitoring horizons for stop loss: {MONITOR_HORIZONS}")

# Ensure we have the full dataframe with all thresholds
if 'full_df_all' not in dir() or full_df_all is None:
    print("full_df_all not found - please run cell 15 first (threshold analysis)")
else:
    # Filter to our threshold
    df_sl = full_df_all[full_df_all["abs_btc_change"] >= BTC_CHANGE_THRESHOLD_SL].copy()
    print(f"Processing {len(df_sl)} trades with |BTC change| >= ${BTC_CHANGE_THRESHOLD_SL}")
    
    # Results storage
    results = []
    
    for idx, row in df_sl.iterrows():
        btc_dir = row["btc_direction"]
        
        # Determine which side we're trading
        if btc_dir > 0:
            side_prefix = "yes"
        else:
            side_prefix = "no"
        
        # Entry price (ask at entry horizon)
        entry_ask_col = f"{side_prefix}_ask_{ENTRY_HORIZON_MS}ms"
        entry_ask = row.get(entry_ask_col, np.nan)
        
        if pd.isna(entry_ask):
            continue
        
        # === WITHOUT STOP LOSS: Simply exit at target horizon ===
        exit_bid_col = f"{side_prefix}_bid_{EXIT_HORIZON_MS}ms"
        exit_bid_no_sl = row.get(exit_bid_col, np.nan)
        profit_no_sl = exit_bid_no_sl - entry_ask if not pd.isna(exit_bid_no_sl) else np.nan
        
        # === WITH STOP LOSS: Check each monitoring horizon ===
        exit_bid_sl = np.nan
        exit_horizon_sl = EXIT_HORIZON_MS  # Default to target exit
        stop_loss_triggered = False
        
        for check_h in MONITOR_HORIZONS:
            # Only activate stop loss after the activation period
            time_since_entry = check_h - ENTRY_HORIZON_MS
            if time_since_entry < STOP_LOSS_ACTIVATION_MS:
                continue
            
            bid_col = f"{side_prefix}_bid_{check_h}ms"
            current_bid = row.get(bid_col, np.nan)
            
            if pd.isna(current_bid):
                continue
            
            # Calculate current loss (positive = loss)
            current_loss = entry_ask - current_bid
            
            # Check if stop loss should trigger
            if current_loss >= STOP_LOSS_THRESHOLD:
                exit_bid_sl = current_bid
                exit_horizon_sl = check_h
                stop_loss_triggered = True
                break
        
        # If stop loss didn't trigger, exit at target horizon
        if not stop_loss_triggered:
            exit_bid_sl = exit_bid_no_sl
        
        profit_sl = exit_bid_sl - entry_ask if not pd.isna(exit_bid_sl) else np.nan
        
        results.append({
            "btc_ts_ms": row["btc_ts_ms"],
            "btc_change": row["btc_change"],
            "btc_direction": btc_dir,
            "side": "YES" if btc_dir > 0 else "NO",
            "entry_ask": entry_ask,
            "exit_bid_no_sl": exit_bid_no_sl,
            "profit_no_sl": profit_no_sl,
            "exit_bid_sl": exit_bid_sl,
            "exit_horizon_sl": exit_horizon_sl,
            "stop_loss_triggered": stop_loss_triggered,
            "profit_sl": profit_sl,
        })
    
    # Create results dataframe
    sl_results_df = pd.DataFrame(results)
    elapsed = time.time() - start_time
    
    print(f"\n✓ Processed {len(sl_results_df)} trades in {elapsed:.2f} seconds")
    print(f"Stop loss triggered: {sl_results_df['stop_loss_triggered'].sum()} times "
          f"({100*sl_results_df['stop_loss_triggered'].mean():.1f}%)")
    
    sl_results_df.head(10)


Monitoring horizons for stop loss: [100, 200, 300, 500, 700, 1000, 1500, 2000]
Processing 3994 trades with |BTC change| >= $5

✓ Processed 3905 trades in 0.64 seconds
Stop loss triggered: 25 times (0.6%)


In [None]:
# ============================================================================
# COMPARISON: WITH vs WITHOUT Stop Loss
# ============================================================================

# Filter to valid trades (have both profits)
valid_sl = sl_results_df.dropna(subset=["profit_no_sl", "profit_sl"]).copy()

print("=" * 80)
print("STOP LOSS STRATEGY COMPARISON")
print("=" * 80)
print(f"Configuration:")
print(f"  Entry Horizon:          {ENTRY_HORIZON_MS} ms")
print(f"  Target Exit Horizon:    {EXIT_HORIZON_MS} ms")
print(f"  Stop Loss Threshold:    {STOP_LOSS_THRESHOLD} USDC")
print(f"  Stop Loss Activation:   {STOP_LOSS_ACTIVATION_MS} ms after entry")
print(f"  BTC Change Threshold:   >= ${BTC_CHANGE_THRESHOLD_SL}")
print(f"  Total Trades Analyzed:  {len(valid_sl)}")
print("=" * 80)

# Summary statistics
print("\n" + "-" * 80)
print("SUMMARY STATISTICS")
print("-" * 80)

no_sl_stats = valid_sl["profit_no_sl"].describe()
sl_stats = valid_sl["profit_sl"].describe()

print(f"{'Metric':<25} {'Without SL':>15} {'With SL':>15} {'Difference':>15}")
print("-" * 80)

# Key metrics
metrics = [
    ("Mean Profit", valid_sl["profit_no_sl"].mean(), valid_sl["profit_sl"].mean()),
    ("Median Profit", valid_sl["profit_no_sl"].median(), valid_sl["profit_sl"].median()),
    ("Std Dev", valid_sl["profit_no_sl"].std(), valid_sl["profit_sl"].std()),
    ("Min Profit", valid_sl["profit_no_sl"].min(), valid_sl["profit_sl"].min()),
    ("Max Profit", valid_sl["profit_no_sl"].max(), valid_sl["profit_sl"].max()),
    ("Total Profit", valid_sl["profit_no_sl"].sum(), valid_sl["profit_sl"].sum()),
    ("Win Rate %", 100*(valid_sl["profit_no_sl"] > 0).mean(), 100*(valid_sl["profit_sl"] > 0).mean()),
]

for name, no_sl_val, sl_val in metrics:
    diff = sl_val - no_sl_val
    diff_str = f"{diff:+.4f}" if abs(diff) < 100 else f"{diff:+.2f}"
    print(f"{name:<25} {no_sl_val:>15.4f} {sl_val:>15.4f} {diff_str:>15}")

# Stop loss impact
print("\n" + "-" * 80)
print("STOP LOSS TRIGGER ANALYSIS")
print("-" * 80)
sl_triggered = valid_sl[valid_sl["stop_loss_triggered"]]
sl_not_triggered = valid_sl[~valid_sl["stop_loss_triggered"]]

print(f"Stop loss triggered:     {len(sl_triggered):>6} trades ({100*len(sl_triggered)/len(valid_sl):.1f}%)")
print(f"Stop loss NOT triggered: {len(sl_not_triggered):>6} trades ({100*len(sl_not_triggered)/len(valid_sl):.1f}%)")

if len(sl_triggered) > 0:
    print(f"\nWhen stop loss TRIGGERED:")
    print(f"  Mean profit (with SL):      {sl_triggered['profit_sl'].mean():.4f} USDC")
    print(f"  Mean profit (without SL):   {sl_triggered['profit_no_sl'].mean():.4f} USDC")
    print(f"  Mean savings from SL:       {(sl_triggered['profit_sl'] - sl_triggered['profit_no_sl']).mean():.4f} USDC")
    print(f"  Exit horizons used:         {sl_triggered['exit_horizon_sl'].value_counts().to_dict()}")

if len(sl_not_triggered) > 0:
    print(f"\nWhen stop loss NOT triggered:")
    print(f"  Mean profit:                {sl_not_triggered['profit_sl'].mean():.4f} USDC")


STOP LOSS STRATEGY COMPARISON
Configuration:
  Entry Horizon:          50 ms
  Target Exit Horizon:    2000 ms
  Stop Loss Threshold:    0.09 USDC
  Stop Loss Activation:   200 ms after entry
  BTC Change Threshold:   >= $5
  Total Trades Analyzed:  3894

--------------------------------------------------------------------------------
SUMMARY STATISTICS
--------------------------------------------------------------------------------
Metric                         Without SL         With SL      Difference
--------------------------------------------------------------------------------
Mean Profit                        0.0034          0.0032         -0.0002
Median Profit                     -0.0100         -0.0100         +0.0000
Std Dev                            0.0367          0.0371         +0.0004
Min Profit                        -0.3400         -0.3100         +0.0300
Max Profit                         0.3000          0.3000         +0.0000
Total Profit                      13.3

In [None]:
# ============================================================================
# VISUALIZATION: Stop Loss Impact
# ============================================================================

from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        "Profit Distribution: Without vs With Stop Loss",
        "Cumulative Profit Over Trades",
        "Profit Difference (SL - No SL) per Trade",
        "Exit Horizon Distribution (When SL Triggered)"
    )
)

# 1. Profit Distribution Comparison (histogram)
fig.add_trace(
    go.Histogram(
        x=valid_sl["profit_no_sl"], 
        name="Without SL", 
        opacity=0.7,
        marker_color="red",
        nbinsx=50
    ),
    row=1, col=1
)
fig.add_trace(
    go.Histogram(
        x=valid_sl["profit_sl"], 
        name="With SL", 
        opacity=0.7,
        marker_color="green",
        nbinsx=50
    ),
    row=1, col=1
)

# 2. Cumulative Profit Over Trades
valid_sl_sorted = valid_sl.sort_values("btc_ts_ms").reset_index(drop=True)
fig.add_trace(
    go.Scatter(
        x=list(range(len(valid_sl_sorted))),
        y=valid_sl_sorted["profit_no_sl"].cumsum(),
        name="Cumulative (No SL)",
        line=dict(color="red", width=2)
    ),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(
        x=list(range(len(valid_sl_sorted))),
        y=valid_sl_sorted["profit_sl"].cumsum(),
        name="Cumulative (With SL)",
        line=dict(color="green", width=2)
    ),
    row=1, col=2
)

# 3. Profit Difference per Trade (scatter)
valid_sl["profit_diff"] = valid_sl["profit_sl"] - valid_sl["profit_no_sl"]
fig.add_trace(
    go.Scatter(
        x=list(range(len(valid_sl))),
        y=valid_sl["profit_diff"].values,
        mode="markers",
        name="Profit Diff",
        marker=dict(
            color=["green" if d > 0 else "red" for d in valid_sl["profit_diff"]],
            size=4,
            opacity=0.5
        )
    ),
    row=2, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)

# 4. Exit Horizon Distribution (when SL triggered)
if len(sl_triggered) > 0:
    exit_counts = sl_triggered["exit_horizon_sl"].value_counts().sort_index()
    fig.add_trace(
        go.Bar(
            x=[str(h) + "ms" for h in exit_counts.index],
            y=exit_counts.values,
            name="Exit Horizon",
            marker_color="steelblue"
        ),
        row=2, col=2
    )

# Layout
fig.update_layout(
    height=700,
    width=1100,
    title_text=f"Stop Loss Analysis: Threshold={STOP_LOSS_THRESHOLD} USDC, Activation={STOP_LOSS_ACTIVATION_MS}ms",
    showlegend=True,
    barmode="overlay"
)

fig.update_xaxes(title_text="Profit (USDC)", row=1, col=1)
fig.update_xaxes(title_text="Trade #", row=1, col=2)
fig.update_xaxes(title_text="Trade #", row=2, col=1)
fig.update_xaxes(title_text="Exit Horizon", row=2, col=2)

fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Cumulative Profit", row=1, col=2)
fig.update_yaxes(title_text="Profit Diff (SL - No SL)", row=2, col=1)
fig.update_yaxes(title_text="Count", row=2, col=2)

fig.show()


In [None]:
# ============================================================================
# PARAMETER SWEEP: Test multiple stop loss thresholds and activation times
# ============================================================================
# This helps find the optimal stop loss configuration

# Thresholds to test
SL_THRESHOLDS_TO_TEST = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10, 0.15, 0.20]
# Activation times to test (ms after entry)
SL_ACTIVATIONS_TO_TEST = [100, 200, 300, 500, 700, 1000]

# Use the filtered dataset
df_sweep = full_df_all[full_df_all["abs_btc_change"] >= BTC_CHANGE_THRESHOLD_SL].copy()

print(f"Running parameter sweep on {len(df_sweep)} trades...")
print(f"Entry: {ENTRY_HORIZON_MS}ms, Target Exit: {EXIT_HORIZON_MS}ms")
print(f"Testing {len(SL_THRESHOLDS_TO_TEST)} thresholds x {len(SL_ACTIVATIONS_TO_TEST)} activation times")

sweep_results = []

for sl_threshold in tqdm(SL_THRESHOLDS_TO_TEST, desc="Testing thresholds"):
    for sl_activation in SL_ACTIVATIONS_TO_TEST:
        # Skip if activation is too close to exit (no room to monitor)
        if ENTRY_HORIZON_MS + sl_activation >= EXIT_HORIZON_MS:
            continue
        
        # Monitoring horizons for this activation
        monitor_horizons = [h for h in ALL_HORIZONS_MS 
                          if (h - ENTRY_HORIZON_MS) >= sl_activation and h <= EXIT_HORIZON_MS]
        
        if not monitor_horizons:
            continue
        
        total_profit_sl = 0
        total_profit_no_sl = 0
        n_valid = 0
        n_sl_triggered = 0
        n_winners_sl = 0
        n_winners_no_sl = 0
        
        for idx, row in df_sweep.iterrows():
            btc_dir = row["btc_direction"]
            side_prefix = "yes" if btc_dir > 0 else "no"
            
            entry_ask = row.get(f"{side_prefix}_ask_{ENTRY_HORIZON_MS}ms", np.nan)
            exit_bid_no_sl = row.get(f"{side_prefix}_bid_{EXIT_HORIZON_MS}ms", np.nan)
            
            if pd.isna(entry_ask) or pd.isna(exit_bid_no_sl):
                continue
            
            profit_no_sl = exit_bid_no_sl - entry_ask
            total_profit_no_sl += profit_no_sl
            if profit_no_sl > 0:
                n_winners_no_sl += 1
            
            # Check for stop loss
            exit_bid_sl = exit_bid_no_sl
            sl_hit = False
            
            for check_h in monitor_horizons:
                current_bid = row.get(f"{side_prefix}_bid_{check_h}ms", np.nan)
                if pd.isna(current_bid):
                    continue
                current_loss = entry_ask - current_bid
                if current_loss >= sl_threshold:
                    exit_bid_sl = current_bid
                    sl_hit = True
                    break
            
            profit_sl = exit_bid_sl - entry_ask
            total_profit_sl += profit_sl
            if profit_sl > 0:
                n_winners_sl += 1
            if sl_hit:
                n_sl_triggered += 1
            n_valid += 1
        
        if n_valid > 0:
            sweep_results.append({
                "sl_threshold": sl_threshold,
                "sl_activation_ms": sl_activation,
                "n_trades": n_valid,
                "total_profit_no_sl": total_profit_no_sl,
                "total_profit_sl": total_profit_sl,
                "profit_improvement": total_profit_sl - total_profit_no_sl,
                "mean_profit_no_sl": total_profit_no_sl / n_valid,
                "mean_profit_sl": total_profit_sl / n_valid,
                "win_rate_no_sl": 100 * n_winners_no_sl / n_valid,
                "win_rate_sl": 100 * n_winners_sl / n_valid,
                "sl_trigger_rate": 100 * n_sl_triggered / n_valid,
            })

sweep_df = pd.DataFrame(sweep_results)
sweep_df = sweep_df.sort_values("profit_improvement", ascending=False)

print("\n" + "=" * 100)
print("PARAMETER SWEEP RESULTS (sorted by profit improvement)")
print("=" * 100)
print(sweep_df.head(20).to_string(index=False, float_format="{:.4f}".format))


Running parameter sweep on 3994 trades...
Entry: 50ms, Target Exit: 2000ms
Testing 12 thresholds x 6 activation times


Testing thresholds:   0%|          | 0/12 [00:00<?, ?it/s]


PARAMETER SWEEP RESULTS (sorted by profit improvement)
 sl_threshold  sl_activation_ms  n_trades  total_profit_no_sl  total_profit_sl  profit_improvement  mean_profit_no_sl  mean_profit_sl  win_rate_no_sl  win_rate_sl  sl_trigger_rate
       0.2000              1000      3894             13.3410          13.3710              0.0300             0.0034          0.0034         33.7442      33.7442           0.0257
       0.1500              1000      3894             13.3410          13.2210             -0.1200             0.0034          0.0034         33.7442      33.7442           0.0514
       0.0800              1000      3894             13.3410          13.2110             -0.1300             0.0034          0.0034         33.7442      33.7442           0.8475
       0.0900              1000      3894             13.3410          13.2110             -0.1300             0.0034          0.0034         33.7442      33.7442           0.5393
       0.1000              1000      3894   

In [None]:
# ============================================================================
# VISUALIZATION: Parameter Sweep Results
# ============================================================================

# Heatmap of profit improvement by threshold and activation time
pivot_improvement = sweep_df.pivot(
    index="sl_threshold", 
    columns="sl_activation_ms", 
    values="profit_improvement"
)

fig_heatmap = px.imshow(
    pivot_improvement,
    labels=dict(x="Activation Time (ms)", y="Stop Loss Threshold (USDC)", color="Profit Improvement"),
    title=f"Stop Loss Profit Improvement Heatmap<br><sup>Entry={ENTRY_HORIZON_MS}ms, Exit={EXIT_HORIZON_MS}ms, BTC≥${BTC_CHANGE_THRESHOLD_SL}</sup>",
    color_continuous_scale="RdYlGn",
    color_continuous_midpoint=0,
    aspect="auto",
)
fig_heatmap.update_layout(width=800, height=500)
fig_heatmap.show()

# Line chart: profit improvement by threshold for each activation time
fig_lines = go.Figure()
for activation in sorted(sweep_df["sl_activation_ms"].unique()):
    subset = sweep_df[sweep_df["sl_activation_ms"] == activation].sort_values("sl_threshold")
    fig_lines.add_trace(go.Scatter(
        x=subset["sl_threshold"],
        y=subset["profit_improvement"],
        mode="lines+markers",
        name=f"Activation: {activation}ms"
    ))

fig_lines.add_hline(y=0, line_dash="dash", line_color="gray")
fig_lines.update_layout(
    title="Profit Improvement vs Stop Loss Threshold (by Activation Time)",
    xaxis_title="Stop Loss Threshold (USDC)",
    yaxis_title="Profit Improvement (USDC)",
    width=900,
    height=500,
    legend_title="Activation Time"
)
fig_lines.show()

# Best configuration summary
best_config = sweep_df.iloc[0]
print("\n" + "=" * 70)
print("BEST STOP LOSS CONFIGURATION")
print("=" * 70)
print(f"Stop Loss Threshold:    {best_config['sl_threshold']:.2f} USDC")
print(f"Activation Time:        {best_config['sl_activation_ms']:.0f} ms after entry")
print(f"Profit Improvement:     {best_config['profit_improvement']:.4f} USDC total")
print(f"Mean Profit (with SL):  {best_config['mean_profit_sl']:.4f} USDC")
print(f"Mean Profit (no SL):    {best_config['mean_profit_no_sl']:.4f} USDC")
print(f"Win Rate (with SL):     {best_config['win_rate_sl']:.1f}%")
print(f"Win Rate (no SL):       {best_config['win_rate_no_sl']:.1f}%")
print(f"SL Trigger Rate:        {best_config['sl_trigger_rate']:.1f}%")
print("=" * 70)



BEST STOP LOSS CONFIGURATION
Stop Loss Threshold:    0.20 USDC
Activation Time:        1000 ms after entry
Profit Improvement:     0.0300 USDC total
Mean Profit (with SL):  0.0034 USDC
Mean Profit (no SL):    0.0034 USDC
Win Rate (with SL):     33.7%
Win Rate (no SL):       33.7%
SL Trigger Rate:        0.0%


In [None]:
# ============================================================================
# ENTRY/EXIT SWEEP WITH OPTIMAL STOP LOSS
# ============================================================================
# Test various entry/exit combinations using the best stop loss config

# Use best stop loss config from sweep
BEST_SL_THRESHOLD = best_config["sl_threshold"]
BEST_SL_ACTIVATION = int(best_config["sl_activation_ms"])

print(f"Using best SL config: threshold={BEST_SL_THRESHOLD}, activation={BEST_SL_ACTIVATION}ms")

# Entry and exit horizons to test
ENTRY_HORIZONS = [50, 100, 200, 300]
EXIT_HORIZONS = [500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 5000]

entry_exit_results = []

for entry_h in tqdm(ENTRY_HORIZONS, desc="Entry horizons"):
    for exit_h in EXIT_HORIZONS:
        if exit_h <= entry_h:
            continue
        
        # Monitoring horizons
        monitor_horizons = [h for h in ALL_HORIZONS_MS 
                          if (h - entry_h) >= BEST_SL_ACTIVATION and h <= exit_h]
        
        total_profit_sl = 0
        total_profit_no_sl = 0
        n_valid = 0
        n_sl_triggered = 0
        n_winners_sl = 0
        n_winners_no_sl = 0
        
        for idx, row in df_sweep.iterrows():
            btc_dir = row["btc_direction"]
            side_prefix = "yes" if btc_dir > 0 else "no"
            
            entry_ask = row.get(f"{side_prefix}_ask_{entry_h}ms", np.nan)
            exit_bid_no_sl = row.get(f"{side_prefix}_bid_{exit_h}ms", np.nan)
            
            if pd.isna(entry_ask) or pd.isna(exit_bid_no_sl):
                continue
            
            profit_no_sl = exit_bid_no_sl - entry_ask
            total_profit_no_sl += profit_no_sl
            if profit_no_sl > 0:
                n_winners_no_sl += 1
            
            # Check for stop loss
            exit_bid_sl = exit_bid_no_sl
            sl_hit = False
            
            for check_h in monitor_horizons:
                current_bid = row.get(f"{side_prefix}_bid_{check_h}ms", np.nan)
                if pd.isna(current_bid):
                    continue
                current_loss = entry_ask - current_bid
                if current_loss >= BEST_SL_THRESHOLD:
                    exit_bid_sl = current_bid
                    sl_hit = True
                    break
            
            profit_sl = exit_bid_sl - entry_ask
            total_profit_sl += profit_sl
            if profit_sl > 0:
                n_winners_sl += 1
            if sl_hit:
                n_sl_triggered += 1
            n_valid += 1
        
        if n_valid > 0:
            entry_exit_results.append({
                "entry_ms": entry_h,
                "exit_ms": exit_h,
                "n_trades": n_valid,
                "mean_profit_no_sl": total_profit_no_sl / n_valid,
                "mean_profit_sl": total_profit_sl / n_valid,
                "total_profit_no_sl": total_profit_no_sl,
                "total_profit_sl": total_profit_sl,
                "improvement": total_profit_sl - total_profit_no_sl,
                "win_rate_no_sl": 100 * n_winners_no_sl / n_valid,
                "win_rate_sl": 100 * n_winners_sl / n_valid,
                "sl_trigger_rate": 100 * n_sl_triggered / n_valid,
            })

entry_exit_df = pd.DataFrame(entry_exit_results)
entry_exit_df = entry_exit_df.sort_values("mean_profit_sl", ascending=False)

print("\n" + "=" * 110)
print(f"ENTRY/EXIT COMPARISON WITH STOP LOSS (SL={BEST_SL_THRESHOLD}, Activation={BEST_SL_ACTIVATION}ms)")
print("=" * 110)
print(entry_exit_df.to_string(index=False, float_format="{:.4f}".format))


Using best SL config: threshold=0.2, activation=1000ms


Entry horizons:   0%|          | 0/4 [00:00<?, ?it/s]


ENTRY/EXIT COMPARISON WITH STOP LOSS (SL=0.2, Activation=1000ms)
 entry_ms  exit_ms  n_trades  mean_profit_no_sl  mean_profit_sl  total_profit_no_sl  total_profit_sl  improvement  win_rate_no_sl  win_rate_sl  sl_trigger_rate
       50     2000      3894             0.0034          0.0034             13.3410          13.3710       0.0300         33.7442      33.7442           0.0257
       50     2500      3893             0.0032          0.0033             12.6420          12.6920       0.0500         34.2666      34.2666           0.0257
       50     3000      3892             0.0032          0.0032             12.5850          12.6450       0.0600         34.4296      34.4296           0.0257
       50     3500      3889             0.0032          0.0032             12.5150          12.5650       0.0500         34.9190      34.9190           0.0514
       50     4000      3887             0.0032          0.0032             12.4350          12.4650       0.0300         35.4258     

In [None]:
# ============================================================================
# VISUALIZATION: Entry/Exit Comparison WITH vs WITHOUT Stop Loss
# ============================================================================

from plotly.subplots import make_subplots

# Create pivot tables for heatmaps
pivot_no_sl = entry_exit_df.pivot(index="entry_ms", columns="exit_ms", values="mean_profit_no_sl")
pivot_sl = entry_exit_df.pivot(index="entry_ms", columns="exit_ms", values="mean_profit_sl")
pivot_improvement = entry_exit_df.pivot(index="entry_ms", columns="exit_ms", values="improvement")

# Find min/max for consistent color scales
vmin = min(pivot_no_sl.min().min(), pivot_sl.min().min())
vmax = max(pivot_no_sl.max().max(), pivot_sl.max().max())

fig = make_subplots(
    rows=1, cols=3,
    subplot_titles=(
        "Mean Profit WITHOUT Stop Loss",
        "Mean Profit WITH Stop Loss",
        "Improvement (SL - No SL)"
    ),
    horizontal_spacing=0.08
)

# Heatmap 1: Without Stop Loss
fig.add_trace(
    go.Heatmap(
        z=pivot_no_sl.values,
        x=[str(c) for c in pivot_no_sl.columns],
        y=[str(i) for i in pivot_no_sl.index],
        colorscale="RdYlGn",
        zmid=0,
        zmin=vmin, zmax=vmax,
        text=[[f"{v:.4f}" for v in row] for row in pivot_no_sl.values],
        texttemplate="%{text}",
        showscale=False,
        name="No SL"
    ),
    row=1, col=1
)

# Heatmap 2: With Stop Loss
fig.add_trace(
    go.Heatmap(
        z=pivot_sl.values,
        x=[str(c) for c in pivot_sl.columns],
        y=[str(i) for i in pivot_sl.index],
        colorscale="RdYlGn",
        zmid=0,
        zmin=vmin, zmax=vmax,
        text=[[f"{v:.4f}" for v in row] for row in pivot_sl.values],
        texttemplate="%{text}",
        showscale=False,
        name="With SL"
    ),
    row=1, col=2
)

# Heatmap 3: Improvement
fig.add_trace(
    go.Heatmap(
        z=pivot_improvement.values,
        x=[str(c) for c in pivot_improvement.columns],
        y=[str(i) for i in pivot_improvement.index],
        colorscale="RdYlGn",
        zmid=0,
        text=[[f"{v:.4f}" for v in row] for row in pivot_improvement.values],
        texttemplate="%{text}",
        colorbar=dict(title="USDC", x=1.02),
        name="Improvement"
    ),
    row=1, col=3
)

fig.update_xaxes(title_text="Exit (ms)", row=1, col=1)
fig.update_xaxes(title_text="Exit (ms)", row=1, col=2)
fig.update_xaxes(title_text="Exit (ms)", row=1, col=3)
fig.update_yaxes(title_text="Entry (ms)", row=1, col=1)

fig.update_layout(
    height=400,
    width=1200,
    title_text=f"Entry/Exit Mean Profit Comparison (SL={BEST_SL_THRESHOLD}, Activation={BEST_SL_ACTIVATION}ms, BTC≥${BTC_CHANGE_THRESHOLD_SL})"
)

fig.show()

# Summary
print("\n" + "=" * 70)
print("BEST ENTRY/EXIT WITH STOP LOSS")
print("=" * 70)
best_ee = entry_exit_df.iloc[0]
print(f"Entry:              {best_ee['entry_ms']:.0f} ms")
print(f"Exit:               {best_ee['exit_ms']:.0f} ms")
print(f"Mean Profit (SL):   {best_ee['mean_profit_sl']:.4f} USDC")
print(f"Mean Profit (NoSL): {best_ee['mean_profit_no_sl']:.4f} USDC")
print(f"Win Rate (SL):      {best_ee['win_rate_sl']:.1f}%")
print(f"Win Rate (NoSL):    {best_ee['win_rate_no_sl']:.1f}%")
print(f"SL Trigger Rate:    {best_ee['sl_trigger_rate']:.1f}%")
print("=" * 70)



BEST ENTRY/EXIT WITH STOP LOSS
Entry:              50 ms
Exit:               2000 ms
Mean Profit (SL):   0.0034 USDC
Mean Profit (NoSL): 0.0034 USDC
Win Rate (SL):      33.7%
Win Rate (NoSL):    33.7%
SL Trigger Rate:    0.0%


## Price Ceiling Filter Strategy

This section tests a strategy that avoids entering trades when the entry price is too high:
- **Skip trades where ask > threshold** (e.g., 0.95)
- Rationale: If ask is already at 0.95, max profit is only 0.05 (5 cents) but downside can be much larger
- This limits entries to cases with more upside potential

**Logic:**
- For BTC UP (buy YES): Skip if YES ask > threshold
- For BTC DOWN (buy NO): Skip if NO ask > threshold


In [None]:
# ============================================================================
# PRICE CEILING FILTER CONFIGURATION
# ============================================================================

# Maximum ask price to enter a trade (skip if ask > this)
PRICE_CEILING = 0.75

# Entry and exit horizons
ENTRY_HORIZON_CEILING = 50  # ms after BTC change
EXIT_HORIZON_CEILING = 2000  # ms after BTC change

# BTC change threshold (reuse from earlier)
BTC_CHANGE_THRESHOLD_CEILING = 5  # Only consider BTC changes >= this value

print("=" * 70)
print("PRICE CEILING FILTER CONFIGURATION")
print("=" * 70)
print(f"Price Ceiling:           {PRICE_CEILING} USDC (skip if ask > this)")
print(f"Entry Horizon:           {ENTRY_HORIZON_CEILING} ms after BTC change")
print(f"Exit Horizon:            {EXIT_HORIZON_CEILING} ms after BTC change")
print(f"BTC Change Threshold:    >= ${BTC_CHANGE_THRESHOLD_CEILING}")
print("=" * 70)


PRICE CEILING FILTER CONFIGURATION
Price Ceiling:           0.75 USDC (skip if ask > this)
Entry Horizon:           50 ms after BTC change
Exit Horizon:            2000 ms after BTC change
BTC Change Threshold:    >= $5


In [None]:
# ============================================================================
# PRICE CEILING ANALYSIS: Compare WITH and WITHOUT filter
# ============================================================================

import time
start_time = time.time()

# Use the full dataframe from earlier analysis
if 'full_df_all' not in dir() or full_df_all is None:
    print("full_df_all not found - please run cell 15 first (threshold analysis)")
else:
    # Filter to our BTC threshold
    df_ceiling = full_df_all[full_df_all["abs_btc_change"] >= BTC_CHANGE_THRESHOLD_CEILING].copy()
    print(f"Total trades with |BTC change| >= ${BTC_CHANGE_THRESHOLD_CEILING}: {len(df_ceiling)}")
    
    # Results storage
    results_no_filter = []
    results_with_filter = []
    
    for idx, row in df_ceiling.iterrows():
        btc_dir = row["btc_direction"]
        
        # Determine which side we're trading
        if btc_dir > 0:
            side_prefix = "yes"
        else:
            side_prefix = "no"
        
        # Entry and exit prices
        entry_ask_col = f"{side_prefix}_ask_{ENTRY_HORIZON_CEILING}ms"
        exit_bid_col = f"{side_prefix}_bid_{EXIT_HORIZON_CEILING}ms"
        
        entry_ask = row.get(entry_ask_col, np.nan)
        exit_bid = row.get(exit_bid_col, np.nan)
        
        if pd.isna(entry_ask) or pd.isna(exit_bid):
            continue
        
        profit = exit_bid - entry_ask
        
        trade_info = {
            "btc_ts_ms": row["btc_ts_ms"],
            "btc_change": row["btc_change"],
            "btc_direction": btc_dir,
            "side": "YES" if btc_dir > 0 else "NO",
            "entry_ask": entry_ask,
            "exit_bid": exit_bid,
            "profit": profit,
        }
        
        # Without filter: include all trades
        results_no_filter.append(trade_info.copy())
        
        # With filter: only include if entry_ask <= PRICE_CEILING
        if entry_ask <= PRICE_CEILING:
            trade_info["filtered"] = False
            results_with_filter.append(trade_info.copy())
    
    # Create dataframes
    df_no_filter = pd.DataFrame(results_no_filter)
    df_with_filter = pd.DataFrame(results_with_filter)
    
    elapsed = time.time() - start_time
    
    print(f"\n✓ Processed in {elapsed:.2f} seconds")
    print(f"Trades WITHOUT filter: {len(df_no_filter)}")
    print(f"Trades WITH filter (ask <= {PRICE_CEILING}): {len(df_with_filter)}")
    print(f"Trades FILTERED OUT (ask > {PRICE_CEILING}): {len(df_no_filter) - len(df_with_filter)}")
    
    # Show sample of filtered trades
    print(f"\nEntry ask price distribution:")
    print(df_no_filter["entry_ask"].describe())


Total trades with |BTC change| >= $5: 3994

✓ Processed in 0.44 seconds
Trades WITHOUT filter: 3894
Trades WITH filter (ask <= 0.75): 2743
Trades FILTERED OUT (ask > 0.75): 1151

Entry ask price distribution:
count    3894.000000
mean        0.524206
std         0.303514
min         0.003000
25%         0.250000
50%         0.540000
75%         0.800000
max         0.999000
Name: entry_ask, dtype: float64


In [None]:
# ============================================================================
# COMPARISON: WITH vs WITHOUT Price Ceiling Filter
# ============================================================================

print("=" * 80)
print("PRICE CEILING FILTER COMPARISON")
print("=" * 80)
print(f"Configuration:")
print(f"  Price Ceiling:          {PRICE_CEILING} USDC (skip if ask > this)")
print(f"  Entry Horizon:          {ENTRY_HORIZON_CEILING} ms")
print(f"  Exit Horizon:           {EXIT_HORIZON_CEILING} ms")
print(f"  BTC Change Threshold:   >= ${BTC_CHANGE_THRESHOLD_CEILING}")
print("=" * 80)

# Summary statistics
print("\n" + "-" * 80)
print("SUMMARY STATISTICS")
print("-" * 80)

print(f"{'Metric':<25} {'No Filter':>15} {'With Filter':>15} {'Difference':>15}")
print("-" * 80)

# Key metrics
n_no = len(df_no_filter)
n_with = len(df_with_filter)

metrics = [
    ("# Trades", n_no, n_with),
    ("Mean Profit", df_no_filter["profit"].mean(), df_with_filter["profit"].mean() if n_with > 0 else np.nan),
    ("Median Profit", df_no_filter["profit"].median(), df_with_filter["profit"].median() if n_with > 0 else np.nan),
    ("Std Dev", df_no_filter["profit"].std(), df_with_filter["profit"].std() if n_with > 0 else np.nan),
    ("Min Profit", df_no_filter["profit"].min(), df_with_filter["profit"].min() if n_with > 0 else np.nan),
    ("Max Profit", df_no_filter["profit"].max(), df_with_filter["profit"].max() if n_with > 0 else np.nan),
    ("Total Profit", df_no_filter["profit"].sum(), df_with_filter["profit"].sum() if n_with > 0 else np.nan),
    ("Win Rate %", 100*(df_no_filter["profit"] > 0).mean(), 100*(df_with_filter["profit"] > 0).mean() if n_with > 0 else np.nan),
    ("Mean Entry Ask", df_no_filter["entry_ask"].mean(), df_with_filter["entry_ask"].mean() if n_with > 0 else np.nan),
]

for name, no_val, with_val in metrics:
    if pd.isna(with_val):
        diff_str = "N/A"
    else:
        diff = with_val - no_val
        diff_str = f"{diff:+.4f}" if abs(diff) < 100 else f"{diff:+.0f}"
    print(f"{name:<25} {no_val:>15.4f} {with_val:>15.4f} {diff_str:>15}")

# Analyze the filtered-out trades
print("\n" + "-" * 80)
print("ANALYSIS OF FILTERED-OUT TRADES (ask > " + str(PRICE_CEILING) + ")")
print("-" * 80)

# Get trades that were filtered out
filtered_out_mask = df_no_filter["entry_ask"] > PRICE_CEILING
filtered_out = df_no_filter[filtered_out_mask]

if len(filtered_out) > 0:
    print(f"Number of trades filtered out:  {len(filtered_out)}")
    print(f"Mean profit of filtered trades: {filtered_out['profit'].mean():.4f} USDC")
    print(f"Total profit of filtered trades: {filtered_out['profit'].sum():.4f} USDC")
    print(f"Win rate of filtered trades:    {100*(filtered_out['profit'] > 0).mean():.1f}%")
    print(f"Mean entry ask (filtered):      {filtered_out['entry_ask'].mean():.4f}")
    print(f"\nBy keeping these trades OUT, we {'AVOIDED' if filtered_out['profit'].sum() < 0 else 'MISSED'} "
          f"{abs(filtered_out['profit'].sum()):.4f} USDC")
else:
    print("No trades were filtered out at this threshold.")


PRICE CEILING FILTER COMPARISON
Configuration:
  Price Ceiling:          0.75 USDC (skip if ask > this)
  Entry Horizon:          50 ms
  Exit Horizon:           2000 ms
  BTC Change Threshold:   >= $5

--------------------------------------------------------------------------------
SUMMARY STATISTICS
--------------------------------------------------------------------------------
Metric                          No Filter     With Filter      Difference
--------------------------------------------------------------------------------
# Trades                        3894.0000       2743.0000           -1151
Mean Profit                        0.0034          0.0072         +0.0037
Median Profit                     -0.0100          0.0000         +0.0100
Std Dev                            0.0367          0.0411         +0.0044
Min Profit                        -0.3400         -0.3400         +0.0000
Max Profit                         0.3000          0.3000         +0.0000
Total Profit     

In [None]:
# ============================================================================
# VISUALIZATION: Price Ceiling Filter Impact
# ============================================================================

from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        "Entry Ask Price Distribution",
        "Profit by Entry Ask Price",
        "Profit Distribution: With vs Without Filter",
        "Cumulative Profit Over Trades"
    )
)

# 1. Entry Ask Price Distribution with threshold line
fig.add_trace(
    go.Histogram(
        x=df_no_filter["entry_ask"], 
        name="Entry Ask", 
        nbinsx=30,
        marker_color="steelblue"
    ),
    row=1, col=1
)
fig.add_vline(x=PRICE_CEILING, line_dash="dash", line_color="red", 
              annotation_text=f"Ceiling: {PRICE_CEILING}", row=1, col=1)

# 2. Profit by Entry Ask Price (scatter)
fig.add_trace(
    go.Scatter(
        x=df_no_filter["entry_ask"],
        y=df_no_filter["profit"],
        mode="markers",
        name="Profit vs Ask",
        marker=dict(
            color=["green" if p > 0 else "red" for p in df_no_filter["profit"]],
            size=5,
            opacity=0.5
        )
    ),
    row=1, col=2
)
fig.add_vline(x=PRICE_CEILING, line_dash="dash", line_color="red", row=1, col=2)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=2)

# 3. Profit Distribution Comparison
fig.add_trace(
    go.Histogram(
        x=df_no_filter["profit"], 
        name="No Filter", 
        opacity=0.6,
        marker_color="red",
        nbinsx=40
    ),
    row=2, col=1
)
if len(df_with_filter) > 0:
    fig.add_trace(
        go.Histogram(
            x=df_with_filter["profit"], 
            name="With Filter", 
            opacity=0.6,
            marker_color="green",
            nbinsx=40
        ),
        row=2, col=1
    )

# 4. Cumulative Profit Over Trades
df_no_sorted = df_no_filter.sort_values("btc_ts_ms").reset_index(drop=True)
fig.add_trace(
    go.Scatter(
        x=list(range(len(df_no_sorted))),
        y=df_no_sorted["profit"].cumsum(),
        name="Cumulative (No Filter)",
        line=dict(color="red", width=2)
    ),
    row=2, col=2
)

if len(df_with_filter) > 0:
    df_with_sorted = df_with_filter.sort_values("btc_ts_ms").reset_index(drop=True)
    fig.add_trace(
        go.Scatter(
            x=list(range(len(df_with_sorted))),
            y=df_with_sorted["profit"].cumsum(),
            name="Cumulative (With Filter)",
            line=dict(color="green", width=2)
        ),
        row=2, col=2
    )

# Layout
fig.update_layout(
    height=700,
    width=1100,
    title_text=f"Price Ceiling Filter Analysis (Ceiling = {PRICE_CEILING})",
    showlegend=True,
    barmode="overlay"
)

fig.update_xaxes(title_text="Entry Ask Price", row=1, col=1)
fig.update_xaxes(title_text="Entry Ask Price", row=1, col=2)
fig.update_xaxes(title_text="Profit (USDC)", row=2, col=1)
fig.update_xaxes(title_text="Trade #", row=2, col=2)

fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Profit (USDC)", row=1, col=2)
fig.update_yaxes(title_text="Count", row=2, col=1)
fig.update_yaxes(title_text="Cumulative Profit", row=2, col=2)

fig.show()


In [None]:
# ============================================================================
# PARAMETER SWEEP: Test multiple price ceiling thresholds
# ============================================================================

# Thresholds to test (skip if ask > threshold)
CEILING_THRESHOLDS = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.92, 0.94, 0.95, 0.96, 0.98, 1.00]

print(f"Testing {len(CEILING_THRESHOLDS)} price ceiling thresholds...")
print(f"Entry: {ENTRY_HORIZON_CEILING}ms, Exit: {EXIT_HORIZON_CEILING}ms, BTC >= ${BTC_CHANGE_THRESHOLD_CEILING}")

ceiling_results = []

# Baseline (no filter = ceiling of 1.0)
baseline_profit = df_no_filter["profit"].sum()
baseline_mean = df_no_filter["profit"].mean()
baseline_win_rate = 100 * (df_no_filter["profit"] > 0).mean()
baseline_n = len(df_no_filter)

for ceiling in tqdm(CEILING_THRESHOLDS, desc="Testing ceilings"):
    # Filter trades
    filtered = df_no_filter[df_no_filter["entry_ask"] <= ceiling]
    
    n_trades = len(filtered)
    if n_trades == 0:
        continue
    
    total_profit = filtered["profit"].sum()
    mean_profit = filtered["profit"].mean()
    win_rate = 100 * (filtered["profit"] > 0).mean()
    trades_skipped = baseline_n - n_trades
    
    # What profit did we avoid by skipping high-ask trades?
    skipped_trades = df_no_filter[df_no_filter["entry_ask"] > ceiling]
    skipped_profit = skipped_trades["profit"].sum() if len(skipped_trades) > 0 else 0
    
    ceiling_results.append({
        "ceiling": ceiling,
        "n_trades": n_trades,
        "trades_skipped": trades_skipped,
        "pct_trades_kept": 100 * n_trades / baseline_n,
        "total_profit": total_profit,
        "mean_profit": mean_profit,
        "win_rate": win_rate,
        "skipped_profit": skipped_profit,
        "profit_vs_baseline": total_profit - baseline_profit,
        "mean_vs_baseline": mean_profit - baseline_mean,
    })

ceiling_df = pd.DataFrame(ceiling_results)

print("\n" + "=" * 120)
print("PRICE CEILING SWEEP RESULTS")
print("=" * 120)
print(f"Baseline (no filter): {baseline_n} trades, total profit = {baseline_profit:.4f}, mean = {baseline_mean:.4f}, win rate = {baseline_win_rate:.1f}%")
print("-" * 120)
print(ceiling_df.to_string(index=False, float_format="{:.4f}".format))


Testing 15 price ceiling thresholds...
Entry: 50ms, Exit: 2000ms, BTC >= $5


Testing ceilings:   0%|          | 0/15 [00:00<?, ?it/s]


PRICE CEILING SWEEP RESULTS
Baseline (no filter): 3894 trades, total profit = 13.3410, mean = 0.0034, win rate = 33.7%
------------------------------------------------------------------------------------------------------------------------
 ceiling  n_trades  trades_skipped  pct_trades_kept  total_profit  mean_profit  win_rate  skipped_profit  profit_vs_baseline  mean_vs_baseline
  0.5000      1829            2065          46.9697       10.8210       0.0059   34.2810          2.5200             -2.5200            0.0025
  0.5500      1971            1923          50.6163       13.2410       0.0067   36.0731          0.1000             -0.1000            0.0033
  0.6000      2143            1751          55.0334       15.0110       0.0070   37.0509         -1.6700              1.6700            0.0036
  0.6500      2363            1531          60.6831       17.4110       0.0074   38.5950         -4.0700              4.0700            0.0039
  0.7000      2536            1358          

In [None]:
# ============================================================================
# VISUALIZATION: Price Ceiling Sweep Results
# ============================================================================

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        "Total Profit by Ceiling Threshold",
        "Mean Profit by Ceiling Threshold",
        "Win Rate by Ceiling Threshold",
        "# Trades by Ceiling Threshold"
    )
)

# 1. Total Profit
fig.add_trace(
    go.Scatter(
        x=ceiling_df["ceiling"],
        y=ceiling_df["total_profit"],
        mode="lines+markers",
        name="Total Profit",
        line=dict(color="green", width=2),
        marker=dict(size=8)
    ),
    row=1, col=1
)
fig.add_hline(y=baseline_profit, line_dash="dash", line_color="red", 
              annotation_text="Baseline", row=1, col=1)

# 2. Mean Profit
fig.add_trace(
    go.Scatter(
        x=ceiling_df["ceiling"],
        y=ceiling_df["mean_profit"],
        mode="lines+markers",
        name="Mean Profit",
        line=dict(color="blue", width=2),
        marker=dict(size=8)
    ),
    row=1, col=2
)
fig.add_hline(y=baseline_mean, line_dash="dash", line_color="red", row=1, col=2)

# 3. Win Rate
fig.add_trace(
    go.Scatter(
        x=ceiling_df["ceiling"],
        y=ceiling_df["win_rate"],
        mode="lines+markers",
        name="Win Rate",
        line=dict(color="purple", width=2),
        marker=dict(size=8)
    ),
    row=2, col=1
)
fig.add_hline(y=baseline_win_rate, line_dash="dash", line_color="red", row=2, col=1)
fig.add_hline(y=50, line_dash="dot", line_color="gray", row=2, col=1)

# 4. Number of Trades
fig.add_trace(
    go.Bar(
        x=ceiling_df["ceiling"],
        y=ceiling_df["n_trades"],
        name="# Trades",
        marker_color="steelblue"
    ),
    row=2, col=2
)
fig.add_hline(y=baseline_n, line_dash="dash", line_color="red", row=2, col=2)

fig.update_xaxes(title_text="Price Ceiling", row=1, col=1)
fig.update_xaxes(title_text="Price Ceiling", row=1, col=2)
fig.update_xaxes(title_text="Price Ceiling", row=2, col=1)
fig.update_xaxes(title_text="Price Ceiling", row=2, col=2)

fig.update_yaxes(title_text="Total Profit (USDC)", row=1, col=1)
fig.update_yaxes(title_text="Mean Profit (USDC)", row=1, col=2)
fig.update_yaxes(title_text="Win Rate (%)", row=2, col=1)
fig.update_yaxes(title_text="# Trades", row=2, col=2)

fig.update_layout(
    height=700,
    width=1100,
    title_text="Price Ceiling Threshold Analysis (Red dashed = baseline/no filter)",
    showlegend=False
)

fig.show()

# Find best ceiling
best_total_idx = ceiling_df["total_profit"].idxmax()
best_mean_idx = ceiling_df["mean_profit"].idxmax()

print("\n" + "=" * 70)
print("BEST PRICE CEILING CONFIGURATIONS")
print("=" * 70)
print(f"\nBest by TOTAL PROFIT:")
best_total = ceiling_df.loc[best_total_idx]
print(f"  Ceiling:        {best_total['ceiling']:.2f}")
print(f"  Total Profit:   {best_total['total_profit']:.4f} USDC")
print(f"  vs Baseline:    {best_total['profit_vs_baseline']:+.4f} USDC")
print(f"  Trades:         {best_total['n_trades']:.0f} ({best_total['pct_trades_kept']:.1f}% of baseline)")
print(f"  Win Rate:       {best_total['win_rate']:.1f}%")

print(f"\nBest by MEAN PROFIT:")
best_mean = ceiling_df.loc[best_mean_idx]
print(f"  Ceiling:        {best_mean['ceiling']:.2f}")
print(f"  Mean Profit:    {best_mean['mean_profit']:.4f} USDC")
print(f"  vs Baseline:    {best_mean['mean_vs_baseline']:+.4f} USDC")
print(f"  Trades:         {best_mean['n_trades']:.0f} ({best_mean['pct_trades_kept']:.1f}% of baseline)")
print(f"  Win Rate:       {best_mean['win_rate']:.1f}%")
print("=" * 70)



BEST PRICE CEILING CONFIGURATIONS

Best by TOTAL PROFIT:
  Ceiling:        0.75
  Total Profit:   19.6610 USDC
  vs Baseline:    +6.3200 USDC
  Trades:         2743 (70.4% of baseline)
  Win Rate:       39.4%

Best by MEAN PROFIT:
  Ceiling:        0.65
  Mean Profit:    0.0074 USDC
  vs Baseline:    +0.0039 USDC
  Trades:         2363 (60.7% of baseline)
  Win Rate:       38.6%


In [None]:
# ============================================================================
# COMBINED ANALYSIS: Price Ceiling + Entry/Exit Optimization
# ============================================================================
# Test price ceiling filter across all entry/exit combinations

# Use the best ceiling from the sweep (or override)
BEST_CEILING = best_total["ceiling"]
print(f"Using price ceiling: {BEST_CEILING}")

# Entry and exit horizons to test
ENTRY_HORIZONS_C = [50, 100, 200, 300]
EXIT_HORIZONS_C = [500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 5000]

combined_results = []

for entry_h in tqdm(ENTRY_HORIZONS_C, desc="Entry horizons"):
    for exit_h in EXIT_HORIZONS_C:
        if exit_h <= entry_h:
            continue
        
        # Process each trade
        total_no_filter = 0
        total_with_filter = 0
        n_no_filter = 0
        n_with_filter = 0
        wins_no_filter = 0
        wins_with_filter = 0
        
        for idx, row in df_ceiling.iterrows():
            btc_dir = row["btc_direction"]
            side_prefix = "yes" if btc_dir > 0 else "no"
            
            entry_ask = row.get(f"{side_prefix}_ask_{entry_h}ms", np.nan)
            exit_bid = row.get(f"{side_prefix}_bid_{exit_h}ms", np.nan)
            
            if pd.isna(entry_ask) or pd.isna(exit_bid):
                continue
            
            profit = exit_bid - entry_ask
            
            # Without filter
            total_no_filter += profit
            n_no_filter += 1
            if profit > 0:
                wins_no_filter += 1
            
            # With filter
            if entry_ask <= BEST_CEILING:
                total_with_filter += profit
                n_with_filter += 1
                if profit > 0:
                    wins_with_filter += 1
        
        if n_no_filter > 0 and n_with_filter > 0:
            combined_results.append({
                "entry_ms": entry_h,
                "exit_ms": exit_h,
                "n_trades_no_filter": n_no_filter,
                "n_trades_with_filter": n_with_filter,
                "mean_no_filter": total_no_filter / n_no_filter,
                "mean_with_filter": total_with_filter / n_with_filter,
                "total_no_filter": total_no_filter,
                "total_with_filter": total_with_filter,
                "improvement": total_with_filter - total_no_filter,
                "win_rate_no_filter": 100 * wins_no_filter / n_no_filter,
                "win_rate_with_filter": 100 * wins_with_filter / n_with_filter,
            })

combined_df = pd.DataFrame(combined_results)
combined_df = combined_df.sort_values("mean_with_filter", ascending=False)

print("\n" + "=" * 130)
print(f"ENTRY/EXIT COMPARISON WITH PRICE CEILING FILTER (ceiling = {BEST_CEILING})")
print("=" * 130)
print(combined_df.head(15).to_string(index=False, float_format="{:.4f}".format))

# Best overall
best_combo = combined_df.iloc[0]
print("\n" + "=" * 70)
print("BEST ENTRY/EXIT WITH PRICE CEILING FILTER")
print("=" * 70)
print(f"Price Ceiling:     {BEST_CEILING}")
print(f"Entry:             {best_combo['entry_ms']:.0f} ms")
print(f"Exit:              {best_combo['exit_ms']:.0f} ms")
print(f"Mean Profit:       {best_combo['mean_with_filter']:.4f} USDC (vs {best_combo['mean_no_filter']:.4f} without filter)")
print(f"Total Profit:      {best_combo['total_with_filter']:.4f} USDC (vs {best_combo['total_no_filter']:.4f} without filter)")
print(f"Win Rate:          {best_combo['win_rate_with_filter']:.1f}% (vs {best_combo['win_rate_no_filter']:.1f}% without filter)")
print(f"Trades:            {best_combo['n_trades_with_filter']:.0f} (vs {best_combo['n_trades_no_filter']:.0f} without filter)")
print("=" * 70)


Using price ceiling: 0.75


Entry horizons:   0%|          | 0/4 [00:00<?, ?it/s]


ENTRY/EXIT COMPARISON WITH PRICE CEILING FILTER (ceiling = 0.75)
 entry_ms  exit_ms  n_trades_no_filter  n_trades_with_filter  mean_no_filter  mean_with_filter  total_no_filter  total_with_filter  improvement  win_rate_no_filter  win_rate_with_filter
       50     4000                3887                  2737          0.0032            0.0073          12.4350            19.9190       7.4840             35.4258               41.2130
       50     2000                3894                  2743          0.0034            0.0072          13.3410            19.6610       6.3200             33.7442               39.3729
       50     3000                3892                  2741          0.0032            0.0072          12.5850            19.6000       7.0150             34.4296               39.8030
       50     3500                3889                  2738          0.0032            0.0071          12.5150            19.5590       7.0440             34.9190               40.5771
    

In [None]:
# ============================================================================
# VISUALIZATION: Entry/Exit Heatmaps WITH vs WITHOUT Price Ceiling
# ============================================================================

# Create pivot tables
pivot_no_filter = combined_df.pivot(index="entry_ms", columns="exit_ms", values="mean_no_filter")
pivot_with_filter = combined_df.pivot(index="entry_ms", columns="exit_ms", values="mean_with_filter")
pivot_improvement = combined_df.pivot(index="entry_ms", columns="exit_ms", values="improvement")

# Find min/max for consistent color scales
vmin = min(pivot_no_filter.min().min(), pivot_with_filter.min().min())
vmax = max(pivot_no_filter.max().max(), pivot_with_filter.max().max())

fig = make_subplots(
    rows=1, cols=3,
    subplot_titles=(
        "Mean Profit WITHOUT Filter",
        f"Mean Profit WITH Filter (ceiling={BEST_CEILING})",
        "Total Profit Improvement"
    ),
    horizontal_spacing=0.08
)

# Heatmap 1: Without Filter
fig.add_trace(
    go.Heatmap(
        z=pivot_no_filter.values,
        x=[str(c) for c in pivot_no_filter.columns],
        y=[str(i) for i in pivot_no_filter.index],
        colorscale="RdYlGn",
        zmid=0,
        zmin=vmin, zmax=vmax,
        text=[[f"{v:.4f}" for v in row] for row in pivot_no_filter.values],
        texttemplate="%{text}",
        showscale=False,
        name="No Filter"
    ),
    row=1, col=1
)

# Heatmap 2: With Filter
fig.add_trace(
    go.Heatmap(
        z=pivot_with_filter.values,
        x=[str(c) for c in pivot_with_filter.columns],
        y=[str(i) for i in pivot_with_filter.index],
        colorscale="RdYlGn",
        zmid=0,
        zmin=vmin, zmax=vmax,
        text=[[f"{v:.4f}" for v in row] for row in pivot_with_filter.values],
        texttemplate="%{text}",
        showscale=False,
        name="With Filter"
    ),
    row=1, col=2
)

# Heatmap 3: Improvement
fig.add_trace(
    go.Heatmap(
        z=pivot_improvement.values,
        x=[str(c) for c in pivot_improvement.columns],
        y=[str(i) for i in pivot_improvement.index],
        colorscale="RdYlGn",
        zmid=0,
        text=[[f"{v:.2f}" for v in row] for row in pivot_improvement.values],
        texttemplate="%{text}",
        colorbar=dict(title="USDC", x=1.02),
        name="Improvement"
    ),
    row=1, col=3
)

fig.update_xaxes(title_text="Exit (ms)", row=1, col=1)
fig.update_xaxes(title_text="Exit (ms)", row=1, col=2)
fig.update_xaxes(title_text="Exit (ms)", row=1, col=3)
fig.update_yaxes(title_text="Entry (ms)", row=1, col=1)

fig.update_layout(
    height=400,
    width=1200,
    title_text=f"Entry/Exit Mean Profit: WITH vs WITHOUT Price Ceiling Filter (ceiling={BEST_CEILING}, BTC≥${BTC_CHANGE_THRESHOLD_CEILING})"
)

fig.show()


## BTC Threshold Analysis WITH Price Ceiling Filter (0.75)

Redo the BTC change threshold analysis from earlier, but now with the 0.75 price ceiling filter applied.
This filters out trades where the entry ask price is > 0.75, limiting entries to cases with more upside potential.


In [None]:
# ============================================================================
# BTC THRESHOLD ANALYSIS WITH PRICE CEILING FILTER
# ============================================================================
# Redo the threshold analysis with 0.75 price ceiling applied

PRICE_CEILING_THRESH = 0.75  # Only enter if ask <= this

# Time horizons (same as before)
HORIZONS_MS_THRESH = [50, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]

# BTC thresholds to test
BTC_THRESHOLDS = list(range(1, 21))

print("=" * 100)
print(f"BTC THRESHOLD ANALYSIS WITH PRICE CEILING = {PRICE_CEILING_THRESH}")
print("=" * 100)

# Use full_df_all from earlier
if 'full_df_all' not in dir() or full_df_all is None:
    print("full_df_all not found - please run cell 15 first")
else:
    # Get profit columns
    profit_cols = [c for c in full_df_all.columns if c.startswith("profit_")]
    
    threshold_results_filtered = []
    threshold_results_unfiltered = []
    
    for thresh in tqdm(BTC_THRESHOLDS, desc="Testing thresholds"):
        # Filter by BTC threshold
        df_thresh = full_df_all[full_df_all["abs_btc_change"] >= thresh].copy()
        n_total = len(df_thresh)
        
        if n_total < 10:
            continue
        
        # For each trade, determine if it passes the ceiling filter
        # We need to check the entry ask at each entry horizon
        
        # UNFILTERED analysis (same as before)
        all_profits_unfiltered = []
        for col in profit_cols:
            valid = df_thresh[col].dropna()
            if len(valid) > 0:
                all_profits_unfiltered.extend(valid.tolist())
        
        if all_profits_unfiltered:
            mean_unfiltered = np.mean(all_profits_unfiltered)
            total_unfiltered = np.sum(all_profits_unfiltered)
            win_rate_unfiltered = 100 * sum(1 for p in all_profits_unfiltered if p > 0) / len(all_profits_unfiltered)
            n_trades_unfiltered = len(all_profits_unfiltered)
        else:
            mean_unfiltered = total_unfiltered = win_rate_unfiltered = n_trades_unfiltered = np.nan
        
        threshold_results_unfiltered.append({
            "threshold": thresh,
            "n_btc_ticks": n_total,
            "n_trades": n_trades_unfiltered,
            "mean_profit": mean_unfiltered,
            "total_profit": total_unfiltered,
            "win_rate": win_rate_unfiltered,
        })
        
        # FILTERED analysis (with price ceiling)
        all_profits_filtered = []
        n_trades_filtered = 0
        
        for entry_h in HORIZONS_MS_THRESH:
            for exit_h in HORIZONS_MS_THRESH:
                if exit_h <= entry_h:
                    continue
                
                col = f"profit_{entry_h}_{exit_h}ms"
                if col not in df_thresh.columns:
                    continue
                
                for idx, row in df_thresh.iterrows():
                    btc_dir = row["btc_direction"]
                    side_prefix = "yes" if btc_dir > 0 else "no"
                    
                    # Check entry ask
                    entry_ask_col = f"{side_prefix}_ask_{entry_h}ms"
                    entry_ask = row.get(entry_ask_col, np.nan)
                    
                    if pd.isna(entry_ask):
                        continue
                    
                    # Apply ceiling filter
                    if entry_ask > PRICE_CEILING_THRESH:
                        continue
                    
                    profit = row.get(col, np.nan)
                    if not pd.isna(profit):
                        all_profits_filtered.append(profit)
        
        if all_profits_filtered:
            mean_filtered = np.mean(all_profits_filtered)
            total_filtered = np.sum(all_profits_filtered)
            win_rate_filtered = 100 * sum(1 for p in all_profits_filtered if p > 0) / len(all_profits_filtered)
            n_trades_filtered = len(all_profits_filtered)
        else:
            mean_filtered = total_filtered = win_rate_filtered = n_trades_filtered = np.nan
        
        threshold_results_filtered.append({
            "threshold": thresh,
            "n_btc_ticks": n_total,
            "n_trades": n_trades_filtered,
            "mean_profit": mean_filtered,
            "total_profit": total_filtered,
            "win_rate": win_rate_filtered,
        })
    
    df_unfiltered = pd.DataFrame(threshold_results_unfiltered)
    df_filtered = pd.DataFrame(threshold_results_filtered)
    
    print(f"\n{'='*100}")
    print("UNFILTERED RESULTS (No price ceiling)")
    print(f"{'='*100}")
    print(df_unfiltered.to_string(index=False, float_format="{:.4f}".format))
    
    print(f"\n{'='*100}")
    print(f"FILTERED RESULTS (Price ceiling = {PRICE_CEILING_THRESH})")
    print(f"{'='*100}")
    print(df_filtered.to_string(index=False, float_format="{:.4f}".format))


BTC THRESHOLD ANALYSIS WITH PRICE CEILING = 0.75


Testing thresholds:   0%|          | 0/20 [00:00<?, ?it/s]


UNFILTERED RESULTS (No price ceiling)
 threshold  n_btc_ticks  n_trades  mean_profit  total_profit  win_rate
         1         6385    653827      -0.0077    -5021.5940   16.1061
         2         5877    601771      -0.0075    -4512.9290   16.4441
         3         5266    538742      -0.0074    -3964.5540   16.8639
         4         4659    477424      -0.0072    -3418.8050   17.3320
         5         3994    408812      -0.0068    -2770.0570   18.1139
         6         3358    344457      -0.0065    -2233.2890   18.4894
         7         2850    292610      -0.0062    -1799.8560   18.9672
         8         2419    248542      -0.0058    -1447.4450   19.4639
         9         2018    207494      -0.0054    -1120.4580   20.1278
        10         1714    176948      -0.0053     -933.4860   20.3981
        11         1443    149017      -0.0051     -760.8210   20.6218
        12         1254    129591      -0.0050     -652.0910   20.6403
        13         1076    111396     

In [None]:
# ============================================================================
# COMPARISON: Filtered vs Unfiltered by BTC Threshold
# ============================================================================

# Merge the two dataframes for comparison
comparison_df = df_unfiltered.merge(
    df_filtered, 
    on=["threshold", "n_btc_ticks"], 
    suffixes=("_unfiltered", "_filtered")
)

# Calculate improvements
comparison_df["mean_improvement"] = comparison_df["mean_profit_filtered"] - comparison_df["mean_profit_unfiltered"]
comparison_df["total_improvement"] = comparison_df["total_profit_filtered"] - comparison_df["total_profit_unfiltered"]
comparison_df["win_rate_improvement"] = comparison_df["win_rate_filtered"] - comparison_df["win_rate_unfiltered"]
comparison_df["trades_reduction_pct"] = 100 * (1 - comparison_df["n_trades_filtered"] / comparison_df["n_trades_unfiltered"])

print("=" * 140)
print(f"COMPARISON: Unfiltered vs Filtered (ceiling = {PRICE_CEILING_THRESH})")
print("=" * 140)

display_cols = [
    "threshold", "n_btc_ticks",
    "mean_profit_unfiltered", "mean_profit_filtered", "mean_improvement",
    "win_rate_unfiltered", "win_rate_filtered", "win_rate_improvement",
    "n_trades_unfiltered", "n_trades_filtered", "trades_reduction_pct"
]

print(comparison_df[display_cols].to_string(index=False, float_format="{:.4f}".format))

# Summary
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
positive_mean_improvement = (comparison_df["mean_improvement"] > 0).sum()
positive_total_improvement = (comparison_df["total_improvement"] > 0).sum()

print(f"Thresholds with IMPROVED mean profit:  {positive_mean_improvement} / {len(comparison_df)}")
print(f"Thresholds with IMPROVED total profit: {positive_total_improvement} / {len(comparison_df)}")
print(f"Average trades reduction:              {comparison_df['trades_reduction_pct'].mean():.1f}%")
print(f"Average mean profit improvement:       {comparison_df['mean_improvement'].mean():.4f} USDC")
print(f"Average win rate improvement:          {comparison_df['win_rate_improvement'].mean():.2f}%")


COMPARISON: Unfiltered vs Filtered (ceiling = 0.75)
 threshold  n_btc_ticks  mean_profit_unfiltered  mean_profit_filtered  mean_improvement  win_rate_unfiltered  win_rate_filtered  win_rate_improvement  n_trades_unfiltered  n_trades_filtered  trades_reduction_pct
         1         6385                 -0.0077               -0.0066            0.0010              16.1061            18.6256                2.5195               653827             462466               29.2678
         2         5877                 -0.0075               -0.0064            0.0011              16.4441            19.0447                2.6006               601771             425599               29.2756
         3         5266                 -0.0074               -0.0062            0.0012              16.8639            19.5783                2.7143               538742             378864               29.6762
         4         4659                 -0.0072               -0.0060            0.0012             

In [None]:
# ============================================================================
# VISUALIZATION: BTC Threshold Analysis - Filtered vs Unfiltered
# ============================================================================

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        f"Mean Profit by BTC Threshold (Ceiling = {PRICE_CEILING_THRESH})",
        f"Total Profit by BTC Threshold (Ceiling = {PRICE_CEILING_THRESH})",
        f"Win Rate by BTC Threshold (Ceiling = {PRICE_CEILING_THRESH})",
        "# Trades by BTC Threshold"
    )
)

# 1. Mean Profit
fig.add_trace(
    go.Scatter(
        x=comparison_df["threshold"],
        y=comparison_df["mean_profit_unfiltered"],
        mode="lines+markers",
        name="Unfiltered",
        line=dict(color="red", width=2),
        marker=dict(size=6)
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=comparison_df["threshold"],
        y=comparison_df["mean_profit_filtered"],
        mode="lines+markers",
        name="Filtered (0.75)",
        line=dict(color="green", width=2),
        marker=dict(size=6)
    ),
    row=1, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=1)

# 2. Total Profit
fig.add_trace(
    go.Scatter(
        x=comparison_df["threshold"],
        y=comparison_df["total_profit_unfiltered"],
        mode="lines+markers",
        name="Unfiltered",
        line=dict(color="red", width=2),
        marker=dict(size=6),
        showlegend=False
    ),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(
        x=comparison_df["threshold"],
        y=comparison_df["total_profit_filtered"],
        mode="lines+markers",
        name="Filtered (0.75)",
        line=dict(color="green", width=2),
        marker=dict(size=6),
        showlegend=False
    ),
    row=1, col=2
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=2)

# 3. Win Rate
fig.add_trace(
    go.Scatter(
        x=comparison_df["threshold"],
        y=comparison_df["win_rate_unfiltered"],
        mode="lines+markers",
        name="Unfiltered",
        line=dict(color="red", width=2),
        marker=dict(size=6),
        showlegend=False
    ),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(
        x=comparison_df["threshold"],
        y=comparison_df["win_rate_filtered"],
        mode="lines+markers",
        name="Filtered (0.75)",
        line=dict(color="green", width=2),
        marker=dict(size=6),
        showlegend=False
    ),
    row=2, col=1
)
fig.add_hline(y=50, line_dash="dash", line_color="gray", row=2, col=1)

# 4. Number of Trades
fig.add_trace(
    go.Bar(
        x=comparison_df["threshold"] - 0.2,
        y=comparison_df["n_trades_unfiltered"],
        name="Unfiltered",
        marker_color="red",
        width=0.4,
        showlegend=False
    ),
    row=2, col=2
)
fig.add_trace(
    go.Bar(
        x=comparison_df["threshold"] + 0.2,
        y=comparison_df["n_trades_filtered"],
        name="Filtered",
        marker_color="green",
        width=0.4,
        showlegend=False
    ),
    row=2, col=2
)

fig.update_xaxes(title_text="BTC Threshold ($)", row=1, col=1)
fig.update_xaxes(title_text="BTC Threshold ($)", row=1, col=2)
fig.update_xaxes(title_text="BTC Threshold ($)", row=2, col=1)
fig.update_xaxes(title_text="BTC Threshold ($)", row=2, col=2)

fig.update_yaxes(title_text="Mean Profit (USDC)", row=1, col=1)
fig.update_yaxes(title_text="Total Profit (USDC)", row=1, col=2)
fig.update_yaxes(title_text="Win Rate (%)", row=2, col=1)
fig.update_yaxes(title_text="# Trades", row=2, col=2)

fig.update_layout(
    height=700,
    width=1100,
    title_text=f"BTC Threshold Analysis: Unfiltered (Red) vs Price Ceiling {PRICE_CEILING_THRESH} (Green)",
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()


In [None]:
# ============================================================================
# BEST ENTRY/EXIT BY BTC THRESHOLD (with 0.75 ceiling)
# ============================================================================
# Find the best entry/exit combination for each BTC threshold with ceiling applied

print("=" * 120)
print(f"BEST ENTRY/EXIT COMBINATIONS BY BTC THRESHOLD (Price Ceiling = {PRICE_CEILING_THRESH})")
print("=" * 120)

best_combos_by_threshold = []

for thresh in tqdm(BTC_THRESHOLDS, desc="Finding best combos"):
    df_thresh = full_df_all[full_df_all["abs_btc_change"] >= thresh].copy()
    
    if len(df_thresh) < 10:
        continue
    
    best_mean = -np.inf
    best_combo = None
    
    for entry_h in HORIZONS_MS_THRESH:
        for exit_h in HORIZONS_MS_THRESH:
            if exit_h <= entry_h:
                continue
            
            col = f"profit_{entry_h}_{exit_h}ms"
            if col not in df_thresh.columns:
                continue
            
            # Filter trades by ceiling
            profits_filtered = []
            
            for idx, row in df_thresh.iterrows():
                btc_dir = row["btc_direction"]
                side_prefix = "yes" if btc_dir > 0 else "no"
                
                entry_ask_col = f"{side_prefix}_ask_{entry_h}ms"
                entry_ask = row.get(entry_ask_col, np.nan)
                
                if pd.isna(entry_ask) or entry_ask > PRICE_CEILING_THRESH:
                    continue
                
                profit = row.get(col, np.nan)
                if not pd.isna(profit):
                    profits_filtered.append(profit)
            
            if len(profits_filtered) >= 10:
                mean_profit = np.mean(profits_filtered)
                if mean_profit > best_mean:
                    best_mean = mean_profit
                    best_combo = {
                        "btc_threshold": thresh,
                        "entry_ms": entry_h,
                        "exit_ms": exit_h,
                        "n_trades": len(profits_filtered),
                        "mean_profit": mean_profit,
                        "total_profit": np.sum(profits_filtered),
                        "win_rate": 100 * sum(1 for p in profits_filtered if p > 0) / len(profits_filtered),
                    }
    
    if best_combo:
        best_combos_by_threshold.append(best_combo)

best_combos_df = pd.DataFrame(best_combos_by_threshold)
print(best_combos_df.to_string(index=False, float_format="{:.4f}".format))

# Find overall best
if len(best_combos_df) > 0:
    overall_best = best_combos_df.loc[best_combos_df["mean_profit"].idxmax()]
    print("\n" + "=" * 70)
    print("OVERALL BEST CONFIGURATION")
    print("=" * 70)
    print(f"BTC Threshold:    >= ${overall_best['btc_threshold']:.0f}")
    print(f"Entry:            {overall_best['entry_ms']:.0f} ms")
    print(f"Exit:             {overall_best['exit_ms']:.0f} ms")
    print(f"Price Ceiling:    {PRICE_CEILING_THRESH}")
    print(f"Mean Profit:      {overall_best['mean_profit']:.4f} USDC")
    print(f"Total Profit:     {overall_best['total_profit']:.4f} USDC")
    print(f"Win Rate:         {overall_best['win_rate']:.1f}%")
    print(f"# Trades:         {overall_best['n_trades']:.0f}")
    print("=" * 70)


BEST ENTRY/EXIT COMBINATIONS BY BTC THRESHOLD (Price Ceiling = 0.75)


Finding best combos:   0%|          | 0/20 [00:00<?, ?it/s]

 btc_threshold  entry_ms  exit_ms  n_trades  mean_profit  total_profit  win_rate
             1        50     4000      4453       0.0032       14.1120   36.1779
             2        50     4000      4101       0.0038       15.6490   36.9422
             3        50     4000      3655       0.0046       16.9100   37.9754
             4        50     4000      3222       0.0055       17.6470   39.1682
             5        50     4000      2737       0.0073       19.9190   41.2130
             6        50     4000      2282       0.0088       20.0120   42.5066
             7        50     3000      1930       0.0103       19.8790   42.6425
             8        50     3000      1632       0.0118       19.2940   43.8725
             9        50     3000      1351       0.0138       18.6760   45.6699
            10        50     3000      1145       0.0146       16.7630   46.6376
            11        50     2000       956       0.0157       15.0430   46.8619
            12        50    

In [None]:
# ============================================================================
# VISUALIZATION: Best Combos by BTC Threshold (with 0.75 ceiling)
# ============================================================================

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        "Mean Profit by BTC Threshold (Best Combo)",
        "Win Rate by BTC Threshold (Best Combo)",
        "Best Entry/Exit Horizons by Threshold",
        "# Trades by BTC Threshold"
    )
)

# 1. Mean Profit
fig.add_trace(
    go.Bar(
        x=best_combos_df["btc_threshold"],
        y=best_combos_df["mean_profit"],
        marker_color=["green" if p > 0 else "red" for p in best_combos_df["mean_profit"]],
        name="Mean Profit"
    ),
    row=1, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=1)

# 2. Win Rate
fig.add_trace(
    go.Bar(
        x=best_combos_df["btc_threshold"],
        y=best_combos_df["win_rate"],
        marker_color="steelblue",
        name="Win Rate"
    ),
    row=1, col=2
)
fig.add_hline(y=50, line_dash="dash", line_color="gray", row=1, col=2)

# 3. Entry/Exit Horizons (stacked or grouped)
fig.add_trace(
    go.Scatter(
        x=best_combos_df["btc_threshold"],
        y=best_combos_df["entry_ms"],
        mode="lines+markers",
        name="Entry (ms)",
        line=dict(color="blue", width=2),
        marker=dict(size=8)
    ),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(
        x=best_combos_df["btc_threshold"],
        y=best_combos_df["exit_ms"],
        mode="lines+markers",
        name="Exit (ms)",
        line=dict(color="orange", width=2),
        marker=dict(size=8)
    ),
    row=2, col=1
)

# 4. Number of Trades
fig.add_trace(
    go.Bar(
        x=best_combos_df["btc_threshold"],
        y=best_combos_df["n_trades"],
        marker_color="coral",
        name="# Trades",
        showlegend=False
    ),
    row=2, col=2
)

fig.update_xaxes(title_text="BTC Threshold ($)", row=1, col=1)
fig.update_xaxes(title_text="BTC Threshold ($)", row=1, col=2)
fig.update_xaxes(title_text="BTC Threshold ($)", row=2, col=1)
fig.update_xaxes(title_text="BTC Threshold ($)", row=2, col=2)

fig.update_yaxes(title_text="Mean Profit (USDC)", row=1, col=1)
fig.update_yaxes(title_text="Win Rate (%)", row=1, col=2)
fig.update_yaxes(title_text="Horizon (ms)", row=2, col=1)
fig.update_yaxes(title_text="# Trades", row=2, col=2)

fig.update_layout(
    height=700,
    width=1100,
    title_text=f"Best Entry/Exit by BTC Threshold (Price Ceiling = {PRICE_CEILING_THRESH})",
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()

# Show the data in a nice table format
print("\n" + "=" * 90)
print(f"BEST CONFIGURATIONS SUMMARY (Price Ceiling = {PRICE_CEILING_THRESH})")
print("=" * 90)
print(f"{'BTC>=':<8} {'Entry':<8} {'Exit':<8} {'Trades':<10} {'Mean':>12} {'Win Rate':>12}")
print("-" * 90)
for _, row in best_combos_df.iterrows():
    print(f"${row['btc_threshold']:<7.0f} {row['entry_ms']:<8.0f} {row['exit_ms']:<8.0f} {row['n_trades']:<10.0f} "
          f"{row['mean_profit']:>12.4f} {row['win_rate']:>11.1f}%")
print("=" * 90)



BEST CONFIGURATIONS SUMMARY (Price Ceiling = 0.75)
BTC>=    Entry    Exit     Trades             Mean     Win Rate
------------------------------------------------------------------------------------------
$1       50       4000     4453             0.0032        36.2%
$2       50       4000     4101             0.0038        36.9%
$3       50       4000     3655             0.0046        38.0%
$4       50       4000     3222             0.0055        39.2%
$5       50       4000     2737             0.0073        41.2%
$6       50       4000     2282             0.0088        42.5%
$7       50       3000     1930             0.0103        42.6%
$8       50       3000     1632             0.0118        43.9%
$9       50       3000     1351             0.0138        45.7%
$10      50       3000     1145             0.0146        46.6%
$11      50       2000     956              0.0157        46.9%
$12      50       2000     823              0.0163        47.4%
$13      50       2000   

In [None]:
# Filter by minimum absolute BTC price change threshold
BTC_CHANGE_THRESHOLD = 5  # Only include changes >= this value

btc_filtered = btc_in_market_nonzero[btc_in_market_nonzero["price_change"].abs() >= BTC_CHANGE_THRESHOLD].copy()

total = len(btc_filtered)
if total == 0:
    print(f"No BTC changes with |price_change| >= {BTC_CHANGE_THRESHOLD}")
else:
    has_close = btc_filtered["close_change"].notna().sum()
    has_opp = btc_filtered["opp_change"].notna().sum()
    neither = total - has_close - has_opp

    print("=" * 60)
    print(f"BTC Price Changes Summary (|change| >= {BTC_CHANGE_THRESHOLD})")
    print("  PROPER MARKOUT: bid(exit) - ask(entry)")
    print("=" * 60)
    print(f"Total BTC price changes:           {total:,}")
    print(f"Profitable first (close):          {has_close:,} ({100*has_close/total:.1f}%)")
    print(f"Adverse first (opp):               {has_opp:,} ({100*has_opp/total:.1f}%)")
    print(f"No markout change:                 {neither:,} ({100*neither/total:.1f}%)")
    print()
    print("=" * 60)
    print("PROFITABLE FIRST Statistics")
    print("=" * 60)
    print("Delay (ms):")
    print(btc_filtered[["close_delay", "best_delay"]].describe().to_string(float_format="{:,.0f}".format))
    print()
    print("Markout (USDC):")
    print(btc_filtered[["close_change", "best_change"]].describe().to_string(float_format="{:,.4f}".format))
    print()
    print("=" * 60)
    print("ADVERSE FIRST Statistics")
    print("=" * 60)
    print("Delay (ms):")
    print(btc_filtered[["opp_delay", "worst_delay"]].describe().to_string(float_format="{:,.0f}".format))
    print()
    print("Markout (USDC):")
    print(btc_filtered[["opp_change", "worst_change"]].describe().to_string(float_format="{:,.4f}".format))


BTC Price Changes Summary (|change| >= 5)
  PROPER MARKOUT: bid(exit) - ask(entry)
Total BTC price changes:           3,994
Profitable first (close):          1,894 (47.4%)
Adverse first (opp):               2,007 (50.3%)
No markout change:                 93 (2.3%)

PROFITABLE FIRST Statistics
Delay (ms):
       close_delay  best_delay
count        1,894       1,894
mean           430      71,599
std          2,913     129,620
min             61          61
25%             92         726
50%            153      10,458
75%            283      73,744
max         60,840     712,925

Markout (USDC):
       close_change  best_change
count    1,894.0000   1,894.0000
mean        -0.0135      -0.1187
std          0.0080       0.1502
min         -0.1000      -0.8800
25%         -0.0100      -0.1500
50%         -0.0100      -0.0600
75%         -0.0100      -0.0200
max         -0.0010      -0.0060

ADVERSE FIRST Statistics
Delay (ms):
       opp_delay  worst_delay
count      2,007        2,007
m

In [None]:
# Display sample of markout data: close_change = first markout, best_change = max markout
btc_filtered[['price_change', 'entry_ask', 'close_change', 'best_change']]