# Spread Compression Tool — Demo

Demonstrates the `SpreadCompressionTool` which computes four bid-ask
spread metrics from local JSONL snapshot data:
1. **mean_spread** — average (ask − bid)
2. **spread_std** — standard deviation of spread series
3. **spread_trend** — last spread − first spread
4. **compression_ratio** — last_spread / mean_spread

In [None]:
import sys, json
from pathlib import Path
from datetime import datetime, timezone

PROJECT_ROOT = Path.cwd().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

PREDICTION_AGENT_DIR = PROJECT_ROOT / "prediction_agent"
JSONL_PATH = PREDICTION_AGENT_DIR / "outputs" / "market_snapshots.jsonl"
print(f"JSONL: {JSONL_PATH}")

## 1. Load dataset and pick a market

In [None]:
rows = []
with open(JSONL_PATH) as f:
    for line in f:
        line = line.strip()
        if line:
            rows.append(json.loads(line))

from collections import Counter
counts = Counter(r["market_id"] for r in rows)
target = counts.most_common(1)[0][0]
print(f"Total rows: {len(rows)}")
print(f"Selected market: {target} ({counts[target]} rows)")

## 2. Run the tool

In [None]:
from prediction_agent.schemas import EventInput
from prediction_agent.tools.spread_compression_tool import SpreadCompressionTool

event = EventInput(event_id="demo", market_id=target, market_title="Demo", current_price=0.50)
tool = SpreadCompressionTool(jsonl_path=JSONL_PATH)
result = tool.run(event, window_minutes=999_999)

print("=== Tool Output ===")
print(f"tool_name    : {result.tool_name}")
print(f"output_vector: {result.output_vector}")
print(f"confidence   : {result.metadata['confidence']}")
print(f"sample_count : {result.metadata['sample_count']}")

labels = ["mean_spread", "spread_std", "spread_trend", "compression_ratio"]
print("\nBreakdown:")
for label, val in zip(labels, result.output_vector):
    print(f"  {label:22s} = {val}")

## 3. Plot spread series over time

In [None]:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

market_rows = [r for r in rows if r["market_id"] == target]

timestamps, spreads = [], []
for r in market_rows:
    bid = r.get("yes_bid")
    ask = r.get("yes_ask")
    if bid is not None and ask is not None:
        ts = datetime.fromisoformat(r["timestamp"])
        timestamps.append(ts)
        spreads.append(ask - bid)

pairs = sorted(zip(timestamps, spreads))
timestamps = [p[0] for p in pairs]
spreads = [p[1] for p in pairs]

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(timestamps, spreads, marker="o", linewidth=1.5, markersize=4, color="#dc2626")
ax.set_title(f"Bid-Ask Spread vs Time — {target}", fontsize=14)
ax.set_xlabel("Timestamp (UTC)")
ax.set_ylabel("Spread (ask − bid)")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
fig.autofmt_xdate()
ax.grid(True, alpha=0.3)

ms = result.output_vector[0]
ax.axhline(y=ms, color="gray", linestyle="--", alpha=0.6, label=f"mean={ms:.4f}")
ax.legend()
plt.tight_layout()
plt.savefig(str(PREDICTION_AGENT_DIR / "outputs" / "spread_vs_time.png"), dpi=150)
plt.show()
print("Plot saved.")