OpenAdaptAI · abrichr · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl
@@ -13,5 +13,5 @@
 {"id":"openadapt-evals-hvm","title":"VL model fix PR #18 ready to merge","notes":"2026-02-08: openadapt-ml PR #18 was already merged on 2026-01-29. VL model fix is done.","status":"closed","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-01-29T16:17:03.491938-05:00","created_by":"Richard Abrich","updated_at":"2026-02-08T12:55:19.233249-05:00","closed_at":"2026-02-08T12:55:19.233249-05:00","close_reason":"PR #18 already merged 2026-01-29"}
 {"id":"openadapt-evals-mx8","title":"Analyze evaluation results and publish findings","description":"After demo-conditioned evaluation completes, analyze results: success rates, failure modes, demo impact. Create data-driven roadmap for improvements.","notes":"wright repo (OpenAdaptAI/wright) scaffolding underway. Herald + consilium repos transferred to OpenAdaptAI org. Wright will be the orchestration layer for eval pipeline.","status":"open","priority":1,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-02-14T12:23:06.328838-05:00","created_by":"Richard Abrich","updated_at":"2026-03-02T00:08:08.422633-05:00"}
 {"id":"openadapt-evals-sz4","title":"RCA: Windows product key prompt recurring issue","status":"closed","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-01-20T18:59:36.266286-05:00","created_by":"Richard Abrich","updated_at":"2026-01-20T20:32:06.493102-05:00","closed_at":"2026-01-20T20:32:06.493102-05:00","close_reason":"RCA complete - root cause is VERSION mismatch (CLI=11, Dockerfile=11e). Fix documented in RECURRING_ISSUES.md and WINDOWS_PRODUCT_KEY_RCA.md"}
-{"id":"openadapt-evals-vcb","title":"Run demo-conditioned WAA evaluation","description":"Once demos are recorded, run WAA evaluation with demo-conditioned agents (RetrievalAugmentedAgent with real demos). Target: measure improvement over zero-shot baseline. Requires real demos from recording task.","notes":"wright repo created (OpenAdaptAI/wright), scaffolding in progress. Herald + consilium transferred to OpenAdaptAI org.","status":"open","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-02-14T12:23:04.624305-05:00","created_by":"Richard Abrich","updated_at":"2026-03-02T00:08:08.423284-05:00"}
+{"id":"openadapt-evals-vcb","title":"Run demo-conditioned WAA evaluation","description":"Once demos are recorded, run WAA evaluation with demo-conditioned agents (RetrievalAugmentedAgent with real demos). Target: measure improvement over zero-shot baseline. Requires real demos from recording task.","notes":"2026-03-01: GPU grant applications reviewed and rewritten (11 files). Writing done, blocked on eval results (DC signal on harder tasks). Detailed status tracked in openadapt-internal (private repo).","status":"open","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-02-14T12:23:04.624305-05:00","created_by":"Richard Abrich","updated_at":"2026-03-02T01:14:44.513125-05:00"}
 {"id":"openadapt-evals-wis","title":"Add pre-flight check to detect Windows install issues","status":"closed","priority":1,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-01-20T18:59:36.865052-05:00","created_by":"Richard Abrich","updated_at":"2026-01-20T20:32:06.757261-05:00","closed_at":"2026-01-20T20:32:06.757261-05:00","close_reason":"Duplicate of openadapt-evals-0dt"}
diff --git a/docs/artifacts/full/step_00_after.png b/docs/artifacts/full/step_00_after.png
diff --git a/docs/artifacts/full/step_00_before.png b/docs/artifacts/full/step_00_before.png
diff --git a/docs/artifacts/full/step_01_after.png b/docs/artifacts/full/step_01_after.png
diff --git a/docs/artifacts/full/step_01_before.png b/docs/artifacts/full/step_01_before.png
diff --git a/docs/artifacts/full/step_02_after.png b/docs/artifacts/full/step_02_after.png
diff --git a/docs/artifacts/full/step_02_before.png b/docs/artifacts/full/step_02_before.png
diff --git a/docs/artifacts/full/step_03_after.png b/docs/artifacts/full/step_03_after.png
diff --git a/docs/artifacts/full/step_03_before.png b/docs/artifacts/full/step_03_before.png
diff --git a/docs/artifacts/full/step_04_after.png b/docs/artifacts/full/step_04_after.png
diff --git a/docs/artifacts/full/step_04_before.png b/docs/artifacts/full/step_04_before.png
diff --git a/docs/artifacts/full/step_05_after.png b/docs/artifacts/full/step_05_after.png
diff --git a/docs/artifacts/full/step_05_before.png b/docs/artifacts/full/step_05_before.png
diff --git a/docs/artifacts/full/step_06_after.png b/docs/artifacts/full/step_06_after.png
diff --git a/docs/artifacts/full/step_06_before.png b/docs/artifacts/full/step_06_before.png
diff --git a/docs/artifacts/full/step_07_after.png b/docs/artifacts/full/step_07_after.png
diff --git a/docs/artifacts/full/step_07_before.png b/docs/artifacts/full/step_07_before.png
diff --git a/docs/artifacts/full/step_08_after.png b/docs/artifacts/full/step_08_after.png
diff --git a/docs/artifacts/full/step_08_before.png b/docs/artifacts/full/step_08_before.png
diff --git a/docs/artifacts/full/step_09_after.png b/docs/artifacts/full/step_09_after.png
diff --git a/docs/artifacts/full/step_09_before.png b/docs/artifacts/full/step_09_before.png
diff --git a/docs/artifacts/full/step_10_after.png b/docs/artifacts/full/step_10_after.png
diff --git a/docs/artifacts/full/step_10_before.png b/docs/artifacts/full/step_10_before.png
diff --git a/docs/artifacts/full/step_11_after.png b/docs/artifacts/full/step_11_after.png
diff --git a/docs/artifacts/full/step_11_before.png b/docs/artifacts/full/step_11_before.png
diff --git a/docs/artifacts/full/step_12_after.png b/docs/artifacts/full/step_12_after.png
diff --git a/docs/artifacts/full/step_12_before.png b/docs/artifacts/full/step_12_before.png
diff --git a/docs/artifacts/full/step_13_after.png b/docs/artifacts/full/step_13_after.png
diff --git a/docs/artifacts/full/step_13_before.png b/docs/artifacts/full/step_13_before.png
diff --git a/docs/artifacts/full/step_14_after.png b/docs/artifacts/full/step_14_after.png
diff --git a/docs/artifacts/full/step_14_before.png b/docs/artifacts/full/step_14_before.png
diff --git a/docs/artifacts/full/step_15_after.png b/docs/artifacts/full/step_15_after.png
diff --git a/docs/artifacts/full/step_15_before.png b/docs/artifacts/full/step_15_before.png
diff --git a/docs/artifacts/full/step_16_after.png b/docs/artifacts/full/step_16_after.png
diff --git a/docs/artifacts/full/step_16_before.png b/docs/artifacts/full/step_16_before.png
diff --git a/docs/artifacts/full/step_17_after.png b/docs/artifacts/full/step_17_after.png
diff --git a/docs/artifacts/full/step_17_before.png b/docs/artifacts/full/step_17_before.png
diff --git a/docs/artifacts/full/step_18_after.png b/docs/artifacts/full/step_18_after.png
diff --git a/docs/artifacts/full/step_18_before.png b/docs/artifacts/full/step_18_before.png
diff --git a/docs/artifacts/full/step_19_after.png b/docs/artifacts/full/step_19_after.png
diff --git a/docs/artifacts/full/step_19_before.png b/docs/artifacts/full/step_19_before.png
diff --git a/docs/artifacts/full/step_20_after.png b/docs/artifacts/full/step_20_after.png
diff --git a/docs/artifacts/full/step_20_before.png b/docs/artifacts/full/step_20_before.png
diff --git a/docs/artifacts/thumbnails/step_00_after.png b/docs/artifacts/thumbnails/step_00_after.png
diff --git a/docs/artifacts/thumbnails/step_00_before.png b/docs/artifacts/thumbnails/step_00_before.png
diff --git a/docs/artifacts/thumbnails/step_01_after.png b/docs/artifacts/thumbnails/step_01_after.png
diff --git a/docs/artifacts/thumbnails/step_01_before.png b/docs/artifacts/thumbnails/step_01_before.png
diff --git a/docs/artifacts/thumbnails/step_02_after.png b/docs/artifacts/thumbnails/step_02_after.png
diff --git a/docs/artifacts/thumbnails/step_02_before.png b/docs/artifacts/thumbnails/step_02_before.png
diff --git a/docs/artifacts/thumbnails/step_03_after.png b/docs/artifacts/thumbnails/step_03_after.png
diff --git a/docs/artifacts/thumbnails/step_03_before.png b/docs/artifacts/thumbnails/step_03_before.png
diff --git a/docs/artifacts/thumbnails/step_04_after.png b/docs/artifacts/thumbnails/step_04_after.png
diff --git a/docs/artifacts/thumbnails/step_04_before.png b/docs/artifacts/thumbnails/step_04_before.png
diff --git a/docs/artifacts/thumbnails/step_05_after.png b/docs/artifacts/thumbnails/step_05_after.png
diff --git a/docs/artifacts/thumbnails/step_05_before.png b/docs/artifacts/thumbnails/step_05_before.png
diff --git a/docs/artifacts/thumbnails/step_06_after.png b/docs/artifacts/thumbnails/step_06_after.png
diff --git a/docs/artifacts/thumbnails/step_06_before.png b/docs/artifacts/thumbnails/step_06_before.png
diff --git a/docs/artifacts/thumbnails/step_07_after.png b/docs/artifacts/thumbnails/step_07_after.png
diff --git a/docs/artifacts/thumbnails/step_07_before.png b/docs/artifacts/thumbnails/step_07_before.png
diff --git a/docs/artifacts/thumbnails/step_08_after.png b/docs/artifacts/thumbnails/step_08_after.png
diff --git a/docs/artifacts/thumbnails/step_08_before.png b/docs/artifacts/thumbnails/step_08_before.png
diff --git a/docs/artifacts/thumbnails/step_09_after.png b/docs/artifacts/thumbnails/step_09_after.png
diff --git a/docs/artifacts/thumbnails/step_09_before.png b/docs/artifacts/thumbnails/step_09_before.png
diff --git a/docs/artifacts/thumbnails/step_10_after.png b/docs/artifacts/thumbnails/step_10_after.png
diff --git a/docs/artifacts/thumbnails/step_10_before.png b/docs/artifacts/thumbnails/step_10_before.png
diff --git a/docs/artifacts/thumbnails/step_11_after.png b/docs/artifacts/thumbnails/step_11_after.png
diff --git a/docs/artifacts/thumbnails/step_11_before.png b/docs/artifacts/thumbnails/step_11_before.png
diff --git a/docs/artifacts/thumbnails/step_12_after.png b/docs/artifacts/thumbnails/step_12_after.png
diff --git a/docs/artifacts/thumbnails/step_12_before.png b/docs/artifacts/thumbnails/step_12_before.png
diff --git a/docs/artifacts/thumbnails/step_13_after.png b/docs/artifacts/thumbnails/step_13_after.png
diff --git a/docs/artifacts/thumbnails/step_13_before.png b/docs/artifacts/thumbnails/step_13_before.png
diff --git a/docs/artifacts/thumbnails/step_14_after.png b/docs/artifacts/thumbnails/step_14_after.png
diff --git a/docs/artifacts/thumbnails/step_14_before.png b/docs/artifacts/thumbnails/step_14_before.png
diff --git a/docs/artifacts/thumbnails/step_15_after.png b/docs/artifacts/thumbnails/step_15_after.png
diff --git a/docs/artifacts/thumbnails/step_15_before.png b/docs/artifacts/thumbnails/step_15_before.png
diff --git a/docs/artifacts/thumbnails/step_16_after.png b/docs/artifacts/thumbnails/step_16_after.png
diff --git a/docs/artifacts/thumbnails/step_16_before.png b/docs/artifacts/thumbnails/step_16_before.png
diff --git a/docs/artifacts/thumbnails/step_17_after.png b/docs/artifacts/thumbnails/step_17_after.png
diff --git a/docs/artifacts/thumbnails/step_17_before.png b/docs/artifacts/thumbnails/step_17_before.png
diff --git a/docs/artifacts/thumbnails/step_18_after.png b/docs/artifacts/thumbnails/step_18_after.png
diff --git a/docs/artifacts/thumbnails/step_18_before.png b/docs/artifacts/thumbnails/step_18_before.png
diff --git a/docs/artifacts/thumbnails/step_19_after.png b/docs/artifacts/thumbnails/step_19_after.png
diff --git a/docs/artifacts/thumbnails/step_19_before.png b/docs/artifacts/thumbnails/step_19_before.png
diff --git a/docs/artifacts/thumbnails/step_20_after.png b/docs/artifacts/thumbnails/step_20_after.png
diff --git a/docs/artifacts/thumbnails/step_20_before.png b/docs/artifacts/thumbnails/step_20_before.png
diff --git a/docs/demo_review.md b/docs/demo_review.md
diff --git a/scripts/generate_demo_review.py b/scripts/generate_demo_review.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+"""Generate a markdown review artifact for the demo recording pipeline.
+
+Reads a WAA recording (meta.json + screenshots), creates thumbnail images,
+and produces a markdown file showing the pipeline output for each step.
+The markdown is suitable for embedding in docs or PR descriptions and
+renders on GitHub with relative image paths. Thumbnails link to full-resolution
+originals when available.
+
+Usage:
+    python scripts/generate_demo_review.py \
+        --recording waa_recordings/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f-WOS \
+        --text-demo demo_prompts/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f-WOS.txt \
+        --vlm-demo demo_prompts_vlm/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f-WOS.txt \
+        --output docs/demo_review.md
+"""
+from __future__ import annotations
+
+import json
+import re
+import shutil
+import sys
+from pathlib import Path
+
+from PIL import Image
+
+
+THUMBNAIL_WIDTH = 600
+
+
+def _parse_demo_steps(demo_text: str) -> dict[int, str]:
+    """Parse a demo .txt file into a dict mapping step number -> step content.
+
+    Handles both text-only and VLM-enriched formats. Returns the full text
+    block for each step (everything between "Step N:" markers).
+    """
+    steps: dict[int, str] = {}
+    # Split on "Step N:" headers, capturing the step number
+    parts = re.split(r'^(Step \d+:)\s*$', demo_text, flags=re.MULTILINE)
+
+    # parts looks like: [preamble, "Step 1:", content, "Step 2:", content, ...]
+    for i in range(1, len(parts) - 1, 2):
+        header = parts[i]  # e.g. "Step 3:"
+        content = parts[i + 1]
+        step_num = int(re.search(r'\d+', header).group())
+        # Strip trailing blank lines / separators but preserve internal structure
+        content = content.strip()
+        # Remove trailing "---" if it's the last step
+        if content.endswith("---"):
+            content = content[:-3].strip()
+        steps[step_num] = content
+
+    return steps
+
+
+def _create_thumbnail(src: Path, dst: Path, width: int = THUMBNAIL_WIDTH) -> None:
+    """Resize an image to the given width, preserving aspect ratio."""
+    with Image.open(src) as img:
+        if img.width <= width:
+            shutil.copy2(src, dst)
+            return
+        ratio = width / img.width
+        new_height = int(img.height * ratio)
+        resized = img.resize((width, new_height), Image.LANCZOS)
+        resized.save(dst, optimize=True)
+
+
+def _relpath(target: Path, start: Path) -> str:
+    """Compute a relative path from start to target, suitable for markdown."""
+    try:
+        return str(target.resolve().relative_to(start.resolve()))
+    except ValueError:
+        import os
+        return os.path.relpath(target.resolve(), start.resolve())
+
+
+def _escape_md(text: str) -> str:
+    """Minimal escaping so that text doesn't break markdown tables."""
+    return text.replace("|", "\\|").replace("\n", "<br>")
+
+
+def _indent_block(text: str, prefix: str = "> ") -> str:
+    """Indent every line of text with the given prefix."""
+    return "\n".join(prefix + line for line in text.split("\n"))
+
+
+def main(
+    recording: str,
+    text_demo: str | None = None,
+    vlm_demo: str | None = None,
+    output: str = "docs/demo_review.md",
+    thumbnail_width: int = THUMBNAIL_WIDTH,
+) -> None:
+    """Generate a markdown review of the demo pipeline output.
+
+    Args:
+        recording: Path to the recording directory (contains meta.json + PNGs).
+        text_demo: Path to the text-only demo .txt file.
+        vlm_demo: Path to the VLM-enriched demo .txt file.
+        output: Output path for the generated markdown file.
+        thumbnail_width: Width in pixels for thumbnail images.
+    """
+    recording_dir = Path(recording)
+    output_path = Path(output)
+
+    # --- Validate inputs ---
+    meta_path = recording_dir / "meta.json"
+    if not meta_path.exists():
+        print(f"Error: meta.json not found in {recording_dir}")
+        sys.exit(1)
+
+    meta = json.loads(meta_path.read_text(encoding="utf-8"))
+    task_id = meta["task_id"]
+    instruction = meta["instruction"]
+    num_steps = meta.get("num_steps", len(meta.get("steps", [])))
+    steps = meta.get("steps", [])
+    recorded_at = meta.get("recorded_at", "unknown")
+
+    # --- Parse demo files ---
+    text_steps: dict[int, str] = {}
+    vlm_steps: dict[int, str] = {}
+
+    if text_demo:
+        text_demo_path = Path(text_demo)
+        if text_demo_path.exists():
+            text_steps = _parse_demo_steps(
+                text_demo_path.read_text(encoding="utf-8")
+            )
+        else:
+            print(f"Warning: text demo not found at {text_demo_path}")
+
+    if vlm_demo:
+        vlm_demo_path = Path(vlm_demo)
+        if vlm_demo_path.exists():
+            vlm_steps = _parse_demo_steps(
+                vlm_demo_path.read_text(encoding="utf-8")
+            )
+        else:
+            print(f"Warning: VLM demo not found at {vlm_demo_path}")
+
+    # --- Create thumbnails and copy full-res originals ---
+    thumb_dir = output_path.parent / "artifacts" / "thumbnails"
+    thumb_dir.mkdir(parents=True, exist_ok=True)
+
+    full_dir = output_path.parent / "artifacts" / "full"
+    full_dir.mkdir(parents=True, exist_ok=True)
+
+    thumbnail_map: dict[str, Path] = {}
+    full_map: dict[str, Path] = {}
+    for i in range(num_steps):
+        for suffix in ("before", "after"):
+            name = f"step_{i:02d}_{suffix}"
+            src = recording_dir / f"{name}.png"
+            if src.exists():
+                dst = thumb_dir / f"{name}.png"
+                _create_thumbnail(src, dst, width=thumbnail_width)
+                thumbnail_map[name] = dst
+                # Copy full-resolution original
+                full_dst = full_dir / f"{name}.png"
+                shutil.copy2(src, full_dst)
+                full_map[name] = full_dst
+
+    print(f"Created {len(thumbnail_map)} thumbnails in {thumb_dir}")
+    if full_map:
+        print(f"Copied {len(full_map)} full-resolution images to {full_dir}")
+
+    # --- Build markdown ---
+    md_dir = output_path.parent
+    md_dir.mkdir(parents=True, exist_ok=True)
+    lines: list[str] = []
+
+    # Header
+    lines.append("# Demo Pipeline Review")
+    lines.append("")
+    lines.append(f"**Task ID:** `{task_id}`")
+    lines.append("")
+    lines.append(f"**Instruction:** {instruction}")
+    lines.append("")
+    lines.append(f"**Steps:** {num_steps}")
+    lines.append("")
+    lines.append(f"**Recorded at:** {recorded_at}")
+    lines.append("")
+
+    # --- Comparison table (first 3 steps) ---
+    compare_count = min(3, num_steps)
+    if text_steps or vlm_steps:
+        lines.append("## Text vs VLM Comparison (First 3 Steps)")
+        lines.append("")
+        lines.append(
+            "| Step | Ground Truth | Text-Only Demo | VLM-Enriched Demo |"
+        )
+        lines.append("|------|-------------|----------------|-------------------|")
+
+        for i in range(compare_count):
+            step_num = i + 1
+            gt = steps[i].get("suggested_step", "") if i < len(steps) else ""
+            text_content = _escape_md(text_steps.get(step_num, "*(not available)*"))
+            vlm_content = _escape_md(vlm_steps.get(step_num, "*(not available)*"))
+            gt_escaped = _escape_md(gt)
+            lines.append(
+                f"| {step_num} | {gt_escaped} | {text_content} | {vlm_content} |"
+            )
+
+        lines.append("")
+
+    # --- Per-step details (expanded, not collapsed) ---
+    for i in range(num_steps):
+        step_num = i + 1
+        gt = steps[i].get("suggested_step", f"(step {step_num})") if i < len(steps) else f"(step {step_num})"
+
+        lines.append(f"### Step {step_num}: {gt}")
+        lines.append("")
+
+        # Screenshots — thumbnails that link to full-resolution when available
+        before_key = f"step_{i:02d}_before"
+        after_key = f"step_{i:02d}_after"
+        has_before = before_key in thumbnail_map
+        has_after = after_key in thumbnail_map
+
+        if has_before or has_after:
+            if has_before and has_after:
+                before_thumb = _relpath(thumbnail_map[before_key], md_dir)
+                after_thumb = _relpath(thumbnail_map[after_key], md_dir)
+                if before_key in full_map:
+                    before_full = _relpath(full_map[before_key], md_dir)
+                    after_full = _relpath(full_map[after_key], md_dir)
+                    lines.append(
+                        f"[![before]({before_thumb})]({before_full}) "
+                        f"[![after]({after_thumb})]({after_full})"
+                    )
+                else:
+                    lines.append(
+                        f"![before]({before_thumb}) "
+                        f"![after]({after_thumb})"
+                    )
+            elif has_before:
+                before_thumb = _relpath(thumbnail_map[before_key], md_dir)
+                if before_key in full_map:
+                    before_full = _relpath(full_map[before_key], md_dir)
+                    lines.append(f"[![before]({before_thumb})]({before_full})")
+                else:
+                    lines.append(f"![before]({before_thumb})")
+            elif has_after:
+                after_thumb = _relpath(thumbnail_map[after_key], md_dir)
+                if after_key in full_map:
+                    after_full = _relpath(full_map[after_key], md_dir)
+                    lines.append(f"[![after]({after_thumb})]({after_full})")
+                else:
+                    lines.append(f"![after]({after_thumb})")
+
+            lines.append("")
+
+        # Ground truth
+        lines.append(f"**Ground truth:** {gt}")
+        lines.append("")
+
+        # Text-only demo output
+        if text_steps:
+            text_content = text_steps.get(step_num)
+            if text_content:
+                lines.append(f"**Text demo:** {text_content}")
+            else:
+                lines.append("**Text demo:** *(not available)*")
+            lines.append("")
+
+        # VLM-enriched demo output
+        if vlm_steps:
+            vlm_content = vlm_steps.get(step_num)
+            if vlm_content:
+                lines.append("**VLM demo:**")
+                lines.append("")
+                lines.append(_indent_block(vlm_content))
+            else:
+                lines.append("**VLM demo:** *(not available)*")
+            lines.append("")
+
+        lines.append("---")
+        lines.append("")
+
+    # --- Footer ---
+    lines.append(
+        f"*Generated by `scripts/generate_demo_review.py` from recording "
+        f"`{recording_dir.name}`*"
+    )
+    lines.append("")
+
+    # Write output
+    md_text = "\n".join(lines)
+    output_path.write_text(md_text, encoding="utf-8")
+    print(f"Wrote {len(md_text)} bytes to {output_path}")
+    print(f"  {num_steps} steps, {len(thumbnail_map)} thumbnails")
+    if text_steps:
+        print(f"  Text-only demo: {len(text_steps)} steps parsed")
+    if vlm_steps:
+        print(f"  VLM-enriched demo: {len(vlm_steps)} steps parsed")
+
+
+if __name__ == "__main__":
+    import fire
+
+    fire.Fire(main)
diff --git a/scripts/record_waa_demos.py b/scripts/record_waa_demos.py
@@ -1356,9 +1356,10 @@ def _auto_start_socat(vm_ip: str) -> bool:
         capture_output=True, text=True, timeout=30,
     )
     if result.returncode != 0:
-        print(f"  ERROR: socat proxy setup failed: {result.stderr.strip()}")
-        return False
-    print("  Socat proxy established (VM:5051 -> container:5050).")
+        print(f"  WARNING: socat setup returned non-zero: {result.stderr.strip()}")
+        # Not fatal — socat may already be running
+    else:
+        print("  Socat proxy established (VM:5051 -> container:5050).")
     return True