# S1 Load & Visualize Recorded Session

`RecordedSession` loads OpenAI content blocks directly:
- `context.json` → context blocks (text + images with base64 inline)
- `actions.json` → action blocks (text + images with base64 inline)

In [None]:
import io
import time
import uuid
import json
import base64
import requests
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image as PILImage
from IPython.display import display, Markdown, Image

URL = "http://209.20.159.6:443"
SESSION_ID = str(uuid.uuid4())


class RecordedSession:
    def __init__(self, path: str):
        self.path = Path(path)
        with open(self.path / "context.json") as f:
            self.context_blocks = json.load(f)
        with open(self.path / "actions.json") as f:
            self.actions_blocks = json.load(f)

    def show_blocks(
        self,
        blocks: list[dict],
        start: int = 0,
        end: int | None = None,
        max_width: int = 800,
    ):
        for block in blocks[start:end]:
            if block["type"] == "text":
                display(Markdown(block["text"]))
            elif block["type"] == "image_url":
                display(Image(url=block["image_url"]["url"], width=max_width))

    def show_context_blocks(self, start: int = 0, end: int | None = None):
        self.show_blocks(self.context_blocks, start, end)

    def show_action_blocks(self, start: int = 0, end: int | None = None):
        self.show_blocks(self.actions_blocks, start, end)

    def search(self, query: str) -> list[int]:
        return [
            i
            for i, b in enumerate(self.actions_blocks)
            if b["type"] == "text" and query.lower() in b["text"].lower()
        ]

    def get_image(self, label: str) -> str | None:
        """Find image by label (e.g., 'c1', 'a1') in blocks."""
        all_blocks = self.context_blocks + self.actions_blocks
        for i, block in enumerate(all_blocks):
            if block["type"] == "text" and block["text"].strip() == f"[{label}]":
                if i + 1 < len(all_blocks) and all_blocks[i + 1]["type"] == "image_url":
                    return all_blocks[i + 1]["image_url"]["url"]
        return None


def show_image_with_bbox(img_b64: str | None, bbox: list[int] | None):
    if img_b64 is None or bbox is None:
        return
    b64_data = img_b64.split(",")[1]
    img = PILImage.open(io.BytesIO(base64.b64decode(b64_data)))
    _, ax = plt.subplots(figsize=(10, 7))
    ax.imshow(img)
    w, h = img.size
    x1, y1 = bbox[0] * w / 1000, bbox[1] * h / 1000
    x2, y2 = bbox[2] * w / 1000, bbox[3] * h / 1000
    pos, h, w = (x1, y1), y2 - y1, x2 - x1
    rect = patches.Rectangle(pos, w, h, linewidth=3, edgecolor="red", facecolor="none")
    ax.add_patch(rect)
    ax.axis("off")
    plt.show()

## Load Data & Visualize

In [None]:
# Load recorded session
# recorded = RecordedSession("../../data/session-2026-01-31T04-24-33")
recorded = RecordedSession("../../data/session-2026-02-02T01-05-49")

# recorded.show_context_blocks()
# recorded.show_action_blocks(start=0, end=10)

# # Find all Cmd+E triggers
# cmd_e_indices = recorded.search("Command + e")
# print(f"Found {len(cmd_e_indices)} Cmd+E triggers at indices: {cmd_e_indices}")

# for idx in cmd_e_indices:
#     recorded.show_action_blocks(start=idx, end=idx + 2)
#     display(Markdown("---"))

## Inference

In [None]:
# Check health
health = requests.get(url=f"{URL}/health").json()
print(f"Health: {health}")

# Set clipboard (renamed from context)
print(f"Setting clipboard for session {SESSION_ID}")
start = time.time()
resp = requests.post(
    url=f"{URL}/clipboard",
    json={"session_id": SESSION_ID, "blocks": recorded.context_blocks},
)
latency = time.time() - start
resp_json = resp.json()
SESSION_ID = resp_json["session_id"]
print(f"Clipboard set: {resp_json['prompt_tokens']:,} tokens ({latency:.2f}s)")

# Find Cmd+E indices
cmd_e_indices = set(recorded.search("Command + e"))
print(f"Cmd+E triggers at: {cmd_e_indices}")

# Add actions one by one, predict at Cmd+E
for idx in range(0, len(recorded.actions_blocks), 2):
    action_block = recorded.actions_blocks[idx:idx + 2]
    recorded.show_action_blocks(start=idx, end=idx + 2)
    
    # Add action
    start = time.time()
    resp = requests.post(f"{URL}/action", json={"session_id": SESSION_ID, "blocks": action_block})
    resp_json = resp.json()
    display(Markdown(f"**Action added:** {resp_json['prompt_tokens']:,} tokens ({time.time() - start:.2f}s)"))
    
    # Predict at Cmd+E triggers
    if idx in cmd_e_indices:
        start = time.time()
        resp = requests.post(f"{URL}/predict", json={"session_id": SESSION_ID})
        resp_json = resp.json()
        display(Markdown(f"**Prediction:** ({time.time() - start:.2f}s)"))
        
        try:
            prediction = json.loads(resp_json["raw"])
            suggestion = prediction["suggestion"]
            source = prediction.get("source")
            bbox = prediction.get("bbox")
            
            display(Markdown(f"**Suggestion:** `{suggestion}`"))
            display(Markdown(f"**Source:** `{source}` | **BBox:** `{bbox}`"))
            
            if source and bbox:
                source_image = recorded.get_image(source)
                show_image_with_bbox(source_image, bbox)
        except Exception as e:
            display(Markdown(f"**Error parsing:** {resp_json['raw']}"))
    
    display(Markdown("---"))

print("✓ Completed")