# Runner Demo (stream and run_episode)

This notebook shows how to use the UI-agnostic runner to stream per-step events and to run a full episode, using the compose layer to build an environment and policy.

In [None]:
# Ensure repo 'src' is on sys.path when running from notebooks/
import sys
import pathlib

repo_root = pathlib.Path.cwd()
if not (repo_root / "src").exists():
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root / "src"))
print("Using repo root:", repo_root)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import plume_nav_sim as pns
from plume_nav_sim.runner import runner as r
from plume_nav_sim.compose import SimulationSpec, PolicySpec, prepare

# setup inline notebook plotting
%matplotlib inline

## Build env + policy via compose

We use a deterministic temporal-derivative policy for repeatable results.

In [None]:
spec = SimulationSpec(
    grid_size=(32, 32),
    start_location=(4, 28),  # start away from center for clearer gradient climb
    max_steps=500,
    render=True,
    policy=PolicySpec(builtin="greedy_td"),
    seed=123,
)
env, policy = prepare(spec)
print("env.max_steps:", getattr(env, "max_steps", None))
env

## Stream a few steps and display frames

In [None]:
frames = []
events = []
for i, ev in enumerate(r.stream(env, policy, seed=spec.seed, render=True)):
    events.append(ev)
    if isinstance(ev.frame, np.ndarray):
        frames.append(ev.frame)
    if i >= 9:  # first 10 steps
        break
len(frames), events[0].t, events[-1].t

In [None]:
# Show the collected frames in a grid
cols = 5
rows = int(np.ceil(len(frames) / cols))
plt.figure(figsize=(cols * 2.5, rows * 2.5))
for i, img in enumerate(frames):
    ax = plt.subplot(rows, cols, i + 1)
    ax.imshow(img)
    ax.set_title(f"step={events[i].t}")
    ax.axis("off")
plt.tight_layout()
plt.show()

## Run a full episode with callbacks

Use `run_episode` to execute until termination or truncation while collecting summary results.

In [None]:
step_rewards = []
flags = []


def on_step(ev):
    step_rewards.append(ev.reward)
    flags.append((ev.terminated, ev.truncated))


result = r.run_episode(
    env,
    policy,
    max_steps=1000,
    seed=spec.seed,
    on_step=on_step,
    render=True,
)
result, len(step_rewards), flags[-1] if flags else None

In [None]:
# Full-episode trajectory overlay on final frame (headless-safe)
from IPython.display import display

positions = []
last_frame = None
for ev in r.stream(env, policy, seed=spec.seed, render=True):
    pos = ev.info.get("agent_xy") if isinstance(ev.info, dict) else None
    if pos is not None:
        positions.append(tuple(pos))
    if isinstance(ev.frame, np.ndarray):
        last_frame = ev.frame
    if ev.terminated or ev.truncated:
        break

if last_frame is None:
    raise RuntimeError(
        "No frame rendered; ensure render=True and env supports rgb_array"
    )

# Plot background image with grid extents to match agent_xy coords
grid_w, grid_h = getattr(env, "grid_size", (last_frame.shape[1], last_frame.shape[0]))
fig = plt.figure(figsize=(6, 6))
ax = plt.gca()
ax.imshow(last_frame, extent=[0, grid_w, 0, grid_h], origin="upper")
if positions:
    xs = [p[0] for p in positions]
    ys = [p[1] for p in positions]
    ax.plot(xs, ys, "-o", color="yellow", markersize=2, linewidth=1, alpha=0.9)
    ax.scatter([xs[0]], [ys[0]], c="lime", s=30, label="start")
    ax.scatter([xs[-1]], [ys[-1]], c="red", s=30, label="end")
    ax.legend(loc="upper right")
ax.set_title("Final frame with agent trajectory")
display(fig)
plt.close(fig)