# Runner Demo (stream and run_episode)

This notebook shows how to use the UI-agnostic runner to stream per-step events and to run a full episode, using the compose layer to build an environment and policy.

In [None]:
# Ensure repo 'src' is on sys.path when running from notebooks/
import sys, pathlib

repo_root = pathlib.Path.cwd()
if not (repo_root / "src").exists():
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root / "src"))
print("Using repo root:", repo_root)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import plume_nav_sim as pns
from plume_nav_sim.runner import runner as r
from plume_nav_sim.compose import SimulationSpec, PolicySpec, prepare

## Build env + policy via compose

We use a deterministic temporal-derivative policy for repeatable results.

In [None]:
spec = SimulationSpec(
    grid_size=(16, 16),
    max_steps=100,
    render=True,
    policy=PolicySpec(builtin="deterministic_td"),
    seed=123,
)
env, policy = prepare(spec)
env

## Stream a few steps and display frames

In [None]:
frames = []
events = []
for i, ev in enumerate(r.stream(env, policy, seed=spec.seed, render=True)):
    events.append(ev)
    if isinstance(ev.frame, np.ndarray):
        frames.append(ev.frame)
    if i >= 9:  # first 10 steps
        break
len(frames), events[0].t, events[-1].t

In [None]:
# Show the collected frames in a grid
cols = 5
rows = int(np.ceil(len(frames) / cols))
plt.figure(figsize=(cols * 2.5, rows * 2.5))
for i, img in enumerate(frames):
    ax = plt.subplot(rows, cols, i + 1)
    ax.imshow(img)
    ax.set_title(f"step={events[i].t}")
    ax.axis("off")
plt.tight_layout()
plt.show()

## Run a full episode with callbacks

Use `run_episode` to execute until termination or truncation while collecting summary results.

In [None]:
step_rewards = []
flags = []


def on_step(ev):
    step_rewards.append(ev.reward)
    flags.append((ev.terminated, ev.truncated))


result = r.run_episode(
    env,
    policy,
    max_steps=50,
    seed=spec.seed,
    on_step=on_step,
    render=True,
)
result, len(step_rewards), flags[-1] if flags else None