# Runner Demo (stream and run_episode)

This notebook shows how to use the UI-agnostic runner to stream per-step events and to run a full episode, using the compose layer to build an environment and policy.

In [None]:
# Ensure repo 'src' is on sys.path when running from notebooks/
import sys
import pathlib

repo_root = pathlib.Path.cwd()
if not (repo_root / "src").exists():
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root / "src"))
print("Using repo root:", repo_root)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import plume_nav_sim as pns
from plume_nav_sim.runner import runner as r
from plume_nav_sim.compose import SimulationSpec, PolicySpec, prepare

# setup inline notebook plotting

## Build env + policy via compose

We use a deterministic temporal-derivative policy for repeatable results.

In [None]:
spec = SimulationSpec(
    grid_size=(32, 32),
    start_location=(4, 28),  # start away from center for clearer gradient climb
    max_steps=500,
    render=True,
    policy=PolicySpec(builtin="greedy_td"),
    seed=123,
)
env, policy = prepare(spec)
print("env.max_steps:", getattr(env, "max_steps", None))
env

## Stream a few steps and display frames

In [None]:
frames = []
events = []
for i, ev in enumerate(r.stream(env, policy, seed=spec.seed, render=True)):
    events.append(ev)
    if isinstance(ev.frame, np.ndarray):
        frames.append(ev.frame)
    if i >= 9:  # first 10 steps
        break
len(frames), events[0].t, events[-1].t

In [None]:
from IPython.display import display

# Show the collected frames in a grid
cols = 5
rows = int(np.ceil(len(frames) / cols))
plt.figure(figsize=(cols * 2.5, rows * 2.5))
for i, img in enumerate(frames):
    ax = plt.subplot(rows, cols, i + 1)
    ax.imshow(img)
    ax.set_title(f"step={events[i].t}")
    ax.axis("off")
plt.tight_layout()
display(plt.gcf())
plt.close()

## Run a full episode with callbacks

Use `run_episode` to execute until termination or truncation while collecting summary results.

In [None]:
step_rewards = []
flags = []


def on_step(ev):
    step_rewards.append(ev.reward)
    flags.append((ev.terminated, ev.truncated))


result = r.run_episode(
    env,
    policy,
    max_steps=1000,
    seed=spec.seed,
    on_step=on_step,
    render=True,
)
result, len(step_rewards), flags[-1] if flags else None

In [None]:
# Diagnostic: 50-step trace of c, dc (moving reference), action, distance to source
import math

action_names = {0: "F", 1: "L", 2: "R"}
prev_moving = None
steps = 0
for ev in r.stream(env, policy, seed=spec.seed, render=False):
    c = float(ev.obs[0])
    if prev_moving is None:
        dc = 0.0
        prev_moving = c
    else:
        dc = c - prev_moving
    pos = ev.info.get("agent_xy") if isinstance(ev.info, dict) else None
    if pos is not None:
        sx, sy = getattr(env, "source_location", (0, 0))
        dist = math.hypot(pos[0] - sx, pos[1] - sy)
    else:
        dist = float("nan")
    a = int(ev.action)
    print(
        f"{steps:03d} c={c:.4f} dc={dc:.4f} a={action_names.get(a,a)} dist={dist:.2f}"
    )
    # Update moving reference similar to policy: on FORWARD and on TURN probe
    if a == 0:
        prev_moving = c
    steps += 1
    if steps >= 50 or ev.terminated or ev.truncated:
        break

In [None]:
from IPython.display import display

# Distance to source over time for current policy
import math
import matplotlib.pyplot as plt

dists = []
for ev in r.stream(env, policy, seed=spec.seed, render=False):
    pos = ev.info.get("agent_xy") if isinstance(ev.info, dict) else None
    if pos is not None:
        sx, sy = getattr(env, "source_location", (0, 0))
        dists.append(math.hypot(pos[0] - sx, pos[1] - sy))
    if ev.terminated or ev.truncated:
        break
plt.figure(figsize=(6, 3))
plt.plot(range(len(dists)), dists, "-k")
plt.xlabel("step")
plt.ylabel("distance to source")
plt.title("Distance to source over time")
plt.tight_layout()
display(plt.gcf())
plt.close()

In [None]:
from IPython.display import display

# Action distribution conditioned on dc sign (using moving reference)
import numpy as np
import matplotlib.pyplot as plt

pos_counts = {0: 0, 1: 0, 2: 0}
neg_counts = {0: 0, 1: 0, 2: 0}
prev_moving = None
for ev in r.stream(env, policy, seed=spec.seed, render=False):
    c = float(ev.obs[0])
    if prev_moving is None:
        dc = 0.0
        prev_moving = c
    else:
        dc = c - prev_moving
    a = int(ev.action) if isinstance(ev.action, (int,)) else int(ev.action)
    if dc > 0:
        pos_counts[a] = pos_counts.get(a, 0) + 1
    else:
        neg_counts[a] = neg_counts.get(a, 0) + 1
    if a == 0:
        prev_moving = c
    if ev.terminated or ev.truncated:
        break
labels = ["F", "L", "R"]
pos_vals = [pos_counts.get(i, 0) for i in (0, 1, 2)]
neg_vals = [neg_counts.get(i, 0) for i in (0, 1, 2)]
x = np.arange(3)
fig, ax = plt.subplots(1, 2, figsize=(8, 3))
ax[0].bar(x, pos_vals, color=["#4caf50", "#2196f3", "#ff9800"])
ax[0].set_xticks(x, labels)
ax[0].set_title("dc > 0")
ax[1].bar(x, neg_vals, color=["#4caf50", "#2196f3", "#ff9800"])
ax[1].set_xticks(x, labels)
ax[1].set_title("dc ≤ 0")
plt.tight_layout()
display(fig)
plt.close()

In [None]:
# Final-episode trajectory overlay (aligned to grid coords)
from IPython.display import display

positions = []
last_frame = None
for ev in r.stream(env, policy, seed=spec.seed, render=True):
    pos = ev.info.get("agent_xy") if isinstance(ev.info, dict) else None
    if pos is not None:
        positions.append(tuple(pos))
    if isinstance(ev.frame, np.ndarray):
        last_frame = ev.frame
    if ev.terminated or ev.truncated:
        break
if last_frame is None:
    raise RuntimeError(
        "No frame rendered; ensure render=True and env supports rgb_array"
    )
grid_w, grid_h = getattr(env, "grid_size", (last_frame.shape[1], last_frame.shape[0]))
source_xy = getattr(env, "source_location", (grid_w // 2, grid_h // 2))
fig, ax = plt.subplots(figsize=(6, 6))
ax.imshow(last_frame)
ax.set_xlim(0, grid_w)
ax.set_ylim(grid_h, 0)  # invert y to match grid coords (row 0 at top)
if positions:
    xs = [p[0] for p in positions]
    ys = [p[1] for p in positions]
    ax.plot(xs, ys, "-o", color="yellow", markersize=2, linewidth=1, alpha=0.9)
    ax.scatter([xs[0]], [ys[0]], c="lime", s=36, marker="^", label="start")
    ax.scatter([xs[-1]], [ys[-1]], c="magenta", s=30, label="end")
sx, sy = source_xy
ax.scatter(
    [sx],
    [sy],
    marker="s",
    s=60,
    facecolors="none",
    edgecolors="red",
    linewidths=1.5,
    label="source",
)
ax.legend(loc="upper right")
ax.set_title("Final frame with agent trajectory")
display(fig)
plt.close(fig)