# Run/Tumble Temporal-Derivative Demo

This notebook demonstrates the run-and-tumble temporal-derivative policy on the oriented run/tumble action space.
- RUN (0): keep heading and move forward when dC ≥ threshold
- TUMBLE (1): reset heading uniformly at random and move forward in one step when dC < threshold


In [None]:
# Ensure repo 'src' is on sys.path when running from notebooks/
import sys, pathlib

repo_root = pathlib.Path.cwd()
if not (repo_root / "src").exists():
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root / "src"))

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats

mpl.use("module://matplotlib_inline.backend_inline")
set_matplotlib_formats("png")
from IPython.display import display

import plume_nav_sim as pns
from plume_nav_sim.policies.run_tumble_td import RunTumbleTemporalDerivativePolicy

In [None]:
# Configure environment (matches other demos)
grid_size = (64, 64)
source_location = (48, 48)
start_location = (16, 16)
max_steps = 500
seed = 123

env = pns.make_env(
    grid_size=grid_size,
    source_location=source_location,
    start_location=start_location,
    max_steps=max_steps,
    action_type="run_tumble",
    observation_type="concentration",
    reward_type="step_penalty",
    render_mode="rgb_array",
)

policy = RunTumbleTemporalDerivativePolicy(threshold=1e-6, eps_seed=seed)
obs, info = env.reset(seed=seed)
policy.reset(seed=seed)

positions = []
actions = []
concs = []
dcs = []
last_c = None
total_reward = 0.0

for _ in range(env.max_steps):
    a = int(policy.select_action(obs))
    actions.append(a)
    obs, reward, term, trunc, step_info = env.step(a)
    total_reward += float(reward)
    c = float(obs[0])
    concs.append(c)
    if last_c is None:
        dcs.append(0.0)
    else:
        dcs.append(c - last_c)
    last_c = c
    pos = step_info.get("agent_xy") if isinstance(step_info, dict) else None
    if pos is not None:
        positions.append(tuple(pos))
    if term or trunc:
        break

print("Steps:", len(actions), "Total reward:", total_reward)

# Final frame and overlay
frame = env.render("rgb_array")
grid_w, grid_h = getattr(env, "grid_size", (frame.shape[1], frame.shape[0]))
sx, sy = getattr(env, "source_location", (grid_w // 2, grid_h // 2))
fig, ax = plt.subplots(figsize=(6, 6))
ax.imshow(frame)
ax.set_xlim(0, grid_w)
ax.set_ylim(grid_h, 0)
if positions:
    xs = [p[0] for p in positions]
    ys = [p[1] for p in positions]
    ax.plot(xs, ys, "-o", color="yellow", markersize=2, linewidth=1)
    ax.scatter([xs[0]], [ys[0]], c="lime", s=36, marker="^", label="start")
    ax.scatter([xs[-1]], [ys[-1]], c="magenta", s=30, label="end")
ax.scatter(
    [sx],
    [sy],
    marker="s",
    s=60,
    facecolors="none",
    edgecolors="red",
    linewidths=1.5,
    label="source",
)
ax.legend(loc="upper right")
ax.set_title("Run/Tumble TD: final frame with trajectory")
display(fig)
plt.close(fig)
env.close()

In [None]:
# Action counts and time series
import numpy as np

run_count = int(np.sum(np.array(actions) == 0))
tumble_count = int(np.sum(np.array(actions) == 1))
print("RUN:", run_count, "TUMBLE:", tumble_count)

fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax[0].bar(["RUN", "TUMBLE"], [run_count, tumble_count], color=["#4caf50", "#ff9800"])
ax[0].set_title("Action counts")
ts = np.arange(len(concs))
ax[1].plot(ts, concs, label="c")
ax[1].plot(ts, dcs, label="dc")
ax[1].set_title("Concentration and dC")
ax[1].legend()
plt.tight_layout()
display(fig)
plt.close(fig)