# Temporal-Gradient Agent Demo (Oriented Control)

This notebook demonstrates a simple oriented agent that:
- Keeps a one-back odor history to estimate a temporal derivative (dC/dt)
- Surges FORWARD on non-decreasing odor, casts by TURNing when odor decreases
- Uses a Gaussian plume environment with a step-penalty reward (sparse goal bonus + small per-step penalty)


In [None]:
import os

os.environ.pop("MPLBACKEND", None)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass

import plume_nav_sim

# Oriented action ids
FORWARD, TURN_LEFT, TURN_RIGHT = 0, 1, 2

In [None]:
@dataclass
class TemporalGradientAgent:
    cast_right_first: bool = True
    eps: float = 1e-6  # avoid noise-driven flips

    prev_moving: float | None = None  # last concentration after FORWARD
    cast_right: bool = True
    last_action: int | None = None

    def __post_init__(self):
        self.cast_right = self.cast_right_first

    def act(self, obs: np.ndarray) -> int:
        """Oriented surge/cast using temporal gradient.
        Forces a FORWARD probe after turns because rotations don't change concentration.
        """
        c = float(obs[0])

        if self.prev_moving is None:
            self.prev_moving = c
            self.last_action = FORWARD
            return FORWARD

        if self.last_action in (TURN_LEFT, TURN_RIGHT):
            self.last_action = FORWARD
            return FORWARD

        dc = c - self.prev_moving
        if dc >= self.eps:
            self.prev_moving = c
            self.last_action = FORWARD
            return FORWARD

        # Negative trend: alternate cast direction
        self.cast_right = not self.cast_right
        action = TURN_RIGHT if self.cast_right else TURN_LEFT
        self.last_action = action
        return action

In [None]:
# Configure environment
grid_size = (64, 64)
source_location = (48, 48)
start_location = (16, 16)
goal_radius = 1.0
max_steps = 500
seed = 123

env = plume_nav_sim.make_env(
    grid_size=grid_size,
    source_location=source_location,
    start_location=start_location,
    goal_radius=goal_radius,
    max_steps=max_steps,
    plume_sigma=20.0,
    action_type="oriented",
    observation_type="concentration",
    reward_type="step_penalty",
    render_mode=None,
)

# Peek the underlying plume field for visualization
core_env = getattr(env, "_core_env", None)
field_array = getattr(
    getattr(core_env, "_concentration_field", None), "field_array", None
)
assert field_array is not None, "Could not access plume field for visualization"

In [None]:
# Run one episode
obs, info = env.reset(seed=seed)
agent = TemporalGradientAgent()

positions = []
concentrations = []
rewards = []
totals = []
distances = []
actions = []

# Record initial
positions.append(tuple(info.get("agent_position") or info.get("agent_xy")))
concentrations.append(float(obs[0]))
totals.append(float(info.get("total_reward", 0.0)))
distances.append(float(info.get("distance_to_goal", np.nan)))

terminated = truncated = False
for t in range(max_steps):
    action = agent.act(obs)
    actions.append(action)
    obs, reward, terminated, truncated, step_info = env.step(action)
    rewards.append(float(reward))
    positions.append(tuple(step_info.get("agent_position")))
    concentrations.append(float(obs[0]))
    totals.append(float(step_info.get("total_reward", totals[-1])))
    distances.append(float(step_info.get("distance_to_goal", np.nan)))
    if terminated or truncated:
        break

print(
    f"Finished: steps={len(rewards)}, terminated={terminated}, truncated={truncated}, total_reward={totals[-1]:.3f}"
)

In [None]:
# Plot plume field with path overlay
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
# Field as grayscale
ax[0].imshow(field_array, cmap="gray", origin="lower")
# Path overlay (x along width, y along height)
xs = [p[0] for p in positions]
ys = [p[1] for p in positions]
ax[0].plot(xs, ys, color="cyan", linewidth=2, label="path")
ax[0].scatter([source_location[0]], [source_location[1]], c="red", s=40, label="source")
ax[0].set_title("Plume Field + Agent Path")
ax[0].legend(loc="upper right")
ax[0].set_xlim(0, grid_size[0] - 1)
ax[0].set_ylim(0, grid_size[1] - 1)

# Time series: concentration and rewards
ts = np.arange(len(concentrations))
dc = np.diff(concentrations, prepend=concentrations[0])
ax[1].plot(ts, concentrations, label="concentration")
ax[1].plot(ts, dc, label="dC (temporal)")
ax[1].step(np.arange(len(rewards)) + 1, rewards, where="post", label="step reward")
ax[1].plot(ts, totals, label="total reward")
ax[1].set_title("Signals over time")
ax[1].legend()
plt.tight_layout()

In [None]:
# Cleanup
env.close()