<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/unified_ai_sim_envs_line_seek_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# unified_ai/sim/envs/line_seek.py
from __future__ import annotations
from dataclasses import dataclass
import random
import math
import torch

@dataclass
class LineSeekConfig:
    line_length: int
    obs_noise_std: float
    success_reward: float
    step_penalty: float
    steps_per_episode: int

class LineSeekEnv:
    """
    1D line world. Agent starts near 0; target hidden at integer position.
    Observation: noisy signed distance d_t = (target - pos)/half_range + noise.
    Actions: {-1, 0, +1}. Reward: -|distance| + step_penalty; success gives success_reward.
    Episode ends on success or when step budget is exhausted.
    """
    def __init__(self, cfg: LineSeekConfig, device: torch.device):
        assert cfg.line_length % 2 == 1, "line_length must be odd"
        self.cfg = cfg
        self.device = device
        self.half = cfg.line_length // 2
        self.pos = 0
        self.target = 0
        self.t = 0

    def reset(self, seed: int | None = None):
        if seed is not None:
            random.seed(seed)
            torch.manual_seed(seed)
        self.pos = 0
        self.target = random.randint(-self.half, self.half)
        self.t = 0
        return self._obs()

    def step(self, action: int):
        self.t += 1
        self.pos = max(-self.half, min(self.half, self.pos + int(action)))
        dist = self.target - self.pos
        done = (self.pos == self.target) or (self.t >= self.cfg.steps_per_episode)
        reward = self.cfg.step_penalty - abs(dist) / self.half
        success = False
        if self.pos == self.target:
            reward += self.cfg.success_reward
            success = True
        obs = self._obs()
        info = {"success": success, "dist": dist, "t": self.t}
        return obs, reward, done, info

    def _obs(self):
        dist = (self.target - self.pos) / max(1, self.half)
        noise = torch.randn(1, device=self.device).item() * self.cfg.obs_noise_std
        o = torch.tensor([self.pos / self.half, dist + noise], device=self.device, dtype=torch.float32)
        return o