From c269b92d63e7fb198b250038fe367edee3e253a3 Mon Sep 17 00:00:00 2001 From: Coldaine <158332486+Coldaine@users.noreply.github.com> Date: Mon, 23 Feb 2026 01:51:43 -0600 Subject: [PATCH 1/2] Harden replay harness validation and time patching --- CHANGELOG.md | 6 + agent_orchestrator/replay/__init__.py | 0 agent_orchestrator/replay/mock_device.py | 153 +++++++++++++++++++++ agent_orchestrator/replay/time_control.py | 32 +++++ agent_orchestrator/test_login_replay.py | 121 ++++++++++++++++ alas_wrapped/dev_tools/record_scenario.py | 160 ++++++++++++++++++++++ docs/ARCHITECTURE.md | 10 ++ docs/ROADMAP.md | 1 + docs/dev/testing.md | 11 ++ 9 files changed, 494 insertions(+) create mode 100644 agent_orchestrator/replay/__init__.py create mode 100644 agent_orchestrator/replay/mock_device.py create mode 100644 agent_orchestrator/replay/time_control.py create mode 100644 agent_orchestrator/test_login_replay.py create mode 100644 alas_wrapped/dev_tools/record_scenario.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 08b357b006..cd587f0aba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to the ALAS AI Agent project. ## [Unreleased] - 2026-02-17 ### Added +- **Deterministic replay harness scaffold**: + - Added `alas_wrapped/dev_tools/record_scenario.py` for fixture capture (screenshots + action manifest). + - Added `agent_orchestrator/replay/mock_device.py` with manifest-driven replay + deviation assertions. + - Added `agent_orchestrator/replay/time_control.py` for simulated clock patching (`time.time`, `time.sleep`, and ALAS timer aliases). + - Added `agent_orchestrator/test_login_replay.py` covering fast-forward replay and deviation detection. + - **Local VLM Setup Plan**: Added `docs/plans/local_vlm_setup.md` - comprehensive primer for serving a vision-language model locally on GeForce 5090: - Model selection (Qwen3-VL-8B, MiniCPM-V 4.5, Qwen3-VL-32B) - llama.cpp vs Ollama comparison with setup instructions diff --git a/agent_orchestrator/replay/__init__.py b/agent_orchestrator/replay/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/agent_orchestrator/replay/mock_device.py b/agent_orchestrator/replay/mock_device.py new file mode 100644 index 0000000000..4f74d7ec40 --- /dev/null +++ b/agent_orchestrator/replay/mock_device.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any + +import numpy as np +from PIL import Image + + +class ReplayDeviationError(AssertionError): + """Raised when replay execution diverges from the recorded manifest.""" + + +@dataclass +class SimulatedClock: + """Logical clock controlled by replay events.""" + + current_ts: float + + @classmethod + def from_timestamp(cls, ts: float) -> "SimulatedClock": + return cls(current_ts=float(ts)) + + def set(self, ts: float) -> None: + self.current_ts = float(ts) + + def advance(self, seconds: float) -> None: + self.current_ts += float(seconds) + + def time(self) -> float: + return self.current_ts + + def now(self) -> datetime: + return datetime.fromtimestamp(self.current_ts) + + +class ReplayManifest: + def __init__(self, fixture_dir: Path): + self.fixture_dir = Path(fixture_dir) + self.images_dir = self.fixture_dir / "images" + self.events = self._load_events() + if not self.events: + raise ValueError(f"Fixture manifest is empty: {self.fixture_dir}") + + def _load_events(self) -> list[dict[str, Any]]: + manifest_path = self.fixture_dir / "manifest.jsonl" + if not manifest_path.exists(): + raise FileNotFoundError(f"Missing manifest: {manifest_path}") + + events: list[dict[str, Any]] = [] + with manifest_path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if line: + events.append(json.loads(line)) + return events + + +class MockDevice: + """Replay-only device that enforces recorded screenshot/action ordering.""" + + def __init__(self, fixture_dir: str | Path, clock: SimulatedClock): + self.manifest = ReplayManifest(Path(fixture_dir)) + self.clock = clock + self._index = 0 + + def _peek(self) -> dict[str, Any]: + if self._index >= len(self.manifest.events): + raise ReplayDeviationError("Replay requested past end of manifest") + return self.manifest.events[self._index] + + def _consume(self, expected_type: str) -> dict[str, Any]: + event = self._peek() + actual_type = event.get("event") + if actual_type != expected_type: + raise ReplayDeviationError( + f"Replay divergence at event #{self._index + 1}: expected {expected_type}, found {actual_type}." + ) + self._index += 1 + return event + + def screenshot(self) -> np.ndarray: + event = self._consume(expected_type="screenshot") + self.clock.set(float(event["timestamp"])) + image_path = self.manifest.images_dir / event["image"] + if not image_path.exists(): + raise ReplayDeviationError(f"Missing recorded frame: {image_path}") + with Image.open(image_path) as image: + return np.array(image) + + def click(self, target: Any) -> None: + event = self._consume(expected_type="action") + if event.get("action") != "click": + raise ReplayDeviationError(f"Expected click action, found {event.get('action')}") + + expected_target = event.get("target") + actual_target = str(target) + if expected_target and expected_target != actual_target: + raise ReplayDeviationError(f"Expected click target {expected_target}, found {actual_target}") + + area = event.get("area") + if not area: + raise ReplayDeviationError("Click action in manifest missing `area` bounds") + + x, y = self._extract_click_point(target) + x1, y1, x2, y2 = area + if not (x1 <= x <= x2 and y1 <= y <= y2): + raise ReplayDeviationError( + f"Click out of expected area: ({x}, {y}) not in [{x1}, {y1}, {x2}, {y2}]" + ) + + def swipe(self, p1: tuple[int, int], p2: tuple[int, int]) -> None: + event = self._consume(expected_type="action") + if event.get("action") != "swipe": + raise ReplayDeviationError(f"Expected swipe action, found {event.get('action')}") + + start_area = event["start_area"] + end_area = event["end_area"] + if not self._point_in_area(p1, start_area): + raise ReplayDeviationError(f"Swipe start out of expected area: {p1} not in {start_area}") + if not self._point_in_area(p2, end_area): + raise ReplayDeviationError(f"Swipe end out of expected area: {p2} not in {end_area}") + + @staticmethod + def _point_in_area(point: tuple[int, int], area: list[int]) -> bool: + x, y = point + x1, y1, x2, y2 = area + return x1 <= x <= x2 and y1 <= y <= y2 + + @staticmethod + def _extract_click_point(target: Any) -> tuple[int, int]: + if isinstance(target, (tuple, list)) and len(target) >= 2: + return int(target[0]), int(target[1]) + + if hasattr(target, "button"): + button_area = getattr(target, "button") + if button_area and len(button_area) == 4: + x1, y1, x2, y2 = map(int, button_area) + return (x1 + x2) // 2, (y1 + y2) // 2 + + if hasattr(target, "area"): + area = getattr(target, "area") + if area and len(area) == 4: + x1, y1, x2, y2 = map(int, area) + return (x1 + x2) // 2, (y1 + y2) // 2 + + raise ReplayDeviationError(f"Unable to extract click coordinates from target={target!r}") + + def is_manifest_exhausted(self) -> bool: + return self._index >= len(self.manifest.events) diff --git a/agent_orchestrator/replay/time_control.py b/agent_orchestrator/replay/time_control.py new file mode 100644 index 0000000000..5b8f656982 --- /dev/null +++ b/agent_orchestrator/replay/time_control.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import time +from contextlib import ExitStack, contextmanager +from datetime import datetime +from unittest.mock import patch + +from replay.mock_device import SimulatedClock + + +@contextmanager +def patched_time(clock: SimulatedClock): + """Patch clock consumers so replay runs at CPU speed with deterministic time.""" + + class _TimerDatetime: + @staticmethod + def now() -> datetime: + return datetime.fromtimestamp(clock.time()) + + with ExitStack() as stack: + stack.enter_context(patch("time.time", side_effect=clock.time)) + stack.enter_context(patch("time.sleep", side_effect=lambda seconds: clock.advance(seconds))) + + # ALAS timer module imports time/datetime directly; patch aliases when available. + try: + stack.enter_context(patch("module.base.timer.time", side_effect=clock.time)) + stack.enter_context(patch("module.base.timer.sleep", side_effect=lambda seconds: clock.advance(seconds))) + stack.enter_context(patch("module.base.timer.datetime", _TimerDatetime)) + except ModuleNotFoundError: + pass + + yield diff --git a/agent_orchestrator/test_login_replay.py b/agent_orchestrator/test_login_replay.py new file mode 100644 index 0000000000..3129f99f54 --- /dev/null +++ b/agent_orchestrator/test_login_replay.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import json +import time +from pathlib import Path + +import numpy as np +import pytest +from PIL import Image + +from replay.mock_device import MockDevice, ReplayDeviationError, SimulatedClock +from replay.time_control import patched_time + + +class _ButtonStub: + def __init__(self, area): + self.button = area + + def __str__(self): + return "LOGIN_CHECK" + + +class _WrongButtonStub(_ButtonStub): + def __str__(self): + return "WRONG_TARGET" + + +class _FakeLoginFlow: + def __init__(self, device): + self.device = device + + def handle_app_login(self): + _ = self.device.screenshot() + self.device.click(_ButtonStub((100, 100, 140, 140))) + _ = self.device.screenshot() + self.device.swipe((200, 200), (240, 240)) + _ = self.device.screenshot() + return True + + +def _write_fixture(base: Path) -> Path: + fixture_dir = base / "login_success" + images_dir = fixture_dir / "images" + images_dir.mkdir(parents=True) + + for idx in range(1, 4): + image = np.full((10, 10, 3), idx * 30, dtype=np.uint8) + Image.fromarray(image).save(images_dir / f"{idx:04d}.png") + + events = [ + {"index": 1, "event": "screenshot", "timestamp": 1708600000.100, "frame": 1, "image": "0001.png"}, + { + "index": 2, + "event": "action", + "timestamp": 1708600000.101, + "action": "click", + "target": "LOGIN_CHECK", + "area": [90, 90, 150, 150], + }, + {"index": 3, "event": "screenshot", "timestamp": 1708600001.100, "frame": 2, "image": "0002.png"}, + { + "index": 4, + "event": "action", + "timestamp": 1708600001.101, + "action": "swipe", + "target": "SWIPE", + "start_area": [190, 190, 210, 210], + "end_area": [230, 230, 250, 250], + }, + {"index": 5, "event": "screenshot", "timestamp": 1708600002.100, "frame": 3, "image": "0003.png"}, + ] + + with (fixture_dir / "manifest.jsonl").open("w", encoding="utf-8") as handle: + for event in events: + handle.write(json.dumps(event) + "\n") + + return fixture_dir + + +def test_login_replay_fast_forward(tmp_path): + fixture_dir = _write_fixture(tmp_path) + clock = SimulatedClock.from_timestamp(1708599999.0) + mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock) + + fake_flow = _FakeLoginFlow(device=mock_device) + + with patched_time(clock): + assert fake_flow.handle_app_login() is True + + assert mock_device.is_manifest_exhausted() is True + assert clock.time() == pytest.approx(1708600002.100) + + +def test_replay_deviation_raises(tmp_path): + fixture_dir = _write_fixture(tmp_path) + clock = SimulatedClock.from_timestamp(1708599999.0) + mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock) + + _ = mock_device.screenshot() + with pytest.raises(ReplayDeviationError): + mock_device.screenshot() + + +def test_replay_target_mismatch_raises(tmp_path): + fixture_dir = _write_fixture(tmp_path) + clock = SimulatedClock.from_timestamp(1708599999.0) + mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock) + + _ = mock_device.screenshot() + with pytest.raises(ReplayDeviationError): + mock_device.click(_WrongButtonStub((100, 100, 140, 140))) + + +def test_patched_time_advances_sleep_without_wait(): + clock = SimulatedClock.from_timestamp(10.0) + with patched_time(clock): + start = time.time() + time.sleep(2.5) + end = time.time() + + assert end - start == pytest.approx(2.5) diff --git a/alas_wrapped/dev_tools/record_scenario.py b/alas_wrapped/dev_tools/record_scenario.py new file mode 100644 index 0000000000..27c5310cbb --- /dev/null +++ b/alas_wrapped/dev_tools/record_scenario.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import argparse +import json +import time +from pathlib import Path +from types import MethodType +from typing import Any + +from PIL import Image + +from alas import AzurLaneAutoScript + + +class ScenarioRecorder: + def __init__(self, scenario_name: str, base_dir: Path | None = None): + base = base_dir or Path("tests/fixtures") + self.fixture_dir = base / scenario_name + self.images_dir = self.fixture_dir / "images" + self.manifest_path = self.fixture_dir / "manifest.jsonl" + self.images_dir.mkdir(parents=True, exist_ok=True) + for old_frame in self.images_dir.glob("*.png"): + old_frame.unlink() + + self._event_index = 0 + self._frame_index = 0 + + def write_event(self, payload: dict[str, Any]) -> None: + self._event_index += 1 + payload["index"] = self._event_index + with self.manifest_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(payload) + "\n") + + def save_frame(self, image_array) -> str: + self._frame_index += 1 + frame_name = f"{self._frame_index:04d}.png" + frame_path = self.images_dir / frame_name + Image.fromarray(image_array).save(frame_path) + return frame_name + + @staticmethod + def _extract_button(button: Any) -> tuple[str, list[int] | None]: + target_name = str(button) + area = None + if hasattr(button, "button") and getattr(button, "button"): + area = [int(v) for v in getattr(button, "button")] + elif hasattr(button, "area") and getattr(button, "area"): + area = [int(v) for v in getattr(button, "area")] + + if not area and isinstance(button, (tuple, list)) and len(button) >= 2: + x, y = int(button[0]), int(button[1]) + area = [x, y, x, y] + + return target_name, area + + +class DevicePatchSession: + def __init__(self, device, recorder: ScenarioRecorder): + self.device = device + self.recorder = recorder + self.original_screenshot = device.screenshot + self.original_click = device.click + self.original_swipe = device.swipe + + def __enter__(self): + def wrapped_screenshot(instance, *args, **kwargs): + image = self.original_screenshot(*args, **kwargs) + ts = time.time() + frame_name = self.recorder.save_frame(image) + self.recorder.write_event( + { + "event": "screenshot", + "timestamp": ts, + "frame": self.recorder._frame_index, + "image": frame_name, + } + ) + return image + + def wrapped_click(instance, button, *args, **kwargs): + target, area = self.recorder._extract_button(button) + if area is None: + raise ValueError(f"Unable to infer click area for target={target}") + + self.recorder.write_event( + { + "event": "action", + "timestamp": time.time(), + "action": "click", + "target": target, + "area": area, + } + ) + return self.original_click(button, *args, **kwargs) + + def wrapped_swipe(instance, p1, p2, *args, **kwargs): + self.recorder.write_event( + { + "event": "action", + "timestamp": time.time(), + "action": "swipe", + "target": kwargs.get("name", "SWIPE"), + "start_area": [int(p1[0]), int(p1[1]), int(p1[0]), int(p1[1])], + "end_area": [int(p2[0]), int(p2[1]), int(p2[0]), int(p2[1])], + } + ) + return self.original_swipe(p1, p2, *args, **kwargs) + + self.device.screenshot = MethodType(wrapped_screenshot, self.device) + self.device.click = MethodType(wrapped_click, self.device) + self.device.swipe = MethodType(wrapped_swipe, self.device) + return self + + def __exit__(self, exc_type, exc, tb): + self.device.screenshot = self.original_screenshot + self.device.click = self.original_click + self.device.swipe = self.original_swipe + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Record ALAS screenshot/action sequence for deterministic replay") + parser.add_argument("scenario", help="Fixture scenario name, saved under tests/fixtures/") + parser.add_argument("--config", default="alas", help="ALAS config name") + parser.add_argument( + "--method", + default="handle_app_login", + help="Device method to invoke while recording (default: handle_app_login)", + ) + parser.add_argument( + "--fixtures-root", + default="tests/fixtures", + help="Fixture root directory (default: tests/fixtures)", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + recorder = ScenarioRecorder(args.scenario, base_dir=Path(args.fixtures_root)) + + if recorder.manifest_path.exists(): + recorder.manifest_path.unlink() + + script = AzurLaneAutoScript(config_name=args.config) + device = script.device + + if not hasattr(device, args.method): + raise AttributeError(f"Device has no method '{args.method}'") + + call = getattr(device, args.method) + with DevicePatchSession(device=device, recorder=recorder): + result = call() + + print(f"Recorded scenario '{args.scenario}' at {recorder.fixture_dir}") + print(f"Events: {recorder._event_index}, frames: {recorder._frame_index}, result: {result}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 01a54dc645..da59532400 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -76,6 +76,16 @@ - **Key Doc**: [state_machine/README.md](./state_machine/README.md) - **Implementation**: Wired into `AzurLaneAutoScript` via `cached_property`. + +### Deterministic Replay Harness +- **Status**: 🛠️ In Progress +- **Summary**: Offline fixture record/replay loop for validating state-machine regressions without emulator runtime. +- **Components**: + - `alas_wrapped/dev_tools/record_scenario.py` records screenshots + click/swipe events into JSONL manifests. + - `agent_orchestrator/replay/mock_device.py` replays fixture images and enforces event-order/action-area assertions. + - `agent_orchestrator/replay/time_control.py` patches ALAS timer/sleep calls to a simulated clock for fast-forward deterministic execution. +- **Depends on**: State Machine, ALAS Device interface + ### Vision Integration - **Status**: ⏳ Planned - **Summary**: Vision for both building deterministic pipelines AND runtime recovery. diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 8b53f41893..ee89a82855 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -50,6 +50,7 @@ All new tools should return: ### Success Criteria - [ ] At least one complete workflow works end-to-end using only deterministic tools (start with login). +- [x] Deterministic replay harness scaffolded (fixture recorder + mock device + simulated clock patches + pytest replay test). - [ ] Tools have documented preconditions/postconditions via `expected_state`/`observed_state`. - [ ] Dashboard tools expose game state (oil, gold, gems, AP, task queue). diff --git a/docs/dev/testing.md b/docs/dev/testing.md index 9f14affff9..fb83dc8161 100644 --- a/docs/dev/testing.md +++ b/docs/dev/testing.md @@ -30,3 +30,14 @@ Gemini Flash is "cheap enough for live testing": ## Implementation *Detailed test infrastructure docs will be added as testing is implemented.* + + +## Deterministic Replay Harness (Current) + +The repository now includes a deterministic replay scaffold for state-machine regression checks: + +- **Recorder**: `alas_wrapped/dev_tools/record_scenario.py` patches ALAS `screenshot/click/swipe` calls and writes a fixture directory with PNG frames + `manifest.jsonl`. +- **Replay Device**: `agent_orchestrator/replay/mock_device.py` replays the manifest stream and raises `ReplayDeviationError` on ordering or coordinate mismatches. +- **Clock Control**: `agent_orchestrator/replay/time_control.py` patches `time.time`, `time.sleep`, and `module.base.timer` aliases so replay runs at CPU speed while preserving exact recorded timestamps. + +This approach gives deterministic verification of timeout-driven behavior without emulator/ADB dependencies during test execution. From c9e3008c4acceee463c654626106dac4ba7a2ea6 Mon Sep 17 00:00:00 2001 From: Patrick MacLyman Date: Mon, 23 Feb 2026 02:19:53 -0600 Subject: [PATCH 2/2] fix: update clock on action events and validate area shape in MockDevice Addresses Qodo review findings: - click()/swipe() now set SimulatedClock from manifest timestamps - area/start_area/end_area validated for type and length before unpacking - _point_in_area() also validates defensively --- agent_orchestrator/replay/mock_device.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/agent_orchestrator/replay/mock_device.py b/agent_orchestrator/replay/mock_device.py index 4f74d7ec40..6e48801f8e 100644 --- a/agent_orchestrator/replay/mock_device.py +++ b/agent_orchestrator/replay/mock_device.py @@ -93,6 +93,8 @@ def screenshot(self) -> np.ndarray: def click(self, target: Any) -> None: event = self._consume(expected_type="action") + if "timestamp" in event: + self.clock.set(float(event["timestamp"])) if event.get("action") != "click": raise ReplayDeviationError(f"Expected click action, found {event.get('action')}") @@ -104,6 +106,8 @@ def click(self, target: Any) -> None: area = event.get("area") if not area: raise ReplayDeviationError("Click action in manifest missing `area` bounds") + if not isinstance(area, (list, tuple)) or len(area) != 4: + raise ReplayDeviationError(f"Click `area` must be [x1,y1,x2,y2], got {area!r}") x, y = self._extract_click_point(target) x1, y1, x2, y2 = area @@ -114,11 +118,17 @@ def click(self, target: Any) -> None: def swipe(self, p1: tuple[int, int], p2: tuple[int, int]) -> None: event = self._consume(expected_type="action") + if "timestamp" in event: + self.clock.set(float(event["timestamp"])) if event.get("action") != "swipe": raise ReplayDeviationError(f"Expected swipe action, found {event.get('action')}") - start_area = event["start_area"] - end_area = event["end_area"] + start_area = event.get("start_area") + end_area = event.get("end_area") + if not start_area or not isinstance(start_area, (list, tuple)) or len(start_area) != 4: + raise ReplayDeviationError(f"Swipe `start_area` must be [x1,y1,x2,y2], got {start_area!r}") + if not end_area or not isinstance(end_area, (list, tuple)) or len(end_area) != 4: + raise ReplayDeviationError(f"Swipe `end_area` must be [x1,y1,x2,y2], got {end_area!r}") if not self._point_in_area(p1, start_area): raise ReplayDeviationError(f"Swipe start out of expected area: {p1} not in {start_area}") if not self._point_in_area(p2, end_area): @@ -127,6 +137,8 @@ def swipe(self, p1: tuple[int, int], p2: tuple[int, int]) -> None: @staticmethod def _point_in_area(point: tuple[int, int], area: list[int]) -> bool: x, y = point + if not isinstance(area, (list, tuple)) or len(area) != 4: + raise ReplayDeviationError(f"Area must be [x1,y1,x2,y2], got {area!r}") x1, y1, x2, y2 = area return x1 <= x <= x2 and y1 <= y <= y2