-
Notifications
You must be signed in to change notification settings - Fork 0
Harden deterministic replay harness validation and clock patching #31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,165 @@ | ||||||||||||||||||||||||||||||||
| from __future__ import annotations | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| import json | ||||||||||||||||||||||||||||||||
| from dataclasses import dataclass | ||||||||||||||||||||||||||||||||
| from datetime import datetime | ||||||||||||||||||||||||||||||||
| from pathlib import Path | ||||||||||||||||||||||||||||||||
| from typing import Any | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| import numpy as np | ||||||||||||||||||||||||||||||||
| from PIL import Image | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| class ReplayDeviationError(AssertionError): | ||||||||||||||||||||||||||||||||
| """Raised when replay execution diverges from the recorded manifest.""" | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| @dataclass | ||||||||||||||||||||||||||||||||
| class SimulatedClock: | ||||||||||||||||||||||||||||||||
| """Logical clock controlled by replay events.""" | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| current_ts: float | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| @classmethod | ||||||||||||||||||||||||||||||||
| def from_timestamp(cls, ts: float) -> "SimulatedClock": | ||||||||||||||||||||||||||||||||
| return cls(current_ts=float(ts)) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def set(self, ts: float) -> None: | ||||||||||||||||||||||||||||||||
| self.current_ts = float(ts) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def advance(self, seconds: float) -> None: | ||||||||||||||||||||||||||||||||
| self.current_ts += float(seconds) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def time(self) -> float: | ||||||||||||||||||||||||||||||||
| return self.current_ts | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def now(self) -> datetime: | ||||||||||||||||||||||||||||||||
| return datetime.fromtimestamp(self.current_ts) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| class ReplayManifest: | ||||||||||||||||||||||||||||||||
| def __init__(self, fixture_dir: Path): | ||||||||||||||||||||||||||||||||
| self.fixture_dir = Path(fixture_dir) | ||||||||||||||||||||||||||||||||
| self.images_dir = self.fixture_dir / "images" | ||||||||||||||||||||||||||||||||
| self.events = self._load_events() | ||||||||||||||||||||||||||||||||
| if not self.events: | ||||||||||||||||||||||||||||||||
| raise ValueError(f"Fixture manifest is empty: {self.fixture_dir}") | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def _load_events(self) -> list[dict[str, Any]]: | ||||||||||||||||||||||||||||||||
| manifest_path = self.fixture_dir / "manifest.jsonl" | ||||||||||||||||||||||||||||||||
| if not manifest_path.exists(): | ||||||||||||||||||||||||||||||||
| raise FileNotFoundError(f"Missing manifest: {manifest_path}") | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| events: list[dict[str, Any]] = [] | ||||||||||||||||||||||||||||||||
| with manifest_path.open("r", encoding="utf-8") as handle: | ||||||||||||||||||||||||||||||||
| for line in handle: | ||||||||||||||||||||||||||||||||
| line = line.strip() | ||||||||||||||||||||||||||||||||
| if line: | ||||||||||||||||||||||||||||||||
| events.append(json.loads(line)) | ||||||||||||||||||||||||||||||||
| return events | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| class MockDevice: | ||||||||||||||||||||||||||||||||
| """Replay-only device that enforces recorded screenshot/action ordering.""" | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def __init__(self, fixture_dir: str | Path, clock: SimulatedClock): | ||||||||||||||||||||||||||||||||
| self.manifest = ReplayManifest(Path(fixture_dir)) | ||||||||||||||||||||||||||||||||
| self.clock = clock | ||||||||||||||||||||||||||||||||
| self._index = 0 | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def _peek(self) -> dict[str, Any]: | ||||||||||||||||||||||||||||||||
| if self._index >= len(self.manifest.events): | ||||||||||||||||||||||||||||||||
| raise ReplayDeviationError("Replay requested past end of manifest") | ||||||||||||||||||||||||||||||||
| return self.manifest.events[self._index] | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def _consume(self, expected_type: str) -> dict[str, Any]: | ||||||||||||||||||||||||||||||||
| event = self._peek() | ||||||||||||||||||||||||||||||||
| actual_type = event.get("event") | ||||||||||||||||||||||||||||||||
| if actual_type != expected_type: | ||||||||||||||||||||||||||||||||
| raise ReplayDeviationError( | ||||||||||||||||||||||||||||||||
| f"Replay divergence at event #{self._index + 1}: expected {expected_type}, found {actual_type}." | ||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||
| self._index += 1 | ||||||||||||||||||||||||||||||||
| return event | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def screenshot(self) -> np.ndarray: | ||||||||||||||||||||||||||||||||
| event = self._consume(expected_type="screenshot") | ||||||||||||||||||||||||||||||||
| self.clock.set(float(event["timestamp"])) | ||||||||||||||||||||||||||||||||
| image_path = self.manifest.images_dir / event["image"] | ||||||||||||||||||||||||||||||||
| if not image_path.exists(): | ||||||||||||||||||||||||||||||||
| raise ReplayDeviationError(f"Missing recorded frame: {image_path}") | ||||||||||||||||||||||||||||||||
| with Image.open(image_path) as image: | ||||||||||||||||||||||||||||||||
| return np.array(image) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def click(self, target: Any) -> None: | ||||||||||||||||||||||||||||||||
| event = self._consume(expected_type="action") | ||||||||||||||||||||||||||||||||
| if "timestamp" in event: | ||||||||||||||||||||||||||||||||
| self.clock.set(float(event["timestamp"])) | ||||||||||||||||||||||||||||||||
| if event.get("action") != "click": | ||||||||||||||||||||||||||||||||
| raise ReplayDeviationError(f"Expected click action, found {event.get('action')}") | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| expected_target = event.get("target") | ||||||||||||||||||||||||||||||||
| actual_target = str(target) | ||||||||||||||||||||||||||||||||
| if expected_target and expected_target != actual_target: | ||||||||||||||||||||||||||||||||
|
Comment on lines
+102
to
+103
|
||||||||||||||||||||||||||||||||
| actual_target = str(target) | |
| if expected_target and expected_target != actual_target: | |
| if not expected_target: | |
| raise ReplayDeviationError("Click action in manifest missing `target`") | |
| actual_target = str(target) | |
| if expected_target != actual_target: |
Copilot
AI
Feb 23, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Manifest schema issues can currently raise generic exceptions instead of ReplayDeviationError (e.g., x1, y1, x2, y2 = area will throw ValueError if area isn't length 4). Since this is a validation harness, consider validating area shape/types explicitly and raising ReplayDeviationError with a clear message for malformed data.
| x, y = self._extract_click_point(target) | |
| x1, y1, x2, y2 = area | |
| # Validate area shape and contents to avoid generic unpacking/type errors | |
| if not isinstance(area, (list, tuple)) or len(area) != 4: | |
| raise ReplayDeviationError( | |
| f"Click action in manifest has malformed `area`; expected sequence of 4 numbers, got: {area!r}" | |
| ) | |
| try: | |
| x1, y1, x2, y2 = [float(v) for v in area] | |
| except (TypeError, ValueError): | |
| raise ReplayDeviationError( | |
| f"Click action in manifest has non-numeric `area` bounds: {area!r}" | |
| ) | |
| x, y = self._extract_click_point(target) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
5. Replay skips action timestamps 🐞 Bug ✓ Correctness
MockDevice sets the simulated clock only when consuming screenshot events and ignores timestamps on click/swipe action events. This makes time.time()/sleep (patched to the simulated clock) diverge from the manifest between screenshots and leaves recorded action timestamps unused.
Agent Prompt
### Issue description
Action events in the manifest contain `timestamp`, but `MockDevice.click()` / `MockDevice.swipe()` do not update the `SimulatedClock` from it. Since `patched_time()` routes `time.time()` through this clock, time can be stale between screenshots.
### Issue Context
`screenshot()` already sets the clock from the manifest timestamp; action events should do the same for consistency.
### Fix Focus Areas
- agent_orchestrator/replay/mock_device.py[85-126]
ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import time | ||
| from contextlib import ExitStack, contextmanager | ||
| from datetime import datetime | ||
| from unittest.mock import patch | ||
|
|
||
| from replay.mock_device import SimulatedClock | ||
|
|
||
|
|
||
| @contextmanager | ||
| def patched_time(clock: SimulatedClock): | ||
| """Patch clock consumers so replay runs at CPU speed with deterministic time.""" | ||
|
|
||
| class _TimerDatetime: | ||
| @staticmethod | ||
| def now() -> datetime: | ||
| return datetime.fromtimestamp(clock.time()) | ||
|
|
||
| with ExitStack() as stack: | ||
| stack.enter_context(patch("time.time", side_effect=clock.time)) | ||
| stack.enter_context(patch("time.sleep", side_effect=lambda seconds: clock.advance(seconds))) | ||
|
|
||
| # ALAS timer module imports time/datetime directly; patch aliases when available. | ||
| try: | ||
| stack.enter_context(patch("module.base.timer.time", side_effect=clock.time)) | ||
| stack.enter_context(patch("module.base.timer.sleep", side_effect=lambda seconds: clock.advance(seconds))) | ||
| stack.enter_context(patch("module.base.timer.datetime", _TimerDatetime)) | ||
| except ModuleNotFoundError: | ||
| pass | ||
|
Comment on lines
+24
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2. patched_time patches module.base.timer agent_orchestrator/replay/time_control.py conditionally patches ALAS internals via module.base.timer.*, which relies on importing module.* at runtime. Per the layering rule, code that imports/depends on ALAS internals should not live under agent_orchestrator/. Agent Prompt
|
||
|
|
||
| yield | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| import time | ||
| from pathlib import Path | ||
|
|
||
| import numpy as np | ||
| import pytest | ||
| from PIL import Image | ||
|
|
||
| from replay.mock_device import MockDevice, ReplayDeviationError, SimulatedClock | ||
| from replay.time_control import patched_time | ||
|
|
||
|
|
||
| class _ButtonStub: | ||
| def __init__(self, area): | ||
| self.button = area | ||
|
|
||
| def __str__(self): | ||
| return "LOGIN_CHECK" | ||
|
|
||
|
|
||
| class _WrongButtonStub(_ButtonStub): | ||
| def __str__(self): | ||
| return "WRONG_TARGET" | ||
|
|
||
|
|
||
| class _FakeLoginFlow: | ||
| def __init__(self, device): | ||
| self.device = device | ||
|
|
||
| def handle_app_login(self): | ||
| _ = self.device.screenshot() | ||
| self.device.click(_ButtonStub((100, 100, 140, 140))) | ||
| _ = self.device.screenshot() | ||
| self.device.swipe((200, 200), (240, 240)) | ||
| _ = self.device.screenshot() | ||
| return True | ||
|
|
||
|
|
||
| def _write_fixture(base: Path) -> Path: | ||
| fixture_dir = base / "login_success" | ||
| images_dir = fixture_dir / "images" | ||
| images_dir.mkdir(parents=True) | ||
|
|
||
| for idx in range(1, 4): | ||
| image = np.full((10, 10, 3), idx * 30, dtype=np.uint8) | ||
| Image.fromarray(image).save(images_dir / f"{idx:04d}.png") | ||
|
|
||
| events = [ | ||
| {"index": 1, "event": "screenshot", "timestamp": 1708600000.100, "frame": 1, "image": "0001.png"}, | ||
| { | ||
| "index": 2, | ||
| "event": "action", | ||
| "timestamp": 1708600000.101, | ||
| "action": "click", | ||
| "target": "LOGIN_CHECK", | ||
| "area": [90, 90, 150, 150], | ||
| }, | ||
| {"index": 3, "event": "screenshot", "timestamp": 1708600001.100, "frame": 2, "image": "0002.png"}, | ||
| { | ||
| "index": 4, | ||
| "event": "action", | ||
| "timestamp": 1708600001.101, | ||
| "action": "swipe", | ||
| "target": "SWIPE", | ||
| "start_area": [190, 190, 210, 210], | ||
| "end_area": [230, 230, 250, 250], | ||
| }, | ||
| {"index": 5, "event": "screenshot", "timestamp": 1708600002.100, "frame": 3, "image": "0003.png"}, | ||
| ] | ||
|
|
||
| with (fixture_dir / "manifest.jsonl").open("w", encoding="utf-8") as handle: | ||
| for event in events: | ||
| handle.write(json.dumps(event) + "\n") | ||
|
|
||
| return fixture_dir | ||
|
|
||
|
|
||
| def test_login_replay_fast_forward(tmp_path): | ||
| fixture_dir = _write_fixture(tmp_path) | ||
| clock = SimulatedClock.from_timestamp(1708599999.0) | ||
| mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock) | ||
|
|
||
| fake_flow = _FakeLoginFlow(device=mock_device) | ||
|
|
||
| with patched_time(clock): | ||
| assert fake_flow.handle_app_login() is True | ||
|
|
||
| assert mock_device.is_manifest_exhausted() is True | ||
| assert clock.time() == pytest.approx(1708600002.100) | ||
|
|
||
|
|
||
| def test_replay_deviation_raises(tmp_path): | ||
| fixture_dir = _write_fixture(tmp_path) | ||
| clock = SimulatedClock.from_timestamp(1708599999.0) | ||
| mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock) | ||
|
|
||
| _ = mock_device.screenshot() | ||
| with pytest.raises(ReplayDeviationError): | ||
| mock_device.screenshot() | ||
|
|
||
|
|
||
| def test_replay_target_mismatch_raises(tmp_path): | ||
| fixture_dir = _write_fixture(tmp_path) | ||
| clock = SimulatedClock.from_timestamp(1708599999.0) | ||
| mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock) | ||
|
|
||
| _ = mock_device.screenshot() | ||
| with pytest.raises(ReplayDeviationError): | ||
| mock_device.click(_WrongButtonStub((100, 100, 140, 140))) | ||
|
|
||
|
|
||
| def test_patched_time_advances_sleep_without_wait(): | ||
| clock = SimulatedClock.from_timestamp(10.0) | ||
| with patched_time(clock): | ||
| start = time.time() | ||
| time.sleep(2.5) | ||
| end = time.time() | ||
|
|
||
| assert end - start == pytest.approx(2.5) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replay clock is only set on
screenshot()events. Because action events in the manifest also carrytimestamp, anytime.time()calls that occur between screenshots during replay will see a stale time value. Consider settingself.clockfromevent["timestamp"]for action events as well (click/swipe) to preserve the recorded timeline.