Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to the ALAS AI Agent project.
## [Unreleased] - 2026-02-17

### Added
- **Deterministic replay harness scaffold**:
- Added `alas_wrapped/dev_tools/record_scenario.py` for fixture capture (screenshots + action manifest).
- Added `agent_orchestrator/replay/mock_device.py` with manifest-driven replay + deviation assertions.
- Added `agent_orchestrator/replay/time_control.py` for simulated clock patching (`time.time`, `time.sleep`, and ALAS timer aliases).
- Added `agent_orchestrator/test_login_replay.py` covering fast-forward replay and deviation detection.

- **Local VLM Setup Plan**: Added `docs/plans/local_vlm_setup.md` - comprehensive primer for serving a vision-language model locally on GeForce 5090:
- Model selection (Qwen3-VL-8B, MiniCPM-V 4.5, Qwen3-VL-32B)
- llama.cpp vs Ollama comparison with setup instructions
Expand Down
Empty file.
165 changes: 165 additions & 0 deletions agent_orchestrator/replay/mock_device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from __future__ import annotations

import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any

import numpy as np
from PIL import Image


class ReplayDeviationError(AssertionError):
"""Raised when replay execution diverges from the recorded manifest."""


@dataclass
class SimulatedClock:
"""Logical clock controlled by replay events."""

current_ts: float

@classmethod
def from_timestamp(cls, ts: float) -> "SimulatedClock":
return cls(current_ts=float(ts))

def set(self, ts: float) -> None:
self.current_ts = float(ts)

def advance(self, seconds: float) -> None:
self.current_ts += float(seconds)

def time(self) -> float:
return self.current_ts

def now(self) -> datetime:
return datetime.fromtimestamp(self.current_ts)


class ReplayManifest:
def __init__(self, fixture_dir: Path):
self.fixture_dir = Path(fixture_dir)
self.images_dir = self.fixture_dir / "images"
self.events = self._load_events()
if not self.events:
raise ValueError(f"Fixture manifest is empty: {self.fixture_dir}")

def _load_events(self) -> list[dict[str, Any]]:
manifest_path = self.fixture_dir / "manifest.jsonl"
if not manifest_path.exists():
raise FileNotFoundError(f"Missing manifest: {manifest_path}")

events: list[dict[str, Any]] = []
with manifest_path.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if line:
events.append(json.loads(line))
return events


class MockDevice:
"""Replay-only device that enforces recorded screenshot/action ordering."""

def __init__(self, fixture_dir: str | Path, clock: SimulatedClock):
self.manifest = ReplayManifest(Path(fixture_dir))
self.clock = clock
self._index = 0

def _peek(self) -> dict[str, Any]:
if self._index >= len(self.manifest.events):
raise ReplayDeviationError("Replay requested past end of manifest")
return self.manifest.events[self._index]

def _consume(self, expected_type: str) -> dict[str, Any]:
event = self._peek()
actual_type = event.get("event")
if actual_type != expected_type:
raise ReplayDeviationError(
f"Replay divergence at event #{self._index + 1}: expected {expected_type}, found {actual_type}."
)
self._index += 1
return event

def screenshot(self) -> np.ndarray:
event = self._consume(expected_type="screenshot")
self.clock.set(float(event["timestamp"]))
image_path = self.manifest.images_dir / event["image"]
if not image_path.exists():
raise ReplayDeviationError(f"Missing recorded frame: {image_path}")
with Image.open(image_path) as image:
return np.array(image)

def click(self, target: Any) -> None:
event = self._consume(expected_type="action")
Copy link

Copilot AI Feb 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replay clock is only set on screenshot() events. Because action events in the manifest also carry timestamp, any time.time() calls that occur between screenshots during replay will see a stale time value. Consider setting self.clock from event["timestamp"] for action events as well (click/swipe) to preserve the recorded timeline.

Suggested change
event = self._consume(expected_type="action")
event = self._consume(expected_type="action")
timestamp = event.get("timestamp")
if timestamp is not None:
self.clock.set(float(timestamp))

Copilot uses AI. Check for mistakes.
if "timestamp" in event:
self.clock.set(float(event["timestamp"]))
if event.get("action") != "click":
raise ReplayDeviationError(f"Expected click action, found {event.get('action')}")

expected_target = event.get("target")
actual_target = str(target)
if expected_target and expected_target != actual_target:
Comment on lines +102 to +103
Copy link

Copilot AI Feb 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expected_target = event.get("target") is treated as optional (if expected_target and ...). If the manifest is missing target (or has an empty value), replay will silently skip target validation, which weakens the harness' "strict" semantics. Consider requiring a non-empty target for click actions and raising ReplayDeviationError when it's missing.

Suggested change
actual_target = str(target)
if expected_target and expected_target != actual_target:
if not expected_target:
raise ReplayDeviationError("Click action in manifest missing `target`")
actual_target = str(target)
if expected_target != actual_target:

Copilot uses AI. Check for mistakes.
raise ReplayDeviationError(f"Expected click target {expected_target}, found {actual_target}")

area = event.get("area")
if not area:
raise ReplayDeviationError("Click action in manifest missing `area` bounds")
if not isinstance(area, (list, tuple)) or len(area) != 4:
raise ReplayDeviationError(f"Click `area` must be [x1,y1,x2,y2], got {area!r}")

x, y = self._extract_click_point(target)
x1, y1, x2, y2 = area
Comment on lines +112 to +113
Copy link

Copilot AI Feb 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Manifest schema issues can currently raise generic exceptions instead of ReplayDeviationError (e.g., x1, y1, x2, y2 = area will throw ValueError if area isn't length 4). Since this is a validation harness, consider validating area shape/types explicitly and raising ReplayDeviationError with a clear message for malformed data.

Suggested change
x, y = self._extract_click_point(target)
x1, y1, x2, y2 = area
# Validate area shape and contents to avoid generic unpacking/type errors
if not isinstance(area, (list, tuple)) or len(area) != 4:
raise ReplayDeviationError(
f"Click action in manifest has malformed `area`; expected sequence of 4 numbers, got: {area!r}"
)
try:
x1, y1, x2, y2 = [float(v) for v in area]
except (TypeError, ValueError):
raise ReplayDeviationError(
f"Click action in manifest has non-numeric `area` bounds: {area!r}"
)
x, y = self._extract_click_point(target)

Copilot uses AI. Check for mistakes.
if not (x1 <= x <= x2 and y1 <= y <= y2):
raise ReplayDeviationError(
f"Click out of expected area: ({x}, {y}) not in [{x1}, {y1}, {x2}, {y2}]"
)

def swipe(self, p1: tuple[int, int], p2: tuple[int, int]) -> None:
event = self._consume(expected_type="action")
if "timestamp" in event:
self.clock.set(float(event["timestamp"]))
if event.get("action") != "swipe":
raise ReplayDeviationError(f"Expected swipe action, found {event.get('action')}")

start_area = event.get("start_area")
end_area = event.get("end_area")
if not start_area or not isinstance(start_area, (list, tuple)) or len(start_area) != 4:
raise ReplayDeviationError(f"Swipe `start_area` must be [x1,y1,x2,y2], got {start_area!r}")
if not end_area or not isinstance(end_area, (list, tuple)) or len(end_area) != 4:
raise ReplayDeviationError(f"Swipe `end_area` must be [x1,y1,x2,y2], got {end_area!r}")
if not self._point_in_area(p1, start_area):
raise ReplayDeviationError(f"Swipe start out of expected area: {p1} not in {start_area}")
if not self._point_in_area(p2, end_area):
raise ReplayDeviationError(f"Swipe end out of expected area: {p2} not in {end_area}")

Comment on lines +94 to +136

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

5. Replay skips action timestamps 🐞 Bug ✓ Correctness

MockDevice sets the simulated clock only when consuming screenshot events and ignores timestamps on
click/swipe action events. This makes time.time()/sleep (patched to the simulated clock) diverge
from the manifest between screenshots and leaves recorded action timestamps unused.
Agent Prompt
### Issue description
Action events in the manifest contain `timestamp`, but `MockDevice.click()` / `MockDevice.swipe()` do not update the `SimulatedClock` from it. Since `patched_time()` routes `time.time()` through this clock, time can be stale between screenshots.

### Issue Context
`screenshot()` already sets the clock from the manifest timestamp; action events should do the same for consistency.

### Fix Focus Areas
- agent_orchestrator/replay/mock_device.py[85-126]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

@staticmethod
def _point_in_area(point: tuple[int, int], area: list[int]) -> bool:
x, y = point
if not isinstance(area, (list, tuple)) or len(area) != 4:
raise ReplayDeviationError(f"Area must be [x1,y1,x2,y2], got {area!r}")
x1, y1, x2, y2 = area
return x1 <= x <= x2 and y1 <= y <= y2

@staticmethod
def _extract_click_point(target: Any) -> tuple[int, int]:
if isinstance(target, (tuple, list)) and len(target) >= 2:
return int(target[0]), int(target[1])

if hasattr(target, "button"):
button_area = getattr(target, "button")
if button_area and len(button_area) == 4:
x1, y1, x2, y2 = map(int, button_area)
return (x1 + x2) // 2, (y1 + y2) // 2

if hasattr(target, "area"):
area = getattr(target, "area")
if area and len(area) == 4:
x1, y1, x2, y2 = map(int, area)
return (x1 + x2) // 2, (y1 + y2) // 2

raise ReplayDeviationError(f"Unable to extract click coordinates from target={target!r}")

def is_manifest_exhausted(self) -> bool:
return self._index >= len(self.manifest.events)
32 changes: 32 additions & 0 deletions agent_orchestrator/replay/time_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

import time
from contextlib import ExitStack, contextmanager
from datetime import datetime
from unittest.mock import patch

from replay.mock_device import SimulatedClock


@contextmanager
def patched_time(clock: SimulatedClock):
"""Patch clock consumers so replay runs at CPU speed with deterministic time."""

class _TimerDatetime:
@staticmethod
def now() -> datetime:
return datetime.fromtimestamp(clock.time())

with ExitStack() as stack:
stack.enter_context(patch("time.time", side_effect=clock.time))
stack.enter_context(patch("time.sleep", side_effect=lambda seconds: clock.advance(seconds)))

# ALAS timer module imports time/datetime directly; patch aliases when available.
try:
stack.enter_context(patch("module.base.timer.time", side_effect=clock.time))
stack.enter_context(patch("module.base.timer.sleep", side_effect=lambda seconds: clock.advance(seconds)))
stack.enter_context(patch("module.base.timer.datetime", _TimerDatetime))
except ModuleNotFoundError:
pass
Comment on lines +24 to +30

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. patched_time patches module.base.timer 📘 Rule violation ✓ Correctness

agent_orchestrator/replay/time_control.py conditionally patches ALAS internals via
module.base.timer.*, which relies on importing module.* at runtime. Per the layering rule, code
that imports/depends on ALAS internals should not live under agent_orchestrator/.
Agent Prompt
## Issue description
`agent_orchestrator/replay/time_control.py` depends on ALAS internals (`module.base.timer.*`) via runtime patching, violating the directory-based layering rule.

## Issue Context
Even though the code is guarded by `try/except`, `patch("module.base.timer.*")` requires importing `module.*` when present, which is still an ALAS-internal dependency.

## Fix Focus Areas
- agent_orchestrator/replay/time_control.py[24-30]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


yield
121 changes: 121 additions & 0 deletions agent_orchestrator/test_login_replay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from __future__ import annotations

import json
import time
from pathlib import Path

import numpy as np
import pytest
from PIL import Image

from replay.mock_device import MockDevice, ReplayDeviationError, SimulatedClock
from replay.time_control import patched_time


class _ButtonStub:
def __init__(self, area):
self.button = area

def __str__(self):
return "LOGIN_CHECK"


class _WrongButtonStub(_ButtonStub):
def __str__(self):
return "WRONG_TARGET"


class _FakeLoginFlow:
def __init__(self, device):
self.device = device

def handle_app_login(self):
_ = self.device.screenshot()
self.device.click(_ButtonStub((100, 100, 140, 140)))
_ = self.device.screenshot()
self.device.swipe((200, 200), (240, 240))
_ = self.device.screenshot()
return True


def _write_fixture(base: Path) -> Path:
fixture_dir = base / "login_success"
images_dir = fixture_dir / "images"
images_dir.mkdir(parents=True)

for idx in range(1, 4):
image = np.full((10, 10, 3), idx * 30, dtype=np.uint8)
Image.fromarray(image).save(images_dir / f"{idx:04d}.png")

events = [
{"index": 1, "event": "screenshot", "timestamp": 1708600000.100, "frame": 1, "image": "0001.png"},
{
"index": 2,
"event": "action",
"timestamp": 1708600000.101,
"action": "click",
"target": "LOGIN_CHECK",
"area": [90, 90, 150, 150],
},
{"index": 3, "event": "screenshot", "timestamp": 1708600001.100, "frame": 2, "image": "0002.png"},
{
"index": 4,
"event": "action",
"timestamp": 1708600001.101,
"action": "swipe",
"target": "SWIPE",
"start_area": [190, 190, 210, 210],
"end_area": [230, 230, 250, 250],
},
{"index": 5, "event": "screenshot", "timestamp": 1708600002.100, "frame": 3, "image": "0003.png"},
]

with (fixture_dir / "manifest.jsonl").open("w", encoding="utf-8") as handle:
for event in events:
handle.write(json.dumps(event) + "\n")

return fixture_dir


def test_login_replay_fast_forward(tmp_path):
fixture_dir = _write_fixture(tmp_path)
clock = SimulatedClock.from_timestamp(1708599999.0)
mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock)

fake_flow = _FakeLoginFlow(device=mock_device)

with patched_time(clock):
assert fake_flow.handle_app_login() is True

assert mock_device.is_manifest_exhausted() is True
assert clock.time() == pytest.approx(1708600002.100)


def test_replay_deviation_raises(tmp_path):
fixture_dir = _write_fixture(tmp_path)
clock = SimulatedClock.from_timestamp(1708599999.0)
mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock)

_ = mock_device.screenshot()
with pytest.raises(ReplayDeviationError):
mock_device.screenshot()


def test_replay_target_mismatch_raises(tmp_path):
fixture_dir = _write_fixture(tmp_path)
clock = SimulatedClock.from_timestamp(1708599999.0)
mock_device = MockDevice(fixture_dir=fixture_dir, clock=clock)

_ = mock_device.screenshot()
with pytest.raises(ReplayDeviationError):
mock_device.click(_WrongButtonStub((100, 100, 140, 140)))


def test_patched_time_advances_sleep_without_wait():
clock = SimulatedClock.from_timestamp(10.0)
with patched_time(clock):
start = time.time()
time.sleep(2.5)
end = time.time()

assert end - start == pytest.approx(2.5)
Loading