Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions Project_Docs/testing/EVIDENCE_HARNESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The public matrix is intentionally conservative. It covers:
- release manifest generation;
- harness self-tests;
- scenario contract validation;
- software-only scenario replay generation;
- public scenario envelope checks;
- Python regression tests;
- targeted Rust contract tests.
Expand Down Expand Up @@ -51,10 +52,25 @@ Each run writes both JSON and Markdown reports. The default output directory is
under `target/`, so generated evidence stays out of source control unless a
maintainer intentionally promotes a report into release notes.

Generate a deterministic scenario replay plus a browser-viewable HTML canvas:

```bash
python scripts/strix_sim_replay.py \
--scenario sim/scenarios/gps_denied_recon.yaml \
--output target/strix-replays/gps_denied_recon.json \
--html target/strix-replays/gps_denied_recon.html
```

The replay harness is intentionally described as a deterministic kinematic
public replay. It is useful for visual inspection, regression evidence, seeded
event playback, and pre-field behavior review. It is not a hardware, RF,
sensor-fidelity, or field-readiness simulator.

## Next Capabilities

The next useful expansion is scenario-family regression: every public scenario
The next useful expansion is scenario-family batch replay: every public scenario
already declares a seed, metric set, and `pass_envelope`; the next step is to
compare observed metrics against that envelope. After that, add statistical
Monte Carlo sweeps and integration checks for criticality, contagion, and
quorum-style confirmation loops.
run every scenario through replay and compare observed metrics against that
envelope. After that, add statistical Monte Carlo sweeps, richer trace exports,
and integration checks for criticality, contagion, and quorum-style
confirmation loops.
24 changes: 24 additions & 0 deletions Project_Docs/testing/public_test_matrix.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,30 @@
"expected_exit": 0,
"timeout_s": 30
},
{
"id": "software_replay_gps_denied",
"name": "Software-only visual replay generation",
"tags": [
"smoke",
"scenario",
"replay",
"visual"
],
"command": [
"python",
"scripts/strix_sim_replay.py",
"--scenario",
"sim/scenarios/gps_denied_recon.yaml",
"--output",
"target/strix-replays/gps_denied_recon.json",
"--html",
"target/strix-replays/gps_denied_recon.html",
"--tick-s",
"10"
],
"expected_exit": 0,
"timeout_s": 30
},
{
"id": "scenario_schema_contract",
"name": "Scenario envelope contract tests",
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ pip install -e .

**Requirements**: Rust 1.75+, Python 3.11+, maturin 1.11+

## Software-Only Replay

STRIX includes a public-safe deterministic replay harness for inspecting
scenario behavior before hardware or field validation:

```bash
python scripts/strix_sim_replay.py --scenario sim/scenarios/gps_denied_recon.yaml
```

The command writes a JSON timeline and a self-contained HTML canvas under
`target/strix-replays/` by default. It is useful for seeded behavior review,
scenario regression evidence, and visual inspection of agent reactions. It is
not a substitute for hardware, RF, sensor, or field validation.

## Project Structure

```text
Expand Down
11 changes: 11 additions & 0 deletions demo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,14 @@
The public `demo/` tree contains only lightweight examples and placeholders.

Evaluator-facing collateral, narrated demo scripts, and richer presentation assets are not maintained as part of the public repository. Public examples should stay focused on generic orchestration, simulation, and developer-facing integration.

For a public-safe visual replay, generate a self-contained HTML view from one of
the public scenarios:

```bash
python scripts/strix_sim_replay.py --scenario sim/scenarios/gps_denied_recon.yaml --output target/strix-replays/gps_denied_recon.json --html target/strix-replays/gps_denied_recon.html
```

Open the generated HTML file locally to inspect agent movement, event timing,
constraint avoidance, energy, and replay metrics. Generated replay assets live
under `target/` by default and are not committed.
132 changes: 132 additions & 0 deletions python/tests/test_strix_sim_replay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import importlib.util
import json
import sys
from pathlib import Path


def _load_module():
path = Path(__file__).resolve().parents[2] / "scripts" / "strix_sim_replay.py"
spec = importlib.util.spec_from_file_location("strix_sim_replay", path)
assert spec is not None
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return module


def _write_scenario(path: Path) -> None:
path.write_text(
"""
scenario_id: replay_case
seed: 77
name: Replay Case
description: Public replay test case
duration_seconds: 30
drones:
count: 2
initial_positions:
- [0, 0, -50]
- [20, 0, -50]
max_speed_ms: 10
endurance_s: 300
environment:
gps_available: false
mission:
type: recon
area:
center: [0, 0, 0]
radius: 100
events:
- time: 10
type: gps_loss
metrics:
- area_coverage_pct
pass_envelope:
area_coverage_pct:
min: 0
max: 100
""",
encoding="utf-8",
)


def test_replay_is_deterministic_for_same_seed(tmp_path):
module = _load_module()
scenario = tmp_path / "scenario.yaml"
_write_scenario(scenario)

first = module.build_replay(scenario, tick_s=10)
second = module.build_replay(scenario, tick_s=10)

assert first["frames"] == second["frames"]
assert first["scenario"]["seed"] == 77
assert first["metrics"]["active_agents"] == 2
assert first["envelope"]["status"] == "passed"


def test_replay_outputs_public_safe_paths(tmp_path):
module = _load_module()
scenario = tmp_path / "scenario.yaml"
_write_scenario(scenario)

replay = module.build_replay(scenario, tick_s=10)

assert str(tmp_path) not in replay["scenario"]["path"]
assert replay["scenario"]["path"] == "<external>/scenario.yaml"


def test_replay_html_embeds_visualizer_data(tmp_path):
module = _load_module()
scenario = tmp_path / "scenario.yaml"
_write_scenario(scenario)
replay = module.build_replay(scenario, tick_s=10)

html = module.render_html(replay)

assert "STRIX Replay" in html
assert "Software-only deterministic kinematic replay" in html
assert "replay_case" in html
assert str(tmp_path) not in html


def test_write_replay_creates_json_and_html(tmp_path):
module = _load_module()
scenario = tmp_path / "scenario.yaml"
_write_scenario(scenario)
replay = module.build_replay(scenario, tick_s=10)
output = tmp_path / "replay.json"
html_output = tmp_path / "replay.html"

module.write_replay(replay, output, html_output)

assert json.loads(output.read_text(encoding="utf-8"))["kind"] == "software_replay"
assert "STRIX Replay" in html_output.read_text(encoding="utf-8")


def test_replay_handles_zero_index_attrition(tmp_path):
module = _load_module()
scenario = tmp_path / "scenario.yaml"
_write_scenario(scenario)
data = scenario.read_text(encoding="utf-8")
scenario.write_text(
data
+ """
attrition_schedule:
- time: 10
drone_id: 0
cause: public_test_event
""",
encoding="utf-8",
)

replay = module.build_replay(scenario, tick_s=10)

assert replay["metrics"]["active_agents"] == 1
assert replay["metrics"]["offline_agents"] == 1
offline_events = [event for event in replay["frames"][1]["events"] if event["type"] == "agent_offline"]
assert offline_events[0]["agent_index"] == 0
Loading
Loading