diff --git a/.pm/tracker.md b/.pm/tracker.md index a548d699..1f245f2e 100755 --- a/.pm/tracker.md +++ b/.pm/tracker.md @@ -1,24 +1,34 @@ # Project Task Tracker -**Last Updated:** 2025-11-30T22:30:00Z +**Last Updated:** 2025-11-30T23:15:00Z ## Status Summary **Recent Progress (since last update):** +- ๐ŸŽ‰ **Task 9.1.1 (AI Observer Foundation) COMPLETED** - GitHub Issue [#19](https://github.com/TheWizardsCode/GEngine/issues/19) + - Fixed bug in Observer._get_state() for service mode data unwrapping + - Added 4 new integration tests for SimServiceClient mode + - Enhanced README with comprehensive service mode examples + - All acceptance criteria verified and met + - Unblocks Task 9.2.1 (Rule-Based AI Action Layer) +- ๐ŸŽ‰ **Phase 7 COMPLETE** - All player experience features shipped! + - โœ… Task 7.4.1 (Campaign UX) completed and merged via PR #14 + - โœ… Task 7.1.1 (Progression Systems) completed and merged via PR #12 + - โœ… Task 7.3.1 (Tuning & Replayability) completed + - โœ… Task 7.2.1 (Explanations) completed + - ๐Ÿ“‹ Issues #11, #13 closed +- ๐Ÿ†• **Phase 8 initiated** - Task 8.1.1 (Containerization) created + - ๐Ÿ“‹ GitHub Issue [#15](https://github.com/TheWizardsCode/GEngine/issues/15) created + - Status: Not started, awaiting assignment + +**Previous Updates:** + - โœ… Task 7.4.1 (Campaign UX Flows) **COMPLETED** by gamedev-agent (2025-11-30) - Campaign module with create/list/resume/end/autosave functionality - CLI commands: campaign new/list/resume/end/status plus --campaign flag - 23 comprehensive tests (all passing), configuration in simulation.yml - Documentation updated in GDD, implementation plan, and gameplay guide - - GitHub Issue [#13](https://github.com/TheWizardsCode/GEngine/issues/13) ready to close -- โœ… Task 7.1.1 (Progression Systems) **VERIFIED COMPLETE** - - 48 progression tests pass, implementation fully functional - - GitHub Issue [#11](https://github.com/TheWizardsCode/GEngine/issues/11) ready to close -- ๐Ÿ†• Task 7.1.2 (Per-Agent Progression) remains not-started (priority: Low) - -**Previous Updates:** - - โœ… Task 7.1.1 (Progression Systems) **COMPLETED** by gamedev-agent (2025-11-30) - Core progression module with skills, access tiers, reputation implemented - ProgressionSystem integrated with SimEngine tick loop @@ -28,19 +38,18 @@ - 5 difficulty presets created (easy, normal, hard, brutal, tutorial) - Sweep runner and analysis scripts implemented with full test coverage - Documentation updated in gameplay guide -- ๐Ÿ“‹ GitHub Issue [#9](https://github.com/TheWizardsCode/GEngine/issues/9) completed **Current Priorities:** -1. โœ… **Phase 7 COMPLETE** (M7.1-M7.4 all done) -2. ๐Ÿš€ **Phase 8 Deployment** - No active development on containerization/K8s -3. ๐Ÿค– **Phase 9 AI Testing** - Observer foundation complete, action layer waiting +1. ๐Ÿš€ **Phase 8 Deployment** - Task 8.1.1 in progress (Issue #15, PR #16) +2. ๐Ÿค– **Phase 9 AI Testing** - Task 9.2.1 next (Rule-Based AI Action Layer) +3. ๐Ÿ”ง **Optional Polish** - Task 7.1.2 in progress (Issue #17, PR #18) **Key Risks:** -- โš ๏ธ **Phase 7 complete, but no Phase 8 work started** - Deployment infrastructure blocks wider distribution -- โš ๏ธ **No clear ownership for Phase 8-9 tasks** - Need Ross to assign or deprioritize -- โœ… **Phase 7 delivery completed** - All player experience features shipped +- โœ… **Phase 9 M9.1 complete** - AI Observer foundation verified and documented +- โš ๏ธ **Phase 8 requires ownership assignment** - Who handles Docker/K8s work? (Ross to assign) +- โœ… **Phase 7 delivery risk eliminated** - All core player features complete and tested | ID | Task | Status | Priority | Responsible | Updated | |---:|---|---|---|---|---| @@ -65,11 +74,11 @@ | 7.2.1 | Explanations & causal queries (M7.2) | completed | High | Team | 2025-11-30 | | 7.3.1 | Tuning & replayability sweeps (M7.3) | completed | High | Gamedev Agent | 2025-11-30 | | 7.4.1 | Campaign UX flows (M7.4) | completed | Medium | gamedev-agent | 2025-11-30 | -| 8.1.1 | Containerization (Docker + compose) (M8.1) | not-started | Medium | TBD (ask Ross) | 2025-11-30 | +| 8.1.1 | Containerization (Docker + compose) (M8.1) | not-started | High | TBD (ask Ross) | 2025-11-30 | | 8.2.1 | Kubernetes manifests & docs (M8.2) | not-started | Medium | TBD (ask Ross) | 2025-11-30 | | 8.3.1 | Observability in Kubernetes (M8.3) | not-started | Medium | TBD (ask Ross) | 2025-11-30 | | 8.4.1 | Content pipeline tooling & CI (M8.4) | not-started | Medium | TBD (ask Ross) | 2025-11-30 | -| 9.1.1 | AI Observer foundation acceptance (M9.1) | completed | Medium | Team | 2025-11-30 | +| 9.1.1 | AI Observer foundation acceptance (M9.1) | completed | Medium | gamedev-agent | 2025-11-30 | | 9.2.1 | Rule-based AI action layer (M9.2) | not-started | Medium | TBD (ask Ross) | 2025-11-30 | | 9.3.1 | LLM-enhanced AI decisions (M9.3) | not-started | Medium | TBD (ask Ross) | 2025-11-30 | | 9.4.1 | AI tournaments & balance tooling (M9.4) | not-started | Low | TBD (ask Ross) | 2025-11-30 | @@ -274,6 +283,7 @@ - **Last Updated:** 2025-11-30 ### 7.1.2 โ€” Implement Per-Agent Progression Layer (M7.1.x) +- **GitHub Issue:** [#17](https://github.com/TheWizardsCode/GEngine/issues/17) - **Description:** Implement the per-agent progression layer described in GDD ยง4.1.1 and the implementation plan (M7.1.x), adding a lightweight `AgentProgressionState` keyed by `agent_id` on top of the existing global `ProgressionState`. Wire it into `GameState`, `ProgressionSystem.tick(...)`, configuration, and minimal CLI/service surfaces while keeping effects bounded and optional. - **Acceptance Criteria:** - `AgentProgressionState` model exists with specialization, expertise pips, reliability, stress, and mission counters. @@ -357,18 +367,22 @@ - **Last Updated:** 2025-11-30 ### 8.1.1 โ€” Containerization (Docker + Compose) (M8.1) +- **GitHub Issue:** [#15](https://github.com/TheWizardsCode/GEngine/issues/15) - **Description:** Create Dockerfiles and docker-compose configuration for simulation, gateway, and LLM services. - **Acceptance Criteria:** All three services can be built and run via Docker/compose; basic README instructions exist; environment configuration is shared via env vars. -- **Priority:** Medium +- **Priority:** High - **Responsible:** TBD (ask Ross) -- **Dependencies:** Reasonably stable service boundaries and configuration contracts. +- **Dependencies:** Stable service boundaries (โœ… Phase 6 complete), configuration contracts (โœ… complete). - **Risks & Mitigations:** - Risk: Divergence between local and container configs. Mitigation: Use shared env var contracts and sample env files. + - Risk: Port conflicts or networking issues. Mitigation: Use docker-compose networking with service names. - **Next Steps:** - 1. Draft Dockerfiles for each service. - 2. Add docker-compose orchestration. - 3. Document usage. -- **Last Updated:** 2025-11-29 + 1. Assign owner for Docker/DevOps work. + 2. Draft Dockerfiles for each service (simulation, gateway, LLM). + 3. Add docker-compose orchestration with networking. + 4. Test multi-service startup and inter-service communication. + 5. Document usage in README. +- **Last Updated:** 2025-11-30 ### 8.2.1 โ€” Kubernetes Manifests & Docs (M8.2) - **Description:** Define Kubernetes Deployments/Services/ConfigMaps/Ingress for simulation, gateway, and LLM services, plus supporting documentation. @@ -413,18 +427,30 @@ - **Last Updated:** 2025-11-29 ### 9.1.1 โ€” AI Observer Foundation Acceptance (M9.1) +- **GitHub Issue:** [#19](https://github.com/TheWizardsCode/GEngine/issues/19) - **Description:** Ensure AI observer implementation and tooling fully meet M9.1 acceptance criteria across both local and service-mode sims, with tests and documentation. - **Acceptance Criteria:** Observer connects via both SimEngine and SimServiceClient; generates structured JSON and optional natural language commentary; integration tests validate trend detection; README documents usage. - **Priority:** Medium -- **Responsible:** TBD (ask Ross) -- **Dependencies:** Stable simulation APIs and telemetry. +- **Responsible:** gamedev-agent +- **Status:** โœ… COMPLETED +- **Dependencies:** Stable simulation APIs and telemetry (โœ… complete). - **Risks & Mitigations:** - Risk: Observer outputs too verbose/noisy. Mitigation: Provide configurable output levels. -- **Next Steps:** - 1. Review current observer implementation/tests. - 2. Close any gaps vs acceptance criteria. - 3. Update README with examples. -- **Last Updated:** 2025-11-29 +- **Completion Notes:** + - **Acceptance Criteria Verified:** + - โœ… Observer connects via both SimEngine and SimServiceClient + - โœ… Generates structured JSON and optional natural language commentary + - โœ… Integration tests validate trend detection (4 new SimServiceClient tests added) + - โœ… README documents usage with comprehensive examples + - **Bug Fix:** Fixed `_get_state()` to properly unwrap service response `data` field when using SimServiceClient + - **Tests Added:** 4 new integration tests for SimServiceClient mode: + - `test_observer_with_service_client_observes_ticks` + - `test_observer_with_service_client_detects_trends` + - `test_observer_with_service_client_generates_commentary` + - `test_observer_with_service_client_json_output` + - **README Enhanced:** Added remote SimServiceClient mode programmatic example with comprehensive trend detection and faction swing monitoring + - **Total AI Observer Tests:** 37 tests (all passing) +- **Last Updated:** 2025-11-30 ### 9.2.1 โ€” Rule-Based AI Action Layer (M9.2) - **Description:** Implement rule-based AI strategies and actor that submit intents, log decisions, and support deterministic 100-tick runs. diff --git a/README.md b/README.md index c418fa57..a99a3c6a 100644 --- a/README.md +++ b/README.md @@ -703,6 +703,8 @@ Key flags: ### Programmatic Usage +**Local SimEngine mode:** + ```python from gengine.ai_player import Observer, ObserverConfig from gengine.ai_player.observer import create_observer_from_engine @@ -721,6 +723,48 @@ print(report.stability_trend.to_dict()) print(report.commentary) ``` +**Remote SimServiceClient mode:** + +```python +from gengine.ai_player import Observer, ObserverConfig +from gengine.ai_player.observer import create_observer_from_service + +# Connect to a running simulation service +observer = create_observer_from_service( + base_url="http://localhost:8000", + config=ObserverConfig( + tick_budget=100, + analysis_interval=10, + stability_alert_threshold=0.6, + legitimacy_swing_threshold=0.15, + ) +) + +try: + report = observer.observe() + + # Check for critical alerts + if report.alerts: + print("ALERTS:", report.alerts) + + # Examine trend detection results + print(f"Stability: {report.stability_trend.trend}") + print(f" Start: {report.stability_trend.start_value:.3f}") + print(f" End: {report.stability_trend.end_value:.3f}") + + # Review faction dynamics + for faction_id, trend in report.faction_swings.items(): + if trend.alert: + print(f"Faction swing detected: {trend.alert}") + + # Get structured JSON output + import json + print(json.dumps(report.to_dict(), indent=2)) +finally: + # Always close the client when done + observer._client.close() +``` + The Observer output includes: - `stability_trend`: Start/end values, delta, trend direction, alert status @@ -728,7 +772,7 @@ The Observer output includes: - `story_seeds_activated`: List of triggered narrative seeds with tick numbers - `alerts`: Critical warnings (e.g., stability crash) - `commentary`: Natural language summary of the observation period -- `environment_summary`: Final environment metrics +- `environment_summary`: Final environment metrics (stability, economy, agents) ## LLM Service (Phase 6 M6.3) diff --git a/gamedev-agent-thoughts.txt b/gamedev-agent-thoughts.txt index 25d72386..4d79be8a 100644 --- a/gamedev-agent-thoughts.txt +++ b/gamedev-agent-thoughts.txt @@ -1602,3 +1602,62 @@ uv run python scripts/run_headless_sim.py --world default --ticks 200 --lod bala **GitHub Issues:** - Issue #11 (Task 7.1.1 - Progression): Ready to close - Issue #13 (Task 7.4.1 - Campaign UX): Ready to close + + +## Task 9.1.1 - AI Observer Foundation Review & Polish (2025-11-30T23:13 UTC) +===================================================================== + +### Initial Assessment +- Working branch: copilot/vivid-chinchilla +- Task: Review and polish AI Observer implementation (Issue #19) + +### Acceptance Criteria Analysis: +1. โœ… Observer connects via both SimEngine and SimServiceClient - VERIFIED +2. โœ… Generates structured JSON and optional natural language commentary - VERIFIED +3. โš ๏ธ Integration tests validate trend detection - NEEDS ENHANCEMENT +4. โš ๏ธ README documents usage with examples - NEEDS ENHANCEMENT + +### Gaps Identified: +1. No integration test that uses SimServiceClient (only local SimEngine tests exist) +2. README could use more comprehensive service mode examples + +### Changes Planned: +1. Add integration test for Observer with SimServiceClient +2. Enhance README with service mode programmatic example +3. Update tracker.md to mark task 9.1.1 as completed + +### Implementation Log: +- All 33 existing AI observer tests pass +- Observer implementation verified to support both local and remote modes + + + +### Changes Made: +1. Fixed bug in Observer._get_state() - service mode now properly unwraps 'data' field from response +2. Added 4 new integration tests in TestObserverWithSimServiceClient class: + - test_observer_with_service_client_observes_ticks + - test_observer_with_service_client_detects_trends + - test_observer_with_service_client_generates_commentary + - test_observer_with_service_client_json_output +3. Enhanced README.md with comprehensive service mode programmatic example +4. Updated .pm/tracker.md to mark task 9.1.1 as completed + +### Test Results: +- All 37 AI observer tests pass +- All acceptance criteria verified + +### Files Changed: +- src/gengine/ai_player/observer.py (bug fix: unwrap service 'data' field) +- tests/ai_player/test_observer.py (added 4 SimServiceClient integration tests) +- README.md (added service mode programmatic example) +- .pm/tracker.md (marked 9.1.1 as completed) +- gamedev-agent-thoughts.txt (this log) + +### Task 9.1.1 Status: COMPLETED +All acceptance criteria met: +โœ… Observer connects via both SimEngine and SimServiceClient +โœ… Generates structured JSON and optional natural language commentary +โœ… Integration tests validate trend detection +โœ… README documents usage with examples + + diff --git a/src/gengine/ai_player/observer.py b/src/gengine/ai_player/observer.py index d603a794..37a27fc9 100644 --- a/src/gengine/ai_player/observer.py +++ b/src/gengine/ai_player/observer.py @@ -260,6 +260,12 @@ def _get_state(self) -> dict[str, Any]: For remote connections, will raise an exception if connection fails. Callers should handle connection errors appropriately. + + Returns + ------- + dict + State data containing stability, faction_legitimacy, story_seeds, etc. + For service mode, the 'data' field is automatically unwrapped. """ if self._is_local: assert self._engine is not None @@ -267,7 +273,12 @@ def _get_state(self) -> dict[str, Any]: else: assert self._client is not None try: - return self._client.state("summary") + response = self._client.state("summary") + # Service returns {"detail": "...", "data": {...}} + # Unwrap to get the actual state data + if "data" in response: + return response["data"] + return response except Exception as e: logger.error(f"Failed to fetch state from remote service: {e}") raise ConnectionError( diff --git a/tests/ai_player/test_observer.py b/tests/ai_player/test_observer.py index 4f540e86..7e8abc9b 100644 --- a/tests/ai_player/test_observer.py +++ b/tests/ai_player/test_observer.py @@ -3,9 +3,12 @@ from __future__ import annotations import pytest +from fastapi.testclient import TestClient from gengine.ai_player import Observer, ObserverConfig, TrendAnalysis from gengine.ai_player.observer import create_observer_from_engine +from gengine.echoes.client import SimServiceClient +from gengine.echoes.service import create_app from gengine.echoes.sim import SimEngine @@ -490,3 +493,93 @@ def test_create_observer_with_custom_config(self) -> None: observer = create_observer_from_engine(world="default", config=config) assert observer.config.tick_budget == 25 + + +@pytest.fixture +def service_client(): + """Create a SimServiceClient backed by a test server with proper cleanup.""" + engine = SimEngine() + engine.initialize_state(world="default") + app = create_app(engine=engine) + http_client = TestClient(app) + client = SimServiceClient(base_url="http://testserver", client=http_client) + yield client + client.close() + + +class TestObserverWithSimServiceClient: + """Integration tests for Observer using SimServiceClient.""" + + def test_observer_with_service_client_observes_ticks( + self, service_client: SimServiceClient + ) -> None: + """Observer should work correctly with SimServiceClient.""" + config = ObserverConfig(tick_budget=5, analysis_interval=2) + observer = Observer(client=service_client, config=config) + + report = observer.observe() + + assert report.ticks_observed == 5 + assert report.end_tick > report.start_tick + assert isinstance(report.stability_trend, TrendAnalysis) + assert isinstance(report.faction_swings, dict) + + def test_observer_with_service_client_detects_trends( + self, service_client: SimServiceClient + ) -> None: + """Observer should detect trends when using SimServiceClient.""" + config = ObserverConfig(tick_budget=10, analysis_interval=5) + observer = Observer(client=service_client, config=config) + + report = observer.observe() + + # Verify trend detection works + assert report.stability_trend.metric_name == "stability" + assert report.stability_trend.trend in ["increasing", "decreasing", "stable"] + assert len(report.stability_trend.samples) > 0 + # Verify faction tracking works + assert len(report.faction_swings) > 0 + for faction_id, trend in report.faction_swings.items(): + assert trend.metric_name.startswith("faction_") + assert trend.trend in ["increasing", "decreasing", "stable"] + + def test_observer_with_service_client_generates_commentary( + self, service_client: SimServiceClient + ) -> None: + """Observer should generate commentary when using SimServiceClient.""" + config = ObserverConfig( + tick_budget=10, + analysis_interval=5, + log_natural_language=True, + ) + observer = Observer(client=service_client, config=config) + + report = observer.observe() + + # Verify commentary is generated + assert isinstance(report.commentary, list) + assert len(report.commentary) > 0 + # Verify structured labels are present + assert any("[STABILITY]" in c for c in report.commentary) + + def test_observer_with_service_client_json_output( + self, service_client: SimServiceClient + ) -> None: + """Observer should produce valid JSON output via SimServiceClient.""" + config = ObserverConfig(tick_budget=5, analysis_interval=2) + observer = Observer(client=service_client, config=config) + + report = observer.observe() + result = report.to_dict() + + # Verify JSON structure + assert "ticks_observed" in result + assert "start_tick" in result + assert "end_tick" in result + assert "stability_trend" in result + assert "faction_swings" in result + assert "story_seeds_activated" in result + assert "alerts" in result + assert "commentary" in result + assert "environment_summary" in result + assert "tick_reports_count" in result