From b148cb538ebb80a5e823c5b9211a524b0ec47bf3 Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:24:55 +0200 Subject: [PATCH 1/7] refactor: remove number prefixes from pipeline output filenames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strip NNN-N- prefixes (e.g. 001-2-plan.txt → plan.txt, 030-report.html → report.html). Template filenames now place the placeholder after the name (expert_criticism_{}_raw.json). Updates filenames.py enum values and all 27 files referencing them across code, tests, and documentation. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/mcp/autonomous_agent_guide.md | 10 +- docs/mcp/mcp_details.md | 4 +- docs/mcp/planexe_mcp_interface.md | 4 +- docs/proposals/06-adopt-on-the-fly.md | 4 +- .../101-luigi-resume-enhancements.md | 8 +- docs/proposals/107-domain-aware-normalizer.md | 6 +- docs/proposals/112-end-to-end-test-plan.md | 4 +- .../114-mcp-interface-feedback-stress-test.md | 2 +- docs/proposals/117-system-prompt-optimizer.md | 12 +- docs/proposals/133-dag-and-rca.md | 26 +- .../66-post-plan-enrichment-swarm.md | 104 +++---- docs/proposals/87-plan-resume-mcp-tool.md | 2 +- .../97-drift-measurement-pseudocode-impl.md | 4 +- .../plans/2026-04-03-extract-constraints.md | 6 +- docs/superpowers/plans/2026-04-05-rca.md | 78 +++--- .../2026-04-03-extract-constraints-design.md | 4 +- .../specs/2026-04-05-rca-design.md | 20 +- worker_plan/worker_plan_api/filenames.py | 258 +++++++++--------- .../plan/run_plan_pipeline.py | 2 +- .../worker_plan_internal/rca/AGENTS.md | 2 +- .../worker_plan_internal/rca/README.md | 20 +- .../worker_plan_internal/rca/__main__.py | 2 +- .../rca/tests/test_output.py | 14 +- .../rca/tests/test_prompts.py | 12 +- .../rca/tests/test_registry.py | 18 +- .../rca/tests/test_tracer.py | 88 +++--- .../utils/tests/test_purge_old_runs.py | 4 +- 27 files changed, 359 insertions(+), 359 deletions(-) diff --git a/docs/mcp/autonomous_agent_guide.md b/docs/mcp/autonomous_agent_guide.md index 706349805..f60e13b4a 100644 --- a/docs/mcp/autonomous_agent_guide.md +++ b/docs/mcp/autonomous_agent_guide.md @@ -123,11 +123,11 @@ An advanced pattern: use PlanExe to plan the agent's own work. 4. Agent executes the plan step by step, tracking progress against the WBS Key files in the zip for agent consumption: -- `018-2-wbs_level1.json` — High-level work packages -- `018-5-wbs_level2.json` — Detailed tasks within each package -- `023-2-wbs_level3.json` — Sub-tasks with effort estimates -- `004-2-pre_project_assessment.json` — Feasibility assessment -- `003-6-distill_assumptions_raw.json` — Key assumptions to validate +- `wbs_level1.json` — High-level work packages +- `wbs_level2.json` — Detailed tasks within each package +- `wbs_level3.json` — Sub-tasks with effort estimates +- `pre_project_assessment.json` — Feasibility assessment +- `distill_assumptions_raw.json` — Key assumptions to validate ## Prompt writing tips for agents diff --git a/docs/mcp/mcp_details.md b/docs/mcp/mcp_details.md index a275c4372..2fed4a33a 100644 --- a/docs/mcp/mcp_details.md +++ b/docs/mcp/mcp_details.md @@ -223,7 +223,7 @@ curl -H "X-API-Key: pex_0123456789abcdef" -O "https://mcp.planexe.org/download/2 Download report: ```bash -curl -H "X-API-Key: pex_0123456789abcdef" -O "https://mcp.planexe.org/download/2d57a448-1b09-45aa-ad37-e69891ff6ec7/030-report.html" +curl -H "X-API-Key: pex_0123456789abcdef" -O "https://mcp.planexe.org/download/2d57a448-1b09-45aa-ad37-e69891ff6ec7/report.html" ``` ## Tool Catalog, `mcp_local` @@ -248,7 +248,7 @@ Example call: - Save directory is `PLANEXE_PATH`, or current working directory if unset. - Non-existing directories are created automatically. - If `PLANEXE_PATH` points to a file, download fails. -- Filename is prefixed with plan id (for example `-030-report.html`). +- Filename is prefixed with plan id (for example `-report.html`). - Response includes `saved_path` with the exact local file location. ## Minimal error-handling contract diff --git a/docs/mcp/planexe_mcp_interface.md b/docs/mcp/planexe_mcp_interface.md index e441f8be3..53efc0244 100644 --- a/docs/mcp/planexe_mcp_interface.md +++ b/docs/mcp/planexe_mcp_interface.md @@ -522,7 +522,7 @@ Use `plan_resume` when `plan_status` shows `failed` or `stopped` and plan genera **Required semantics** -- The MCP tool only accepts plans in `failed` state. However, the underlying Luigi mechanism is more general: Luigi skips any task whose output file already exists and re-executes any task whose output file is missing. This means a completed plan can be partially re-run by deleting `999-pipeline_complete.txt` and the output files of the tasks you want to regenerate — Luigi will re-execute those tasks and all their downstream dependents. The MCP API does not yet expose this capability; it is available when running the pipeline locally via `run_plan_pipeline.py`. +- The MCP tool only accepts plans in `failed` state. However, the underlying Luigi mechanism is more general: Luigi skips any task whose output file already exists and re-executes any task whose output file is missing. This means a completed plan can be partially re-run by deleting `pipeline_complete.txt` and the output files of the tasks you want to regenerate — Luigi will re-execute those tasks and all their downstream dependents. The MCP API does not yet expose this capability; it is available when running the pipeline locally via `run_plan_pipeline.py`. - On success, the same plan_id is reset to `pending` and requeued. - Prior artifacts are **preserved** — the worker restores the output directory from the stored zip snapshot. - `resume_count` tracks how many times the plan has been resumed. @@ -577,7 +577,7 @@ Bump `PIPELINE_VERSION` whenever the pipeline changes in a way that would break - Save directory is `PLANEXE_PATH`. - If `PLANEXE_PATH` is unset, save to current working directory. - If `PLANEXE_PATH` points to a file (not a directory), return an error. -- Filenames are `-030-report.html` or `-run.zip`. +- Filenames are `-report.html` or `-run.zip`. - If a filename already exists, append `-1`, `-2`, ... before extension. - Successful responses include `saved_path`. diff --git a/docs/proposals/06-adopt-on-the-fly.md b/docs/proposals/06-adopt-on-the-fly.md index 15ccf31fc..6ef375f1f 100644 --- a/docs/proposals/06-adopt-on-the-fly.md +++ b/docs/proposals/06-adopt-on-the-fly.md @@ -6,9 +6,9 @@ This is a concrete implementation plan for making PlanExe's agent behavior adapt PlanExe already has multiple "early classification" concepts and quality gates that we can build on: -- **Purpose classification (business/personal/other)**: `worker_plan/worker_plan_internal/assume/identify_purpose.py` produces `002-6-identify_purpose.md` and is already used downstream (e.g., SWOT prompt selection). +- **Purpose classification (business/personal/other)**: `worker_plan/worker_plan_internal/assume/identify_purpose.py` produces `identify_purpose.md` and is already used downstream (e.g., SWOT prompt selection). -- **Plan type classification (digital/physical)**: `worker_plan/worker_plan_internal/assume/identify_plan_type.py` produces `002-8-plan_type.md`. Note: it intentionally labels most software development as "physical" (because it assumes a physical workspace/devices). +- **Plan type classification (digital/physical)**: `worker_plan/worker_plan_internal/assume/identify_plan_type.py` produces `plan_type.md`. Note: it intentionally labels most software development as "physical" (because it assumes a physical workspace/devices). - **Levers pipeline**: `worker_plan/worker_plan_internal/lever/*` produces potential levers -> deduped -> enriched -> "vital few" -> scenarios/strategic decisions. diff --git a/docs/proposals/101-luigi-resume-enhancements.md b/docs/proposals/101-luigi-resume-enhancements.md index ec46be863..77e9bd31a 100644 --- a/docs/proposals/101-luigi-resume-enhancements.md +++ b/docs/proposals/101-luigi-resume-enhancements.md @@ -85,9 +85,9 @@ Behavior: ``` $ planexe invalidate SelectScenarioTask --run-dir ./run/Qwen_Clean_v1 Would delete: - run/Qwen_Clean_v1/002-17-selected_scenario_raw.json - run/Qwen_Clean_v1/002-18-selected_scenario.json - run/Qwen_Clean_v1/002-19-scenarios.md + run/Qwen_Clean_v1/selected_scenario_raw.json + run/Qwen_Clean_v1/selected_scenario.json + run/Qwen_Clean_v1/scenarios.md Proceed? [y/N] ``` @@ -101,7 +101,7 @@ Tonight we needed to re-run `SelectScenarioTask` after applying a fix. Without k ### The problem -The input plan (`001-2-plan.txt`) is locked in at run start. If a user wants to refine the plan description mid-run — clarify scope, correct a factual error, tighten the framing — there is no supported path. The only option is start a new run from scratch. +The input plan (`plan.txt`) is locked in at run start. If a user wants to refine the plan description mid-run — clarify scope, correct a factual error, tighten the framing — there is no supported path. The only option is start a new run from scratch. ### What we want diff --git a/docs/proposals/107-domain-aware-normalizer.md b/docs/proposals/107-domain-aware-normalizer.md index 1435559ed..17a57ad9f 100644 --- a/docs/proposals/107-domain-aware-normalizer.md +++ b/docs/proposals/107-domain-aware-normalizer.md @@ -401,9 +401,9 @@ MakeAssumptions → [QuantifiedAssumptionExtractor] → [FermiSanityCheck] → [ The three new tasks (in brackets) are inserted between the existing MakeAssumptions and DistillAssumptions tasks. Each produces output files following PlanExe's standard naming convention: -- `003-12-fermi_sanity_check_report.json` — detailed per-assumption verdicts -- `003-13-fermi_sanity_check_summary.md` — human-readable summary of findings -- `003-14-normalized_assumptions.json` — all assumptions in standard representation +- `fermi_sanity_check_report.json` — detailed per-assumption verdicts +- `fermi_sanity_check_summary.md` — human-readable summary of findings +- `normalized_assumptions.json` — all assumptions in standard representation The FermiSanityCheck report includes a section on ethical flags, making it visible to both the downstream pipeline tasks and human reviewers. diff --git a/docs/proposals/112-end-to-end-test-plan.md b/docs/proposals/112-end-to-end-test-plan.md index 9eed4a2f1..73c11a5a3 100644 --- a/docs/proposals/112-end-to-end-test-plan.md +++ b/docs/proposals/112-end-to-end-test-plan.md @@ -25,7 +25,7 @@ These tests exercise the MCP server, database, and worker interactions without i **Variant — worker-side check:** 1. Bypass the MCP-layer check (e.g. manually set `parameters["pipeline_version"]` to match current). -2. But ensure the `001-3-planexe_metadata.json` in the zip snapshot has a different version. +2. But ensure the `planexe_metadata.json` in the zip snapshot has a different version. 3. Let the worker pick up the resumed plan. 4. Assert: worker sets plan to failed with progress_message containing "Not resumable". @@ -87,7 +87,7 @@ These tests invoke real LLMs and are non-deterministic, slow (~10-20 min per pla 4. Call `plan_file_info` with `artifact: "report"` — assert `download_url` is present. 5. Call `plan_file_info` with `artifact: "zip"` — assert `download_url` is present. 6. Download the report and verify it is valid HTML containing expected sections. -7. Download the zip and verify `001-3-planexe_metadata.json` is present with correct `pipeline_version`. +7. Download the zip and verify `planexe_metadata.json` is present with correct `pipeline_version`. ### 7. Resume after mid-generation failure diff --git a/docs/proposals/114-mcp-interface-feedback-stress-test.md b/docs/proposals/114-mcp-interface-feedback-stress-test.md index c86057192..e056e8318 100644 --- a/docs/proposals/114-mcp-interface-feedback-stress-test.md +++ b/docs/proposals/114-mcp-interface-feedback-stress-test.md @@ -507,7 +507,7 @@ No stale error information leaked between states. ### Files list ordering fix -The files list in `plan_status` now shows the most recent 10 files instead of the first 10. When the plan completed, the agent saw `029-2-self_audit.md`, `030-report.html`, `999-pipeline_complete.txt` etc. instead of the same early pipeline files every time. Much more useful for monitoring progress. +The files list in `plan_status` now shows the most recent 10 files instead of the first 10. When the plan completed, the agent saw `self_audit.md`, `report.html`, `pipeline_complete.txt` etc. instead of the same early pipeline files every time. Much more useful for monitoring progress. ### Agent-server capability mismatch (systemic observation) diff --git a/docs/proposals/117-system-prompt-optimizer.md b/docs/proposals/117-system-prompt-optimizer.md index 84c1957ff..51bcfb3d2 100644 --- a/docs/proposals/117-system-prompt-optimizer.md +++ b/docs/proposals/117-system-prompt-optimizer.md @@ -303,10 +303,10 @@ populate_baseline.py # script to populate baseline from zip files baseline/ # current outputs (extracted from dataset zips) train/ 20260310_hong_kong_game/ - 001-1-start_time.json - 001-2-plan.txt + start_time.json + plan.txt ... - 030-report.html + report.html 20250329_gta_game/ ... 20250321_silo/ @@ -338,8 +338,8 @@ history/ # captured output, global run coun outputs.jsonl outputs/ 20250321_silo/ - 002-9-potential_levers_raw.json - 002-10-potential_levers.json + potential_levers_raw.json + potential_levers.json activity_overview.json usage_metrics.jsonl 20260310_hong_kong_game/ @@ -382,7 +382,7 @@ scores/ # longitudinal tracking full_plan_comparisons/ # Stage 3 periodic full-plan regenerations 2026-03-20/ hong_kong_game/ - 030-report.html + report.html kpi_comparison.json ``` diff --git a/docs/proposals/133-dag-and-rca.md b/docs/proposals/133-dag-and-rca.md index db5968b0b..e8c8f9a59 100644 --- a/docs/proposals/133-dag-and-rca.md +++ b/docs/proposals/133-dag-and-rca.md @@ -81,7 +81,7 @@ Example: { "id": "executive_summary_markdown", - "path": "025-2-executive_summary.md", + "path": "executive_summary.md", "format": "md", "role": "summary_markdown" } @@ -132,7 +132,7 @@ A stronger format could allow fields like: { "from_node": "executive_summary", - "artifact_path": "025-2-executive_summary.md", + "artifact_path": "executive_summary.md", "used_for": "decision-maker summary section" } @@ -143,7 +143,7 @@ How RCA can work with the current format Goal The goal of RCA is to answer questions like: - • Why is a false claim shown in 030-report.html? + • Why is a false claim shown in report.html? • Which upstream artifact first contained it? • Which node likely introduced it? • Which source file should be inspected first? @@ -153,7 +153,7 @@ Investigation strategy Step 1: Start from the final artifact Begin with the final output artifact, such as: - • 030-report.html + • report.html Find the node that produces it. @@ -210,15 +210,15 @@ Suppose the final report contains the false claim: The project requires 12 full-time engineers. A practical investigation would look like this: - 1. search 030-report.html for the claim + 1. search report.html for the claim 2. inspect the report node inputs - 3. search 025-2-executive_summary.md - 4. search 024-2-review_plan.md - 5. search 013-team.md - 6. if the claim appears in 013-team.md, inspect the team_markdown node + 3. search executive_summary.md + 4. search review_plan.md + 5. search team.md + 6. if the claim appears in team.md, inspect the team_markdown node 7. inspect that node’s inputs: - • 011-2-enrich_team_members_environment_info.json - • 012-review_team_raw.json + • enrich_team_members_environment_info.json + • review_team_raw.json 8. search those artifacts for the same claim or the numeric value 9. continue upstream until the earliest occurrence is found 10. inspect the producing node’s source_files @@ -255,7 +255,7 @@ Example: { "id": "review_plan_markdown", - "path": "024-2-review_plan.md", + "path": "review_plan.md", "format": "md", "role": "review_output" } @@ -266,7 +266,7 @@ Example: { "from_node": "review_plan", - "artifact_path": "024-2-review_plan.md", + "artifact_path": "review_plan.md", "used_for": "quality review section" } diff --git a/docs/proposals/66-post-plan-enrichment-swarm.md b/docs/proposals/66-post-plan-enrichment-swarm.md index a88693c1d..5d6a87f6a 100644 --- a/docs/proposals/66-post-plan-enrichment-swarm.md +++ b/docs/proposals/66-post-plan-enrichment-swarm.md @@ -39,15 +39,15 @@ This is not a modification to PlanExe. The planning pipeline is untouched. The s A PlanExe run for even a simple plan (e.g. a small Connecticut egg farm) produces: -- `017-5-identified_documents_to_find.json` — a structured list of real documents that need to be located (zoning ordinances, predator population data, health codes, etc.) -- `017-6-identified_documents_to_create.json` — a structured list of internal documents to draft (Project Charter, Risk Register, Communication Plan, etc.) -- `023-4-wbs_project_level1_and_level2_and_level3.csv` — a full Level 1–4 WBS with task UUIDs -- `026-3-schedule_gantt_machai.csv` — project schedule with start/end dates per task -- `021-task_dependencies_raw.json` — dependency graph -- `003-11-consolidate_assumptions_short.md` — key assumptions in plain Markdown -- `013-team.md` — role roster with contract types -- `005-2-project_plan.md` — master plan with resources, permits, and budget narrative -- `002-21-physical_locations.md` — jurisdiction(s) where the project will run +- `identified_documents_to_find.json` — a structured list of real documents that need to be located (zoning ordinances, predator population data, health codes, etc.) +- `identified_documents_to_create.json` — a structured list of internal documents to draft (Project Charter, Risk Register, Communication Plan, etc.) +- `wbs_project_level1_and_level2_and_level3.csv` — a full Level 1–4 WBS with task UUIDs +- `schedule_gantt_machai.csv` — project schedule with start/end dates per task +- `task_dependencies_raw.json` — dependency graph +- `consolidate_assumptions_short.md` — key assumptions in plain Markdown +- `team.md` — role roster with contract types +- `project_plan.md` — master plan with resources, permits, and budget narrative +- `physical_locations.md` — jurisdiction(s) where the project will run All of these are machine-readable and semantically rich. Without enrichment they just sit in a folder. @@ -57,10 +57,10 @@ All of these are machine-readable and semantically rich. Without enrichment they ### Trigger -PlanExe writes `999-pipeline_complete.txt` to the run directory when the pipeline finishes. This file is the trigger. +PlanExe writes `pipeline_complete.txt` to the run directory when the pipeline finishes. This file is the trigger. ``` -/run/29131a8e-95d1-4f43-9891-920fae2b90ef/999-pipeline_complete.txt +/run/29131a8e-95d1-4f43-9891-920fae2b90ef/pipeline_complete.txt ``` An OpenClaw file-watch hook (or a simple `inotifywait` wrapper on the run root) detects the file and fires the enrichment swarm: @@ -132,8 +132,8 @@ steps: planexe-enrich-agent document-executor --run-dir "$run_dir" --plan-repo "$plan_repo" - --find-list "017-5-identified_documents_to_find.json" - --create-list "017-6-identified_documents_to_create.json" + --find-list "identified_documents_to_find.json" + --create-list "identified_documents_to_create.json" --output-dir "docs/" --json condition: "! git -C $plan_repo ls-files --error-unmatch docs/project-charter.md 2>/dev/null" @@ -160,9 +160,9 @@ steps: planexe-enrich-agent project-board --run-dir "$run_dir" --github-repo "$github_repo" - --wbs-csv "023-4-wbs_project_level1_and_level2_and_level3.csv" - --gantt-csv "026-3-schedule_gantt_machai.csv" - --deps-json "021-task_dependencies_raw.json" + --wbs-csv "wbs_project_level1_and_level2_and_level3.csv" + --gantt-csv "schedule_gantt_machai.csv" + --deps-json "task_dependencies_raw.json" --json condition: $commit_documents.exitcode == 0 @@ -181,9 +181,9 @@ steps: planexe-enrich-agent project-board --run-dir "$run_dir" --github-repo "$github_repo" - --wbs-csv "023-4-wbs_project_level1_and_level2_and_level3.csv" - --gantt-csv "026-3-schedule_gantt_machai.csv" - --deps-json "021-task_dependencies_raw.json" + --wbs-csv "wbs_project_level1_and_level2_and_level3.csv" + --gantt-csv "schedule_gantt_machai.csv" + --deps-json "task_dependencies_raw.json" --apply --json condition: $approve_board.approved @@ -194,7 +194,7 @@ steps: planexe-enrich-agent assumption-validator --run-dir "$run_dir" --plan-repo "$plan_repo" - --assumptions-md "003-11-consolidate_assumptions_short.md" + --assumptions-md "consolidate_assumptions_short.md" --output "validation/assumptions-check.md" --json condition: "! git -C $plan_repo ls-files --error-unmatch validation/assumptions-check.md 2>/dev/null" @@ -221,8 +221,8 @@ steps: planexe-enrich-agent team-sourcer --run-dir "$run_dir" --plan-repo "$plan_repo" - --team-md "013-team.md" - --project-plan-md "005-2-project_plan.md" + --team-md "team.md" + --project-plan-md "project_plan.md" --output-dir "sourcing/" --json condition: "! git -C $plan_repo ls-files --error-unmatch sourcing/team-leads.md 2>/dev/null" @@ -249,8 +249,8 @@ steps: planexe-enrich-agent compliance-researcher --run-dir "$run_dir" --plan-repo "$plan_repo" - --project-plan-md "005-2-project_plan.md" - --locations-md "002-21-physical_locations.md" + --project-plan-md "project_plan.md" + --locations-md "physical_locations.md" --output "compliance/requirements.md" --json condition: "! git -C $plan_repo ls-files --error-unmatch compliance/requirements.md 2>/dev/null" @@ -295,8 +295,8 @@ lobster resume --approve **Inputs:** ``` -{run_dir}/017-5-identified_documents_to_find.json -{run_dir}/017-6-identified_documents_to_create.json +{run_dir}/identified_documents_to_find.json +{run_dir}/identified_documents_to_create.json ``` **Example input item (find-list):** @@ -353,8 +353,8 @@ docs/ planexe-enrich-agent document-executor \ --run-dir /run/UUID \ --plan-repo /repos/my-plan \ - --find-list 017-5-identified_documents_to_find.json \ - --create-list 017-6-identified_documents_to_create.json \ + --find-list identified_documents_to_find.json \ + --create-list identified_documents_to_create.json \ --output-dir docs/ \ --json # stdout: { "created": 12, "fetched": 8, "skipped": 0, "errors": [] } @@ -368,9 +368,9 @@ planexe-enrich-agent document-executor \ **Inputs:** ``` -{run_dir}/023-4-wbs_project_level1_and_level2_and_level3.csv -{run_dir}/026-3-schedule_gantt_machai.csv -{run_dir}/021-task_dependencies_raw.json +{run_dir}/wbs_project_level1_and_level2_and_level3.csv +{run_dir}/schedule_gantt_machai.csv +{run_dir}/task_dependencies_raw.json ``` **Example WBS row:** @@ -391,7 +391,7 @@ project_key,project_name,project_start_date,project_end_date,... 1. Parse WBS CSV: Level 2 rows → GitHub milestones (with start/end from Gantt CSV). 2. Parse WBS CSV: Level 3 rows → GitHub issues (assigned to milestone, labelled with Level 1). 3. Parse WBS CSV: Level 4 rows → GitHub sub-issues (linked to parent Level 3 issue). -4. Parse `021-task_dependencies_raw.json` → add "Depends on: #N" lines to issue bodies. +4. Parse `task_dependencies_raw.json` → add "Depends on: #N" lines to issue bodies. 5. In dry-run mode (`--json` only): emit the plan as JSON without creating anything. 6. In apply mode (`--apply`): call `gh api` to create milestones and issues. @@ -416,9 +416,9 @@ GitHub project: acme/egg-farm-ct planexe-enrich-agent project-board \ --run-dir /run/UUID \ --github-repo owner/repo \ - --wbs-csv 023-4-wbs_project_level1_and_level2_and_level3.csv \ - --gantt-csv 026-3-schedule_gantt_machai.csv \ - --deps-json 021-task_dependencies_raw.json \ + --wbs-csv wbs_project_level1_and_level2_and_level3.csv \ + --gantt-csv schedule_gantt_machai.csv \ + --deps-json task_dependencies_raw.json \ --json # stdout: { "milestones": 6, "issues": 42, "sub_issues": 127, "plan": [...] } @@ -435,7 +435,7 @@ planexe-enrich-agent project-board ... --apply --json **Input:** ``` -{run_dir}/003-11-consolidate_assumptions_short.md +{run_dir}/consolidate_assumptions_short.md ``` **Example assumption (from actual egg-farm run):** @@ -483,7 +483,7 @@ Notes: Plan-specific; no external data source applies. planexe-enrich-agent assumption-validator \ --run-dir /run/UUID \ --plan-repo /repos/my-plan \ - --assumptions-md 003-11-consolidate_assumptions_short.md \ + --assumptions-md consolidate_assumptions_short.md \ --output validation/assumptions-check.md \ --json # stdout: { "confirmed": 1, "uncertain": 1, "contradicted": 0, "unverifiable": 1 } @@ -497,8 +497,8 @@ planexe-enrich-agent assumption-validator \ **Inputs:** ``` -{run_dir}/013-team.md (role roster with contract types) -{run_dir}/005-2-project_plan.md (resources section) +{run_dir}/team.md (role roster with contract types) +{run_dir}/project_plan.md (resources section) ``` **Example input (from actual egg-farm run):** @@ -511,10 +511,10 @@ planexe-enrich-agent assumption-validator \ ``` **Actions:** -1. Parse role names and contract types from `013-team.md`. -2. Extract location from `002-21-physical_locations.md` (e.g. "Litchfield County, CT"). +1. Parse role names and contract types from `team.md`. +2. Extract location from `physical_locations.md` (e.g. "Litchfield County, CT"). 3. For each role: run targeted web searches for real professionals/vendors in that location. -4. For each resource mentioned in `005-2-project_plan.md`: find actual local suppliers. +4. For each resource mentioned in `project_plan.md`: find actual local suppliers. 5. Compile findings with names, contact info, and notes. **Outputs committed to plan repo:** @@ -542,8 +542,8 @@ Example `team-leads.md`: planexe-enrich-agent team-sourcer \ --run-dir /run/UUID \ --plan-repo /repos/my-plan \ - --team-md 013-team.md \ - --project-plan-md 005-2-project_plan.md \ + --team-md team.md \ + --project-plan-md project_plan.md \ --output-dir sourcing/ \ --json # stdout: { "roles_sourced": 8, "vendors_found": 5, "errors": [] } @@ -557,8 +557,8 @@ planexe-enrich-agent team-sourcer \ **Inputs:** ``` -{run_dir}/005-2-project_plan.md (permits mentioned) -{run_dir}/002-21-physical_locations.md (jurisdiction) +{run_dir}/project_plan.md (permits mentioned) +{run_dir}/physical_locations.md (jurisdiction) ``` **Example location (actual run):** @@ -569,8 +569,8 @@ A small farm in Litchfield County, CT ``` **Actions:** -1. Extract jurisdiction (state, county, municipality) from `002-21-physical_locations.md`. -2. Extract permit types and regulatory domains mentioned in `005-2-project_plan.md`. +1. Extract jurisdiction (state, county, municipality) from `physical_locations.md`. +2. Extract permit types and regulatory domains mentioned in `project_plan.md`. 3. For each regulatory requirement: search for actual permit names, forms, fees, and filing procedures. 4. Compile into a structured compliance report. @@ -608,8 +608,8 @@ Example output: planexe-enrich-agent compliance-researcher \ --run-dir /run/UUID \ --plan-repo /repos/my-plan \ - --project-plan-md 005-2-project_plan.md \ - --locations-md 002-21-physical_locations.md \ + --project-plan-md project_plan.md \ + --locations-md physical_locations.md \ --output compliance/requirements.md \ --json # stdout: { "requirements_found": 8, "jurisdictions_searched": 3, "errors": [] } @@ -653,8 +653,8 @@ skills/ git_state.py # Shared: idempotency checks, commit helpers tests/ fixtures/ - 017-5-identified_documents_to_find.json # Sample from real run - 023-4-wbs_project_level1_and_level2_and_level3.csv + identified_documents_to_find.json # Sample from real run + wbs_project_level1_and_level2_and_level3.csv test_document_executor.py test_project_board_setup.py test_assumption_validator.py diff --git a/docs/proposals/87-plan-resume-mcp-tool.md b/docs/proposals/87-plan-resume-mcp-tool.md index ebfec308e..39462b9bb 100644 --- a/docs/proposals/87-plan-resume-mcp-tool.md +++ b/docs/proposals/87-plan-resume-mcp-tool.md @@ -72,7 +72,7 @@ From the module docstring: ``` In order to resume an unfinished run. Insert the run_id_dir of the thing you want to resume. -If it's an already finished run, then remove the "999-pipeline_complete.txt" file. +If it's an already finished run, then remove the "pipeline_complete.txt" file. PROMPT> ./planexe create_plan --run-id-dir /absolute/path/to/PlanExe_20250216_150332 ``` diff --git a/docs/proposals/97-drift-measurement-pseudocode-impl.md b/docs/proposals/97-drift-measurement-pseudocode-impl.md index 1077fa68a..2ff9bd113 100644 --- a/docs/proposals/97-drift-measurement-pseudocode-impl.md +++ b/docs/proposals/97-drift-measurement-pseudocode-impl.md @@ -34,7 +34,7 @@ The task runs **after** the pipeline has fully completed. It is an optional post ``` Inputs required: -- `001-2-initial_plan.txt` — the original user prompt (already exists) +- `plan.txt` — the original user prompt (already exists) - `final-report.md` or equivalent final plan artifact (already exists) Output: @@ -219,7 +219,7 @@ class DriftEvaluationTask(PlanTask): def requires(self): return { - 'prompt': self.clone(SetupTask), # 001-2-initial_plan.txt + 'prompt': self.clone(SetupTask), # plan.txt 'report': self.clone(FinalReportTask), # or equivalent final artifact } diff --git a/docs/superpowers/plans/2026-04-03-extract-constraints.md b/docs/superpowers/plans/2026-04-03-extract-constraints.md index 5584d4c1f..e72efbb08 100644 --- a/docs/superpowers/plans/2026-04-03-extract-constraints.md +++ b/docs/superpowers/plans/2026-04-03-extract-constraints.md @@ -573,14 +573,14 @@ git commit -m "Add unit and LLM integration tests for extract_constraints" In `worker_plan/worker_plan_api/filenames.py`, insert after line 8 (`SCREEN_PLANNING_PROMPT_MARKDOWN`): ```python - EXTRACT_CONSTRAINTS_RAW = "002-0-extract_constraints_raw.json" - EXTRACT_CONSTRAINTS_MARKDOWN = "002-0-extract_constraints.md" + EXTRACT_CONSTRAINTS_RAW = "extract_constraints_raw.json" + EXTRACT_CONSTRAINTS_MARKDOWN = "extract_constraints.md" ``` - [ ] **Step 2: Verify syntax** Run: `cd worker_plan && /opt/homebrew/bin/python3.11 -c "from worker_plan_api.filenames import FilenameEnum; print(FilenameEnum.EXTRACT_CONSTRAINTS_RAW.value)"` -Expected: `002-0-extract_constraints_raw.json` +Expected: `extract_constraints_raw.json` - [ ] **Step 3: Commit** diff --git a/docs/superpowers/plans/2026-04-05-rca.md b/docs/superpowers/plans/2026-04-05-rca.md index 8b9770497..de5b8e79a 100644 --- a/docs/superpowers/plans/2026-04-05-rca.md +++ b/docs/superpowers/plans/2026-04-05-rca.md @@ -87,22 +87,22 @@ class TestNodeInfo(unittest.TestCase): class TestFindStageByFilename(unittest.TestCase): def test_find_report(self): - stage = find_node_by_filename("030-report.html") + stage = find_node_by_filename("report.html") self.assertIsNotNone(stage) self.assertEqual(node.name, "report") def test_find_potential_levers_clean(self): - stage = find_node_by_filename("002-10-potential_levers.json") + stage = find_node_by_filename("potential_levers.json") self.assertIsNotNone(stage) self.assertEqual(node.name, "potential_levers") def test_find_potential_levers_raw(self): - stage = find_node_by_filename("002-9-potential_levers_raw.json") + stage = find_node_by_filename("potential_levers_raw.json") self.assertIsNotNone(stage) self.assertEqual(node.name, "potential_levers") def test_find_executive_summary(self): - stage = find_node_by_filename("025-2-executive_summary.md") + stage = find_node_by_filename("executive_summary.md") self.assertIsNotNone(stage) self.assertEqual(node.name, "executive_summary") @@ -121,10 +121,10 @@ class TestGetUpstreamFiles(unittest.TestCase): with TemporaryDirectory() as d: output_dir = Path(d) # Create the expected upstream files on disk - (output_dir / "001-2-plan.txt").write_text("plan", encoding="utf-8") - (output_dir / "002-6-identify_purpose.md").write_text("purpose", encoding="utf-8") - (output_dir / "002-8-plan_type.md").write_text("type", encoding="utf-8") - (output_dir / "002-0-extract_constraints.md").write_text("constraints", encoding="utf-8") + (output_dir / "plan.txt").write_text("plan", encoding="utf-8") + (output_dir / "identify_purpose.md").write_text("purpose", encoding="utf-8") + (output_dir / "plan_type.md").write_text("type", encoding="utf-8") + (output_dir / "extract_constraints.md").write_text("constraints", encoding="utf-8") result = get_upstream_files("potential_levers", output_dir) node_names = [name for name, _ in result] @@ -137,7 +137,7 @@ class TestGetUpstreamFiles(unittest.TestCase): with TemporaryDirectory() as d: output_dir = Path(d) # Only create one of the upstream files - (output_dir / "001-2-plan.txt").write_text("plan", encoding="utf-8") + (output_dir / "plan.txt").write_text("plan", encoding="utf-8") result = get_upstream_files("potential_levers", output_dir) node_names = [name for name, _ in result] @@ -211,8 +211,8 @@ NODES: tuple[NodeInfo, ...] = _build_registry() # see registry.py # Example NodeInfo: # NodeInfo( # name="potential_levers", -# output_files=("002-9-potential_levers_raw.json", "002-10-potential_levers.json"), -# inputs=(NodeInput(from_node="setup", artifact_path="001-2-plan.txt"), ...), +# output_files=("potential_levers_raw.json", "potential_levers.json"), +# inputs=(NodeInput(from_node="setup", artifact_path="plan.txt"), ...), # source_code_files=("worker_plan_internal/plan/nodes/potential_levers.py", ...), # ) @@ -326,7 +326,7 @@ class TestPydanticModels(unittest.TestCase): class TestBuildProblemIdentificationMessages(unittest.TestCase): def test_returns_chat_messages(self): messages = build_problem_identification_messages( - filename="030-report.html", + filename="report.html", file_content="report content", user_problem_description="budget is wrong", ) @@ -337,12 +337,12 @@ class TestBuildProblemIdentificationMessages(unittest.TestCase): def test_user_message_contains_inputs(self): messages = build_problem_identification_messages( - filename="025-2-executive_summary.md", + filename="executive_summary.md", file_content="# Summary\nBudget: 500k", user_problem_description="fabricated budget", ) user_content = messages[1].content - self.assertIn("025-2-executive_summary.md", user_content) + self.assertIn("executive_summary.md", user_content) self.assertIn("# Summary", user_content) self.assertIn("fabricated budget", user_content) @@ -352,7 +352,7 @@ class TestBuildUpstreamCheckMessages(unittest.TestCase): messages = build_upstream_check_messages( problem_description="Budget is fabricated", evidence_quote="CZK 500,000", - upstream_filename="005-2-project_plan.md", + upstream_filename="project_plan.md", upstream_file_content="# Project Plan\nBudget: 500k", ) self.assertIsInstance(messages, list) @@ -362,13 +362,13 @@ class TestBuildUpstreamCheckMessages(unittest.TestCase): messages = build_upstream_check_messages( problem_description="Missing market sizing", evidence_quote="growing Czech market", - upstream_filename="003-5-make_assumptions.md", + upstream_filename="make_assumptions.md", upstream_file_content="# Assumptions\nMarket is growing", ) user_content = messages[1].content self.assertIn("Missing market sizing", user_content) self.assertIn("growing Czech market", user_content) - self.assertIn("003-5-make_assumptions.md", user_content) + self.assertIn("make_assumptions.md", user_content) class TestBuildSourceCodeAnalysisMessages(unittest.TestCase): @@ -576,13 +576,13 @@ def _make_executor(responses: list[str]) -> LLMExecutor: class TestRCAResult(unittest.TestCase): def test_dataclass_creation(self): result = RCAResult( - starting_file="030-report.html", + starting_file="report.html", problem_description="test", output_dir="/tmp/test", problems=[], llm_calls_made=0, ) - self.assertEqual(result.starting_file, "030-report.html") + self.assertEqual(result.starting_file, "report.html") self.assertEqual(len(result.problems), 0) @@ -594,10 +594,10 @@ class TestRootCauseAnalyzerPhase1(unittest.TestCase): with TemporaryDirectory() as d: output_dir = Path(d) # Create a minimal output file - report_file = output_dir / "025-2-executive_summary.md" + report_file = output_dir / "executive_summary.md" report_file.write_text("# Summary\nBudget: CZK 500,000", encoding="utf-8") # Create upstream file so trace can proceed - (output_dir / "005-2-project_plan.md").write_text("# Plan", encoding="utf-8") + (output_dir / "project_plan.md").write_text("# Plan", encoding="utf-8") # Mock LLM response for problem identification (Phase 1) problem_response = json.dumps({ @@ -631,7 +631,7 @@ class TestRootCauseAnalyzerPhase1(unittest.TestCase): max_depth=15, verbose=False, ) - result = tracer.trace("025-2-executive_summary.md", "budget is unvalidated") + result = tracer.trace("executive_summary.md", "budget is unvalidated") self.assertIsInstance(result, RCAResult) self.assertGreaterEqual(len(result.problems), 1) @@ -647,13 +647,13 @@ class TestRootCauseAnalyzerUpstreamTrace(unittest.TestCase): with TemporaryDirectory() as d: output_dir = Path(d) # Create files for a simple chain: executive_summary -> project_plan -> setup - (output_dir / "025-2-executive_summary.md").write_text("Budget: CZK 500,000", encoding="utf-8") - (output_dir / "005-2-project_plan.md").write_text("Budget: CZK 500,000", encoding="utf-8") - (output_dir / "001-2-plan.txt").write_text("Open a tea shop", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: CZK 500,000", encoding="utf-8") + (output_dir / "project_plan.md").write_text("Budget: CZK 500,000", encoding="utf-8") + (output_dir / "plan.txt").write_text("Open a tea shop", encoding="utf-8") # Create other upstream files that executive_summary depends on - (output_dir / "002-14-strategic_decisions.md").write_text("decisions", encoding="utf-8") - (output_dir / "002-19-scenarios.md").write_text("scenarios", encoding="utf-8") - (output_dir / "003-10-consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") + (output_dir / "strategic_decisions.md").write_text("decisions", encoding="utf-8") + (output_dir / "scenarios.md").write_text("scenarios", encoding="utf-8") + (output_dir / "consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") responses = [ # Phase 1: identify problems in executive_summary @@ -687,7 +687,7 @@ class TestRootCauseAnalyzerUpstreamTrace(unittest.TestCase): max_depth=15, verbose=False, ) - result = tracer.trace("025-2-executive_summary.md", "budget is fabricated") + result = tracer.trace("executive_summary.md", "budget is fabricated") self.assertEqual(len(result.problems), 1) problem = result.problems[0] @@ -703,7 +703,7 @@ class TestRootCauseAnalyzerMaxDepth(unittest.TestCase): def test_respects_max_depth(self): with TemporaryDirectory() as d: output_dir = Path(d) - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k", encoding="utf-8") responses = [ json.dumps({"problems": [{"description": "test problem", "evidence": "500k", "severity": "LOW"}]}), @@ -717,7 +717,7 @@ class TestRootCauseAnalyzerMaxDepth(unittest.TestCase): max_depth=0, # zero depth = no upstream tracing verbose=False, ) - result = tracer.trace("025-2-executive_summary.md", "test") + result = tracer.trace("executive_summary.md", "test") self.assertEqual(len(result.problems), 1) # With max_depth=0, no upstream tracing happens @@ -1055,7 +1055,7 @@ from worker_plan_internal.rca.output import write_json_report, write_markdown_re def _make_sample_result() -> RCAResult: """Create a sample RCAResult for testing.""" return RCAResult( - starting_file="025-2-executive_summary.md", + starting_file="executive_summary.md", problem_description="Budget is unvalidated", output_dir="/tmp/test_output", problems=[ @@ -1065,14 +1065,14 @@ def _make_sample_result() -> RCAResult: severity="HIGH", starting_evidence="CZK 500,000", trace=[ - TraceEntry(stage="executive_summary", file="025-2-executive_summary.md", evidence="CZK 500,000", is_origin=False), - TraceEntry(stage="project_plan", file="005-2-project_plan.md", evidence="Budget: 500k", is_origin=False), - TraceEntry(stage="make_assumptions", file="003-5-make_assumptions.md", evidence="Assume budget of 500k", is_origin=True), + TraceEntry(stage="executive_summary", file="executive_summary.md", evidence="CZK 500,000", is_origin=False), + TraceEntry(stage="project_plan", file="project_plan.md", evidence="Budget: 500k", is_origin=False), + TraceEntry(stage="make_assumptions", file="make_assumptions.md", evidence="Assume budget of 500k", is_origin=True), ], origin_stage="make_assumptions", origin=OriginInfo( stage="make_assumptions", - file="003-5-make_assumptions.md", + file="make_assumptions.md", source_code_files=["make_assumptions.py"], likely_cause="Prompt generates budget without data", suggestion="Add validation step", @@ -1085,7 +1085,7 @@ def _make_sample_result() -> RCAResult: severity="MEDIUM", starting_evidence="growing Czech market", trace=[ - TraceEntry(stage="executive_summary", file="025-2-executive_summary.md", evidence="growing Czech market", is_origin=True), + TraceEntry(stage="executive_summary", file="executive_summary.md", evidence="growing Czech market", is_origin=True), ], origin_stage="executive_summary", depth=1, @@ -1168,7 +1168,7 @@ class TestWriteMarkdownReport(unittest.TestCase): with TemporaryDirectory() as d: output_path = Path(d) / "root_cause_analysis.md" result = RCAResult( - starting_file="030-report.html", + starting_file="report.html", problem_description="test", output_dir="/tmp", problems=[], @@ -1348,7 +1348,7 @@ git commit -m "feat: add rca JSON and markdown report generation" Usage: python -m worker_plan_internal.rca \ --dir /path/to/output \ - --file 030-report.html \ + --file report.html \ --problem "The budget appears unvalidated..." \ --output-dir /path/to/output \ --max-depth 15 \ diff --git a/docs/superpowers/specs/2026-04-03-extract-constraints-design.md b/docs/superpowers/specs/2026-04-03-extract-constraints-design.md index 649c07ad4..073a51539 100644 --- a/docs/superpowers/specs/2026-04-03-extract-constraints-design.md +++ b/docs/superpowers/specs/2026-04-03-extract-constraints-design.md @@ -72,8 +72,8 @@ Output: **FilenameEnum entries:** ```python -EXTRACT_CONSTRAINTS_RAW = "002-0-extract_constraints_raw.json" -EXTRACT_CONSTRAINTS_MARKDOWN = "002-0-extract_constraints.md" +EXTRACT_CONSTRAINTS_RAW = "extract_constraints_raw.json" +EXTRACT_CONSTRAINTS_MARKDOWN = "extract_constraints.md" ``` **Pipeline registration** in `full_plan_pipeline.py`: diff --git a/docs/superpowers/specs/2026-04-05-rca-design.md b/docs/superpowers/specs/2026-04-05-rca-design.md index 49d4a0deb..04b60c5b7 100644 --- a/docs/superpowers/specs/2026-04-05-rca-design.md +++ b/docs/superpowers/specs/2026-04-05-rca-design.md @@ -35,7 +35,7 @@ A static Python data structure mapping the full pipeline topology. Each entry de @dataclass class NodeInfo: name: str # e.g., "potential_levers" - output_files: list[str] # e.g., ["002-9-potential_levers_raw.json", "002-10-potential_levers.json"] + output_files: list[str] # e.g., ["potential_levers_raw.json", "potential_levers.json"] inputs: list[str] # e.g., ["setup", "identify_purpose", "plan_type", "extract_constraints"] source_code_files: list[str] # Relative to worker_plan/, e.g., ["worker_plan_internal/plan/stages/potential_levers.py", "worker_plan_internal/lever/identify_potential_levers.py"] ``` @@ -156,7 +156,7 @@ Writes the full trace as JSON: ```json { "input": { - "starting_file": "030-report.html", + "starting_file": "report.html", "problem_description": "...", "output_dir": "/path/to/output", "timestamp": "2026-04-05T14:30:00Z" @@ -170,20 +170,20 @@ Writes the full trace as JSON: "trace": [ { "node": "executive_summary", - "file": "025-2-executive_summary.md", + "file": "executive_summary.md", "evidence": "...", "is_origin": false }, { "node": "make_assumptions", - "file": "003-5-make_assumptions.md", + "file": "make_assumptions.md", "evidence": "...", "is_origin": true } ], "origin": { "node": "make_assumptions", - "file": "003-5-make_assumptions.md", + "file": "make_assumptions.md", "source_code_files": ["stages/make_assumptions.py", "assumption/make_assumptions.py"], "likely_cause": "The prompt asks the LLM to...", "suggestion": "Add a validation step that..." @@ -207,7 +207,7 @@ Writes a human-readable report: ```markdown # Root Cause Analysis Report -**Input:** 030-report.html +**Input:** report.html **Problems found:** 3 **Deepest origin:** make_assumptions (depth 3) @@ -219,9 +219,9 @@ Writes a human-readable report: | Node | File | Evidence | |------|------|----------| -| executive_summary | 025-2-executive_summary.md | "The budget is CZK 500,000..." | -| project_plan | 005-2-project_plan.md | "Estimated budget: CZK 500,000..." | -| **make_assumptions** | 003-5-make_assumptions.md | "Assume total budget..." | +| executive_summary | executive_summary.md | "The budget is CZK 500,000..." | +| project_plan | project_plan.md | "Estimated budget: CZK 500,000..." | +| **make_assumptions** | make_assumptions.md | "Assume total budget..." | **Root cause:** The prompt asks the LLM to generate budget assumptions without requiring external data sources... @@ -236,7 +236,7 @@ Problems are sorted by depth (deepest origin first) so the most upstream root ca ``` python -m worker_plan_internal.rca \ --dir /path/to/output \ - --file 030-report.html \ + --file report.html \ --problem "The budget is CZK 500,000 but this number appears unvalidated..." \ --output-dir /path/to/output \ --max-depth 15 \ diff --git a/worker_plan/worker_plan_api/filenames.py b/worker_plan/worker_plan_api/filenames.py index b346675fb..161cb7832 100644 --- a/worker_plan/worker_plan_api/filenames.py +++ b/worker_plan/worker_plan_api/filenames.py @@ -1,135 +1,135 @@ from enum import Enum class FilenameEnum(str, Enum): - START_TIME = "001-1-start_time.json" - INITIAL_PLAN = "001-2-plan.txt" - PLANEXE_METADATA = "001-3-planexe_metadata.json" - SCREEN_PLANNING_PROMPT_RAW = "002-0-screen_planning_prompt.json" - SCREEN_PLANNING_PROMPT_MARKDOWN = "002-0-screen_planning_prompt.md" - EXTRACT_CONSTRAINTS_RAW = "002-0-extract_constraints_raw.json" - EXTRACT_CONSTRAINTS_MARKDOWN = "002-0-extract_constraints.md" - REDLINE_GATE_RAW = "002-1-redline_gate.json" - REDLINE_GATE_MARKDOWN = "002-2-redline_gate.md" - PREMISE_ATTACK_RAW = "002-3-premise_attack.json" - PREMISE_ATTACK_MARKDOWN = "002-4-premise_attack.md" - IDENTIFY_PURPOSE_RAW = "002-5-identify_purpose_raw.json" - IDENTIFY_PURPOSE_MARKDOWN = "002-6-identify_purpose.md" - PLAN_TYPE_RAW = "002-7-plan_type_raw.json" - PLAN_TYPE_MARKDOWN = "002-8-plan_type.md" - POTENTIAL_LEVERS_RAW = "002-9-potential_levers_raw.json" - POTENTIAL_LEVERS_CLEAN = "002-10-potential_levers.json" - POTENTIAL_LEVERS_CONSTRAINT = "002-10-potential_levers_constraint.json" - DEDUPLICATED_LEVERS_RAW = "002-11-deduplicated_levers_raw.json" - DEDUPLICATED_LEVERS_CONSTRAINT = "002-11-deduplicated_levers_constraint.json" - ENRICHED_LEVERS_RAW = "002-12-enriched_levers_raw.json" - ENRICHED_LEVERS_CONSTRAINT = "002-12-enriched_levers_constraint.json" - VITAL_FEW_LEVERS_RAW = "002-13-vital_few_levers_raw.json" - VITAL_FEW_LEVERS_CONSTRAINT = "002-13-vital_few_levers_constraint.json" - STRATEGIC_DECISIONS_MARKDOWN = "002-14-strategic_decisions.md" - CANDIDATE_SCENARIOS_RAW = "002-15-candidate_scenarios_raw.json" - CANDIDATE_SCENARIOS_CLEAN = "002-16-candidate_scenarios.json" - CANDIDATE_SCENARIOS_CONSTRAINT = "002-16-candidate_scenarios_constraint.json" - SELECTED_SCENARIO_RAW = "002-17-selected_scenario_raw.json" - SELECTED_SCENARIO_CLEAN = "002-18-selected_scenario.json" - SELECTED_SCENARIO_CONSTRAINT = "002-18-selected_scenario_constraint.json" - SCENARIOS_MARKDOWN = "002-19-scenarios.md" - PHYSICAL_LOCATIONS_RAW = "002-20-physical_locations_raw.json" - PHYSICAL_LOCATIONS_MARKDOWN = "002-21-physical_locations.md" - CURRENCY_STRATEGY_RAW = "002-22-currency_strategy_raw.json" - CURRENCY_STRATEGY_MARKDOWN = "002-23-currency_strategy.md" - IDENTIFY_RISKS_RAW = "003-1-identify_risks_raw.json" - IDENTIFY_RISKS_MARKDOWN = "003-2-identify_risks.md" - MAKE_ASSUMPTIONS_RAW = "003-3-make_assumptions_raw.json" - MAKE_ASSUMPTIONS_CLEAN = "003-4-make_assumptions.json" - MAKE_ASSUMPTIONS_MARKDOWN = "003-5-make_assumptions.md" - DISTILL_ASSUMPTIONS_RAW = "003-6-distill_assumptions_raw.json" - DISTILL_ASSUMPTIONS_MARKDOWN = "003-7-distill_assumptions.md" - REVIEW_ASSUMPTIONS_RAW = "003-8-review_assumptions_raw.json" - REVIEW_ASSUMPTIONS_MARKDOWN = "003-9-review_assumptions.md" - CONSOLIDATE_ASSUMPTIONS_FULL_MARKDOWN = "003-10-consolidate_assumptions_full.md" - CONSOLIDATE_ASSUMPTIONS_SHORT_MARKDOWN = "003-11-consolidate_assumptions_short.md" - PRE_PROJECT_ASSESSMENT_RAW = "004-1-pre_project_assessment_raw.json" - PRE_PROJECT_ASSESSMENT = "004-2-pre_project_assessment.json" - PROJECT_PLAN_RAW = "005-1-project_plan_raw.json" - PROJECT_PLAN_MARKDOWN = "005-2-project_plan.md" - GOVERNANCE_PHASE1_AUDIT_RAW = "006-1-governance_phase1_audit_raw.json" - GOVERNANCE_PHASE1_AUDIT_MARKDOWN = "006-2-governance_phase1_audit.md" - GOVERNANCE_PHASE2_BODIES_RAW = "006-3-governance_phase2_bodies_raw.json" - GOVERNANCE_PHASE2_BODIES_MARKDOWN = "006-4-governance_phase2_bodies.md" - GOVERNANCE_PHASE3_IMPL_PLAN_RAW = "006-5-governance_phase3_impl_plan_raw.json" - GOVERNANCE_PHASE3_IMPL_PLAN_MARKDOWN = "006-6-governance_phase3_impl_plan.md" - GOVERNANCE_PHASE4_DECISION_ESCALATION_MATRIX_RAW = "006-7-governance_phase4_decision_escalation_matrix_raw.json" - GOVERNANCE_PHASE4_DECISION_ESCALATION_MATRIX_MARKDOWN = "006-8-governance_phase4_decision_escalation_matrix.md" - GOVERNANCE_PHASE5_MONITORING_PROGRESS_RAW = "006-9-governance_phase5_monitoring_progress_raw.json" - GOVERNANCE_PHASE5_MONITORING_PROGRESS_MARKDOWN = "006-10-governance_phase5_monitoring_progress.md" - GOVERNANCE_PHASE6_EXTRA_RAW = "006-11-governance_phase6_extra_raw.json" - GOVERNANCE_PHASE6_EXTRA_MARKDOWN = "006-12-governance_phase6_extra.md" - CONSOLIDATE_GOVERNANCE_MARKDOWN = "006-13-consolidate_governance.md" - RELATED_RESOURCES_RAW = "007-1-related_resources_raw.json" - RELATED_RESOURCES_MARKDOWN = "007-8-related_resources.md" - FIND_TEAM_MEMBERS_RAW = "008-1-find_team_members_raw.json" - FIND_TEAM_MEMBERS_CLEAN = "008-2-find_team_members.json" - ENRICH_TEAM_MEMBERS_CONTRACT_TYPE_RAW = "009-1-enrich_team_members_contract_type_raw.json" - ENRICH_TEAM_MEMBERS_CONTRACT_TYPE_CLEAN = "009-2-enrich_team_members_contract_type.json" - ENRICH_TEAM_MEMBERS_BACKGROUND_STORY_RAW = "010-1-enrich_team_members_background_story_raw.json" - ENRICH_TEAM_MEMBERS_BACKGROUND_STORY_CLEAN = "010-2-enrich_team_members_background_story.json" - ENRICH_TEAM_MEMBERS_ENVIRONMENT_INFO_RAW = "011-1-enrich_team_members_environment_info_raw.json" - ENRICH_TEAM_MEMBERS_ENVIRONMENT_INFO_CLEAN = "011-2-enrich_team_members_environment_info.json" - REVIEW_TEAM_RAW = "012-review_team_raw.json" - TEAM_MARKDOWN = "013-team.md" - SWOT_RAW = "014-1-swot_analysis_raw.json" - SWOT_MARKDOWN = "014-2-swot_analysis.md" - EXPERTS_RAW = "015-1-experts_raw.json" - EXPERTS_CLEAN = "015-2-experts.json" - EXPERT_CRITICISM_RAW_TEMPLATE = "016-1-{}-expert_criticism_raw.json" - EXPERT_CRITICISM_MARKDOWN = "016-2-expert_criticism.md" - DATA_COLLECTION_RAW = "017-1-data_collection_raw.json" - DATA_COLLECTION_MARKDOWN = "017-2-data_collection.md" - IDENTIFIED_DOCUMENTS_RAW = "017-3-identified_documents_raw.json" - IDENTIFIED_DOCUMENTS_MARKDOWN = "017-4-identified_documents.md" - IDENTIFIED_DOCUMENTS_TO_FIND_JSON = "017-5-identified_documents_to_find.json" - IDENTIFIED_DOCUMENTS_TO_CREATE_JSON = "017-6-identified_documents_to_create.json" - FILTER_DOCUMENTS_TO_FIND_RAW = "017-7-filter_documents_to_find_raw.json" - FILTER_DOCUMENTS_TO_FIND_CLEAN = "017-8-filter_documents_to_find_clean.json" - FILTER_DOCUMENTS_TO_CREATE_RAW = "017-9-filter_documents_to_create_raw.json" - FILTER_DOCUMENTS_TO_CREATE_CLEAN = "017-10-filter_documents_to_create_clean.json" - DRAFT_DOCUMENTS_TO_FIND_RAW_TEMPLATE = "017-11-{}-draft_documents_to_find_raw.json" - DRAFT_DOCUMENTS_TO_FIND_CONSOLIDATED = "017-12-draft_documents_to_find.json" - DRAFT_DOCUMENTS_TO_CREATE_RAW_TEMPLATE = "017-13-{}-draft_documents_to_create_raw.json" - DRAFT_DOCUMENTS_TO_CREATE_CONSOLIDATED = "017-14-draft_documents_to_create.json" - DOCUMENTS_TO_CREATE_AND_FIND_MARKDOWN = "017-15-documents_to_create_and_find.md" - WBS_LEVEL1_RAW = "018-1-wbs_level1_raw.json" - WBS_LEVEL1 = "018-2-wbs_level1.json" - WBS_LEVEL1_PROJECT_TITLE = "018-3-wbs_level1_project_title.json" - WBS_LEVEL2_RAW = "018-4-wbs_level2_raw.json" - WBS_LEVEL2 = "018-5-wbs_level2.json" - WBS_PROJECT_LEVEL1_AND_LEVEL2 = "019-wbs_project_level1_and_level2.json" - PITCH_RAW = "020-1-pitch_raw.json" - PITCH_CONVERT_TO_MARKDOWN_RAW = "020-2-pitch_to_markdown_raw.json" - PITCH_MARKDOWN = "020-3-pitch.md" - TASK_DEPENDENCIES_RAW = "021-task_dependencies_raw.json" - TASK_DURATIONS_RAW_TEMPLATE = "022-1-{}-task_durations_raw.json" - TASK_DURATIONS = "022-2-task_durations.json" - WBS_LEVEL3_RAW_TEMPLATE = "023-1-{}-wbs_level3_raw.json" - WBS_LEVEL3 = "023-2-wbs_level3.json" - WBS_PROJECT_LEVEL1_AND_LEVEL2_AND_LEVEL3_FULL = "023-3-wbs_project_level1_and_level2_and_level3.json" - WBS_PROJECT_LEVEL1_AND_LEVEL2_AND_LEVEL3_CSV = "023-4-wbs_project_level1_and_level2_and_level3.csv" - REVIEW_PLAN_RAW = "024-1-review_plan_raw.json" - REVIEW_PLAN_MARKDOWN = "024-2-review_plan.md" - EXECUTIVE_SUMMARY_RAW = "025-1-executive_summary_raw.json" - EXECUTIVE_SUMMARY_MARKDOWN = "025-2-executive_summary.md" - SCHEDULE_GANTT_MERMAID_HTML = "026-1-schedule_gantt_mermaid.html" - SCHEDULE_GANTT_DHTMLX_HTML = "026-2-schedule_gantt_dhtmlx.html" - SCHEDULE_GANTT_MACHAI_CSV = "026-3-schedule_gantt_machai.csv" - QUESTIONS_AND_ANSWERS_RAW = "027-1-questions_and_answers_raw.json" - QUESTIONS_AND_ANSWERS_MARKDOWN = "027-2-questions_and_answers.md" - QUESTIONS_AND_ANSWERS_HTML = "027-3-questions_and_answers.html" - PREMORTEM_RAW = "028-1-premortem_raw.json" - PREMORTEM_MARKDOWN = "028-2-premortem.md" - SELF_AUDIT_RAW = "029-1-self_audit_raw.json" - SELF_AUDIT_MARKDOWN = "029-2-self_audit.md" - REPORT = "030-report.html" - PIPELINE_COMPLETE = "999-pipeline_complete.txt" + START_TIME = "start_time.json" + INITIAL_PLAN = "plan.txt" + PLANEXE_METADATA = "planexe_metadata.json" + SCREEN_PLANNING_PROMPT_RAW = "screen_planning_prompt.json" + SCREEN_PLANNING_PROMPT_MARKDOWN = "screen_planning_prompt.md" + EXTRACT_CONSTRAINTS_RAW = "extract_constraints_raw.json" + EXTRACT_CONSTRAINTS_MARKDOWN = "extract_constraints.md" + REDLINE_GATE_RAW = "redline_gate.json" + REDLINE_GATE_MARKDOWN = "redline_gate.md" + PREMISE_ATTACK_RAW = "premise_attack.json" + PREMISE_ATTACK_MARKDOWN = "premise_attack.md" + IDENTIFY_PURPOSE_RAW = "identify_purpose_raw.json" + IDENTIFY_PURPOSE_MARKDOWN = "identify_purpose.md" + PLAN_TYPE_RAW = "plan_type_raw.json" + PLAN_TYPE_MARKDOWN = "plan_type.md" + POTENTIAL_LEVERS_RAW = "potential_levers_raw.json" + POTENTIAL_LEVERS_CLEAN = "potential_levers.json" + POTENTIAL_LEVERS_CONSTRAINT = "potential_levers_constraint.json" + DEDUPLICATED_LEVERS_RAW = "deduplicated_levers_raw.json" + DEDUPLICATED_LEVERS_CONSTRAINT = "deduplicated_levers_constraint.json" + ENRICHED_LEVERS_RAW = "enriched_levers_raw.json" + ENRICHED_LEVERS_CONSTRAINT = "enriched_levers_constraint.json" + VITAL_FEW_LEVERS_RAW = "vital_few_levers_raw.json" + VITAL_FEW_LEVERS_CONSTRAINT = "vital_few_levers_constraint.json" + STRATEGIC_DECISIONS_MARKDOWN = "strategic_decisions.md" + CANDIDATE_SCENARIOS_RAW = "candidate_scenarios_raw.json" + CANDIDATE_SCENARIOS_CLEAN = "candidate_scenarios.json" + CANDIDATE_SCENARIOS_CONSTRAINT = "candidate_scenarios_constraint.json" + SELECTED_SCENARIO_RAW = "selected_scenario_raw.json" + SELECTED_SCENARIO_CLEAN = "selected_scenario.json" + SELECTED_SCENARIO_CONSTRAINT = "selected_scenario_constraint.json" + SCENARIOS_MARKDOWN = "scenarios.md" + PHYSICAL_LOCATIONS_RAW = "physical_locations_raw.json" + PHYSICAL_LOCATIONS_MARKDOWN = "physical_locations.md" + CURRENCY_STRATEGY_RAW = "currency_strategy_raw.json" + CURRENCY_STRATEGY_MARKDOWN = "currency_strategy.md" + IDENTIFY_RISKS_RAW = "identify_risks_raw.json" + IDENTIFY_RISKS_MARKDOWN = "identify_risks.md" + MAKE_ASSUMPTIONS_RAW = "make_assumptions_raw.json" + MAKE_ASSUMPTIONS_CLEAN = "make_assumptions.json" + MAKE_ASSUMPTIONS_MARKDOWN = "make_assumptions.md" + DISTILL_ASSUMPTIONS_RAW = "distill_assumptions_raw.json" + DISTILL_ASSUMPTIONS_MARKDOWN = "distill_assumptions.md" + REVIEW_ASSUMPTIONS_RAW = "review_assumptions_raw.json" + REVIEW_ASSUMPTIONS_MARKDOWN = "review_assumptions.md" + CONSOLIDATE_ASSUMPTIONS_FULL_MARKDOWN = "consolidate_assumptions_full.md" + CONSOLIDATE_ASSUMPTIONS_SHORT_MARKDOWN = "consolidate_assumptions_short.md" + PRE_PROJECT_ASSESSMENT_RAW = "pre_project_assessment_raw.json" + PRE_PROJECT_ASSESSMENT = "pre_project_assessment.json" + PROJECT_PLAN_RAW = "project_plan_raw.json" + PROJECT_PLAN_MARKDOWN = "project_plan.md" + GOVERNANCE_PHASE1_AUDIT_RAW = "governance_phase1_audit_raw.json" + GOVERNANCE_PHASE1_AUDIT_MARKDOWN = "governance_phase1_audit.md" + GOVERNANCE_PHASE2_BODIES_RAW = "governance_phase2_bodies_raw.json" + GOVERNANCE_PHASE2_BODIES_MARKDOWN = "governance_phase2_bodies.md" + GOVERNANCE_PHASE3_IMPL_PLAN_RAW = "governance_phase3_impl_plan_raw.json" + GOVERNANCE_PHASE3_IMPL_PLAN_MARKDOWN = "governance_phase3_impl_plan.md" + GOVERNANCE_PHASE4_DECISION_ESCALATION_MATRIX_RAW = "governance_phase4_decision_escalation_matrix_raw.json" + GOVERNANCE_PHASE4_DECISION_ESCALATION_MATRIX_MARKDOWN = "governance_phase4_decision_escalation_matrix.md" + GOVERNANCE_PHASE5_MONITORING_PROGRESS_RAW = "governance_phase5_monitoring_progress_raw.json" + GOVERNANCE_PHASE5_MONITORING_PROGRESS_MARKDOWN = "governance_phase5_monitoring_progress.md" + GOVERNANCE_PHASE6_EXTRA_RAW = "governance_phase6_extra_raw.json" + GOVERNANCE_PHASE6_EXTRA_MARKDOWN = "governance_phase6_extra.md" + CONSOLIDATE_GOVERNANCE_MARKDOWN = "consolidate_governance.md" + RELATED_RESOURCES_RAW = "related_resources_raw.json" + RELATED_RESOURCES_MARKDOWN = "related_resources.md" + FIND_TEAM_MEMBERS_RAW = "find_team_members_raw.json" + FIND_TEAM_MEMBERS_CLEAN = "find_team_members.json" + ENRICH_TEAM_MEMBERS_CONTRACT_TYPE_RAW = "enrich_team_members_contract_type_raw.json" + ENRICH_TEAM_MEMBERS_CONTRACT_TYPE_CLEAN = "enrich_team_members_contract_type.json" + ENRICH_TEAM_MEMBERS_BACKGROUND_STORY_RAW = "enrich_team_members_background_story_raw.json" + ENRICH_TEAM_MEMBERS_BACKGROUND_STORY_CLEAN = "enrich_team_members_background_story.json" + ENRICH_TEAM_MEMBERS_ENVIRONMENT_INFO_RAW = "enrich_team_members_environment_info_raw.json" + ENRICH_TEAM_MEMBERS_ENVIRONMENT_INFO_CLEAN = "enrich_team_members_environment_info.json" + REVIEW_TEAM_RAW = "review_team_raw.json" + TEAM_MARKDOWN = "team.md" + SWOT_RAW = "swot_analysis_raw.json" + SWOT_MARKDOWN = "swot_analysis.md" + EXPERTS_RAW = "experts_raw.json" + EXPERTS_CLEAN = "experts.json" + EXPERT_CRITICISM_RAW_TEMPLATE = "expert_criticism_{}_raw.json" + EXPERT_CRITICISM_MARKDOWN = "expert_criticism.md" + DATA_COLLECTION_RAW = "data_collection_raw.json" + DATA_COLLECTION_MARKDOWN = "data_collection.md" + IDENTIFIED_DOCUMENTS_RAW = "identified_documents_raw.json" + IDENTIFIED_DOCUMENTS_MARKDOWN = "identified_documents.md" + IDENTIFIED_DOCUMENTS_TO_FIND_JSON = "identified_documents_to_find.json" + IDENTIFIED_DOCUMENTS_TO_CREATE_JSON = "identified_documents_to_create.json" + FILTER_DOCUMENTS_TO_FIND_RAW = "filter_documents_to_find_raw.json" + FILTER_DOCUMENTS_TO_FIND_CLEAN = "filter_documents_to_find_clean.json" + FILTER_DOCUMENTS_TO_CREATE_RAW = "filter_documents_to_create_raw.json" + FILTER_DOCUMENTS_TO_CREATE_CLEAN = "filter_documents_to_create_clean.json" + DRAFT_DOCUMENTS_TO_FIND_RAW_TEMPLATE = "draft_documents_to_find_{}_raw.json" + DRAFT_DOCUMENTS_TO_FIND_CONSOLIDATED = "draft_documents_to_find.json" + DRAFT_DOCUMENTS_TO_CREATE_RAW_TEMPLATE = "draft_documents_to_create_{}_raw.json" + DRAFT_DOCUMENTS_TO_CREATE_CONSOLIDATED = "draft_documents_to_create.json" + DOCUMENTS_TO_CREATE_AND_FIND_MARKDOWN = "documents_to_create_and_find.md" + WBS_LEVEL1_RAW = "wbs_level1_raw.json" + WBS_LEVEL1 = "wbs_level1.json" + WBS_LEVEL1_PROJECT_TITLE = "wbs_level1_project_title.json" + WBS_LEVEL2_RAW = "wbs_level2_raw.json" + WBS_LEVEL2 = "wbs_level2.json" + WBS_PROJECT_LEVEL1_AND_LEVEL2 = "wbs_project_level1_and_level2.json" + PITCH_RAW = "pitch_raw.json" + PITCH_CONVERT_TO_MARKDOWN_RAW = "pitch_to_markdown_raw.json" + PITCH_MARKDOWN = "pitch.md" + TASK_DEPENDENCIES_RAW = "task_dependencies_raw.json" + TASK_DURATIONS_RAW_TEMPLATE = "task_durations_{}_raw.json" + TASK_DURATIONS = "task_durations.json" + WBS_LEVEL3_RAW_TEMPLATE = "wbs_level3_{}_raw.json" + WBS_LEVEL3 = "wbs_level3.json" + WBS_PROJECT_LEVEL1_AND_LEVEL2_AND_LEVEL3_FULL = "wbs_project_level1_and_level2_and_level3.json" + WBS_PROJECT_LEVEL1_AND_LEVEL2_AND_LEVEL3_CSV = "wbs_project_level1_and_level2_and_level3.csv" + REVIEW_PLAN_RAW = "review_plan_raw.json" + REVIEW_PLAN_MARKDOWN = "review_plan.md" + EXECUTIVE_SUMMARY_RAW = "executive_summary_raw.json" + EXECUTIVE_SUMMARY_MARKDOWN = "executive_summary.md" + SCHEDULE_GANTT_MERMAID_HTML = "schedule_gantt_mermaid.html" + SCHEDULE_GANTT_DHTMLX_HTML = "schedule_gantt_dhtmlx.html" + SCHEDULE_GANTT_MACHAI_CSV = "schedule_gantt_machai.csv" + QUESTIONS_AND_ANSWERS_RAW = "questions_and_answers_raw.json" + QUESTIONS_AND_ANSWERS_MARKDOWN = "questions_and_answers.md" + QUESTIONS_AND_ANSWERS_HTML = "questions_and_answers.html" + PREMORTEM_RAW = "premortem_raw.json" + PREMORTEM_MARKDOWN = "premortem.md" + SELF_AUDIT_RAW = "self_audit_raw.json" + SELF_AUDIT_MARKDOWN = "self_audit.md" + REPORT = "report.html" + PIPELINE_COMPLETE = "pipeline_complete.txt" class ExtraFilenameEnum(str, Enum): LOG_TXT = "log.txt" diff --git a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py index c8d7713b0..2218d3349 100644 --- a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py +++ b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py @@ -3,7 +3,7 @@ In order to resume an unfinished run. Insert the run_id_dir of the thing you want to resume. -If it's an already finished run, then remove the "999-pipeline_complete.txt" file. +If it's an already finished run, then remove the "pipeline_complete.txt" file. PROMPT> RUN_ID_DIR=/absolute/path/to/PlanExe_20250216_150332 python -m worker_plan_internal.plan.run_plan_pipeline """ from dataclasses import dataclass, field diff --git a/worker_plan/worker_plan_internal/rca/AGENTS.md b/worker_plan/worker_plan_internal/rca/AGENTS.md index f7ec4f645..4033bee90 100644 --- a/worker_plan/worker_plan_internal/rca/AGENTS.md +++ b/worker_plan/worker_plan_internal/rca/AGENTS.md @@ -84,7 +84,7 @@ The category classification (`prompt_fixable` / `domain_complexity` / `missing_i The tool is diagnostic, not prescriptive. It tells you *where* a problem originated and *why*, but someone still has to decide what to do. It can't catch problems that don't leave textual evidence — timing issues, model-specific quirks, or structural DAG problems are invisible. -Starting from `029-2-self_audit.md` is the sweet spot. That file already contains identified issues, so the tracer is tracing known problems upstream rather than discovering problems from scratch. +Starting from `self_audit.md` is the sweet spot. That file already contains identified issues, so the tracer is tracing known problems upstream rather than discovering problems from scratch. Before relying on this for automated decisions (e.g., in the self-improve loop), it needs more diverse test runs (10+ plans) and reproducibility testing. diff --git a/worker_plan/worker_plan_internal/rca/README.md b/worker_plan/worker_plan_internal/rca/README.md index df83c481d..15932f9cc 100644 --- a/worker_plan/worker_plan_internal/rca/README.md +++ b/worker_plan/worker_plan_internal/rca/README.md @@ -47,7 +47,7 @@ Basic usage: ```bash /opt/homebrew/bin/python3.11 -m worker_plan_internal.rca \ --dir /path/to/output \ - --file 030-report.html \ + --file report.html \ --problem "Description of the problem you observed" \ --verbose ``` @@ -69,11 +69,11 @@ You can start from any intermediary artifact. Common starting points: | File | What it is | |------|------------| -| `030-report.html` | The final HTML report (largest, most problems to find) | -| `029-2-self_audit.md` | Self-audit (already identifies issues — good for tracing them back) | -| `025-2-executive_summary.md` | Executive summary | -| `024-2-review_plan.md` | Plan review | -| `028-2-premortem.md` | Premortem analysis | +| `report.html` | The final HTML report (largest, most problems to find) | +| `self_audit.md` | Self-audit (already identifies issues — good for tracing them back) | +| `executive_summary.md` | Executive summary | +| `review_plan.md` | Plan review | +| `premortem.md` | Premortem analysis | ### Examples @@ -82,7 +82,7 @@ Trace a problem from the self-audit: ```bash /opt/homebrew/bin/python3.11 -m worker_plan_internal.rca \ --dir /path/to/output/20250101_india_census \ - --file 029-2-self_audit.md \ + --file self_audit.md \ --problem "No Real-World Proof. The plan combines a digital census with caste enumeration at an unprecedented scale, lacking independent evidence of success." \ --output-dir /tmp/rca-analysis \ --verbose @@ -93,7 +93,7 @@ Trace a zoning/permits problem: ```bash /opt/homebrew/bin/python3.11 -m worker_plan_internal.rca \ --dir /path/to/output/20251016_minecraft_escape \ - --file 029-2-self_audit.md \ + --file self_audit.md \ --problem "Infeasible Constraints Rated MEDIUM because the plan mentions zoning and permits but lacks specifics for the Shanghai location." \ --output-dir /tmp/rca-analysis2 \ --verbose @@ -123,7 +123,7 @@ A typical run finds 2-3 focused problems and makes 15-30 LLM calls. The tool implements the investigation strategy described in `docs/proposals/133-dag-and-rca.md`: -1. Start from the final artifact (e.g., `030-report.html`) +1. Start from the final artifact (e.g., `report.html`) 2. Inspect direct input artifacts to the producing node 3. Search those artifacts for the false claim or problem 4. When found upstream, recurse into that node's inputs @@ -133,7 +133,7 @@ The tool implements the investigation strategy described in `docs/proposals/133- ## Tips -- **Start from `029-2-self_audit.md`.** This file already contains identified issues, so you're tracing *known* problems upstream rather than asking the LLM to find problems from scratch. +- **Start from `self_audit.md`.** This file already contains identified issues, so you're tracing *known* problems upstream rather than asking the LLM to find problems from scratch. - **Trust the trace chains more than the suggestions.** The upstream path (which nodes the problem passed through) is mechanically grounded in the DAG. The suggestions are LLM opinions — useful starting points, not patches. - **Check the category before acting.** If the origin is `domain_complexity`, don't spend time tweaking the prompt. If it's `prompt_fixable`, the suggestion is likely actionable. - **Results are non-deterministic.** This is LLM judging LLM output. Two runs on the same input may produce slightly different traces. If a finding matters, run it twice. diff --git a/worker_plan/worker_plan_internal/rca/__main__.py b/worker_plan/worker_plan_internal/rca/__main__.py index aea03cf94..9f4f86c6e 100644 --- a/worker_plan/worker_plan_internal/rca/__main__.py +++ b/worker_plan/worker_plan_internal/rca/__main__.py @@ -4,7 +4,7 @@ Usage: python -m worker_plan_internal.rca \ --dir /path/to/output \ - --file 030-report.html \ + --file report.html \ --problem "The budget appears unvalidated..." \ --output-dir /path/to/output \ --max-depth 15 \ diff --git a/worker_plan/worker_plan_internal/rca/tests/test_output.py b/worker_plan/worker_plan_internal/rca/tests/test_output.py index 315b4a409..bb784e4fa 100644 --- a/worker_plan/worker_plan_internal/rca/tests/test_output.py +++ b/worker_plan/worker_plan_internal/rca/tests/test_output.py @@ -16,7 +16,7 @@ def _make_sample_result() -> RCAResult: """Create a sample RCAResult for testing.""" return RCAResult( - starting_file="025-2-executive_summary.md", + starting_file="executive_summary.md", problem_description="Budget is unvalidated", output_dir="/tmp/test_output", problems=[ @@ -26,14 +26,14 @@ def _make_sample_result() -> RCAResult: severity="HIGH", starting_evidence="CZK 500,000", trace=[ - TraceEntry(node="executive_summary", file="025-2-executive_summary.md", evidence="CZK 500,000", is_origin=False), - TraceEntry(node="project_plan", file="005-2-project_plan.md", evidence="Budget: 500k", is_origin=False), - TraceEntry(node="make_assumptions", file="003-5-make_assumptions.md", evidence="Assume budget of 500k", is_origin=True), + TraceEntry(node="executive_summary", file="executive_summary.md", evidence="CZK 500,000", is_origin=False), + TraceEntry(node="project_plan", file="project_plan.md", evidence="Budget: 500k", is_origin=False), + TraceEntry(node="make_assumptions", file="make_assumptions.md", evidence="Assume budget of 500k", is_origin=True), ], origin_node="make_assumptions", origin=OriginInfo( node="make_assumptions", - file="003-5-make_assumptions.md", + file="make_assumptions.md", source_code_files=["make_assumptions.py"], category="prompt_fixable", likely_cause="Prompt generates budget without data", @@ -47,7 +47,7 @@ def _make_sample_result() -> RCAResult: severity="MEDIUM", starting_evidence="growing Czech market", trace=[ - TraceEntry(node="executive_summary", file="025-2-executive_summary.md", evidence="growing Czech market", is_origin=True), + TraceEntry(node="executive_summary", file="executive_summary.md", evidence="growing Czech market", is_origin=True), ], origin_node="executive_summary", depth=1, @@ -130,7 +130,7 @@ def test_empty_result_produces_valid_markdown(self): with TemporaryDirectory() as d: output_path = Path(d) / "root_cause_analysis.md" result = RCAResult( - starting_file="030-report.html", + starting_file="report.html", problem_description="test", output_dir="/tmp", problems=[], diff --git a/worker_plan/worker_plan_internal/rca/tests/test_prompts.py b/worker_plan/worker_plan_internal/rca/tests/test_prompts.py index 45fb4fae8..4de5ce643 100644 --- a/worker_plan/worker_plan_internal/rca/tests/test_prompts.py +++ b/worker_plan/worker_plan_internal/rca/tests/test_prompts.py @@ -67,7 +67,7 @@ def test_source_code_analysis_rejects_invalid_category(self): class TestBuildProblemIdentificationMessages(unittest.TestCase): def test_returns_chat_messages(self): messages = build_problem_identification_messages( - filename="030-report.html", + filename="report.html", file_content="report content", user_problem_description="budget is wrong", ) @@ -78,12 +78,12 @@ def test_returns_chat_messages(self): def test_user_message_contains_inputs(self): messages = build_problem_identification_messages( - filename="025-2-executive_summary.md", + filename="executive_summary.md", file_content="# Summary\nBudget: 500k", user_problem_description="fabricated budget", ) user_content = messages[1].content - self.assertIn("025-2-executive_summary.md", user_content) + self.assertIn("executive_summary.md", user_content) self.assertIn("# Summary", user_content) self.assertIn("fabricated budget", user_content) @@ -93,7 +93,7 @@ def test_returns_chat_messages(self): messages = build_upstream_check_messages( problem_description="Budget is fabricated", evidence_quote="CZK 500,000", - upstream_filename="005-2-project_plan.md", + upstream_filename="project_plan.md", upstream_file_content="# Project Plan\nBudget: 500k", ) self.assertIsInstance(messages, list) @@ -103,13 +103,13 @@ def test_user_message_contains_problem_and_upstream(self): messages = build_upstream_check_messages( problem_description="Missing market sizing", evidence_quote="growing Czech market", - upstream_filename="003-5-make_assumptions.md", + upstream_filename="make_assumptions.md", upstream_file_content="# Assumptions\nMarket is growing", ) user_content = messages[1].content self.assertIn("Missing market sizing", user_content) self.assertIn("growing Czech market", user_content) - self.assertIn("003-5-make_assumptions.md", user_content) + self.assertIn("make_assumptions.md", user_content) class TestBuildSourceCodeAnalysisMessages(unittest.TestCase): diff --git a/worker_plan/worker_plan_internal/rca/tests/test_registry.py b/worker_plan/worker_plan_internal/rca/tests/test_registry.py index 1ca79cf4a..5f7d14a1f 100644 --- a/worker_plan/worker_plan_internal/rca/tests/test_registry.py +++ b/worker_plan/worker_plan_internal/rca/tests/test_registry.py @@ -36,22 +36,22 @@ def test_input_references_are_valid(self): class TestFindNodeByFilename(unittest.TestCase): def test_find_report(self): - node = find_node_by_filename("030-report.html") + node = find_node_by_filename("report.html") self.assertIsNotNone(node) self.assertEqual(node.name, "report") def test_find_potential_levers_clean(self): - node = find_node_by_filename("002-10-potential_levers.json") + node = find_node_by_filename("potential_levers.json") self.assertIsNotNone(node) self.assertEqual(node.name, "potential_levers") def test_find_potential_levers_raw(self): - node = find_node_by_filename("002-9-potential_levers_raw.json") + node = find_node_by_filename("potential_levers_raw.json") self.assertIsNotNone(node) self.assertEqual(node.name, "potential_levers") def test_find_executive_summary(self): - node = find_node_by_filename("025-2-executive_summary.md") + node = find_node_by_filename("executive_summary.md") self.assertIsNotNone(node) self.assertEqual(node.name, "executive_summary") @@ -70,10 +70,10 @@ def test_potential_levers_upstream(self): with TemporaryDirectory() as d: output_dir = Path(d) # Create the expected upstream files on disk - (output_dir / "001-2-plan.txt").write_text("plan", encoding="utf-8") - (output_dir / "002-6-identify_purpose.md").write_text("purpose", encoding="utf-8") - (output_dir / "002-8-plan_type.md").write_text("type", encoding="utf-8") - (output_dir / "002-0-extract_constraints.md").write_text("constraints", encoding="utf-8") + (output_dir / "plan.txt").write_text("plan", encoding="utf-8") + (output_dir / "identify_purpose.md").write_text("purpose", encoding="utf-8") + (output_dir / "plan_type.md").write_text("type", encoding="utf-8") + (output_dir / "extract_constraints.md").write_text("constraints", encoding="utf-8") result = get_upstream_files("potential_levers", output_dir) node_names = [name for name, _ in result] @@ -86,7 +86,7 @@ def test_missing_files_are_skipped(self): with TemporaryDirectory() as d: output_dir = Path(d) # Only create one of the upstream files - (output_dir / "001-2-plan.txt").write_text("plan", encoding="utf-8") + (output_dir / "plan.txt").write_text("plan", encoding="utf-8") result = get_upstream_files("potential_levers", output_dir) node_names = [name for name, _ in result] diff --git a/worker_plan/worker_plan_internal/rca/tests/test_tracer.py b/worker_plan/worker_plan_internal/rca/tests/test_tracer.py index aca22d407..0410c7fd1 100644 --- a/worker_plan/worker_plan_internal/rca/tests/test_tracer.py +++ b/worker_plan/worker_plan_internal/rca/tests/test_tracer.py @@ -48,13 +48,13 @@ def _make_tracer(output_dir: Path, max_depth: int = 15, verbose: bool = False) - class TestRCAResult(unittest.TestCase): def test_dataclass_creation(self): result = RCAResult( - starting_file="030-report.html", + starting_file="report.html", problem_description="test", output_dir="/tmp/test", problems=[], llm_calls_made=0, ) - self.assertEqual(result.starting_file, "030-report.html") + self.assertEqual(result.starting_file, "report.html") self.assertEqual(len(result.problems), 0) self.assertEqual(result.llm_calls_made, 0) @@ -100,7 +100,7 @@ def test_identify_problems(self): with TemporaryDirectory() as d: output_dir = Path(d) # Create a minimal output file - report_file = output_dir / "025-2-executive_summary.md" + report_file = output_dir / "executive_summary.md" report_file.write_text("# Summary\nBudget: CZK 500,000", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -118,7 +118,7 @@ def test_identify_problems(self): with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_analyze_source_code') as mock_analyze: - result = tracer.trace("025-2-executive_summary.md", "budget is unvalidated") + result = tracer.trace("executive_summary.md", "budget is unvalidated") self.assertIsInstance(result, RCAResult) self.assertGreaterEqual(len(result.problems), 1) @@ -141,13 +141,13 @@ def test_traces_problem_upstream(self): with TemporaryDirectory() as d: output_dir = Path(d) # Create files for a chain: executive_summary -> project_plan -> setup - (output_dir / "025-2-executive_summary.md").write_text("Budget: CZK 500,000", encoding="utf-8") - (output_dir / "005-2-project_plan.md").write_text("Budget: CZK 500,000", encoding="utf-8") - (output_dir / "001-2-plan.txt").write_text("Open a tea shop", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: CZK 500,000", encoding="utf-8") + (output_dir / "project_plan.md").write_text("Budget: CZK 500,000", encoding="utf-8") + (output_dir / "plan.txt").write_text("Open a tea shop", encoding="utf-8") # Create other upstream files that executive_summary depends on - (output_dir / "002-14-strategic_decisions.md").write_text("decisions", encoding="utf-8") - (output_dir / "002-19-scenarios.md").write_text("scenarios", encoding="utf-8") - (output_dir / "003-10-consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") + (output_dir / "strategic_decisions.md").write_text("decisions", encoding="utf-8") + (output_dir / "scenarios.md").write_text("scenarios", encoding="utf-8") + (output_dir / "consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -186,7 +186,7 @@ def mock_check_upstream(problem_desc, evidence, upstream_filename, upstream_cont with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_check_upstream', side_effect=mock_check_upstream), \ patch.object(tracer, '_analyze_source_code'): - result = tracer.trace("025-2-executive_summary.md", "budget is fabricated") + result = tracer.trace("executive_summary.md", "budget is fabricated") self.assertEqual(len(result.problems), 1) problem = result.problems[0] @@ -204,12 +204,12 @@ def test_deduplication_works(self): # executive_summary depends on strategic_decisions_markdown, scenarios_markdown, etc. # project_plan also depends on strategic_decisions_markdown, scenarios_markdown. # When we trace through project_plan, those shared upstreams should be skipped. - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k", encoding="utf-8") - (output_dir / "005-2-project_plan.md").write_text("Budget: 500k", encoding="utf-8") - (output_dir / "001-2-plan.txt").write_text("Open a tea shop", encoding="utf-8") - (output_dir / "002-14-strategic_decisions.md").write_text("decisions", encoding="utf-8") - (output_dir / "002-19-scenarios.md").write_text("scenarios", encoding="utf-8") - (output_dir / "003-10-consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "project_plan.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "plan.txt").write_text("Open a tea shop", encoding="utf-8") + (output_dir / "strategic_decisions.md").write_text("decisions", encoding="utf-8") + (output_dir / "scenarios.md").write_text("scenarios", encoding="utf-8") + (output_dir / "consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -230,7 +230,7 @@ def mock_check_upstream(problem_desc, evidence, upstream_filename, upstream_cont with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_check_upstream', side_effect=mock_check_upstream), \ patch.object(tracer, '_analyze_source_code'): - result = tracer.trace("025-2-executive_summary.md", "budget fabricated") + result = tracer.trace("executive_summary.md", "budget fabricated") # Count unique filenames checked — dedup should prevent re-checking # strategic_decisions and scenarios at the project_plan level @@ -245,7 +245,7 @@ def test_respects_max_depth_zero(self): """With max_depth=0, no upstream tracing happens.""" with TemporaryDirectory() as d: output_dir = Path(d) - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k", encoding="utf-8") tracer = _make_tracer(output_dir, max_depth=0) @@ -258,7 +258,7 @@ def test_respects_max_depth_zero(self): with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_check_upstream') as mock_check, \ patch.object(tracer, '_analyze_source_code'): - result = tracer.trace("025-2-executive_summary.md", "test") + result = tracer.trace("executive_summary.md", "test") self.assertEqual(len(result.problems), 1) # With max_depth=0, no upstream tracing happens @@ -270,12 +270,12 @@ def test_max_depth_limits_recursion(self): """With max_depth=1, tracing should stop after one level of upstream.""" with TemporaryDirectory() as d: output_dir = Path(d) - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k", encoding="utf-8") - (output_dir / "005-2-project_plan.md").write_text("Budget: 500k", encoding="utf-8") - (output_dir / "001-2-plan.txt").write_text("plan", encoding="utf-8") - (output_dir / "002-14-strategic_decisions.md").write_text("decisions", encoding="utf-8") - (output_dir / "002-19-scenarios.md").write_text("scenarios", encoding="utf-8") - (output_dir / "003-10-consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "project_plan.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "plan.txt").write_text("plan", encoding="utf-8") + (output_dir / "strategic_decisions.md").write_text("decisions", encoding="utf-8") + (output_dir / "scenarios.md").write_text("scenarios", encoding="utf-8") + (output_dir / "consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") tracer = _make_tracer(output_dir, max_depth=1) @@ -291,7 +291,7 @@ def always_found(problem_desc, evidence, upstream_filename, upstream_content): with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_check_upstream', side_effect=always_found), \ patch.object(tracer, '_analyze_source_code'): - result = tracer.trace("025-2-executive_summary.md", "test") + result = tracer.trace("executive_summary.md", "test") self.assertEqual(len(result.problems), 1) problem = result.problems[0] @@ -305,7 +305,7 @@ class TestRootCauseAnalyzerSourceCodeAnalysis(unittest.TestCase): def test_source_code_analysis_called_at_origin(self): with TemporaryDirectory() as d: output_dir = Path(d) - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -317,7 +317,7 @@ def test_source_code_analysis_called_at_origin(self): with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_analyze_source_code') as mock_analyze: - result = tracer.trace("025-2-executive_summary.md", "test") + result = tracer.trace("executive_summary.md", "test") # _analyze_source_code should have been called once for the origin mock_analyze.assert_called_once() @@ -330,12 +330,12 @@ def test_source_code_analysis_called_at_deep_origin(self): with TemporaryDirectory() as d: output_dir = Path(d) # Create files for a chain: executive_summary -> project_plan (origin) - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k", encoding="utf-8") - (output_dir / "005-2-project_plan.md").write_text("Budget: 500k", encoding="utf-8") - (output_dir / "001-2-plan.txt").write_text("Open a tea shop", encoding="utf-8") - (output_dir / "002-14-strategic_decisions.md").write_text("decisions", encoding="utf-8") - (output_dir / "002-19-scenarios.md").write_text("scenarios", encoding="utf-8") - (output_dir / "003-10-consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "project_plan.md").write_text("Budget: 500k", encoding="utf-8") + (output_dir / "plan.txt").write_text("Open a tea shop", encoding="utf-8") + (output_dir / "strategic_decisions.md").write_text("decisions", encoding="utf-8") + (output_dir / "scenarios.md").write_text("scenarios", encoding="utf-8") + (output_dir / "consolidate_assumptions_full.md").write_text("assumptions", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -356,7 +356,7 @@ def mock_check_upstream(problem_desc, evidence, upstream_filename, upstream_cont with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_check_upstream', side_effect=mock_check_upstream), \ patch.object(tracer, '_analyze_source_code') as mock_analyze: - result = tracer.trace("025-2-executive_summary.md", "budget fabricated") + result = tracer.trace("executive_summary.md", "budget fabricated") # Phase 3 should have been called at the deep origin (project_plan) mock_analyze.assert_called_once() @@ -371,7 +371,7 @@ class TestRootCauseAnalyzerMultipleProblems(unittest.TestCase): def test_traces_multiple_problems(self): with TemporaryDirectory() as d: output_dir = Path(d) - (output_dir / "025-2-executive_summary.md").write_text("Budget: 500k\nTimeline: 2 months", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("Budget: 500k\nTimeline: 2 months", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -384,7 +384,7 @@ def test_traces_multiple_problems(self): with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_analyze_source_code'): - result = tracer.trace("025-2-executive_summary.md", "multiple issues") + result = tracer.trace("executive_summary.md", "multiple issues") self.assertEqual(len(result.problems), 2) descriptions = {f.description for f in result.problems} @@ -401,11 +401,11 @@ class TestRootCauseAnalyzerSortsByDepth(unittest.TestCase): def test_problems_sorted_by_depth_descending(self): with TemporaryDirectory() as d: output_dir = Path(d) - (output_dir / "025-2-executive_summary.md").write_text("content", encoding="utf-8") - (output_dir / "005-2-project_plan.md").write_text("content", encoding="utf-8") - (output_dir / "002-14-strategic_decisions.md").write_text("content", encoding="utf-8") - (output_dir / "002-19-scenarios.md").write_text("content", encoding="utf-8") - (output_dir / "003-10-consolidate_assumptions_full.md").write_text("content", encoding="utf-8") + (output_dir / "executive_summary.md").write_text("content", encoding="utf-8") + (output_dir / "project_plan.md").write_text("content", encoding="utf-8") + (output_dir / "strategic_decisions.md").write_text("content", encoding="utf-8") + (output_dir / "scenarios.md").write_text("content", encoding="utf-8") + (output_dir / "consolidate_assumptions_full.md").write_text("content", encoding="utf-8") tracer = _make_tracer(output_dir) @@ -429,7 +429,7 @@ def mock_check_upstream(problem_desc, evidence, upstream_filename, upstream_cont with patch.object(tracer, '_identify_problems', return_value=mock_identification), \ patch.object(tracer, '_check_upstream', side_effect=mock_check_upstream), \ patch.object(tracer, '_analyze_source_code'): - result = tracer.trace("025-2-executive_summary.md", "test") + result = tracer.trace("executive_summary.md", "test") self.assertEqual(len(result.problems), 2) # Deepest origin should be first diff --git a/worker_plan/worker_plan_internal/utils/tests/test_purge_old_runs.py b/worker_plan/worker_plan_internal/utils/tests/test_purge_old_runs.py index 3c5352218..289ce22b1 100644 --- a/worker_plan/worker_plan_internal/utils/tests/test_purge_old_runs.py +++ b/worker_plan/worker_plan_internal/utils/tests/test_purge_old_runs.py @@ -41,10 +41,10 @@ def _create_run_dir(self, dirname: str, hours_old: float, with_start: bool, with path = os.path.join(self.test_run_dir, dirname) os.makedirs(path, exist_ok=True) if with_start: - with open(os.path.join(path, "001-1-start_time.json"), "w", encoding="utf-8") as f: + with open(os.path.join(path, "start_time.json"), "w", encoding="utf-8") as f: f.write("{}") if with_plan: - with open(os.path.join(path, "001-2-plan.txt"), "w", encoding="utf-8") as f: + with open(os.path.join(path, "plan.txt"), "w", encoding="utf-8") as f: f.write("plan") self._set_mtime(path, hours_old) From 827aeb98b5a11f0132ad2ec74e8e1b7ac9a0ddd2 Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:26:35 +0200 Subject: [PATCH 2/7] refactor: remove number prefixes from standalone script filenames Covers run_hire_team.py, create_wbs_level3.py, estimate_wbs_task_durations.py, identify_wbs_task_dependencies.py, expert_cost.py, and create_wsb_table_csv.py. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../plan/create_wbs_level3.py | 8 +++---- .../plan/estimate_wbs_task_durations.py | 4 ++-- .../worker_plan_internal/plan/expert_cost.py | 12 +++++----- .../plan/identify_wbs_task_dependencies.py | 4 ++-- .../team/run_hire_team.py | 22 +++++++++---------- .../wbs/create_wsb_table_csv.py | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/worker_plan/worker_plan_internal/plan/create_wbs_level3.py b/worker_plan/worker_plan_internal/plan/create_wbs_level3.py index bbc564757..b9c5d0ce1 100644 --- a/worker_plan/worker_plan_internal/plan/create_wbs_level3.py +++ b/worker_plan/worker_plan_internal/plan/create_wbs_level3.py @@ -205,10 +205,10 @@ def load_json(relative_path: str) -> dict: the_json = json.load(f) return the_json - plan_json = load_json('002-project_plan.json') - wbs_level1_json = load_json('006-wbs_level1.json') - wbs_level2_json = load_json('008-wbs_level2.json') - wbs_level2_task_durations_json = load_json('012-task_durations.json') + plan_json = load_json('project_plan.json') + wbs_level1_json = load_json('wbs_level1.json') + wbs_level2_json = load_json('wbs_level2.json') + wbs_level2_task_durations_json = load_json('task_durations.json') decompose_task_id = "1c690f4a-ae8e-493d-9e47-6da58ef5b24c" query = CreateWBSLevel3.format_query(plan_json, wbs_level1_json, wbs_level2_json, wbs_level2_task_durations_json, decompose_task_id) diff --git a/worker_plan/worker_plan_internal/plan/estimate_wbs_task_durations.py b/worker_plan/worker_plan_internal/plan/estimate_wbs_task_durations.py index d8054d1ac..88bbf2599 100644 --- a/worker_plan/worker_plan_internal/plan/estimate_wbs_task_durations.py +++ b/worker_plan/worker_plan_internal/plan/estimate_wbs_task_durations.py @@ -149,8 +149,8 @@ def load_json(relative_path: str) -> dict: the_json = json.load(f) return the_json - plan_json = load_json('002-project_plan.json') - wbs_level2_json = load_json('006-wbs_level2.json') + plan_json = load_json('project_plan.json') + wbs_level2_json = load_json('wbs_level2.json') task_ids = [ "c6a249af-b8d3-4d4c-b3ef-8a5caa8793d4", diff --git a/worker_plan/worker_plan_internal/plan/expert_cost.py b/worker_plan/worker_plan_internal/plan/expert_cost.py index 65e76baf1..4b47c3c59 100644 --- a/worker_plan/worker_plan_internal/plan/expert_cost.py +++ b/worker_plan/worker_plan_internal/plan/expert_cost.py @@ -247,20 +247,20 @@ def load_text(relative_path: str) -> dict: the_text = f.read() return the_text - plan_txt = load_text('001-plan.txt') + plan_txt = load_text('plan.txt') document_plan = Document(name="vague_plan_description.txt", content=plan_txt) - project_plan_json = load_json('002-project_plan.json') + project_plan_json = load_json('project_plan.json') project_plan = format_json_for_use_in_query(project_plan_json) document_project_plan = Document(name="project_plan.json", content=project_plan) - swot_analysis_md = load_text('004-swot_analysis.md') + swot_analysis_md = load_text('swot_analysis.md') document_swot_analysis = Document(name="swot_analysis.md", content=swot_analysis_md) - expert_list_json = load_json('006-experts.json') + expert_list_json = load_json('experts.json') - path_wbs_table_csv = os.path.join(basepath, '016-wbs_table.csv') - path_wbs_project_json = os.path.join(basepath, '016-wbs_project.json') + path_wbs_table_csv = os.path.join(basepath, 'wbs_table.csv') + path_wbs_project_json = os.path.join(basepath, 'wbs_project.json') wbs_table = WBSTableForCostEstimation.create(path_wbs_table_csv, path_wbs_project_json) wbs_df = wbs_table.wbs_table_df.copy() diff --git a/worker_plan/worker_plan_internal/plan/identify_wbs_task_dependencies.py b/worker_plan/worker_plan_internal/plan/identify_wbs_task_dependencies.py index fdc981948..ddb973759 100644 --- a/worker_plan/worker_plan_internal/plan/identify_wbs_task_dependencies.py +++ b/worker_plan/worker_plan_internal/plan/identify_wbs_task_dependencies.py @@ -147,8 +147,8 @@ def load_json(relative_path: str) -> dict: the_json = json.load(f) return the_json - plan_json = load_json('002-project_plan.json') - wbs_json = load_json('005-wbs_level2.json') + plan_json = load_json('project_plan.json') + wbs_json = load_json('wbs_level2.json') query = IdentifyWBSTaskDependencies.format_query(plan_json, wbs_json) diff --git a/worker_plan/worker_plan_internal/team/run_hire_team.py b/worker_plan/worker_plan_internal/team/run_hire_team.py index be7e5e2c9..678fff0f6 100644 --- a/worker_plan/worker_plan_internal/team/run_hire_team.py +++ b/worker_plan/worker_plan_internal/team/run_hire_team.py @@ -28,18 +28,18 @@ # Create the output folder if it doesn't exist os.makedirs(run_dir, exist_ok=True) -plan_prompt_file = f'{run_dir}/001-plan.txt' +plan_prompt_file = f'{run_dir}/plan.txt' with open(plan_prompt_file, 'w') as f: f.write(plan_prompt) print("Finding team members for this task...") find_team_members = FindTeamMembers.execute(llm, plan_prompt) -team_members_raw_file = f'{run_dir}/002-team_members_raw.json' +team_members_raw_file = f'{run_dir}/team_members_raw.json' with open(team_members_raw_file, 'w') as f: f.write(json.dumps(find_team_members.to_dict(), indent=2)) team_members_list = find_team_members.team_member_list -team_members_list_file = f'{run_dir}/003-team_members_list.json' +team_members_list_file = f'{run_dir}/team_members_list.json' with open(team_members_list_file, 'w') as f: f.write(json.dumps(team_members_list, indent=2)) @@ -51,12 +51,12 @@ enrich_team_members_with_contract_type_query = EnrichTeamMembersWithContractType.format_query(plan_prompt, team_members_list) enrich_team_members_with_contract_type = EnrichTeamMembersWithContractType.execute(llm, enrich_team_members_with_contract_type_query, team_members_list) enrich_team_members_with_contract_type_raw_dict = enrich_team_members_with_contract_type.to_dict() -enrich_team_members_with_contract_type_raw_file = f'{run_dir}/004-enrich_team_members_with_contract_type_raw.json' +enrich_team_members_with_contract_type_raw_file = f'{run_dir}/enrich_team_members_with_contract_type_raw.json' with open(enrich_team_members_with_contract_type_raw_file, 'w') as f: f.write(json.dumps(enrich_team_members_with_contract_type_raw_dict, indent=2)) enrich_team_members_with_contract_type_list = enrich_team_members_with_contract_type.team_member_list -enrich_team_members_with_contract_type_list_file = f'{run_dir}/005-enrich_team_members_with_contract_type_list.json' +enrich_team_members_with_contract_type_list_file = f'{run_dir}/enrich_team_members_with_contract_type_list.json' with open(enrich_team_members_with_contract_type_list_file, 'w') as f: f.write(json.dumps(enrich_team_members_with_contract_type_list, indent=2)) print("Step A: Done enriching team members.") @@ -65,12 +65,12 @@ enrich_team_members_with_background_story_query = EnrichTeamMembersWithBackgroundStory.format_query(plan_prompt, enrich_team_members_with_contract_type_list) enrich_team_members_with_background_story = EnrichTeamMembersWithBackgroundStory.execute(llm, enrich_team_members_with_background_story_query, enrich_team_members_with_contract_type_list) enrich_team_members_with_background_story_raw_dict = enrich_team_members_with_background_story.to_dict() -enrich_team_members_with_background_story_raw_file = f'{run_dir}/006-enriched_team_members_with_background_story_raw.json' +enrich_team_members_with_background_story_raw_file = f'{run_dir}/enriched_team_members_with_background_story_raw.json' with open(enrich_team_members_with_background_story_raw_file, 'w') as f: f.write(json.dumps(enrich_team_members_with_background_story_raw_dict, indent=2)) enrich_team_members_with_background_story_list = enrich_team_members_with_background_story.team_member_list -enrich_team_members_with_background_story_list_file = f'{run_dir}/007-enrich_team_members_with_background_story_list.json' +enrich_team_members_with_background_story_list_file = f'{run_dir}/enrich_team_members_with_background_story_list.json' with open(enrich_team_members_with_background_story_list_file, 'w') as f: f.write(json.dumps(enrich_team_members_with_background_story_list, indent=2)) print("Step B: Done enriching team members.") @@ -79,12 +79,12 @@ enrich_team_members_with_environment_info_query = EnrichTeamMembersWithEnvironmentInfo.format_query(plan_prompt, enrich_team_members_with_background_story_list) enrich_team_members_with_environment_info = EnrichTeamMembersWithEnvironmentInfo.execute(llm, enrich_team_members_with_environment_info_query, enrich_team_members_with_background_story_list) enrich_team_members_with_environment_info_raw_dict = enrich_team_members_with_environment_info.to_dict() -enrich_team_members_with_environment_info_raw_file = f'{run_dir}/008-enrich_team_members_with_environment_info_raw.json' +enrich_team_members_with_environment_info_raw_file = f'{run_dir}/enrich_team_members_with_environment_info_raw.json' with open(enrich_team_members_with_environment_info_raw_file, 'w') as f: f.write(json.dumps(enrich_team_members_with_environment_info_raw_dict, indent=2)) enrich_team_members_with_environment_info_list = enrich_team_members_with_environment_info.team_member_list -enrich_team_members_with_environment_info_list_file = f'{run_dir}/009-enrich_team_members_with_environment_info_list.json' +enrich_team_members_with_environment_info_list_file = f'{run_dir}/enrich_team_members_with_environment_info_list.json' with open(enrich_team_members_with_environment_info_list_file, 'w') as f: f.write(json.dumps(enrich_team_members_with_environment_info_list, indent=2)) print("Step C: Done enriching team members.") @@ -95,7 +95,7 @@ review_team_query = ReviewTeam.format_query(plan_prompt, builder1.to_string()) review_team = ReviewTeam.execute(llm, review_team_query) review_team_raw_dict = review_team.to_dict() -review_team_raw_file = f'{run_dir}/010-review_team_raw.json' +review_team_raw_file = f'{run_dir}/review_team_raw.json' with open(review_team_raw_file, 'w') as f: f.write(json.dumps(review_team_raw_dict, indent=2)) print("Step D: Reviewing team.") @@ -107,6 +107,6 @@ builder2.append_roles(enrich_team_members_with_environment_info_list) builder2.append_separator() builder2.append_full_review(review_team.response) -output_file = f'{run_dir}/011-team.md' +output_file = f'{run_dir}/team.md' builder2.write_to_file(output_file) print("Done creating Markdown document.") diff --git a/worker_plan/worker_plan_internal/wbs/create_wsb_table_csv.py b/worker_plan/worker_plan_internal/wbs/create_wsb_table_csv.py index afdacd016..a78c35b73 100644 --- a/worker_plan/worker_plan_internal/wbs/create_wsb_table_csv.py +++ b/worker_plan/worker_plan_internal/wbs/create_wsb_table_csv.py @@ -47,7 +47,7 @@ def to_csv_string(self): if __name__ == "__main__": # TODO: Eliminate hardcoded paths - path = '/Users/neoneye/Desktop/planexe_data/011-wbs_project.json' + path = '/Users/neoneye/Desktop/planexe_data/wbs_project.json' print(f"loading file: {path}") with open(path, 'r', encoding='utf-8') as f: From 3adc6439291a16a2e9f6c9ab9dbde67b2457a942 Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:34:42 +0200 Subject: [PATCH 3/7] docs: remove filename prefixes from proposal docs Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/proposals/114-mcp-interface-feedback-stress-test.md | 4 ++-- docs/proposals/66-post-plan-enrichment-swarm.md | 2 +- docs/proposals/70-mcp-interface-evaluation-and-roadmap.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/proposals/114-mcp-interface-feedback-stress-test.md b/docs/proposals/114-mcp-interface-feedback-stress-test.md index e056e8318..dbc6e3dc4 100644 --- a/docs/proposals/114-mcp-interface-feedback-stress-test.md +++ b/docs/proposals/114-mcp-interface-feedback-stress-test.md @@ -68,7 +68,7 @@ During the stress test, Plan 1 (20f1cfac) stalled at 5.5% with zero diagnostic i "state": "failed", "error": { "failure_reason": "generation_error", - "failed_step": "016-expert_criticism", + "failed_step": "expert_criticism", "message": "LLM provider returned 503", "recoverable": true } @@ -248,7 +248,7 @@ This is a trust gap: the agent cannot confidently tell the user "your plan is re "sections_complete": 108, "sections_partial": 2, "partial_details": [ - {"step": "016-expert_criticism", "note": "2/8 experts provided feedback"} + {"step": "expert_criticism", "note": "2/8 experts provided feedback"} ] } ``` diff --git a/docs/proposals/66-post-plan-enrichment-swarm.md b/docs/proposals/66-post-plan-enrichment-swarm.md index 5d6a87f6a..c1dde48fb 100644 --- a/docs/proposals/66-post-plan-enrichment-swarm.md +++ b/docs/proposals/66-post-plan-enrichment-swarm.md @@ -67,7 +67,7 @@ An OpenClaw file-watch hook (or a simple `inotifywait` wrapper on the run root) ```bash # Example hook: watch for completion signal -inotifywait -m -r /run --include '999-pipeline_complete\.txt' -e create \ +inotifywait -m -r /run --include 'pipeline_complete\.txt' -e create \ | while read dir event file; do planexe-enrich "$dir" done diff --git a/docs/proposals/70-mcp-interface-evaluation-and-roadmap.md b/docs/proposals/70-mcp-interface-evaluation-and-roadmap.md index a36e9dae6..59d8edab2 100644 --- a/docs/proposals/70-mcp-interface-evaluation-and-roadmap.md +++ b/docs/proposals/70-mcp-interface-evaluation-and-roadmap.md @@ -205,7 +205,7 @@ After `plan_create`, there is no indication of credits consumed or remaining. Fo ### 5.7 Complete files array in `plan_status` for completed plans -**Status:** Partially addressed. The `files` array now returns the **most recent 10** files instead of the first 10, so agents see what was just produced (e.g. `019-swot_analysis.md`) rather than always the same early pipeline files (`001-start_time.json`). `files_count` gives the total. Full manifest support for completed plans remains open. +**Status:** Partially addressed. The `files` array now returns the **most recent 10** files instead of the first 10, so agents see what was just produced (e.g. `swot_analysis.md`) rather than always the same early pipeline files (`start_time.json`). `files_count` gives the total. Full manifest support for completed plans remains open. **Source:** Claude Code agent feedback (2026-03-02). From a4244260c54e55192b07a5034c42f1aedfd582d6 Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:39:41 +0200 Subject: [PATCH 4/7] refactor: rename deduplicate_levers to triage_levers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The step classifies levers as primary/secondary/remove — triage describes this more accurately than deduplication. Renames files, classes (DeduplicateLevers→TriageLevers), FilenameEnum entries, JSON keys, and all references. Co-Authored-By: Claude Opus 4.6 (1M context) --- worker_plan/worker_plan_api/filenames.py | 4 +-- .../lever/enrich_potential_levers.py | 6 ++-- .../lever/identify_potential_levers.py | 8 ++--- ...deduplicate_levers.py => triage_levers.py} | 30 +++++++++---------- .../plan/nodes/constraint_checker_stages.py | 12 ++++---- .../plan/nodes/enrich_levers.py | 8 ++--- .../plan/nodes/full_plan_pipeline.py | 8 ++--- ...deduplicate_levers.py => triage_levers.py} | 12 ++++---- 8 files changed, 44 insertions(+), 44 deletions(-) rename worker_plan/worker_plan_internal/lever/{deduplicate_levers.py => triage_levers.py} (92%) rename worker_plan/worker_plan_internal/plan/nodes/{deduplicate_levers.py => triage_levers.py} (82%) diff --git a/worker_plan/worker_plan_api/filenames.py b/worker_plan/worker_plan_api/filenames.py index 161cb7832..9cda919eb 100644 --- a/worker_plan/worker_plan_api/filenames.py +++ b/worker_plan/worker_plan_api/filenames.py @@ -19,8 +19,8 @@ class FilenameEnum(str, Enum): POTENTIAL_LEVERS_RAW = "potential_levers_raw.json" POTENTIAL_LEVERS_CLEAN = "potential_levers.json" POTENTIAL_LEVERS_CONSTRAINT = "potential_levers_constraint.json" - DEDUPLICATED_LEVERS_RAW = "deduplicated_levers_raw.json" - DEDUPLICATED_LEVERS_CONSTRAINT = "deduplicated_levers_constraint.json" + TRIAGED_LEVERS_RAW = "triaged_levers_raw.json" + TRIAGED_LEVERS_CONSTRAINT = "triaged_levers_constraint.json" ENRICHED_LEVERS_RAW = "enriched_levers_raw.json" ENRICHED_LEVERS_CONSTRAINT = "enriched_levers_constraint.json" VITAL_FEW_LEVERS_RAW = "vital_few_levers_raw.json" diff --git a/worker_plan/worker_plan_internal/lever/enrich_potential_levers.py b/worker_plan/worker_plan_internal/lever/enrich_potential_levers.py index aa584d957..a3c1c656a 100644 --- a/worker_plan/worker_plan_internal/lever/enrich_potential_levers.py +++ b/worker_plan/worker_plan_internal/lever/enrich_potential_levers.py @@ -36,7 +36,7 @@ pipeline inside run_plan_pipeline.py: 1. IdentifyPotentialLevers — brainstorms 15-20 raw levers - 2. DeduplicateLevers — removes near-duplicate levers + 2. TriageLevers — removes near-duplicate levers 3. EnrichLevers ← you are here 4. FocusOnVitalFewLevers — filters down to 4-6 high-impact levers 5. ScenarioGeneration — builds 3 scenarios (aggressive, medium, safe) @@ -360,8 +360,8 @@ def save_raw(self, file_path: str) -> None: raise ValueError("Prompt item not found.") project_plan = prompt_item.prompt - # This file is created by deduplicate_levers.py - input_file = os.path.join(os.path.dirname(__file__), 'test_data', f'deduplicate_levers_{prompt_id}.json') + # This file is created by triage_levers.py + input_file = os.path.join(os.path.dirname(__file__), 'test_data', f'triage_levers_{prompt_id}.json') output_file = f"enrich_potential_levers_{prompt_id}.json" if not os.path.exists(input_file): diff --git a/worker_plan/worker_plan_internal/lever/identify_potential_levers.py b/worker_plan/worker_plan_internal/lever/identify_potential_levers.py index 1cda80e6b..5ce347d7e 100644 --- a/worker_plan/worker_plan_internal/lever/identify_potential_levers.py +++ b/worker_plan/worker_plan_internal/lever/identify_potential_levers.py @@ -6,7 +6,7 @@ It’s more important that the quality of the text content of the levers are getting improved on. The output contains near duplicates, these have to be deduplicated. A few lever names appear twice. -The deduplication is done in the deduplicate_levers.py script. +The triage is done in the triage_levers.py script. PROMPT> python -m worker_plan_internal.lever.identify_potential_levers """ @@ -34,7 +34,7 @@ exploration pipeline inside run_plan_pipeline.py: 1. IdentifyPotentialLevers ← you are here - 2. DeduplicateLevers — removes near-duplicate levers + 2. TriageLevers — removes near-duplicate levers 3. EnrichLevers — adds description, synergy, and conflict text 4. FocusOnVitalFewLevers — filters down to 4-6 high-impact levers 5. ScenarioGeneration — builds 3 scenarios (aggressive, medium, safe) @@ -214,7 +214,7 @@ class DocumentDetails(BaseModel): description="A concise strategic analysis (around 100 words) of the project's core tensions and trade-offs. This rationale must JUSTIFY why the selected levers are the most critical levers for decision-making. For example, explain how the chosen levers navigate the fundamental conflicts between speed, cost, scope, and quality." ) # No max_length constraint: if a model returns more than 7 levers, the downstream - # DeduplicateLeversTask handles extras. A hard cap would discard the entire response + # TriageLeversTask handles extras. A hard cap would discard the entire response # and waste tokens retrying. levers: list[Lever] = Field( min_length=5, @@ -384,7 +384,7 @@ def execute(cls, llm_executor: LLMExecutor, user_prompt: str, constraints_markdo ) # Adaptive loop: keep calling until we have enough levers. - # Over-generation is fine — DeduplicateLeversTask handles extras. + # Over-generation is fine — TriageLeversTask handles extras. min_levers = 15 max_calls = 5 responses: list[DocumentDetails] = [] diff --git a/worker_plan/worker_plan_internal/lever/deduplicate_levers.py b/worker_plan/worker_plan_internal/lever/triage_levers.py similarity index 92% rename from worker_plan/worker_plan_internal/lever/deduplicate_levers.py rename to worker_plan/worker_plan_internal/lever/triage_levers.py index 6b3115413..2ada463fe 100644 --- a/worker_plan/worker_plan_internal/lever/deduplicate_levers.py +++ b/worker_plan/worker_plan_internal/lever/triage_levers.py @@ -7,7 +7,7 @@ secondary — useful but supporting, kept remove — redundant or overlapping, discarded -PROMPT> python -m worker_plan_internal.lever.deduplicate_levers +PROMPT> python -m worker_plan_internal.lever.triage_levers """ import json @@ -31,11 +31,11 @@ Pipeline context ---------------- -This step (DeduplicateLevers) is part of a 6-step solution-space +This step (TriageLevers) is part of a 6-step solution-space exploration pipeline inside run_plan_pipeline.py: 1. IdentifyPotentialLevers — brainstorms 15-20 raw levers - 2. DeduplicateLevers ← you are here + 2. TriageLevers ← you are here 3. EnrichLevers — adds description, synergy, and conflict text 4. FocusOnVitalFewLevers — filters down to 4-6 high-impact levers 5. ScenarioGeneration — builds 3 scenarios (aggressive, medium, safe) @@ -145,16 +145,16 @@ class OutputLever(InputLever): @dataclass -class DeduplicateLevers: +class TriageLevers: """Holds the results of the deduplication.""" user_prompt: str system_prompt: str response: List[LeverDecision] - deduplicated_levers: List[OutputLever] + triaged_levers: List[OutputLever] metadata: List[Dict[str, Any]] @classmethod - def execute(cls, llm_executor: LLMExecutor, project_context: str, raw_levers_list: List[dict]) -> 'DeduplicateLevers': + def execute(cls, llm_executor: LLMExecutor, project_context: str, raw_levers_list: List[dict]) -> 'TriageLevers': """ Executes the deduplication process using a single batch LLM call. @@ -272,16 +272,16 @@ def execute_function(llm: LLM) -> dict: user_prompt=project_context, system_prompt=system_prompt, response=decisions, - deduplicated_levers=output_levers, + triaged_levers=output_levers, metadata=metadata_list, ) - def to_dict(self, include_response=True, include_deduplicated_levers=True, include_metadata=True, include_system_prompt=True, include_user_prompt=True) -> dict: + def to_dict(self, include_response=True, include_triaged_levers=True, include_metadata=True, include_system_prompt=True, include_user_prompt=True) -> dict: d = {} if include_response: d["response"] = [item.model_dump() for item in self.response] - if include_deduplicated_levers: - d['deduplicated_levers'] = [lever.model_dump() for lever in self.deduplicated_levers] + if include_triaged_levers: + d['triaged_levers'] = [lever.model_dump() for lever in self.triaged_levers] if include_metadata: d['metadata'] = self.metadata if include_system_prompt: @@ -295,7 +295,7 @@ def save_raw(self, file_path: str) -> None: def save_clean(self, file_path: Path) -> None: """Saves the final, deduplicated list of levers to a JSON file.""" - output_data = [lever.model_dump() for lever in self.deduplicated_levers] + output_data = [lever.model_dump() for lever in self.triaged_levers] try: with open(file_path, 'w', encoding='utf-8') as f: json.dump(output_data, f, indent=2) @@ -324,23 +324,23 @@ def save_clean(self, file_path: Path) -> None: with open(input_file, 'r', encoding='utf-8') as f: raw_levers_data = json.load(f) - output_file = f"deduplicate_levers_{prompt_id}.json" + output_file = f"triage_levers_{prompt_id}.json" model_names = ["ollama-llama3.1"] llm_models = LLMModelFromName.from_names(model_names) llm_executor = LLMExecutor(llm_models=llm_models) # --- Run Deduplication --- - result = DeduplicateLevers.execute( + result = TriageLevers.execute( llm_executor=llm_executor, project_context=project_context, raw_levers_list=raw_levers_data ) - d = result.to_dict(include_response=True, include_deduplicated_levers=True, include_metadata=True, include_system_prompt=False, include_user_prompt=False) + d = result.to_dict(include_response=True, include_triaged_levers=True, include_metadata=True, include_system_prompt=False, include_user_prompt=False) d_json = json.dumps(d, indent=2) logger.info(f"Deduplication result: {d_json}") - logger.info(f"Lever count after deduplication: {len(result.deduplicated_levers)}.") + logger.info(f"Lever count after triage: {len(result.triaged_levers)}.") # --- Save Output --- result.save_clean(output_file) diff --git a/worker_plan/worker_plan_internal/plan/nodes/constraint_checker_stages.py b/worker_plan/worker_plan_internal/plan/nodes/constraint_checker_stages.py index 8a72ad2c3..ff02477a5 100644 --- a/worker_plan/worker_plan_internal/plan/nodes/constraint_checker_stages.py +++ b/worker_plan/worker_plan_internal/plan/nodes/constraint_checker_stages.py @@ -10,7 +10,7 @@ from worker_plan_api.filenames import FilenameEnum from worker_plan_internal.plan.nodes.extract_constraints import ExtractConstraintsTask from worker_plan_internal.plan.nodes.potential_levers import PotentialLeversTask -from worker_plan_internal.plan.nodes.deduplicate_levers import DeduplicateLeversTask +from worker_plan_internal.plan.nodes.triage_levers import TriageLeversTask from worker_plan_internal.plan.nodes.enrich_levers import EnrichLeversTask from worker_plan_internal.plan.nodes.focus_on_vital_few_levers import FocusOnVitalFewLeversTask from worker_plan_internal.plan.nodes.candidate_scenarios import CandidateScenariosTask @@ -48,22 +48,22 @@ def run_with_llm(self, llm: LLM) -> None: result.save_raw(self.output().path) -class DeduplicatedLeversConstraintTask(PlanTask): +class TriagedLeversConstraintTask(PlanTask): """Guardrail: verify triaged levers still respect the user's constraints.""" def requires(self): return { 'extract_constraints': self.clone(ExtractConstraintsTask), - 'deduplicate_levers': self.clone(DeduplicateLeversTask), + 'triage_levers': self.clone(TriageLeversTask), } def output(self): - return self.local_target(FilenameEnum.DEDUPLICATED_LEVERS_CONSTRAINT) + return self.local_target(FilenameEnum.TRIAGED_LEVERS_CONSTRAINT) def run_with_llm(self, llm: LLM) -> None: constraints_json = _read_constraints_json(self) - with self.input()['deduplicate_levers']['raw'].open("r") as f: + with self.input()['triage_levers']['raw'].open("r") as f: stage_output_json = f.read() - result = ConstraintChecker.execute(llm, constraints_json, stage_output_json, "deduplicated_levers") + result = ConstraintChecker.execute(llm, constraints_json, stage_output_json, "triaged_levers") result.save_raw(self.output().path) diff --git a/worker_plan/worker_plan_internal/plan/nodes/enrich_levers.py b/worker_plan/worker_plan_internal/plan/nodes/enrich_levers.py index 3469e2bac..071d35ed6 100644 --- a/worker_plan/worker_plan_internal/plan/nodes/enrich_levers.py +++ b/worker_plan/worker_plan_internal/plan/nodes/enrich_levers.py @@ -7,7 +7,7 @@ from worker_plan_internal.plan.nodes.setup import SetupTask from worker_plan_internal.plan.nodes.identify_purpose import IdentifyPurposeTask from worker_plan_internal.plan.nodes.plan_type import PlanTypeTask -from worker_plan_internal.plan.nodes.deduplicate_levers import DeduplicateLeversTask +from worker_plan_internal.plan.nodes.triage_levers import TriageLeversTask class EnrichLeversTask(PlanTask): @@ -17,7 +17,7 @@ def requires(self): 'setup': self.clone(SetupTask), 'identify_purpose': self.clone(IdentifyPurposeTask), 'plan_type': self.clone(PlanTypeTask), - 'deduplicate_levers': self.clone(DeduplicateLeversTask), + 'triage_levers': self.clone(TriageLeversTask), } def output(self): @@ -35,9 +35,9 @@ def run_inner(self): identify_purpose_markdown = f.read() with self.input()['plan_type']['markdown'].open("r") as f: plan_type_markdown = f.read() - with self.input()['deduplicate_levers']['raw'].open("r") as f: + with self.input()['triage_levers']['raw'].open("r") as f: json_dict = json.load(f) - lever_item_list = json_dict["deduplicated_levers"] + lever_item_list = json_dict["triaged_levers"] query = ( f"File 'plan.txt':\n{plan_prompt}\n\n" diff --git a/worker_plan/worker_plan_internal/plan/nodes/full_plan_pipeline.py b/worker_plan/worker_plan_internal/plan/nodes/full_plan_pipeline.py index 028351251..ed51e3bd0 100644 --- a/worker_plan/worker_plan_internal/plan/nodes/full_plan_pipeline.py +++ b/worker_plan/worker_plan_internal/plan/nodes/full_plan_pipeline.py @@ -14,7 +14,7 @@ # Phase 3 from worker_plan_internal.plan.nodes.potential_levers import PotentialLeversTask -from worker_plan_internal.plan.nodes.deduplicate_levers import DeduplicateLeversTask +from worker_plan_internal.plan.nodes.triage_levers import TriageLeversTask from worker_plan_internal.plan.nodes.enrich_levers import EnrichLeversTask from worker_plan_internal.plan.nodes.focus_on_vital_few_levers import FocusOnVitalFewLeversTask from worker_plan_internal.plan.nodes.strategic_decisions_markdown import StrategicDecisionsMarkdownTask @@ -23,7 +23,7 @@ from worker_plan_internal.plan.nodes.scenarios_markdown import ScenariosMarkdownTask from worker_plan_internal.plan.nodes.constraint_checker_stages import ( PotentialLeversConstraintTask, - DeduplicatedLeversConstraintTask, + TriagedLeversConstraintTask, EnrichedLeversConstraintTask, VitalFewLeversConstraintTask, CandidateScenariosConstraintTask, @@ -103,7 +103,7 @@ def requires(self): 'identify_purpose': self.clone(IdentifyPurposeTask), 'plan_type': self.clone(PlanTypeTask), 'potential_levers': self.clone(PotentialLeversTask), - 'deduplicate_levers': self.clone(DeduplicateLeversTask), + 'triage_levers': self.clone(TriageLeversTask), 'enriched_levers': self.clone(EnrichLeversTask), 'focus_on_vital_few_levers': self.clone(FocusOnVitalFewLeversTask), 'strategic_decisions_markdown': self.clone(StrategicDecisionsMarkdownTask), @@ -111,7 +111,7 @@ def requires(self): 'select_scenario': self.clone(SelectScenarioTask), 'scenarios_markdown': self.clone(ScenariosMarkdownTask), 'potential_levers_constraint': self.clone(PotentialLeversConstraintTask), - 'deduplicated_levers_constraint': self.clone(DeduplicatedLeversConstraintTask), + 'triaged_levers_constraint': self.clone(TriagedLeversConstraintTask), 'enriched_levers_constraint': self.clone(EnrichedLeversConstraintTask), 'vital_few_levers_constraint': self.clone(VitalFewLeversConstraintTask), 'candidate_scenarios_constraint': self.clone(CandidateScenariosConstraintTask), diff --git a/worker_plan/worker_plan_internal/plan/nodes/deduplicate_levers.py b/worker_plan/worker_plan_internal/plan/nodes/triage_levers.py similarity index 82% rename from worker_plan/worker_plan_internal/plan/nodes/deduplicate_levers.py rename to worker_plan/worker_plan_internal/plan/nodes/triage_levers.py index db4974cac..dfcbe2232 100644 --- a/worker_plan/worker_plan_internal/plan/nodes/deduplicate_levers.py +++ b/worker_plan/worker_plan_internal/plan/nodes/triage_levers.py @@ -1,7 +1,7 @@ -"""DeduplicateLeversTask - The potential levers usually have some redundant levers.""" +"""TriageLeversTask - The potential levers usually have some redundant levers.""" import json from worker_plan_internal.plan.run_plan_pipeline import PlanTask -from worker_plan_internal.lever.deduplicate_levers import DeduplicateLevers +from worker_plan_internal.lever.triage_levers import TriageLevers from worker_plan_api.filenames import FilenameEnum from worker_plan_internal.llm_util.llm_executor import LLMExecutor from worker_plan_internal.plan.nodes.setup import SetupTask @@ -10,7 +10,7 @@ from worker_plan_internal.plan.nodes.potential_levers import PotentialLeversTask -class DeduplicateLeversTask(PlanTask): +class TriageLeversTask(PlanTask): """Triage levers into primary, secondary, or remove.""" def requires(self): return { @@ -22,7 +22,7 @@ def requires(self): def output(self): return { - 'raw': self.local_target(FilenameEnum.DEDUPLICATED_LEVERS_RAW) + 'raw': self.local_target(FilenameEnum.TRIAGED_LEVERS_RAW) } def run_inner(self): @@ -44,7 +44,7 @@ def run_inner(self): f"File 'plan_type.md':\n{plan_type_markdown}" ) - deduplicate_levers = DeduplicateLevers.execute( + triage_levers = TriageLevers.execute( llm_executor, project_context=query, raw_levers_list=lever_item_list @@ -52,4 +52,4 @@ def run_inner(self): # Write the result to disk. output_raw_path = self.output()['raw'].path - deduplicate_levers.save_raw(str(output_raw_path)) + triage_levers.save_raw(str(output_raw_path)) From 1a29ec59c32caae2acd86f120021b19d59ee1dcd Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:41:35 +0200 Subject: [PATCH 5/7] chore: bump PIPELINE_VERSION to 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filenames and task names changed — old snapshots are incompatible. Co-Authored-By: Claude Opus 4.6 (1M context) --- worker_plan/worker_plan_api/pipeline_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/worker_plan/worker_plan_api/pipeline_version.py b/worker_plan/worker_plan_api/pipeline_version.py index 70f34a769..ce9e0635c 100644 --- a/worker_plan/worker_plan_api/pipeline_version.py +++ b/worker_plan/worker_plan_api/pipeline_version.py @@ -13,4 +13,4 @@ differs from the current value with error code PIPELINE_VERSION_MISMATCH. """ -PIPELINE_VERSION: int = 1 +PIPELINE_VERSION: int = 2 From dcc5958adea7772d22206fa6e3397360283c53f5 Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:42:42 +0200 Subject: [PATCH 6/7] refactor: rename expected_filenames1 to expected_filenames Co-Authored-By: Claude Opus 4.6 (1M context) --- worker_plan/worker_plan_api/filenames.py | 2 +- .../worker_plan_internal/plan/run_plan_pipeline.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/worker_plan/worker_plan_api/filenames.py b/worker_plan/worker_plan_api/filenames.py index 9cda919eb..fab30488d 100644 --- a/worker_plan/worker_plan_api/filenames.py +++ b/worker_plan/worker_plan_api/filenames.py @@ -133,7 +133,7 @@ class FilenameEnum(str, Enum): class ExtraFilenameEnum(str, Enum): LOG_TXT = "log.txt" - EXPECTED_FILENAMES1_JSON = "expected_filenames1.json" + EXPECTED_FILENAMES_JSON = "expected_filenames.json" PIPELINE_STOP_REQUESTED_FLAG = "pipeline_stop_requested.txt" TRACK_ACTIVITY_JSONL = "track_activity.jsonl" ACTIVITY_OVERVIEW_JSON = "activity_overview.json" diff --git a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py index 2218d3349..009997530 100644 --- a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py +++ b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py @@ -301,7 +301,7 @@ def get_progress_percentage(self) -> PipelineProgress: logger.warning(f"Could not list files in {run_id_dir}: {e}") ignore_files = [ - ExtraFilenameEnum.EXPECTED_FILENAMES1_JSON.value, + ExtraFilenameEnum.EXPECTED_FILENAMES_JSON.value, ExtraFilenameEnum.LOG_TXT.value, ExtraFilenameEnum.PIPELINE_STOP_REQUESTED_FLAG.value, ExtraFilenameEnum.USAGE_METRICS_JSONL.value, @@ -311,7 +311,7 @@ def get_progress_percentage(self) -> PipelineProgress: # logger.debug(f"Files in run_id_dir for {job.run_id}: {files}") # Debug, can be noisy # logger.debug(f"Number of files in run_id_dir for {job.run_id}: {len(files)}") # Debug - # Determine the progress, by comparing the generated files with the expected_filenames1.json + # Determine the progress, by comparing the generated files with the expected_filenames.json set_files = set(files) set_expected_files = set(self.all_expected_filenames) intersection_files = set_files & set_expected_files @@ -392,8 +392,8 @@ def run(self): set_usage_metrics_path(usage_metrics_path) logger.info(f"Usage metrics will be written to {usage_metrics_path}") - # create a json file with the expected filenames. Save it to the run/run_id/expected_filenames1.json - expected_filenames_path = self.run_id_dir / ExtraFilenameEnum.EXPECTED_FILENAMES1_JSON.value + # create a json file with the expected filenames. Save it to the run/run_id/expected_filenames.json + expected_filenames_path = self.run_id_dir / ExtraFilenameEnum.EXPECTED_FILENAMES_JSON.value with open(expected_filenames_path, "w") as f: json.dump(self.all_expected_filenames, f, indent=2) logger.info(f"Saved {len(self.all_expected_filenames)} expected filenames to {expected_filenames_path}") From ef81be1bca0ca280c45608890e62398d2a13f4d8 Mon Sep 17 00:00:00 2001 From: Simon Strandgaard Date: Wed, 8 Apr 2026 16:45:28 +0200 Subject: [PATCH 7/7] fix: add _raw suffix to redline_gate and premise_attack filenames Consistent with the naming convention used by all other raw JSON outputs. Co-Authored-By: Claude Opus 4.6 (1M context) --- worker_plan/worker_plan_api/filenames.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/worker_plan/worker_plan_api/filenames.py b/worker_plan/worker_plan_api/filenames.py index fab30488d..fc7ba6240 100644 --- a/worker_plan/worker_plan_api/filenames.py +++ b/worker_plan/worker_plan_api/filenames.py @@ -8,9 +8,9 @@ class FilenameEnum(str, Enum): SCREEN_PLANNING_PROMPT_MARKDOWN = "screen_planning_prompt.md" EXTRACT_CONSTRAINTS_RAW = "extract_constraints_raw.json" EXTRACT_CONSTRAINTS_MARKDOWN = "extract_constraints.md" - REDLINE_GATE_RAW = "redline_gate.json" + REDLINE_GATE_RAW = "redline_gate_raw.json" REDLINE_GATE_MARKDOWN = "redline_gate.md" - PREMISE_ATTACK_RAW = "premise_attack.json" + PREMISE_ATTACK_RAW = "premise_attack_raw.json" PREMISE_ATTACK_MARKDOWN = "premise_attack.md" IDENTIFY_PURPOSE_RAW = "identify_purpose_raw.json" IDENTIFY_PURPOSE_MARKDOWN = "identify_purpose.md"