AVADSA25 · AVADSA25 · May 22, 2026 · May 22, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -386,7 +386,7 @@ Rule-based compressor for memory writes that need shrinking. Entity abbreviation
 ### Injection points
 | File:line | What gets injected |
 |---|---|
-| `codec.py:358-362` | Voice-mode prompt suffix (boot ctx + facts + memory) |
+| `codec._build_voice_system_prompt(task)` | Voice-mode prompt suffix (boot ctx + facts + memory) — extracted from `_dispatch_inner` in PR-3D-b/A-5 |
 | `codec_dashboard.py:1827-1862` | Chat handler before LLM call |
 | `codec_dashboard.py:1851-1886` | Same handler, separate channel |
 | `codec_voice.py:288-320` | VAD speech-start preload |

diff --git a/codec.py b/codec.py
@@ -263,6 +263,81 @@ def dispatch(task):
         _dispatch_cooldown = time.time() + 1.5
         _dispatch_lock.release()
 
+def _build_voice_system_prompt(task):
+    """A-5 (PR-3D-b): assemble the voice system prompt — CODEC_VOICE_PROMPT +
+    boot identity + active temporal facts + recent memory + targeted/recent
+    conversation context. Reads the memory stores (each guarded); returns the
+    assembled system-prompt string. Extracted verbatim from _dispatch_inner."""
+    mem = get_memory(5)
+    mem_ctx = ""
+    boot_ctx = ""
+    facts_ctx = ""
+    try:
+        from codec_memory import CodecMemory
+        cm = CodecMemory()
+        targeted = cm.get_context(task, n=5)
+        if targeted:
+            mem_ctx += f"\n\n[MEMORY — RELEVANT PAST CONVERSATIONS]\n{targeted}\n[END MEMORY]"
+        recent = cm.search_recent(days=3, limit=5)
+        if recent:
+            lines = ["[RECENT MEMORY — LAST 3 DAYS]"]
+            for r in recent:
+                ts = r["timestamp"][:16].replace("T", " ")
+                snippet = r["content"][:200].replace("\n", " ")
+                lines.append(f"  [{ts}] {r['role'].upper()}: {snippet}")
+            lines.append("[END RECENT MEMORY]")
+            mem_ctx += "\n\n" + "\n".join(lines)
+    except Exception as e:
+        log.warning("Memory context retrieval failed: %s", e)
+
+    # ── Memory upgrade: L0/L1 identity + active temporal facts ──────────
+    try:
+        from codec_memory_upgrade import load_identity, query_valid_facts, compress_rule_based
+        identity = load_identity()
+        if identity:
+            boot_ctx = f"\n\n[IDENTITY — BOOT PAYLOAD]\n{identity}\n[END IDENTITY]"
+        facts = query_valid_facts(limit=20)
+        if facts:
+            lines = ["[ACTIVE FACTS]"]
+            for f in facts:
+                lines.append(f"  {f['key']} = {f['value']}")
+            lines.append("[END FACTS]")
+            facts_ctx = "\n\n" + "\n".join(lines)
+        # Compress the recalled memory block to save tokens (identity+facts stay verbatim)
+        if mem_ctx:
+            mem_ctx = compress_rule_based(mem_ctx)
+    except Exception as e:
+        log.warning("Memory upgrade injection failed: %s", e)
+
+    # 2026-04-29 prompt rewrite: CODEC_VOICE_PROMPT now contains a {date}
+    # placeholder. Format it before use so the LLM doesn't see literal '{date}'.
+    sys_p = CODEC_VOICE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
+    if boot_ctx: sys_p += boot_ctx
+    if facts_ctx: sys_p += facts_ctx
+    if mem: sys_p += "\n\n" + mem
+    if mem_ctx: sys_p += mem_ctx
+    return sys_p
+
+
+def _persist_voice_turn(task, answer, rid):
+    """A-5 (PR-3D-b): persist a completed voice turn — append the assistant
+    message to the in-memory session, bump turn_count, write the response to the
+    session DB (WAL helper), and save the exchange to shared CodecMemory.
+    Extracted verbatim from _dispatch_inner's quick-reply block."""
+    voice_session["messages"].append({"role": "assistant", "content": answer})
+    voice_session["turn_count"] += 1
+    # Save response to DB (A-20: codec_core helper, WAL + busy_timeout).
+    update_session_response(rid, answer[:500])
+    # Save to shared memory (same store as Chat)
+    try:
+        from codec_memory import CodecMemory
+        cm = CodecMemory()
+        cm.save("voice", "user", task)
+        cm.save("voice", "assistant", answer)
+    except Exception as e:
+        log.warning(f"[CODEC] Memory save failed after LLM: {e}")
+
+
 def _dispatch_inner(task):
     app = focused_app()
     log_event("wake_dispatch", "open-codec",
@@ -329,55 +404,8 @@ def _post_skill_screenshot():
 
     rid = save_task(task, app)
 
-    # ── Build system prompt with memory ─────────────────────────────────
-    mem = get_memory(5)
-    mem_ctx = ""
-    boot_ctx = ""
-    facts_ctx = ""
-    try:
-        from codec_memory import CodecMemory
-        cm = CodecMemory()
-        targeted = cm.get_context(task, n=5)
-        if targeted:
-            mem_ctx += f"\n\n[MEMORY — RELEVANT PAST CONVERSATIONS]\n{targeted}\n[END MEMORY]"
-        recent = cm.search_recent(days=3, limit=5)
-        if recent:
-            lines = ["[RECENT MEMORY — LAST 3 DAYS]"]
-            for r in recent:
-                ts = r["timestamp"][:16].replace("T", " ")
-                snippet = r["content"][:200].replace("\n", " ")
-                lines.append(f"  [{ts}] {r['role'].upper()}: {snippet}")
-            lines.append("[END RECENT MEMORY]")
-            mem_ctx += "\n\n" + "\n".join(lines)
-    except Exception as e:
-        log.warning("Memory context retrieval failed: %s", e)
-
-    # ── Memory upgrade: L0/L1 identity + active temporal facts ──────────
-    try:
-        from codec_memory_upgrade import load_identity, query_valid_facts, compress_rule_based
-        identity = load_identity()
-        if identity:
-            boot_ctx = f"\n\n[IDENTITY — BOOT PAYLOAD]\n{identity}\n[END IDENTITY]"
-        facts = query_valid_facts(limit=20)
-        if facts:
-            lines = ["[ACTIVE FACTS]"]
-            for f in facts:
-                lines.append(f"  {f['key']} = {f['value']}")
-            lines.append("[END FACTS]")
-            facts_ctx = "\n\n" + "\n".join(lines)
-        # Compress the recalled memory block to save tokens (identity+facts stay verbatim)
-        if mem_ctx:
-            mem_ctx = compress_rule_based(mem_ctx)
-    except Exception as e:
-        log.warning("Memory upgrade injection failed: %s", e)
-
-    # 2026-04-29 prompt rewrite: CODEC_VOICE_PROMPT now contains a {date}
-    # placeholder. Format it before use so the LLM doesn't see literal '{date}'.
-    sys_p = CODEC_VOICE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
-    if boot_ctx: sys_p += boot_ctx
-    if facts_ctx: sys_p += facts_ctx
-    if mem: sys_p += "\n\n" + mem
-    if mem_ctx: sys_p += mem_ctx
+    # ── Build system prompt with memory (A-5: extracted helper) ─────────
+    sys_p = _build_voice_system_prompt(task)
     safe_sys = sys_p.replace('\n', ' ')
 
     # ── Open terminal session (the real CODEC session window) ───────────
@@ -424,18 +452,8 @@ def _post_skill_screenshot():
             log_event("tts_speak", "open-codec",
                       f"TTS: {answer[:60]}",
                       extra={"text_len": len(answer)})
-            # Add assistant response to session history
-            voice_session["messages"].append({"role": "assistant", "content": answer})
-            voice_session["turn_count"] += 1
-            # Save response to DB (A-20: codec_core helper, WAL + busy_timeout).
-            update_session_response(rid, answer[:500])
-            # Save to shared memory (same store as Chat)
-            try:
-                cm = CodecMemory()
-                cm.save("voice", "user", task)
-                cm.save("voice", "assistant", answer)
-            except Exception as e:
-                log.warning(f"[CODEC] Memory save failed after LLM: {e}")
+            # Persist the turn (A-5: extracted to _persist_voice_turn)
+            _persist_voice_turn(task, answer, rid)
             _last_tts_text = answer[:200]
             speak_text(answer)
             _safe_ans = answer[:80].replace('\\', '\\\\').replace('"', '\\"')

diff --git a/docs/PR3D-MONOLITH-EXTRACT-DESIGN.md b/docs/PR3D-MONOLITH-EXTRACT-DESIGN.md
@@ -1,6 +1,9 @@
 # PR-3D — extract helpers from the 3 monolith functions (A-5/6/7) (DESIGN)
 
-**Status:** IN PROGRESS — split into 3 sub-PRs. **3D-a (A-7 `Agent.run`) IMPLEMENTED** (extracted `_parse_action`, `_validate_tool_call`, `_execute_tool_with_hooks`; `run()` 230 → 177 LOC; 13 new unit tests + 112 agent regression tests green; zero new suite failures; zero net-new ruff). **A-5 + A-6 pending** as 3D-b / 3D-c.
+**Status:** IN PROGRESS — split into 3 sub-PRs.
+- **3D-a (A-7 `Agent.run`) IMPLEMENTED** — extracted `_parse_action`, `_validate_tool_call`, `_execute_tool_with_hooks`; `run()` 230 → 177 LOC; 13 unit + 112 regression tests green.
+- **3D-b (A-5 `_dispatch_inner`) IMPLEMENTED** — extracted `_build_voice_system_prompt(task)` + `_persist_voice_turn(task, answer, rid)`; `_dispatch_inner` 188 → 131 LOC; 7 unit tests; zero new suite failures; zero net-new ruff. (Faithfulness note: `_persist_voice_turn` does its own `from codec_memory import CodecMemory` — the original relied on the build block's local import being in `_dispatch_inner`'s scope, which the extraction removed.)
+- **3D-c (A-6 `chat_completion` / `SkillTagBuffer`) pending.**
 **Findings:** A-5 (`_dispatch_inner`, 188 LOC), A-6 (`chat_completion`, 466 LOC), A-7 (`Agent.run`, 230 LOC) — all MEDIUM.
 **Wave:** 3 (complexity reduction). These are the **three hottest functions in the repo** (voice dispatch · chat handler · agent loop), so: split into one-PR-per-function + behavior-preserving extractions with tests. **No big-bang.**
 

diff --git a/docs/audits/PHASE-1-CODE-QUALITY.md b/docs/audits/PHASE-1-CODE-QUALITY.md
@@ -65,6 +65,8 @@ Both scan `SKILLS_DIR` independently, so a skill file is loaded twice in differe
 **Effort:** medium (touches 3 files + needs a careful test pass on the voice-skill code path)
 
 ### A-5 — `_dispatch_inner` is a 200-LOC monolithic function with mixed concerns [MEDIUM]
+
+> **Closed by PR-3D-b.** Behavior-preserving extraction of the two recommended pure-ish helpers: `_build_voice_system_prompt(task) -> str` (the memory/identity/facts injection + prompt assembly) and `_persist_voice_turn(task, answer, rid)` (session-message append + `update_session_response` + `CodecMemory` save). `_dispatch_inner` is now a flow of named calls (188 → 131 LOC); the LLM call was already `codec_llm.call` (A-12 tranche 1). Faithfulness detail: `_persist_voice_turn` carries its own `from codec_memory import CodecMemory` — the original persist block relied on the build block's local import being in scope, which extraction removed. Pinned by `tests/test_dispatch_inner_helpers.py` (7). See `docs/PR3D-MONOLITH-EXTRACT-DESIGN.md`. **A-6 (`chat_completion`/`SkillTagBuffer`) follows as PR-3D-c.** (The 3 duplicate memory-injection sites the impact note flags — voice / `_enrich_messages` / `codec_voice.generate_response` — are a separate consolidation, not in this extraction's scope.)
 **Location:** `codec.py:545-744`
 **Description:** Single function does: skill dispatch loop, draft detection + queueing, memory injection (5 distinct injection points), system-prompt build, voice-session state mutation, LLM HTTP call (with inline `import requests as _llm_req`), inline SQLite UPDATE, two follow-up calls into `CodecMemory`, TTS dispatch, AppleScript notification dispatch, error handling for all of the above. 200 lines, ≥4 levels of nesting in places, single try/except wrapping the LLM call (line 689-743). Pre-audit P-5 understated this as "~50 LOC" — the actual count is ~200.
 **Impact:** Hard to test, hard to modify safely, hard to read. The 5 different memory injection mechanisms (lines 615-649) duplicate logic that already lives in `_enrich_messages` (`codec_dashboard.py:1930-2093`) and `codec_voice.generate_response` (`codec_voice.py:716-754`). Three independent implementations of "build a system prompt with memory context" — bugs fixed in one don't reach the others.

diff --git a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
@@ -243,7 +243,7 @@ Mirror the Intake Phase 3 wave pattern. 7 waves planned; sizes are PR-counts, NO
 - A-4: skill-loader unification ✅ (branch `fix/pr3-a4-skill-loader-unification`, design-first per §11 → `docs/A4-SKILL-LOADER-UNIFICATION-DESIGN.md`). Deleted legacy `codec_core.{loaded_skills,load_skills,run_skill}`; codec.py + cortex_skills now use canonical `codec_dispatch` registry. Closed a real **security gap** (legacy path skipped the PR-1A AST gate) + a **hooks bypass** (voice path now fires run_with_hooks). Option A: `custom_triggers.json` now honored everywhere via SkillRegistry. 10 tests; full suite 1376 passing.
 - PR-3D: A-5 + A-6 + A-7 — extract helpers from the 3 monolithic functions (`_dispatch_inner`, `chat_completion`, `Agent.run`). Split into 3 sub-PRs (one per function — too risky to do all three on these hot paths in one diff). Design → `docs/PR3D-MONOLITH-EXTRACT-DESIGN.md`.
   - PR-3D-a: A-7 `Agent.run` ✅ (branch `fix/pr3d-extract-monolith-helpers`). Behavior-preserving extraction of `_parse_action` (pure protocol parse), `_validate_tool_call` (pure — 4 guards → 1 rejection msg), `_execute_tool_with_hooks` (copy_context + run_with_hooks + veto executor). `run()` 230 → 177 LOC; stuck detection kept inline post-`tool_result`-audit for exact parity. 13 unit tests (`tests/test_agent_run_helpers.py`) + 112 agent/crew regression tests green; zero net-new ruff; full suite 1444 passing, zero new failures.
-  - PR-3D-b: A-5 `_dispatch_inner` (voice helpers `_build_voice_system_prompt` + `_persist_voice_turn`) — pending.
+  - PR-3D-b: A-5 `_dispatch_inner` ✅ (branch `fix/pr3d-b-dispatch-inner`). Behavior-preserving extraction of `_build_voice_system_prompt(task)` (memory/identity/facts + prompt assembly) + `_persist_voice_turn(task, answer, rid)` (session + DB + CodecMemory save). `_dispatch_inner` 188 → 131 LOC. Faithfulness: `_persist_voice_turn` carries its own `from codec_memory import CodecMemory` (original relied on the build block's local import being in scope). 7 unit tests (`tests/test_dispatch_inner_helpers.py`); zero net-new ruff; full suite 1451 passing, zero new failures.
   - PR-3D-c: A-6 `chat_completion` (extract `SkillTagBuffer` → `codec_chat_stream.py`; also unblocks the A-12 dashboard-stream migration) — pending.
 - PR-3E: A-11 + A-12 — unify vision + `chat/completions` ✅ (branch `fix/pr3e-llm-vision-dedup`, design-first per §11 → `docs/PR3E-LLM-VISION-DEDUP-DESIGN.md`; **Option 2** chosen by Mickael). **A-11 fully closed**: new `codec_vision.py` (sync+async, Gemini→local fallback, live config); all 3 consumers (codec.py/voice/session) delegate; session gains a Gemini fallback it lacked. **A-12 first tranche**: discovered `codec_llm_proxy` is a *queue*, not an HTTP caller — built genuinely-new `codec_llm.py` (`call()` + `strip_think`/`extract_content`, retry, never-raises) and migrated codec.py voice-reply chat + `codec_session.qwen_call`. **Deferred to phased follow-ons**: `qwen_stream` SSE (needs `codec_llm.stream()`) + ~40 remaining sites (dashboard/voice/agents/bridges/misc), each its own tranche. 19 tests (`tests/test_llm_vision_dedup.py`); full suite zero new failures.
 - PR-3E-2: A-12 tranche 2 ✅ (branch `fix/pr3-a12-tranche2-stream`, design-first → `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`; **Option 1** chosen). Built streaming keystone `codec_llm.stream()` (sync generator, raw deltas, never-raises) + shared `_build_request`; migrated `codec_session.qwen_stream` (proof) + non-streaming trivials `codec_compaction` + `codec_dictate`. Read-the-source moved `codec_textassist` + `regen_skill_descriptions` to **2c** (raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). 14 tests (`tests/test_llm_stream.py`); zero net-new ruff; full suite 1409 passing, zero new failures. **Remaining A-12 tranches:** 2c (raise-mode: textassist/regen/agent_plan/agent_runner), bridges (telegram/imessage), dashboard (non-stream + the stream tag-machine), voice `_stream_qwen` + agents (async `astream()` + queue), skills tranche.