From fe05412b91d883579ffb178985733958e1dd442b Mon Sep 17 00:00:00 2001
From: Mickael Farina <farina.mickael@gmail.com>
Date: Fri, 22 May 2026 16:24:26 +0200
Subject: [PATCH] refactor(voice): extract _dispatch_inner helpers (A-5,
 PR-3D-b)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Second of the PR-3D split. Behavior-preserving extraction of the voice-dispatch
monolith codec._dispatch_inner into the two helpers the audit recommended:

- _build_voice_system_prompt(task) -> str — the memory/identity/facts injection
  + CODEC_VOICE_PROMPT assembly (get_memory + CodecMemory context + memory_upgrade
  identity/facts + compress). Caller derives safe_sys.
- _persist_voice_turn(task, answer, rid) — append the assistant message to the
  session, bump turn_count, update_session_response (WAL helper), and save the
  exchange to CodecMemory.

_dispatch_inner is now a flow of named calls (188 -> 131 LOC). The LLM call was
already codec_llm.call (A-12 tranche 1).

Faithfulness detail: _persist_voice_turn carries its own
`from codec_memory import CodecMemory`. The original persist block had no local
import — it relied on the build block's `from codec_memory import CodecMemory`
(both are in _dispatch_inner) leaving CodecMemory in function scope. Extracting
the build block removed that, so the persist helper imports it itself. No other
behavior change: prompt assembly, memory markers, DB-write truncation (answer[:500]),
and the skill/draft/terminal branches are untouched.

Tests: tests/test_dispatch_inner_helpers.py (7 — base prompt / get_memory /
facts / identity injection; persist appends+increments+writes+saves; DB-write
truncation; source invariant). Full suite 1451 passing, 23 known-baseline
failures, zero new. Zero net-new ruff. No skills/ touched.

A-6 (chat_completion / SkillTagBuffer) follows as PR-3D-c.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 AGENTS.md                                  |   2 +-
 codec.py                                   | 140 ++++++++++++---------
 docs/PR3D-MONOLITH-EXTRACT-DESIGN.md       |   5 +-
 docs/audits/PHASE-1-CODE-QUALITY.md        |   2 +
 docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md |   2 +-
 tests/test_dispatch_inner_helpers.py       | 126 +++++++++++++++++++
 6 files changed, 213 insertions(+), 64 deletions(-)
 create mode 100644 tests/test_dispatch_inner_helpers.py

diff --git a/AGENTS.md b/AGENTS.md
index 0f5210f..1c5f4ec 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -386,7 +386,7 @@ Rule-based compressor for memory writes that need shrinking. Entity abbreviation
 ### Injection points
 | File:line | What gets injected |
 |---|---|
-| `codec.py:358-362` | Voice-mode prompt suffix (boot ctx + facts + memory) |
+| `codec._build_voice_system_prompt(task)` | Voice-mode prompt suffix (boot ctx + facts + memory) — extracted from `_dispatch_inner` in PR-3D-b/A-5 |
 | `codec_dashboard.py:1827-1862` | Chat handler before LLM call |
 | `codec_dashboard.py:1851-1886` | Same handler, separate channel |
 | `codec_voice.py:288-320` | VAD speech-start preload |
diff --git a/codec.py b/codec.py
index 2ae8960..ffd5384 100644
--- a/codec.py
+++ b/codec.py
@@ -263,6 +263,81 @@ def dispatch(task):
         _dispatch_cooldown = time.time() + 1.5
         _dispatch_lock.release()
 
+def _build_voice_system_prompt(task):
+    """A-5 (PR-3D-b): assemble the voice system prompt — CODEC_VOICE_PROMPT +
+    boot identity + active temporal facts + recent memory + targeted/recent
+    conversation context. Reads the memory stores (each guarded); returns the
+    assembled system-prompt string. Extracted verbatim from _dispatch_inner."""
+    mem = get_memory(5)
+    mem_ctx = ""
+    boot_ctx = ""
+    facts_ctx = ""
+    try:
+        from codec_memory import CodecMemory
+        cm = CodecMemory()
+        targeted = cm.get_context(task, n=5)
+        if targeted:
+            mem_ctx += f"\n\n[MEMORY — RELEVANT PAST CONVERSATIONS]\n{targeted}\n[END MEMORY]"
+        recent = cm.search_recent(days=3, limit=5)
+        if recent:
+            lines = ["[RECENT MEMORY — LAST 3 DAYS]"]
+            for r in recent:
+                ts = r["timestamp"][:16].replace("T", " ")
+                snippet = r["content"][:200].replace("\n", " ")
+                lines.append(f"  [{ts}] {r['role'].upper()}: {snippet}")
+            lines.append("[END RECENT MEMORY]")
+            mem_ctx += "\n\n" + "\n".join(lines)
+    except Exception as e:
+        log.warning("Memory context retrieval failed: %s", e)
+
+    # ── Memory upgrade: L0/L1 identity + active temporal facts ──────────
+    try:
+        from codec_memory_upgrade import load_identity, query_valid_facts, compress_rule_based
+        identity = load_identity()
+        if identity:
+            boot_ctx = f"\n\n[IDENTITY — BOOT PAYLOAD]\n{identity}\n[END IDENTITY]"
+        facts = query_valid_facts(limit=20)
+        if facts:
+            lines = ["[ACTIVE FACTS]"]
+            for f in facts:
+                lines.append(f"  {f['key']} = {f['value']}")
+            lines.append("[END FACTS]")
+            facts_ctx = "\n\n" + "\n".join(lines)
+        # Compress the recalled memory block to save tokens (identity+facts stay verbatim)
+        if mem_ctx:
+            mem_ctx = compress_rule_based(mem_ctx)
+    except Exception as e:
+        log.warning("Memory upgrade injection failed: %s", e)
+
+    # 2026-04-29 prompt rewrite: CODEC_VOICE_PROMPT now contains a {date}
+    # placeholder. Format it before use so the LLM doesn't see literal '{date}'.
+    sys_p = CODEC_VOICE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
+    if boot_ctx: sys_p += boot_ctx
+    if facts_ctx: sys_p += facts_ctx
+    if mem: sys_p += "\n\n" + mem
+    if mem_ctx: sys_p += mem_ctx
+    return sys_p
+
+
+def _persist_voice_turn(task, answer, rid):
+    """A-5 (PR-3D-b): persist a completed voice turn — append the assistant
+    message to the in-memory session, bump turn_count, write the response to the
+    session DB (WAL helper), and save the exchange to shared CodecMemory.
+    Extracted verbatim from _dispatch_inner's quick-reply block."""
+    voice_session["messages"].append({"role": "assistant", "content": answer})
+    voice_session["turn_count"] += 1
+    # Save response to DB (A-20: codec_core helper, WAL + busy_timeout).
+    update_session_response(rid, answer[:500])
+    # Save to shared memory (same store as Chat)
+    try:
+        from codec_memory import CodecMemory
+        cm = CodecMemory()
+        cm.save("voice", "user", task)
+        cm.save("voice", "assistant", answer)
+    except Exception as e:
+        log.warning(f"[CODEC] Memory save failed after LLM: {e}")
+
+
 def _dispatch_inner(task):
     app = focused_app()
     log_event("wake_dispatch", "open-codec",
@@ -329,55 +404,8 @@ def _post_skill_screenshot():
 
     rid = save_task(task, app)
 
-    # ── Build system prompt with memory ─────────────────────────────────
-    mem = get_memory(5)
-    mem_ctx = ""
-    boot_ctx = ""
-    facts_ctx = ""
-    try:
-        from codec_memory import CodecMemory
-        cm = CodecMemory()
-        targeted = cm.get_context(task, n=5)
-        if targeted:
-            mem_ctx += f"\n\n[MEMORY — RELEVANT PAST CONVERSATIONS]\n{targeted}\n[END MEMORY]"
-        recent = cm.search_recent(days=3, limit=5)
-        if recent:
-            lines = ["[RECENT MEMORY — LAST 3 DAYS]"]
-            for r in recent:
-                ts = r["timestamp"][:16].replace("T", " ")
-                snippet = r["content"][:200].replace("\n", " ")
-                lines.append(f"  [{ts}] {r['role'].upper()}: {snippet}")
-            lines.append("[END RECENT MEMORY]")
-            mem_ctx += "\n\n" + "\n".join(lines)
-    except Exception as e:
-        log.warning("Memory context retrieval failed: %s", e)
-
-    # ── Memory upgrade: L0/L1 identity + active temporal facts ──────────
-    try:
-        from codec_memory_upgrade import load_identity, query_valid_facts, compress_rule_based
-        identity = load_identity()
-        if identity:
-            boot_ctx = f"\n\n[IDENTITY — BOOT PAYLOAD]\n{identity}\n[END IDENTITY]"
-        facts = query_valid_facts(limit=20)
-        if facts:
-            lines = ["[ACTIVE FACTS]"]
-            for f in facts:
-                lines.append(f"  {f['key']} = {f['value']}")
-            lines.append("[END FACTS]")
-            facts_ctx = "\n\n" + "\n".join(lines)
-        # Compress the recalled memory block to save tokens (identity+facts stay verbatim)
-        if mem_ctx:
-            mem_ctx = compress_rule_based(mem_ctx)
-    except Exception as e:
-        log.warning("Memory upgrade injection failed: %s", e)
-
-    # 2026-04-29 prompt rewrite: CODEC_VOICE_PROMPT now contains a {date}
-    # placeholder. Format it before use so the LLM doesn't see literal '{date}'.
-    sys_p = CODEC_VOICE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
-    if boot_ctx: sys_p += boot_ctx
-    if facts_ctx: sys_p += facts_ctx
-    if mem: sys_p += "\n\n" + mem
-    if mem_ctx: sys_p += mem_ctx
+    # ── Build system prompt with memory (A-5: extracted helper) ─────────
+    sys_p = _build_voice_system_prompt(task)
     safe_sys = sys_p.replace('\n', ' ')
 
     # ── Open terminal session (the real CODEC session window) ───────────
@@ -424,18 +452,8 @@ def _post_skill_screenshot():
             log_event("tts_speak", "open-codec",
                       f"TTS: {answer[:60]}",
                       extra={"text_len": len(answer)})
-            # Add assistant response to session history
-            voice_session["messages"].append({"role": "assistant", "content": answer})
-            voice_session["turn_count"] += 1
-            # Save response to DB (A-20: codec_core helper, WAL + busy_timeout).
-            update_session_response(rid, answer[:500])
-            # Save to shared memory (same store as Chat)
-            try:
-                cm = CodecMemory()
-                cm.save("voice", "user", task)
-                cm.save("voice", "assistant", answer)
-            except Exception as e:
-                log.warning(f"[CODEC] Memory save failed after LLM: {e}")
+            # Persist the turn (A-5: extracted to _persist_voice_turn)
+            _persist_voice_turn(task, answer, rid)
             _last_tts_text = answer[:200]
             speak_text(answer)
             _safe_ans = answer[:80].replace('\\', '\\\\').replace('"', '\\"')
diff --git a/docs/PR3D-MONOLITH-EXTRACT-DESIGN.md b/docs/PR3D-MONOLITH-EXTRACT-DESIGN.md
index 779e4d1..c8b6d51 100644
--- a/docs/PR3D-MONOLITH-EXTRACT-DESIGN.md
+++ b/docs/PR3D-MONOLITH-EXTRACT-DESIGN.md
@@ -1,6 +1,9 @@
 # PR-3D — extract helpers from the 3 monolith functions (A-5/6/7) (DESIGN)
 
-**Status:** IN PROGRESS — split into 3 sub-PRs. **3D-a (A-7 `Agent.run`) IMPLEMENTED** (extracted `_parse_action`, `_validate_tool_call`, `_execute_tool_with_hooks`; `run()` 230 → 177 LOC; 13 new unit tests + 112 agent regression tests green; zero new suite failures; zero net-new ruff). **A-5 + A-6 pending** as 3D-b / 3D-c.
+**Status:** IN PROGRESS — split into 3 sub-PRs.
+- **3D-a (A-7 `Agent.run`) IMPLEMENTED** — extracted `_parse_action`, `_validate_tool_call`, `_execute_tool_with_hooks`; `run()` 230 → 177 LOC; 13 unit + 112 regression tests green.
+- **3D-b (A-5 `_dispatch_inner`) IMPLEMENTED** — extracted `_build_voice_system_prompt(task)` + `_persist_voice_turn(task, answer, rid)`; `_dispatch_inner` 188 → 131 LOC; 7 unit tests; zero new suite failures; zero net-new ruff. (Faithfulness note: `_persist_voice_turn` does its own `from codec_memory import CodecMemory` — the original relied on the build block's local import being in `_dispatch_inner`'s scope, which the extraction removed.)
+- **3D-c (A-6 `chat_completion` / `SkillTagBuffer`) pending.**
 **Findings:** A-5 (`_dispatch_inner`, 188 LOC), A-6 (`chat_completion`, 466 LOC), A-7 (`Agent.run`, 230 LOC) — all MEDIUM.
 **Wave:** 3 (complexity reduction). These are the **three hottest functions in the repo** (voice dispatch · chat handler · agent loop), so: split into one-PR-per-function + behavior-preserving extractions with tests. **No big-bang.**
 
diff --git a/docs/audits/PHASE-1-CODE-QUALITY.md b/docs/audits/PHASE-1-CODE-QUALITY.md
index e9919fc..54f6560 100644
--- a/docs/audits/PHASE-1-CODE-QUALITY.md
+++ b/docs/audits/PHASE-1-CODE-QUALITY.md
@@ -65,6 +65,8 @@ Both scan `SKILLS_DIR` independently, so a skill file is loaded twice in differe
 **Effort:** medium (touches 3 files + needs a careful test pass on the voice-skill code path)
 
 ### A-5 — `_dispatch_inner` is a 200-LOC monolithic function with mixed concerns [MEDIUM]
+
+> **Closed by PR-3D-b.** Behavior-preserving extraction of the two recommended pure-ish helpers: `_build_voice_system_prompt(task) -> str` (the memory/identity/facts injection + prompt assembly) and `_persist_voice_turn(task, answer, rid)` (session-message append + `update_session_response` + `CodecMemory` save). `_dispatch_inner` is now a flow of named calls (188 → 131 LOC); the LLM call was already `codec_llm.call` (A-12 tranche 1). Faithfulness detail: `_persist_voice_turn` carries its own `from codec_memory import CodecMemory` — the original persist block relied on the build block's local import being in scope, which extraction removed. Pinned by `tests/test_dispatch_inner_helpers.py` (7). See `docs/PR3D-MONOLITH-EXTRACT-DESIGN.md`. **A-6 (`chat_completion`/`SkillTagBuffer`) follows as PR-3D-c.** (The 3 duplicate memory-injection sites the impact note flags — voice / `_enrich_messages` / `codec_voice.generate_response` — are a separate consolidation, not in this extraction's scope.)
 **Location:** `codec.py:545-744`
 **Description:** Single function does: skill dispatch loop, draft detection + queueing, memory injection (5 distinct injection points), system-prompt build, voice-session state mutation, LLM HTTP call (with inline `import requests as _llm_req`), inline SQLite UPDATE, two follow-up calls into `CodecMemory`, TTS dispatch, AppleScript notification dispatch, error handling for all of the above. 200 lines, ≥4 levels of nesting in places, single try/except wrapping the LLM call (line 689-743). Pre-audit P-5 understated this as "~50 LOC" — the actual count is ~200.
 **Impact:** Hard to test, hard to modify safely, hard to read. The 5 different memory injection mechanisms (lines 615-649) duplicate logic that already lives in `_enrich_messages` (`codec_dashboard.py:1930-2093`) and `codec_voice.generate_response` (`codec_voice.py:716-754`). Three independent implementations of "build a system prompt with memory context" — bugs fixed in one don't reach the others.
diff --git a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
index ca70804..668f2e2 100644
--- a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
+++ b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
@@ -243,7 +243,7 @@ Mirror the Intake Phase 3 wave pattern. 7 waves planned; sizes are PR-counts, NO
 - A-4: skill-loader unification ✅ (branch `fix/pr3-a4-skill-loader-unification`, design-first per §11 → `docs/A4-SKILL-LOADER-UNIFICATION-DESIGN.md`). Deleted legacy `codec_core.{loaded_skills,load_skills,run_skill}`; codec.py + cortex_skills now use canonical `codec_dispatch` registry. Closed a real **security gap** (legacy path skipped the PR-1A AST gate) + a **hooks bypass** (voice path now fires run_with_hooks). Option A: `custom_triggers.json` now honored everywhere via SkillRegistry. 10 tests; full suite 1376 passing.
 - PR-3D: A-5 + A-6 + A-7 — extract helpers from the 3 monolithic functions (`_dispatch_inner`, `chat_completion`, `Agent.run`). Split into 3 sub-PRs (one per function — too risky to do all three on these hot paths in one diff). Design → `docs/PR3D-MONOLITH-EXTRACT-DESIGN.md`.
   - PR-3D-a: A-7 `Agent.run` ✅ (branch `fix/pr3d-extract-monolith-helpers`). Behavior-preserving extraction of `_parse_action` (pure protocol parse), `_validate_tool_call` (pure — 4 guards → 1 rejection msg), `_execute_tool_with_hooks` (copy_context + run_with_hooks + veto executor). `run()` 230 → 177 LOC; stuck detection kept inline post-`tool_result`-audit for exact parity. 13 unit tests (`tests/test_agent_run_helpers.py`) + 112 agent/crew regression tests green; zero net-new ruff; full suite 1444 passing, zero new failures.
-  - PR-3D-b: A-5 `_dispatch_inner` (voice helpers `_build_voice_system_prompt` + `_persist_voice_turn`) — pending.
+  - PR-3D-b: A-5 `_dispatch_inner` ✅ (branch `fix/pr3d-b-dispatch-inner`). Behavior-preserving extraction of `_build_voice_system_prompt(task)` (memory/identity/facts + prompt assembly) + `_persist_voice_turn(task, answer, rid)` (session + DB + CodecMemory save). `_dispatch_inner` 188 → 131 LOC. Faithfulness: `_persist_voice_turn` carries its own `from codec_memory import CodecMemory` (original relied on the build block's local import being in scope). 7 unit tests (`tests/test_dispatch_inner_helpers.py`); zero net-new ruff; full suite 1451 passing, zero new failures.
   - PR-3D-c: A-6 `chat_completion` (extract `SkillTagBuffer` → `codec_chat_stream.py`; also unblocks the A-12 dashboard-stream migration) — pending.
 - PR-3E: A-11 + A-12 — unify vision + `chat/completions` ✅ (branch `fix/pr3e-llm-vision-dedup`, design-first per §11 → `docs/PR3E-LLM-VISION-DEDUP-DESIGN.md`; **Option 2** chosen by Mickael). **A-11 fully closed**: new `codec_vision.py` (sync+async, Gemini→local fallback, live config); all 3 consumers (codec.py/voice/session) delegate; session gains a Gemini fallback it lacked. **A-12 first tranche**: discovered `codec_llm_proxy` is a *queue*, not an HTTP caller — built genuinely-new `codec_llm.py` (`call()` + `strip_think`/`extract_content`, retry, never-raises) and migrated codec.py voice-reply chat + `codec_session.qwen_call`. **Deferred to phased follow-ons**: `qwen_stream` SSE (needs `codec_llm.stream()`) + ~40 remaining sites (dashboard/voice/agents/bridges/misc), each its own tranche. 19 tests (`tests/test_llm_vision_dedup.py`); full suite zero new failures.
 - PR-3E-2: A-12 tranche 2 ✅ (branch `fix/pr3-a12-tranche2-stream`, design-first → `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`; **Option 1** chosen). Built streaming keystone `codec_llm.stream()` (sync generator, raw deltas, never-raises) + shared `_build_request`; migrated `codec_session.qwen_stream` (proof) + non-streaming trivials `codec_compaction` + `codec_dictate`. Read-the-source moved `codec_textassist` + `regen_skill_descriptions` to **2c** (raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). 14 tests (`tests/test_llm_stream.py`); zero net-new ruff; full suite 1409 passing, zero new failures. **Remaining A-12 tranches:** 2c (raise-mode: textassist/regen/agent_plan/agent_runner), bridges (telegram/imessage), dashboard (non-stream + the stream tag-machine), voice `_stream_qwen` + agents (async `astream()` + queue), skills tranche.
diff --git a/tests/test_dispatch_inner_helpers.py b/tests/test_dispatch_inner_helpers.py
new file mode 100644
index 0000000..7b0cd8f
--- /dev/null
+++ b/tests/test_dispatch_inner_helpers.py
@@ -0,0 +1,126 @@
+"""Tests for PR-3D-b (A-5) — helpers extracted from codec._dispatch_inner.
+
+Behavior-preserving extraction of the voice-dispatch monolith:
+- codec._build_voice_system_prompt(task) -> str   (memory/identity/facts assembly)
+- codec._persist_voice_turn(task, answer, rid)     (session + DB + CodecMemory save)
+
+The voice path is exercised end-to-end elsewhere; these pin the extracted units.
+Reference: docs/PR3D-MONOLITH-EXTRACT-DESIGN.md.
+"""
+from __future__ import annotations
+
+import sys
+from datetime import datetime
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO))
+
+import codec  # noqa: E402
+
+
+class _FakeCM:
+    def get_context(self, task, n=5):
+        return ""
+
+    def search_recent(self, days=3, limit=5):
+        return []
+
+    def save(self, *a):
+        pass
+
+
+def _silence_memory(monkeypatch):
+    """Make every memory source empty so _build_voice_system_prompt returns just
+    the base prompt — individual tests then re-enable one source."""
+    import codec_memory
+    import codec_memory_upgrade
+    monkeypatch.setattr(codec, "get_memory", lambda n=5: "")
+    monkeypatch.setattr(codec_memory, "CodecMemory", _FakeCM)
+    monkeypatch.setattr(codec_memory_upgrade, "load_identity", lambda: "")
+    monkeypatch.setattr(codec_memory_upgrade, "query_valid_facts", lambda limit=20: [])
+    monkeypatch.setattr(codec_memory_upgrade, "compress_rule_based", lambda x: x)
+
+
+# ── _build_voice_system_prompt ────────────────────────────────────────────────
+
+
+def test_build_voice_system_prompt_base(monkeypatch):
+    _silence_memory(monkeypatch)
+    out = codec._build_voice_system_prompt("hi")
+    expected = codec.CODEC_VOICE_PROMPT.format(
+        date=datetime.now().strftime("%A, %B %d, %Y"))
+    assert out == expected
+
+
+def test_build_voice_system_prompt_includes_get_memory(monkeypatch):
+    _silence_memory(monkeypatch)
+    monkeypatch.setattr(codec, "get_memory", lambda n=5: "MEMBLOCK123")
+    assert "MEMBLOCK123" in codec._build_voice_system_prompt("hi")
+
+
+def test_build_voice_system_prompt_includes_facts(monkeypatch):
+    _silence_memory(monkeypatch)
+    import codec_memory_upgrade
+    monkeypatch.setattr(codec_memory_upgrade, "query_valid_facts",
+                        lambda limit=20: [{"key": "city", "value": "Marbella"}])
+    out = codec._build_voice_system_prompt("hi")
+    assert "[ACTIVE FACTS]" in out and "city = Marbella" in out
+
+
+def test_build_voice_system_prompt_includes_identity(monkeypatch):
+    _silence_memory(monkeypatch)
+    import codec_memory_upgrade
+    monkeypatch.setattr(codec_memory_upgrade, "load_identity", lambda: "I am CODEC")
+    out = codec._build_voice_system_prompt("hi")
+    assert "[IDENTITY — BOOT PAYLOAD]" in out and "I am CODEC" in out
+
+
+# ── _persist_voice_turn ───────────────────────────────────────────────────────
+
+
+def test_persist_voice_turn(monkeypatch):
+    import codec_memory
+    monkeypatch.setattr(codec, "voice_session",
+                        {"messages": [], "turn_count": 0, "started": None})
+    recorded = {}
+    monkeypatch.setattr(codec, "update_session_response",
+                        lambda rid, txt: recorded.update(rid=rid, txt=txt))
+    saved = []
+
+    class _CM:
+        def save(self, *a):
+            saved.append(a)
+
+    monkeypatch.setattr(codec_memory, "CodecMemory", lambda: _CM())
+
+    codec._persist_voice_turn("mytask", "myanswer", 42)
+
+    assert codec.voice_session["messages"][-1] == {"role": "assistant", "content": "myanswer"}
+    assert codec.voice_session["turn_count"] == 1
+    assert recorded == {"rid": 42, "txt": "myanswer"}
+    assert ("voice", "user", "mytask") in saved
+    assert ("voice", "assistant", "myanswer") in saved
+
+
+def test_persist_voice_turn_truncates_db_write(monkeypatch):
+    import codec_memory
+    monkeypatch.setattr(codec, "voice_session",
+                        {"messages": [], "turn_count": 0, "started": None})
+    recorded = {}
+    monkeypatch.setattr(codec, "update_session_response",
+                        lambda rid, txt: recorded.update(rid=rid, txt=txt))
+    monkeypatch.setattr(codec_memory, "CodecMemory",
+                        lambda: type("CM", (), {"save": lambda self, *a: None})())
+    long_answer = "z" * 900
+    codec._persist_voice_turn("t", long_answer, 7)
+    assert len(recorded["txt"]) == 500   # update_session_response gets answer[:500]
+
+
+# ── source-level migration invariant ──────────────────────────────────────────
+
+
+def test_dispatch_inner_uses_helpers():
+    src = (REPO / "codec.py").read_text()
+    assert "_build_voice_system_prompt(" in src
+    assert "_persist_voice_turn(" in src