Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ Rule-based compressor for memory writes that need shrinking. Entity abbreviation
### Injection points
| File:line | What gets injected |
|---|---|
| `codec.py:358-362` | Voice-mode prompt suffix (boot ctx + facts + memory) |
| `codec._build_voice_system_prompt(task)` | Voice-mode prompt suffix (boot ctx + facts + memory) — extracted from `_dispatch_inner` in PR-3D-b/A-5 |
| `codec_dashboard.py:1827-1862` | Chat handler before LLM call |
| `codec_dashboard.py:1851-1886` | Same handler, separate channel |
| `codec_voice.py:288-320` | VAD speech-start preload |
Expand Down
140 changes: 79 additions & 61 deletions codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,81 @@ def dispatch(task):
_dispatch_cooldown = time.time() + 1.5
_dispatch_lock.release()

def _build_voice_system_prompt(task):
"""A-5 (PR-3D-b): assemble the voice system prompt — CODEC_VOICE_PROMPT +
boot identity + active temporal facts + recent memory + targeted/recent
conversation context. Reads the memory stores (each guarded); returns the
assembled system-prompt string. Extracted verbatim from _dispatch_inner."""
mem = get_memory(5)
mem_ctx = ""
boot_ctx = ""
facts_ctx = ""
try:
from codec_memory import CodecMemory
cm = CodecMemory()
targeted = cm.get_context(task, n=5)
if targeted:
mem_ctx += f"\n\n[MEMORY — RELEVANT PAST CONVERSATIONS]\n{targeted}\n[END MEMORY]"
recent = cm.search_recent(days=3, limit=5)
if recent:
lines = ["[RECENT MEMORY — LAST 3 DAYS]"]
for r in recent:
ts = r["timestamp"][:16].replace("T", " ")
snippet = r["content"][:200].replace("\n", " ")
lines.append(f" [{ts}] {r['role'].upper()}: {snippet}")
lines.append("[END RECENT MEMORY]")
mem_ctx += "\n\n" + "\n".join(lines)
except Exception as e:
log.warning("Memory context retrieval failed: %s", e)

# ── Memory upgrade: L0/L1 identity + active temporal facts ──────────
try:
from codec_memory_upgrade import load_identity, query_valid_facts, compress_rule_based
identity = load_identity()
if identity:
boot_ctx = f"\n\n[IDENTITY — BOOT PAYLOAD]\n{identity}\n[END IDENTITY]"
facts = query_valid_facts(limit=20)
if facts:
lines = ["[ACTIVE FACTS]"]
for f in facts:
lines.append(f" {f['key']} = {f['value']}")
lines.append("[END FACTS]")
facts_ctx = "\n\n" + "\n".join(lines)
# Compress the recalled memory block to save tokens (identity+facts stay verbatim)
if mem_ctx:
mem_ctx = compress_rule_based(mem_ctx)
except Exception as e:
log.warning("Memory upgrade injection failed: %s", e)

# 2026-04-29 prompt rewrite: CODEC_VOICE_PROMPT now contains a {date}
# placeholder. Format it before use so the LLM doesn't see literal '{date}'.
sys_p = CODEC_VOICE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
if boot_ctx: sys_p += boot_ctx
if facts_ctx: sys_p += facts_ctx
if mem: sys_p += "\n\n" + mem
if mem_ctx: sys_p += mem_ctx
return sys_p


def _persist_voice_turn(task, answer, rid):
"""A-5 (PR-3D-b): persist a completed voice turn — append the assistant
message to the in-memory session, bump turn_count, write the response to the
session DB (WAL helper), and save the exchange to shared CodecMemory.
Extracted verbatim from _dispatch_inner's quick-reply block."""
voice_session["messages"].append({"role": "assistant", "content": answer})
voice_session["turn_count"] += 1
# Save response to DB (A-20: codec_core helper, WAL + busy_timeout).
update_session_response(rid, answer[:500])
# Save to shared memory (same store as Chat)
try:
from codec_memory import CodecMemory
cm = CodecMemory()
cm.save("voice", "user", task)
cm.save("voice", "assistant", answer)
except Exception as e:
log.warning(f"[CODEC] Memory save failed after LLM: {e}")


def _dispatch_inner(task):
app = focused_app()
log_event("wake_dispatch", "open-codec",
Expand Down Expand Up @@ -329,55 +404,8 @@ def _post_skill_screenshot():

rid = save_task(task, app)

# ── Build system prompt with memory ─────────────────────────────────
mem = get_memory(5)
mem_ctx = ""
boot_ctx = ""
facts_ctx = ""
try:
from codec_memory import CodecMemory
cm = CodecMemory()
targeted = cm.get_context(task, n=5)
if targeted:
mem_ctx += f"\n\n[MEMORY — RELEVANT PAST CONVERSATIONS]\n{targeted}\n[END MEMORY]"
recent = cm.search_recent(days=3, limit=5)
if recent:
lines = ["[RECENT MEMORY — LAST 3 DAYS]"]
for r in recent:
ts = r["timestamp"][:16].replace("T", " ")
snippet = r["content"][:200].replace("\n", " ")
lines.append(f" [{ts}] {r['role'].upper()}: {snippet}")
lines.append("[END RECENT MEMORY]")
mem_ctx += "\n\n" + "\n".join(lines)
except Exception as e:
log.warning("Memory context retrieval failed: %s", e)

# ── Memory upgrade: L0/L1 identity + active temporal facts ──────────
try:
from codec_memory_upgrade import load_identity, query_valid_facts, compress_rule_based
identity = load_identity()
if identity:
boot_ctx = f"\n\n[IDENTITY — BOOT PAYLOAD]\n{identity}\n[END IDENTITY]"
facts = query_valid_facts(limit=20)
if facts:
lines = ["[ACTIVE FACTS]"]
for f in facts:
lines.append(f" {f['key']} = {f['value']}")
lines.append("[END FACTS]")
facts_ctx = "\n\n" + "\n".join(lines)
# Compress the recalled memory block to save tokens (identity+facts stay verbatim)
if mem_ctx:
mem_ctx = compress_rule_based(mem_ctx)
except Exception as e:
log.warning("Memory upgrade injection failed: %s", e)

# 2026-04-29 prompt rewrite: CODEC_VOICE_PROMPT now contains a {date}
# placeholder. Format it before use so the LLM doesn't see literal '{date}'.
sys_p = CODEC_VOICE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
if boot_ctx: sys_p += boot_ctx
if facts_ctx: sys_p += facts_ctx
if mem: sys_p += "\n\n" + mem
if mem_ctx: sys_p += mem_ctx
# ── Build system prompt with memory (A-5: extracted helper) ─────────
sys_p = _build_voice_system_prompt(task)
safe_sys = sys_p.replace('\n', ' ')

# ── Open terminal session (the real CODEC session window) ───────────
Expand Down Expand Up @@ -424,18 +452,8 @@ def _post_skill_screenshot():
log_event("tts_speak", "open-codec",
f"TTS: {answer[:60]}",
extra={"text_len": len(answer)})
# Add assistant response to session history
voice_session["messages"].append({"role": "assistant", "content": answer})
voice_session["turn_count"] += 1
# Save response to DB (A-20: codec_core helper, WAL + busy_timeout).
update_session_response(rid, answer[:500])
# Save to shared memory (same store as Chat)
try:
cm = CodecMemory()
cm.save("voice", "user", task)
cm.save("voice", "assistant", answer)
except Exception as e:
log.warning(f"[CODEC] Memory save failed after LLM: {e}")
# Persist the turn (A-5: extracted to _persist_voice_turn)
_persist_voice_turn(task, answer, rid)
_last_tts_text = answer[:200]
speak_text(answer)
_safe_ans = answer[:80].replace('\\', '\\\\').replace('"', '\\"')
Expand Down
5 changes: 4 additions & 1 deletion docs/PR3D-MONOLITH-EXTRACT-DESIGN.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# PR-3D — extract helpers from the 3 monolith functions (A-5/6/7) (DESIGN)

**Status:** IN PROGRESS — split into 3 sub-PRs. **3D-a (A-7 `Agent.run`) IMPLEMENTED** (extracted `_parse_action`, `_validate_tool_call`, `_execute_tool_with_hooks`; `run()` 230 → 177 LOC; 13 new unit tests + 112 agent regression tests green; zero new suite failures; zero net-new ruff). **A-5 + A-6 pending** as 3D-b / 3D-c.
**Status:** IN PROGRESS — split into 3 sub-PRs.
- **3D-a (A-7 `Agent.run`) IMPLEMENTED** — extracted `_parse_action`, `_validate_tool_call`, `_execute_tool_with_hooks`; `run()` 230 → 177 LOC; 13 unit + 112 regression tests green.
- **3D-b (A-5 `_dispatch_inner`) IMPLEMENTED** — extracted `_build_voice_system_prompt(task)` + `_persist_voice_turn(task, answer, rid)`; `_dispatch_inner` 188 → 131 LOC; 7 unit tests; zero new suite failures; zero net-new ruff. (Faithfulness note: `_persist_voice_turn` does its own `from codec_memory import CodecMemory` — the original relied on the build block's local import being in `_dispatch_inner`'s scope, which the extraction removed.)
- **3D-c (A-6 `chat_completion` / `SkillTagBuffer`) pending.**
**Findings:** A-5 (`_dispatch_inner`, 188 LOC), A-6 (`chat_completion`, 466 LOC), A-7 (`Agent.run`, 230 LOC) — all MEDIUM.
**Wave:** 3 (complexity reduction). These are the **three hottest functions in the repo** (voice dispatch · chat handler · agent loop), so: split into one-PR-per-function + behavior-preserving extractions with tests. **No big-bang.**

Expand Down
2 changes: 2 additions & 0 deletions docs/audits/PHASE-1-CODE-QUALITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ Both scan `SKILLS_DIR` independently, so a skill file is loaded twice in differe
**Effort:** medium (touches 3 files + needs a careful test pass on the voice-skill code path)

### A-5 — `_dispatch_inner` is a 200-LOC monolithic function with mixed concerns [MEDIUM]

> **Closed by PR-3D-b.** Behavior-preserving extraction of the two recommended pure-ish helpers: `_build_voice_system_prompt(task) -> str` (the memory/identity/facts injection + prompt assembly) and `_persist_voice_turn(task, answer, rid)` (session-message append + `update_session_response` + `CodecMemory` save). `_dispatch_inner` is now a flow of named calls (188 → 131 LOC); the LLM call was already `codec_llm.call` (A-12 tranche 1). Faithfulness detail: `_persist_voice_turn` carries its own `from codec_memory import CodecMemory` — the original persist block relied on the build block's local import being in scope, which extraction removed. Pinned by `tests/test_dispatch_inner_helpers.py` (7). See `docs/PR3D-MONOLITH-EXTRACT-DESIGN.md`. **A-6 (`chat_completion`/`SkillTagBuffer`) follows as PR-3D-c.** (The 3 duplicate memory-injection sites the impact note flags — voice / `_enrich_messages` / `codec_voice.generate_response` — are a separate consolidation, not in this extraction's scope.)
**Location:** `codec.py:545-744`
**Description:** Single function does: skill dispatch loop, draft detection + queueing, memory injection (5 distinct injection points), system-prompt build, voice-session state mutation, LLM HTTP call (with inline `import requests as _llm_req`), inline SQLite UPDATE, two follow-up calls into `CodecMemory`, TTS dispatch, AppleScript notification dispatch, error handling for all of the above. 200 lines, ≥4 levels of nesting in places, single try/except wrapping the LLM call (line 689-743). Pre-audit P-5 understated this as "~50 LOC" — the actual count is ~200.
**Impact:** Hard to test, hard to modify safely, hard to read. The 5 different memory injection mechanisms (lines 615-649) duplicate logic that already lives in `_enrich_messages` (`codec_dashboard.py:1930-2093`) and `codec_voice.generate_response` (`codec_voice.py:716-754`). Three independent implementations of "build a system prompt with memory context" — bugs fixed in one don't reach the others.
Expand Down
2 changes: 1 addition & 1 deletion docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ Mirror the Intake Phase 3 wave pattern. 7 waves planned; sizes are PR-counts, NO
- A-4: skill-loader unification ✅ (branch `fix/pr3-a4-skill-loader-unification`, design-first per §11 → `docs/A4-SKILL-LOADER-UNIFICATION-DESIGN.md`). Deleted legacy `codec_core.{loaded_skills,load_skills,run_skill}`; codec.py + cortex_skills now use canonical `codec_dispatch` registry. Closed a real **security gap** (legacy path skipped the PR-1A AST gate) + a **hooks bypass** (voice path now fires run_with_hooks). Option A: `custom_triggers.json` now honored everywhere via SkillRegistry. 10 tests; full suite 1376 passing.
- PR-3D: A-5 + A-6 + A-7 — extract helpers from the 3 monolithic functions (`_dispatch_inner`, `chat_completion`, `Agent.run`). Split into 3 sub-PRs (one per function — too risky to do all three on these hot paths in one diff). Design → `docs/PR3D-MONOLITH-EXTRACT-DESIGN.md`.
- PR-3D-a: A-7 `Agent.run` ✅ (branch `fix/pr3d-extract-monolith-helpers`). Behavior-preserving extraction of `_parse_action` (pure protocol parse), `_validate_tool_call` (pure — 4 guards → 1 rejection msg), `_execute_tool_with_hooks` (copy_context + run_with_hooks + veto executor). `run()` 230 → 177 LOC; stuck detection kept inline post-`tool_result`-audit for exact parity. 13 unit tests (`tests/test_agent_run_helpers.py`) + 112 agent/crew regression tests green; zero net-new ruff; full suite 1444 passing, zero new failures.
- PR-3D-b: A-5 `_dispatch_inner` (voice helpers `_build_voice_system_prompt` + `_persist_voice_turn`) — pending.
- PR-3D-b: A-5 `_dispatch_inner` ✅ (branch `fix/pr3d-b-dispatch-inner`). Behavior-preserving extraction of `_build_voice_system_prompt(task)` (memory/identity/facts + prompt assembly) + `_persist_voice_turn(task, answer, rid)` (session + DB + CodecMemory save). `_dispatch_inner` 188 → 131 LOC. Faithfulness: `_persist_voice_turn` carries its own `from codec_memory import CodecMemory` (original relied on the build block's local import being in scope). 7 unit tests (`tests/test_dispatch_inner_helpers.py`); zero net-new ruff; full suite 1451 passing, zero new failures.
- PR-3D-c: A-6 `chat_completion` (extract `SkillTagBuffer` → `codec_chat_stream.py`; also unblocks the A-12 dashboard-stream migration) — pending.
- PR-3E: A-11 + A-12 — unify vision + `chat/completions` ✅ (branch `fix/pr3e-llm-vision-dedup`, design-first per §11 → `docs/PR3E-LLM-VISION-DEDUP-DESIGN.md`; **Option 2** chosen by Mickael). **A-11 fully closed**: new `codec_vision.py` (sync+async, Gemini→local fallback, live config); all 3 consumers (codec.py/voice/session) delegate; session gains a Gemini fallback it lacked. **A-12 first tranche**: discovered `codec_llm_proxy` is a *queue*, not an HTTP caller — built genuinely-new `codec_llm.py` (`call()` + `strip_think`/`extract_content`, retry, never-raises) and migrated codec.py voice-reply chat + `codec_session.qwen_call`. **Deferred to phased follow-ons**: `qwen_stream` SSE (needs `codec_llm.stream()`) + ~40 remaining sites (dashboard/voice/agents/bridges/misc), each its own tranche. 19 tests (`tests/test_llm_vision_dedup.py`); full suite zero new failures.
- PR-3E-2: A-12 tranche 2 ✅ (branch `fix/pr3-a12-tranche2-stream`, design-first → `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`; **Option 1** chosen). Built streaming keystone `codec_llm.stream()` (sync generator, raw deltas, never-raises) + shared `_build_request`; migrated `codec_session.qwen_stream` (proof) + non-streaming trivials `codec_compaction` + `codec_dictate`. Read-the-source moved `codec_textassist` + `regen_skill_descriptions` to **2c** (raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). 14 tests (`tests/test_llm_stream.py`); zero net-new ruff; full suite 1409 passing, zero new failures. **Remaining A-12 tranches:** 2c (raise-mode: textassist/regen/agent_plan/agent_runner), bridges (telegram/imessage), dashboard (non-stream + the stream tag-machine), voice `_stream_qwen` + agents (async `astream()` + queue), skills tranche.
Expand Down
Loading
Loading