CambrianTech · joelteply · May 8, 2026 · May 8, 2026
diff --git a/src/system/user/server/modules/PersonaResponseGenerator.ts b/src/system/user/server/modules/PersonaResponseGenerator.ts
@@ -91,62 +91,6 @@ function synthesizeDeterministicUuid(msg: LLMMessage): string {
   return `${h.slice(0, 8)}-${h.slice(8, 12)}-${h.slice(12, 16)}-${h.slice(16, 20)}-${h.slice(20, 32)}`;
 }
 
-/**
- * Strip leaked tool-invocation markup from a persona's response text before
- * it lands in the chat log.
- *
- * Why this exists (Joel 2026-05-03, chat-probe runaway): until cognition's
- * tool agent loop fully migrates to Rust (see header comment about Joel's
- * 2026-04-20 "REMOVE THESE FUCKING FALLBACKS" instruction), Rust returns
- * the model's raw text — INCLUDING any `<tool_use>...</tool_use>` XML the
- * model emitted as part of its response. The TS shim does no parsing and
- * posts that text verbatim, so users see a wall of `<tool_use><tool_name>
- * collaboration/decision/vote</tool_name>...` markup interleaved with the
- * persona's actual prose. With multiple personas in a room replying to
- * each other, the leaked block becomes the dominant pattern in history,
- * personas treat it as a continuation example, and the room collapses
- * into an echo loop of identical templated tool-use ghosts (200+ msgs
- * observed inside 10 minutes on a fresh Mac install).
- *
- * Interim fix: silently drop the leaked blocks here. The tool itself is
- * a no-op anyway (Rust isn't executing it yet); stripping the markup
- * leaves the persona's actual prose intact, which is the only thing the
- * user wanted to see. When Rust's cognition::tool_executor takes over
- * the tool agent loop, the model's `<tool_use>` will be consumed before
- * the response text reaches this shim and this function becomes a no-op
- * — at which point it can be deleted.
- *
- * Also strips `<tool_result>` blocks (model can echo a previous result
- * back into its turn) and `<thinking>...</thinking>` blocks (some models
- * leak their chain-of-thought when prompted with one-shot examples that
- * contain a thinking block — same shape of leak, same fix).
- *
- * 2026-05-03 follow-up (codex-b741, observed on canary E2E test post-#1024):
- * with `<tool_use>` blocks now stripped, models still emit the inner
- * `<tool_name>` + `<parameters>` shape WITHOUT the outer `<tool_use>`
- * wrapper. Example: `'code/shell/execute'<parameters>{cmd: cargo test ...}
- * </parameters>`. The original strip regex anchored on `<tool_use>` so
- * these escaped. Strip them too — same justification (no Rust executor
- * yet, so the markup is dead noise that pollutes prose + history).
- */
-function stripLeakedToolMarkup(text: string): string {
-  return text
-    .replace(/<tool_use\b[^>]*>[\s\S]*?<\/tool_use>/gi, '')
-    .replace(/<tool_result\b[^>]*>[\s\S]*?<\/tool_result>/gi, '')
-    .replace(/<thinking\b[^>]*>[\s\S]*?<\/thinking>/gi, '')
-    // Inner shapes that escape when the outer <tool_use> wrapper is missing.
-    .replace(/<tool_name\b[^>]*>[\s\S]*?<\/tool_name>/gi, '')
-    .replace(/<parameters\b[^>]*>[\s\S]*?<\/parameters>/gi, '')
-    .replace(/<arguments\b[^>]*>[\s\S]*?<\/arguments>/gi, '')
-    // Quoted bare tool refs left over after stripping (e.g. `'code/shell/execute'`).
-    // Conservative: only strip when followed by trailing whitespace + EOL or
-    // another stripped marker — avoids false-positives on prose mentioning a
-    // command name in quotes.
-    .replace(/['"`][a-z][a-z0-9_-]*\/[a-z0-9_/-]+['"`](?=\s*$)/gim, '')
-    .replace(/\n{3,}/g, '\n\n')
-    .trim();
-}
-
 export interface ResponseGenerationResult {
   success: boolean;
   messageId?: UUID;
@@ -566,21 +510,11 @@ export class PersonaResponseGenerator {
       // FALLBACKS". Tool calling will be re-added inside Rust as part
       // of the cognition migration; until then a persona's spoken text
       // is exactly what Rust returned.
-      const rawText = response.text.trim();
-      const finalText = stripLeakedToolMarkup(rawText);
+      const finalText = response.text.trim();
       if (!finalText) {
-        // Either Rust returned empty, OR everything was leaked tool markup
-        // that we just stripped. Either way, nothing post-worthy.
-        if (rawText && !finalText) {
-          this.log(`⚠️ ${this.personaName}: Response was 100% leaked tool markup (${rawText.length} chars stripped) — skipping post to avoid echo loop`);
-        } else {
-          this.log(`⚠️ ${this.personaName}: Rust returned empty text — skipping post`);
-        }
+        this.log(`⚠️ ${this.personaName}: Rust returned empty text — skipping post`);
         return { success: false, error: 'Empty response from Rust', storedToolResultIds: allStoredResultIds };
       }
-      if (rawText.length !== finalText.length) {
-        this.log(`🧹 ${this.personaName}: Stripped ${rawText.length - finalText.length} chars of leaked tool markup`);
-      }
 
       const phase35Start = Date.now();
       const postedMessageId = await this.postResponse(

diff --git a/src/workers/continuum-core/src/persona/response.rs b/src/workers/continuum-core/src/persona/response.rs
@@ -33,6 +33,7 @@
 use crate::cognition::tool_executor::types::MediaItemLite;
 use crate::cognition::{AnalysisInput, PersonaSlot, RecentMessage, SharedAnalysis, analyze};
 use serde::{Deserialize, Serialize};
+use std::sync::LazyLock;
 use std::time::SystemTime;
 use ts_rs::TS;
 use uuid::Uuid;
@@ -238,17 +239,19 @@ pub async fn respond(input: RespondInput) -> Result<PersonaResponse, String> {
     );
 
     let post_start = now_ms();
-    let (visible_text, think_count) = strip_thinks_emit_events(
+    let (think_stripped_text, think_count) = strip_thinks_emit_events(
         &raw_response.text,
         input.persona.persona_id,
         input.message_id,
     );
+    let visible_text = strip_leaked_tool_markup(&think_stripped_text);
     trace.record(
         SEAM_POST_PROCESS,
         post_start,
         now_ms().saturating_sub(post_start),
         serde_json::json!({
             "think_blocks": think_count,
+            "leaked_markup_chars_stripped": think_stripped_text.len().saturating_sub(visible_text.len()),
             "visible_chars": visible_text.len(),
         }),
     );
@@ -636,6 +639,62 @@ fn strip_thinks_emit_events(raw: &str, persona_id: Uuid, message_id: Uuid) -> (S
     (visible.trim().to_string(), count)
 }
 
+static TOOL_USE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"(?is)<tool_use\b[^>]*>.*?</tool_use>").expect("tool_use regex")
+});
+static TOOL_RESULT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"(?is)<tool_result\b[^>]*>.*?</tool_result>").expect("tool_result regex")
+});
+static THINKING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"(?is)<thinking\b[^>]*>.*?</thinking>").expect("thinking regex")
+});
+static TOOL_NAME_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"(?is)<tool_name\b[^>]*>.*?</tool_name>").expect("tool_name regex")
+});
+static PARAMETERS_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"(?is)<parameters\b[^>]*>.*?</parameters>").expect("parameters regex")
+});
+static ARGUMENTS_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r"(?is)<arguments\b[^>]*>.*?</arguments>").expect("arguments regex")
+});
+static BARE_TOOL_REF_LINE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+    regex::Regex::new(r#"^\s*['"`][a-z][a-z0-9_-]*/[a-z0-9_/-]+['"`]\s*$"#)
+        .expect("bare tool ref line regex")
+});
+static EXCESS_BLANK_LINES_RE: LazyLock<regex::Regex> =
+    LazyLock::new(|| regex::Regex::new(r"\n{3,}").expect("blank lines regex"));
+
+/// Strip dead tool-invocation markup from text before the host posts it.
+///
+/// Tool execution belongs in Rust cognition, not in the TS chat shim.
+/// Until every generated tool call is consumed by the Rust executor,
+/// local models can leak `<tool_use>` / `<parameters>` fragments as
+/// visible prose. Posting those fragments poisons room history and
+/// drives echo loops. Keep the cleanup Rust-side so every host surface
+/// (TS, CLI, future native apps) receives the same post-processed text.
+fn strip_leaked_tool_markup(text: &str) -> String {
+    let mut cleaned = text.to_string();
+    for re in [
+        &*TOOL_USE_RE,
+        &*TOOL_RESULT_RE,
+        &*THINKING_RE,
+        &*TOOL_NAME_RE,
+        &*PARAMETERS_RE,
+        &*ARGUMENTS_RE,
+    ] {
+        cleaned = re.replace_all(&cleaned, "").into_owned();
+    }
+    cleaned = cleaned
+        .lines()
+        .filter(|line| !BARE_TOOL_REF_LINE_RE.is_match(line))
+        .collect::<Vec<_>>()
+        .join("\n");
+    EXCESS_BLANK_LINES_RE
+        .replace_all(&cleaned, "\n\n")
+        .trim()
+        .to_string()
+}
+
 fn find_at(haystack: &[u8], from: usize, needle: &[u8]) -> Option<usize> {
     if from >= haystack.len() {
         return None;
@@ -722,6 +781,55 @@ mod tests {
         assert_eq!(count, 0);
     }
 
+    /// What this catches: the exact runaway shape observed in chat
+    /// where local models emitted XML tool calls as visible prose.
+    /// Rust must remove the dead invocation before TS posts the
+    /// message, or the room history becomes tool-markup training data.
+    #[test]
+    fn strip_leaked_tool_markup_removes_full_tool_blocks() {
+        let raw = "Before <tool_use><tool_name>code/shell/execute</tool_name><parameters>{\"cmd\":\"cargo test\"}</parameters></tool_use> after";
+        let visible = strip_leaked_tool_markup(raw);
+        assert_eq!(visible, "Before  after");
+        assert!(!visible.contains("tool_use"));
+        assert!(!visible.contains("cargo test"));
+    }
+
+    /// What this catches: models sometimes drop the outer
+    /// `<tool_use>` wrapper but still leak the inner tag pair. The
+    /// scrubber must handle that partial shape too.
+    #[test]
+    fn strip_leaked_tool_markup_removes_wrapperless_inner_shapes() {
+        let raw = "Answer.\n<tool_name>code/shell/execute</tool_name>\n<arguments>{\"cmd\":\"npm test\"}</arguments>\nDone.";
+        let visible = strip_leaked_tool_markup(raw);
+        assert_eq!(visible, "Answer.\n\nDone.");
+        assert!(!visible.contains("code/shell/execute"));
+        assert!(!visible.contains("npm test"));
+    }
+
+    /// What this catches: `<thinking>` is a separate leak shape from
+    /// the normal `<think>` blocks handled by `strip_thinks_emit_events`.
+    /// It should not reach chat output.
+    #[test]
+    fn strip_leaked_tool_markup_removes_thinking_blocks() {
+        let raw = "<thinking>private chain</thinking>\nVisible.";
+        let visible = strip_leaked_tool_markup(raw);
+        assert_eq!(visible, "Visible.");
+    }
+
+    /// What this catches: the bare tool-ref cleanup is intentionally
+    /// conservative. Inline prose that mentions a command in quotes
+    /// should remain; only dangling quoted tool refs at line end are
+    /// stripped.
+    #[test]
+    fn strip_leaked_tool_markup_keeps_inline_tool_reference_prose() {
+        let raw = "The command 'code/shell/execute' is not available here.\n'code/shell/execute'";
+        let visible = strip_leaked_tool_markup(raw);
+        assert_eq!(
+            visible,
+            "The command 'code/shell/execute' is not available here."
+        );
+    }
+
     // ─── Native multimodal helper tests ─────────────────────────────
     //
     // build_messages_with_media is the convergence point for sensory