Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 2 additions & 68 deletions src/system/user/server/modules/PersonaResponseGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,62 +91,6 @@ function synthesizeDeterministicUuid(msg: LLMMessage): string {
return `${h.slice(0, 8)}-${h.slice(8, 12)}-${h.slice(12, 16)}-${h.slice(16, 20)}-${h.slice(20, 32)}`;
}

/**
* Strip leaked tool-invocation markup from a persona's response text before
* it lands in the chat log.
*
* Why this exists (Joel 2026-05-03, chat-probe runaway): until cognition's
* tool agent loop fully migrates to Rust (see header comment about Joel's
* 2026-04-20 "REMOVE THESE FUCKING FALLBACKS" instruction), Rust returns
* the model's raw text — INCLUDING any `<tool_use>...</tool_use>` XML the
* model emitted as part of its response. The TS shim does no parsing and
* posts that text verbatim, so users see a wall of `<tool_use><tool_name>
* collaboration/decision/vote</tool_name>...` markup interleaved with the
* persona's actual prose. With multiple personas in a room replying to
* each other, the leaked block becomes the dominant pattern in history,
* personas treat it as a continuation example, and the room collapses
* into an echo loop of identical templated tool-use ghosts (200+ msgs
* observed inside 10 minutes on a fresh Mac install).
*
* Interim fix: silently drop the leaked blocks here. The tool itself is
* a no-op anyway (Rust isn't executing it yet); stripping the markup
* leaves the persona's actual prose intact, which is the only thing the
* user wanted to see. When Rust's cognition::tool_executor takes over
* the tool agent loop, the model's `<tool_use>` will be consumed before
* the response text reaches this shim and this function becomes a no-op
* — at which point it can be deleted.
*
* Also strips `<tool_result>` blocks (model can echo a previous result
* back into its turn) and `<thinking>...</thinking>` blocks (some models
* leak their chain-of-thought when prompted with one-shot examples that
* contain a thinking block — same shape of leak, same fix).
*
* 2026-05-03 follow-up (codex-b741, observed on canary E2E test post-#1024):
* with `<tool_use>` blocks now stripped, models still emit the inner
* `<tool_name>` + `<parameters>` shape WITHOUT the outer `<tool_use>`
* wrapper. Example: `'code/shell/execute'<parameters>{cmd: cargo test ...}
* </parameters>`. The original strip regex anchored on `<tool_use>` so
* these escaped. Strip them too — same justification (no Rust executor
* yet, so the markup is dead noise that pollutes prose + history).
*/
function stripLeakedToolMarkup(text: string): string {
return text
.replace(/<tool_use\b[^>]*>[\s\S]*?<\/tool_use>/gi, '')
.replace(/<tool_result\b[^>]*>[\s\S]*?<\/tool_result>/gi, '')
.replace(/<thinking\b[^>]*>[\s\S]*?<\/thinking>/gi, '')
// Inner shapes that escape when the outer <tool_use> wrapper is missing.
.replace(/<tool_name\b[^>]*>[\s\S]*?<\/tool_name>/gi, '')
.replace(/<parameters\b[^>]*>[\s\S]*?<\/parameters>/gi, '')
.replace(/<arguments\b[^>]*>[\s\S]*?<\/arguments>/gi, '')
// Quoted bare tool refs left over after stripping (e.g. `'code/shell/execute'`).
// Conservative: only strip when followed by trailing whitespace + EOL or
// another stripped marker — avoids false-positives on prose mentioning a
// command name in quotes.
.replace(/['"`][a-z][a-z0-9_-]*\/[a-z0-9_/-]+['"`](?=\s*$)/gim, '')
.replace(/\n{3,}/g, '\n\n')
.trim();
}

export interface ResponseGenerationResult {
success: boolean;
messageId?: UUID;
Expand Down Expand Up @@ -566,21 +510,11 @@ export class PersonaResponseGenerator {
// FALLBACKS". Tool calling will be re-added inside Rust as part
// of the cognition migration; until then a persona's spoken text
// is exactly what Rust returned.
const rawText = response.text.trim();
const finalText = stripLeakedToolMarkup(rawText);
const finalText = response.text.trim();
if (!finalText) {
// Either Rust returned empty, OR everything was leaked tool markup
// that we just stripped. Either way, nothing post-worthy.
if (rawText && !finalText) {
this.log(`⚠️ ${this.personaName}: Response was 100% leaked tool markup (${rawText.length} chars stripped) — skipping post to avoid echo loop`);
} else {
this.log(`⚠️ ${this.personaName}: Rust returned empty text — skipping post`);
}
this.log(`⚠️ ${this.personaName}: Rust returned empty text — skipping post`);
return { success: false, error: 'Empty response from Rust', storedToolResultIds: allStoredResultIds };
}
if (rawText.length !== finalText.length) {
this.log(`🧹 ${this.personaName}: Stripped ${rawText.length - finalText.length} chars of leaked tool markup`);
}

const phase35Start = Date.now();
const postedMessageId = await this.postResponse(
Expand Down
110 changes: 109 additions & 1 deletion src/workers/continuum-core/src/persona/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
use crate::cognition::tool_executor::types::MediaItemLite;
use crate::cognition::{AnalysisInput, PersonaSlot, RecentMessage, SharedAnalysis, analyze};
use serde::{Deserialize, Serialize};
use std::sync::LazyLock;
use std::time::SystemTime;
use ts_rs::TS;
use uuid::Uuid;
Expand Down Expand Up @@ -238,17 +239,19 @@ pub async fn respond(input: RespondInput) -> Result<PersonaResponse, String> {
);

let post_start = now_ms();
let (visible_text, think_count) = strip_thinks_emit_events(
let (think_stripped_text, think_count) = strip_thinks_emit_events(
&raw_response.text,
input.persona.persona_id,
input.message_id,
);
let visible_text = strip_leaked_tool_markup(&think_stripped_text);
trace.record(
SEAM_POST_PROCESS,
post_start,
now_ms().saturating_sub(post_start),
serde_json::json!({
"think_blocks": think_count,
"leaked_markup_chars_stripped": think_stripped_text.len().saturating_sub(visible_text.len()),
"visible_chars": visible_text.len(),
}),
);
Expand Down Expand Up @@ -636,6 +639,62 @@ fn strip_thinks_emit_events(raw: &str, persona_id: Uuid, message_id: Uuid) -> (S
(visible.trim().to_string(), count)
}

static TOOL_USE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?is)<tool_use\b[^>]*>.*?</tool_use>").expect("tool_use regex")
});
static TOOL_RESULT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?is)<tool_result\b[^>]*>.*?</tool_result>").expect("tool_result regex")
});
static THINKING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?is)<thinking\b[^>]*>.*?</thinking>").expect("thinking regex")
});
static TOOL_NAME_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?is)<tool_name\b[^>]*>.*?</tool_name>").expect("tool_name regex")
});
static PARAMETERS_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?is)<parameters\b[^>]*>.*?</parameters>").expect("parameters regex")
});
static ARGUMENTS_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?is)<arguments\b[^>]*>.*?</arguments>").expect("arguments regex")
});
static BARE_TOOL_REF_LINE_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r#"^\s*['"`][a-z][a-z0-9_-]*/[a-z0-9_/-]+['"`]\s*$"#)
.expect("bare tool ref line regex")
});
static EXCESS_BLANK_LINES_RE: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"\n{3,}").expect("blank lines regex"));

/// Strip dead tool-invocation markup from text before the host posts it.
///
/// Tool execution belongs in Rust cognition, not in the TS chat shim.
/// Until every generated tool call is consumed by the Rust executor,
/// local models can leak `<tool_use>` / `<parameters>` fragments as
/// visible prose. Posting those fragments poisons room history and
/// drives echo loops. Keep the cleanup Rust-side so every host surface
/// (TS, CLI, future native apps) receives the same post-processed text.
fn strip_leaked_tool_markup(text: &str) -> String {
let mut cleaned = text.to_string();
for re in [
&*TOOL_USE_RE,
&*TOOL_RESULT_RE,
&*THINKING_RE,
&*TOOL_NAME_RE,
&*PARAMETERS_RE,
&*ARGUMENTS_RE,
] {
cleaned = re.replace_all(&cleaned, "").into_owned();
}
cleaned = cleaned
.lines()
.filter(|line| !BARE_TOOL_REF_LINE_RE.is_match(line))
.collect::<Vec<_>>()
.join("\n");
EXCESS_BLANK_LINES_RE
.replace_all(&cleaned, "\n\n")
.trim()
.to_string()
}

fn find_at(haystack: &[u8], from: usize, needle: &[u8]) -> Option<usize> {
if from >= haystack.len() {
return None;
Expand Down Expand Up @@ -722,6 +781,55 @@ mod tests {
assert_eq!(count, 0);
}

/// What this catches: the exact runaway shape observed in chat
/// where local models emitted XML tool calls as visible prose.
/// Rust must remove the dead invocation before TS posts the
/// message, or the room history becomes tool-markup training data.
#[test]
fn strip_leaked_tool_markup_removes_full_tool_blocks() {
let raw = "Before <tool_use><tool_name>code/shell/execute</tool_name><parameters>{\"cmd\":\"cargo test\"}</parameters></tool_use> after";
let visible = strip_leaked_tool_markup(raw);
assert_eq!(visible, "Before after");
assert!(!visible.contains("tool_use"));
assert!(!visible.contains("cargo test"));
}

/// What this catches: models sometimes drop the outer
/// `<tool_use>` wrapper but still leak the inner tag pair. The
/// scrubber must handle that partial shape too.
#[test]
fn strip_leaked_tool_markup_removes_wrapperless_inner_shapes() {
let raw = "Answer.\n<tool_name>code/shell/execute</tool_name>\n<arguments>{\"cmd\":\"npm test\"}</arguments>\nDone.";
let visible = strip_leaked_tool_markup(raw);
assert_eq!(visible, "Answer.\n\nDone.");
assert!(!visible.contains("code/shell/execute"));
assert!(!visible.contains("npm test"));
}

/// What this catches: `<thinking>` is a separate leak shape from
/// the normal `<think>` blocks handled by `strip_thinks_emit_events`.
/// It should not reach chat output.
#[test]
fn strip_leaked_tool_markup_removes_thinking_blocks() {
let raw = "<thinking>private chain</thinking>\nVisible.";
let visible = strip_leaked_tool_markup(raw);
assert_eq!(visible, "Visible.");
}

/// What this catches: the bare tool-ref cleanup is intentionally
/// conservative. Inline prose that mentions a command in quotes
/// should remain; only dangling quoted tool refs at line end are
/// stripped.
#[test]
fn strip_leaked_tool_markup_keeps_inline_tool_reference_prose() {
let raw = "The command 'code/shell/execute' is not available here.\n'code/shell/execute'";
let visible = strip_leaked_tool_markup(raw);
assert_eq!(
visible,
"The command 'code/shell/execute' is not available here."
);
}

// ─── Native multimodal helper tests ─────────────────────────────
//
// build_messages_with_media is the convergence point for sensory
Expand Down
Loading