From 7d8261a67a1e1bf99b848b4dab3686ec526c7d4e Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 14 May 2026 16:46:21 +0000 Subject: [PATCH] fix: address review findings from 3-block system prompt PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical: - OpenAI/Responses API upstreams now receive LTM in system prompt (previously silently dropped — only Anthropic path got LTM) - vectorSearch() now accepts excludeCategories filter, preventing preference entries from consuming vector search slots when context- bound entries are requested separately Medium: - textDiffRatio rewritten to sample across full string length instead of only prefix+suffix — catches interior content changes that the old heuristic missed - Context-bound LTM budget floor: guaranteed 50% of total LTM budget even when preferences are large, preventing preference starvation Low: - KnowledgeCategory type alias for well-known categories with autocomplete - ForSessionOptions doc clarifies categories/excludeCategories mutual exclusion - Tests for excludeCategories on cross-project entries and empty array edge case --- packages/core/src/embedding.ts | 15 +++++++- packages/core/src/ltm.ts | 16 ++++++-- packages/core/test/ltm.test.ts | 61 +++++++++++++++++++++++++++++++ packages/gateway/src/pipeline.ts | 63 +++++++++++++++++++++----------- 4 files changed, 128 insertions(+), 27 deletions(-) diff --git a/packages/core/src/embedding.ts b/packages/core/src/embedding.ts index d2bdd33..ab4057a 100644 --- a/packages/core/src/embedding.ts +++ b/packages/core/src/embedding.ts @@ -706,14 +706,25 @@ type VectorHit = { id: string; similarity: number }; * Search all knowledge entries with embeddings by cosine similarity. * Returns top-k entries sorted by similarity descending. * Pure brute-force — fine for <100 entries (microseconds). + * + * @param excludeCategories Optional category names to exclude from results. + * Useful when preferences are injected in a separate system block and + * shouldn't compete for vector search slots with context-bound entries. */ export function vectorSearch( queryEmbedding: Float32Array, limit = 10, + excludeCategories?: string[], ): VectorHit[] { + let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2"; + const params: string[] = []; + if (excludeCategories?.length) { + sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`; + params.push(...excludeCategories); + } const rows = db() - .query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2") - .all() as Array<{ id: string; embedding: Buffer }>; + .query(sql) + .all(...params) as Array<{ id: string; embedding: Buffer }>; const scored: VectorHit[] = []; for (const row of rows) { diff --git a/packages/core/src/ltm.ts b/packages/core/src/ltm.ts index 76f48b2..2089eed 100644 --- a/packages/core/src/ltm.ts +++ b/packages/core/src/ltm.ts @@ -355,16 +355,24 @@ function scoreEntriesFTS(sessionContext: string): Map { } } +/** + * Well-known knowledge entry categories managed by the curator. + * The DB column is a free-form string, but these are the standard values. + */ +export type KnowledgeCategory = "decision" | "pattern" | "preference" | "architecture" | "gotcha"; + /** Options for `forSession()` to control entry selection. */ export type ForSessionOptions = { /** Caller-provided context (e.g., user's current message) for relevance * scoring when no session context exists in the DB yet. */ contextHint?: string; /** Restrict to these categories (e.g., `['preference']` for turn 1). */ - categories?: string[]; + categories?: (KnowledgeCategory | (string & {}))[]; /** Exclude these categories (e.g., `['preference']` for context-bound - * entries when preferences are already injected in a separate block). */ - excludeCategories?: string[]; + * entries when preferences are already injected in a separate block). + * Mutually exclusive with `categories` — if both are provided, + * `categories` (include) wins. */ + excludeCategories?: (KnowledgeCategory | (string & {}))[]; }; /** @@ -473,7 +481,7 @@ export async function forSession( let vectorScores: Map; try { const [contextVec] = await embedding.embed([sessionContext], "query"); - const hits = embedding.vectorSearch(contextVec, 50); + const hits = embedding.vectorSearch(contextVec, 50, excludeFilter); vectorScores = new Map(hits.map((h) => [h.id, h.similarity])); } catch (err) { log.warn("Vector scoring failed, falling back to FTS5:", err); diff --git a/packages/core/test/ltm.test.ts b/packages/core/test/ltm.test.ts index 352e99a..68fd1ab 100644 --- a/packages/core/test/ltm.test.ts +++ b/packages/core/test/ltm.test.ts @@ -747,6 +747,67 @@ describe("ltm.forSession", () => { } }); + test("excludeCategories filters cross-project entries too", async () => { + ltm.create({ + projectPath: PROJ, + category: "preference", + title: "Cross-project pref to exclude", + content: "This cross-project preference should not appear", + scope: "project", + crossProject: true, + }); + ltm.create({ + projectPath: PROJ, + category: "gotcha", + title: "Cross-project gotcha to include", + content: "This cross-project gotcha should appear", + scope: "project", + crossProject: true, + }); + ltm.create({ + projectPath: PROJ, + category: "architecture", + title: "Local arch entry to include", + content: "This local architecture entry should appear", + scope: "project", + crossProject: false, + }); + + const result = await ltm.forSession(PROJ, SESSION, 10_000, { + excludeCategories: ["preference"], + }); + expect(result.length).toBeGreaterThan(0); + for (const entry of result) { + expect(entry.category).not.toBe("preference"); + } + }); + + test("empty excludeCategories array has no effect", async () => { + ltm.create({ + projectPath: PROJ, + category: "preference", + title: "Pref for empty-exclude test", + content: "Should appear when excludeCategories is empty array", + scope: "project", + crossProject: false, + }); + ltm.create({ + projectPath: PROJ, + category: "gotcha", + title: "Gotcha for empty-exclude test", + content: "Should also appear when excludeCategories is empty", + scope: "project", + crossProject: false, + }); + + const result = await ltm.forSession(PROJ, SESSION, 10_000, { + excludeCategories: [], + }); + const categories = new Set(result.map((e) => e.category)); + // Both categories should be present — empty exclude means no filtering + expect(categories.size).toBeGreaterThanOrEqual(2); + }); + test("contextHint provides relevance signal when no session context exists", async () => { // Create entries about different topics ltm.create({ diff --git a/packages/gateway/src/pipeline.ts b/packages/gateway/src/pipeline.ts index 6d29639..bfb93f6 100644 --- a/packages/gateway/src/pipeline.ts +++ b/packages/gateway/src/pipeline.ts @@ -279,31 +279,35 @@ const stableLtmCache = new Map< /** * Measure character-level difference between two strings as a ratio (0..1). - * Uses a simple length + common-prefix heuristic — not a full diff, but - * sufficient to detect "substantially the same" vs "meaningfully different". + * Samples characters at regular intervals across the full string length to + * detect interior changes, not just prefix/suffix differences. + * + * For short strings (≤1000 chars) compares every character. For longer + * strings samples up to 1000 evenly-spaced positions for O(1) cost. */ function textDiffRatio(a: string, b: string): number { if (a === b) return 0; if (!a || !b) return 1; - // Common prefix length - const minLen = Math.min(a.length, b.length); const maxLen = Math.max(a.length, b.length); - let common = 0; - for (let i = 0; i < minLen; i++) { - if (a[i] === b[i]) common++; - else break; - } + const minLen = Math.min(a.length, b.length); + + // Length difference accounts for part of the diff + const lengthDiff = maxLen - minLen; - // Common suffix length (non-overlapping with prefix) - let suffix = 0; - for (let i = 0; i < minLen - common; i++) { - if (a[a.length - 1 - i] === b[b.length - 1 - i]) suffix++; - else break; + // Sample up to 1000 positions across the overlapping region + const sampleCount = Math.min(minLen, 1000); + const step = sampleCount < minLen ? minLen / sampleCount : 1; + let mismatches = 0; + for (let i = 0; i < sampleCount; i++) { + const idx = Math.floor(i * step); + if (a[idx] !== b[idx]) mismatches++; } - const matched = common + suffix; - return 1 - matched / maxLen; + // Extrapolate mismatch rate to the full overlapping region + length diff + const mismatchRate = sampleCount > 0 ? mismatches / sampleCount : 0; + const estimatedMismatches = mismatchRate * minLen + lengthDiff; + return Math.min(1, estimatedMismatches / maxLen); } /** Cached LLM client for background workers. */ @@ -930,12 +934,24 @@ async function forwardToUpstream( : config.upstreamOpenAI); if (effectiveProtocol === "openai-responses") { - const result = buildOpenAIResponsesUpstreamRequest(req, effectiveUpstreamBase); + // Inject LTM into system prompt for non-Anthropic paths. + // Anthropic handles LTM via separate system blocks in buildAnthropicRequest; + // OpenAI paths receive a single system string, so we concatenate here. + const ltmParts = [cache?.stableLtmSystem, cache?.ltmSystem].filter(Boolean); + const reqWithLtm = ltmParts.length + ? { ...req, system: [req.system, ...ltmParts].filter(Boolean).join("\n\n") } + : req; + const result = buildOpenAIResponsesUpstreamRequest(reqWithLtm, effectiveUpstreamBase); url = result.url; headers = result.headers; body = result.body; } else if (effectiveProtocol === "openai") { - const result = buildOpenAIUpstreamRequest(req, effectiveUpstreamBase); + // Inject LTM into system prompt (see comment above for openai-responses). + const ltmParts = [cache?.stableLtmSystem, cache?.ltmSystem].filter(Boolean); + const reqWithLtm = ltmParts.length + ? { ...req, system: [req.system, ...ltmParts].filter(Boolean).join("\n\n") } + : req; + const result = buildOpenAIUpstreamRequest(reqWithLtm, effectiveUpstreamBase); url = result.url; headers = result.headers; body = result.body; @@ -2587,9 +2603,13 @@ async function handleConversationTurn( let cached = ltmSessionCache.get(sessionID); if (!cached) { - // Reserve budget for stable LTM already injected in system[1] + // Reserve budget for stable LTM already injected in system[1]. + // Guarantee at least 50% of the total budget for context-bound + // entries — preferences are useful but gotchas/patterns are more + // critical for correctness during active work. const stableTokens = stable?.tokenCount ?? 0; - const contextBudget = Math.max(0, ltmBudget - stableTokens); + const minContextBudget = Math.floor(ltmBudget * 0.5); + const contextBudget = Math.max(minContextBudget, ltmBudget - stableTokens); // Exclude preferences — they're already in system[1] const contextEntries = await ltm.forSession(projectPath, sessionID, contextBudget, { excludeCategories: ["preference"], @@ -2704,7 +2724,8 @@ async function handleConversationTurn( const ltmFraction = cfg.budget.ltm; const ltmBudget = getLtmBudget(ltmFraction); const stableTokens = stableLtmCache.get(sessionID)?.tokenCount ?? 0; - const contextBudget = Math.max(0, ltmBudget - stableTokens); + const minContextBudget = Math.floor(ltmBudget * 0.5); + const contextBudget = Math.max(minContextBudget, ltmBudget - stableTokens); const contextHint = lastUserTextTrimmed(req); const contextEntries = await ltm.forSession(projectPath, sessionID, contextBudget, { excludeCategories: ["preference"],