From 7d8261a67a1e1bf99b848b4dab3686ec526c7d4e Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <ben@byk.im>
Date: Thu, 14 May 2026 16:46:21 +0000
Subject: [PATCH] fix: address review findings from 3-block system prompt PR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Critical:
- OpenAI/Responses API upstreams now receive LTM in system prompt
  (previously silently dropped — only Anthropic path got LTM)
- vectorSearch() now accepts excludeCategories filter, preventing
  preference entries from consuming vector search slots when context-
  bound entries are requested separately

Medium:
- textDiffRatio rewritten to sample across full string length instead
  of only prefix+suffix — catches interior content changes that the
  old heuristic missed
- Context-bound LTM budget floor: guaranteed 50% of total LTM budget
  even when preferences are large, preventing preference starvation

Low:
- KnowledgeCategory type alias for well-known categories with autocomplete
- ForSessionOptions doc clarifies categories/excludeCategories mutual exclusion
- Tests for excludeCategories on cross-project entries and empty array edge case
---
 packages/core/src/embedding.ts   | 15 +++++++-
 packages/core/src/ltm.ts         | 16 ++++++--
 packages/core/test/ltm.test.ts   | 61 +++++++++++++++++++++++++++++++
 packages/gateway/src/pipeline.ts | 63 +++++++++++++++++++++-----------
 4 files changed, 128 insertions(+), 27 deletions(-)

diff --git a/packages/core/src/embedding.ts b/packages/core/src/embedding.ts
index d2bdd33..ab4057a 100644
--- a/packages/core/src/embedding.ts
+++ b/packages/core/src/embedding.ts
@@ -706,14 +706,25 @@ type VectorHit = { id: string; similarity: number };
  * Search all knowledge entries with embeddings by cosine similarity.
  * Returns top-k entries sorted by similarity descending.
  * Pure brute-force — fine for <100 entries (microseconds).
+ *
+ * @param excludeCategories  Optional category names to exclude from results.
+ *   Useful when preferences are injected in a separate system block and
+ *   shouldn't compete for vector search slots with context-bound entries.
  */
 export function vectorSearch(
   queryEmbedding: Float32Array,
   limit = 10,
+  excludeCategories?: string[],
 ): VectorHit[] {
+  let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
+  const params: string[] = [];
+  if (excludeCategories?.length) {
+    sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
+    params.push(...excludeCategories);
+  }
   const rows = db()
-    .query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2")
-    .all() as Array<{ id: string; embedding: Buffer }>;
+    .query(sql)
+    .all(...params) as Array<{ id: string; embedding: Buffer }>;
 
   const scored: VectorHit[] = [];
   for (const row of rows) {
diff --git a/packages/core/src/ltm.ts b/packages/core/src/ltm.ts
index 76f48b2..2089eed 100644
--- a/packages/core/src/ltm.ts
+++ b/packages/core/src/ltm.ts
@@ -355,16 +355,24 @@ function scoreEntriesFTS(sessionContext: string): Map<string, number> {
   }
 }
 
+/**
+ * Well-known knowledge entry categories managed by the curator.
+ * The DB column is a free-form string, but these are the standard values.
+ */
+export type KnowledgeCategory = "decision" | "pattern" | "preference" | "architecture" | "gotcha";
+
 /** Options for `forSession()` to control entry selection. */
 export type ForSessionOptions = {
   /** Caller-provided context (e.g., user's current message) for relevance
    *  scoring when no session context exists in the DB yet. */
   contextHint?: string;
   /** Restrict to these categories (e.g., `['preference']` for turn 1). */
-  categories?: string[];
+  categories?: (KnowledgeCategory | (string & {}))[];
   /** Exclude these categories (e.g., `['preference']` for context-bound
-   *  entries when preferences are already injected in a separate block). */
-  excludeCategories?: string[];
+   *  entries when preferences are already injected in a separate block).
+   *  Mutually exclusive with `categories` — if both are provided,
+   *  `categories` (include) wins. */
+  excludeCategories?: (KnowledgeCategory | (string & {}))[];
 };
 
 /**
@@ -473,7 +481,7 @@ export async function forSession(
     let vectorScores: Map<string, number>;
     try {
       const [contextVec] = await embedding.embed([sessionContext], "query");
-      const hits = embedding.vectorSearch(contextVec, 50);
+      const hits = embedding.vectorSearch(contextVec, 50, excludeFilter);
       vectorScores = new Map(hits.map((h) => [h.id, h.similarity]));
     } catch (err) {
       log.warn("Vector scoring failed, falling back to FTS5:", err);
diff --git a/packages/core/test/ltm.test.ts b/packages/core/test/ltm.test.ts
index 352e99a..68fd1ab 100644
--- a/packages/core/test/ltm.test.ts
+++ b/packages/core/test/ltm.test.ts
@@ -747,6 +747,67 @@ describe("ltm.forSession", () => {
     }
   });
 
+  test("excludeCategories filters cross-project entries too", async () => {
+    ltm.create({
+      projectPath: PROJ,
+      category: "preference",
+      title: "Cross-project pref to exclude",
+      content: "This cross-project preference should not appear",
+      scope: "project",
+      crossProject: true,
+    });
+    ltm.create({
+      projectPath: PROJ,
+      category: "gotcha",
+      title: "Cross-project gotcha to include",
+      content: "This cross-project gotcha should appear",
+      scope: "project",
+      crossProject: true,
+    });
+    ltm.create({
+      projectPath: PROJ,
+      category: "architecture",
+      title: "Local arch entry to include",
+      content: "This local architecture entry should appear",
+      scope: "project",
+      crossProject: false,
+    });
+
+    const result = await ltm.forSession(PROJ, SESSION, 10_000, {
+      excludeCategories: ["preference"],
+    });
+    expect(result.length).toBeGreaterThan(0);
+    for (const entry of result) {
+      expect(entry.category).not.toBe("preference");
+    }
+  });
+
+  test("empty excludeCategories array has no effect", async () => {
+    ltm.create({
+      projectPath: PROJ,
+      category: "preference",
+      title: "Pref for empty-exclude test",
+      content: "Should appear when excludeCategories is empty array",
+      scope: "project",
+      crossProject: false,
+    });
+    ltm.create({
+      projectPath: PROJ,
+      category: "gotcha",
+      title: "Gotcha for empty-exclude test",
+      content: "Should also appear when excludeCategories is empty",
+      scope: "project",
+      crossProject: false,
+    });
+
+    const result = await ltm.forSession(PROJ, SESSION, 10_000, {
+      excludeCategories: [],
+    });
+    const categories = new Set(result.map((e) => e.category));
+    // Both categories should be present — empty exclude means no filtering
+    expect(categories.size).toBeGreaterThanOrEqual(2);
+  });
+
   test("contextHint provides relevance signal when no session context exists", async () => {
     // Create entries about different topics
     ltm.create({
diff --git a/packages/gateway/src/pipeline.ts b/packages/gateway/src/pipeline.ts
index 6d29639..bfb93f6 100644
--- a/packages/gateway/src/pipeline.ts
+++ b/packages/gateway/src/pipeline.ts
@@ -279,31 +279,35 @@ const stableLtmCache = new Map<
 
 /**
  * Measure character-level difference between two strings as a ratio (0..1).
- * Uses a simple length + common-prefix heuristic — not a full diff, but
- * sufficient to detect "substantially the same" vs "meaningfully different".
+ * Samples characters at regular intervals across the full string length to
+ * detect interior changes, not just prefix/suffix differences.
+ *
+ * For short strings (≤1000 chars) compares every character. For longer
+ * strings samples up to 1000 evenly-spaced positions for O(1) cost.
  */
 function textDiffRatio(a: string, b: string): number {
   if (a === b) return 0;
   if (!a || !b) return 1;
 
-  // Common prefix length
-  const minLen = Math.min(a.length, b.length);
   const maxLen = Math.max(a.length, b.length);
-  let common = 0;
-  for (let i = 0; i < minLen; i++) {
-    if (a[i] === b[i]) common++;
-    else break;
-  }
+  const minLen = Math.min(a.length, b.length);
+
+  // Length difference accounts for part of the diff
+  const lengthDiff = maxLen - minLen;
 
-  // Common suffix length (non-overlapping with prefix)
-  let suffix = 0;
-  for (let i = 0; i < minLen - common; i++) {
-    if (a[a.length - 1 - i] === b[b.length - 1 - i]) suffix++;
-    else break;
+  // Sample up to 1000 positions across the overlapping region
+  const sampleCount = Math.min(minLen, 1000);
+  const step = sampleCount < minLen ? minLen / sampleCount : 1;
+  let mismatches = 0;
+  for (let i = 0; i < sampleCount; i++) {
+    const idx = Math.floor(i * step);
+    if (a[idx] !== b[idx]) mismatches++;
   }
 
-  const matched = common + suffix;
-  return 1 - matched / maxLen;
+  // Extrapolate mismatch rate to the full overlapping region + length diff
+  const mismatchRate = sampleCount > 0 ? mismatches / sampleCount : 0;
+  const estimatedMismatches = mismatchRate * minLen + lengthDiff;
+  return Math.min(1, estimatedMismatches / maxLen);
 }
 
 /** Cached LLM client for background workers. */
@@ -930,12 +934,24 @@ async function forwardToUpstream(
       : config.upstreamOpenAI);
 
   if (effectiveProtocol === "openai-responses") {
-    const result = buildOpenAIResponsesUpstreamRequest(req, effectiveUpstreamBase);
+    // Inject LTM into system prompt for non-Anthropic paths.
+    // Anthropic handles LTM via separate system blocks in buildAnthropicRequest;
+    // OpenAI paths receive a single system string, so we concatenate here.
+    const ltmParts = [cache?.stableLtmSystem, cache?.ltmSystem].filter(Boolean);
+    const reqWithLtm = ltmParts.length
+      ? { ...req, system: [req.system, ...ltmParts].filter(Boolean).join("\n\n") }
+      : req;
+    const result = buildOpenAIResponsesUpstreamRequest(reqWithLtm, effectiveUpstreamBase);
     url = result.url;
     headers = result.headers;
     body = result.body;
   } else if (effectiveProtocol === "openai") {
-    const result = buildOpenAIUpstreamRequest(req, effectiveUpstreamBase);
+    // Inject LTM into system prompt (see comment above for openai-responses).
+    const ltmParts = [cache?.stableLtmSystem, cache?.ltmSystem].filter(Boolean);
+    const reqWithLtm = ltmParts.length
+      ? { ...req, system: [req.system, ...ltmParts].filter(Boolean).join("\n\n") }
+      : req;
+    const result = buildOpenAIUpstreamRequest(reqWithLtm, effectiveUpstreamBase);
     url = result.url;
     headers = result.headers;
     body = result.body;
@@ -2587,9 +2603,13 @@ async function handleConversationTurn(
         let cached = ltmSessionCache.get(sessionID);
 
         if (!cached) {
-          // Reserve budget for stable LTM already injected in system[1]
+          // Reserve budget for stable LTM already injected in system[1].
+          // Guarantee at least 50% of the total budget for context-bound
+          // entries — preferences are useful but gotchas/patterns are more
+          // critical for correctness during active work.
           const stableTokens = stable?.tokenCount ?? 0;
-          const contextBudget = Math.max(0, ltmBudget - stableTokens);
+          const minContextBudget = Math.floor(ltmBudget * 0.5);
+          const contextBudget = Math.max(minContextBudget, ltmBudget - stableTokens);
           // Exclude preferences — they're already in system[1]
           const contextEntries = await ltm.forSession(projectPath, sessionID, contextBudget, {
             excludeCategories: ["preference"],
@@ -2704,7 +2724,8 @@ async function handleConversationTurn(
       const ltmFraction = cfg.budget.ltm;
       const ltmBudget = getLtmBudget(ltmFraction);
       const stableTokens = stableLtmCache.get(sessionID)?.tokenCount ?? 0;
-      const contextBudget = Math.max(0, ltmBudget - stableTokens);
+      const minContextBudget = Math.floor(ltmBudget * 0.5);
+      const contextBudget = Math.max(minContextBudget, ltmBudget - stableTokens);
       const contextHint = lastUserTextTrimmed(req);
       const contextEntries = await ltm.forSession(projectPath, sessionID, contextBudget, {
         excludeCategories: ["preference"],