Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions packages/core/src/embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -706,14 +706,25 @@ type VectorHit = { id: string; similarity: number };
* Search all knowledge entries with embeddings by cosine similarity.
* Returns top-k entries sorted by similarity descending.
* Pure brute-force — fine for <100 entries (microseconds).
*
* @param excludeCategories Optional category names to exclude from results.
* Useful when preferences are injected in a separate system block and
* shouldn't compete for vector search slots with context-bound entries.
*/
export function vectorSearch(
queryEmbedding: Float32Array,
limit = 10,
excludeCategories?: string[],
): VectorHit[] {
let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
const params: string[] = [];
if (excludeCategories?.length) {
sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
params.push(...excludeCategories);
}
const rows = db()
.query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2")
.all() as Array<{ id: string; embedding: Buffer }>;
.query(sql)
.all(...params) as Array<{ id: string; embedding: Buffer }>;

const scored: VectorHit[] = [];
for (const row of rows) {
Expand Down
16 changes: 12 additions & 4 deletions packages/core/src/ltm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,16 +355,24 @@ function scoreEntriesFTS(sessionContext: string): Map<string, number> {
}
}

/**
* Well-known knowledge entry categories managed by the curator.
* The DB column is a free-form string, but these are the standard values.
*/
export type KnowledgeCategory = "decision" | "pattern" | "preference" | "architecture" | "gotcha";

/** Options for `forSession()` to control entry selection. */
export type ForSessionOptions = {
/** Caller-provided context (e.g., user's current message) for relevance
* scoring when no session context exists in the DB yet. */
contextHint?: string;
/** Restrict to these categories (e.g., `['preference']` for turn 1). */
categories?: string[];
categories?: (KnowledgeCategory | (string & {}))[];
/** Exclude these categories (e.g., `['preference']` for context-bound
* entries when preferences are already injected in a separate block). */
excludeCategories?: string[];
* entries when preferences are already injected in a separate block).
* Mutually exclusive with `categories` — if both are provided,
* `categories` (include) wins. */
excludeCategories?: (KnowledgeCategory | (string & {}))[];
};

/**
Expand Down Expand Up @@ -473,7 +481,7 @@ export async function forSession(
let vectorScores: Map<string, number>;
try {
const [contextVec] = await embedding.embed([sessionContext], "query");
const hits = embedding.vectorSearch(contextVec, 50);
const hits = embedding.vectorSearch(contextVec, 50, excludeFilter);
vectorScores = new Map(hits.map((h) => [h.id, h.similarity]));
} catch (err) {
log.warn("Vector scoring failed, falling back to FTS5:", err);
Expand Down
61 changes: 61 additions & 0 deletions packages/core/test/ltm.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,67 @@ describe("ltm.forSession", () => {
}
});

test("excludeCategories filters cross-project entries too", async () => {
ltm.create({
projectPath: PROJ,
category: "preference",
title: "Cross-project pref to exclude",
content: "This cross-project preference should not appear",
scope: "project",
crossProject: true,
});
ltm.create({
projectPath: PROJ,
category: "gotcha",
title: "Cross-project gotcha to include",
content: "This cross-project gotcha should appear",
scope: "project",
crossProject: true,
});
ltm.create({
projectPath: PROJ,
category: "architecture",
title: "Local arch entry to include",
content: "This local architecture entry should appear",
scope: "project",
crossProject: false,
});

const result = await ltm.forSession(PROJ, SESSION, 10_000, {
excludeCategories: ["preference"],
});
expect(result.length).toBeGreaterThan(0);
for (const entry of result) {
expect(entry.category).not.toBe("preference");
}
});

test("empty excludeCategories array has no effect", async () => {
ltm.create({
projectPath: PROJ,
category: "preference",
title: "Pref for empty-exclude test",
content: "Should appear when excludeCategories is empty array",
scope: "project",
crossProject: false,
});
ltm.create({
projectPath: PROJ,
category: "gotcha",
title: "Gotcha for empty-exclude test",
content: "Should also appear when excludeCategories is empty",
scope: "project",
crossProject: false,
});

const result = await ltm.forSession(PROJ, SESSION, 10_000, {
excludeCategories: [],
});
const categories = new Set(result.map((e) => e.category));
// Both categories should be present — empty exclude means no filtering
expect(categories.size).toBeGreaterThanOrEqual(2);
});

test("contextHint provides relevance signal when no session context exists", async () => {
// Create entries about different topics
ltm.create({
Expand Down
63 changes: 42 additions & 21 deletions packages/gateway/src/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -279,31 +279,35 @@ const stableLtmCache = new Map<

/**
* Measure character-level difference between two strings as a ratio (0..1).
* Uses a simple length + common-prefix heuristic — not a full diff, but
* sufficient to detect "substantially the same" vs "meaningfully different".
* Samples characters at regular intervals across the full string length to
* detect interior changes, not just prefix/suffix differences.
*
* For short strings (≤1000 chars) compares every character. For longer
* strings samples up to 1000 evenly-spaced positions for O(1) cost.
*/
function textDiffRatio(a: string, b: string): number {
if (a === b) return 0;
if (!a || !b) return 1;

// Common prefix length
const minLen = Math.min(a.length, b.length);
const maxLen = Math.max(a.length, b.length);
let common = 0;
for (let i = 0; i < minLen; i++) {
if (a[i] === b[i]) common++;
else break;
}
const minLen = Math.min(a.length, b.length);

// Length difference accounts for part of the diff
const lengthDiff = maxLen - minLen;

// Common suffix length (non-overlapping with prefix)
let suffix = 0;
for (let i = 0; i < minLen - common; i++) {
if (a[a.length - 1 - i] === b[b.length - 1 - i]) suffix++;
else break;
// Sample up to 1000 positions across the overlapping region
const sampleCount = Math.min(minLen, 1000);
const step = sampleCount < minLen ? minLen / sampleCount : 1;
let mismatches = 0;
for (let i = 0; i < sampleCount; i++) {
const idx = Math.floor(i * step);
if (a[idx] !== b[idx]) mismatches++;
}

const matched = common + suffix;
return 1 - matched / maxLen;
// Extrapolate mismatch rate to the full overlapping region + length diff
const mismatchRate = sampleCount > 0 ? mismatches / sampleCount : 0;
const estimatedMismatches = mismatchRate * minLen + lengthDiff;
return Math.min(1, estimatedMismatches / maxLen);
}

/** Cached LLM client for background workers. */
Expand Down Expand Up @@ -930,12 +934,24 @@ async function forwardToUpstream(
: config.upstreamOpenAI);

if (effectiveProtocol === "openai-responses") {
const result = buildOpenAIResponsesUpstreamRequest(req, effectiveUpstreamBase);
// Inject LTM into system prompt for non-Anthropic paths.
// Anthropic handles LTM via separate system blocks in buildAnthropicRequest;
// OpenAI paths receive a single system string, so we concatenate here.
const ltmParts = [cache?.stableLtmSystem, cache?.ltmSystem].filter(Boolean);
const reqWithLtm = ltmParts.length
? { ...req, system: [req.system, ...ltmParts].filter(Boolean).join("\n\n") }
: req;
const result = buildOpenAIResponsesUpstreamRequest(reqWithLtm, effectiveUpstreamBase);
url = result.url;
headers = result.headers;
body = result.body;
} else if (effectiveProtocol === "openai") {
const result = buildOpenAIUpstreamRequest(req, effectiveUpstreamBase);
// Inject LTM into system prompt (see comment above for openai-responses).
const ltmParts = [cache?.stableLtmSystem, cache?.ltmSystem].filter(Boolean);
const reqWithLtm = ltmParts.length
? { ...req, system: [req.system, ...ltmParts].filter(Boolean).join("\n\n") }
: req;
const result = buildOpenAIUpstreamRequest(reqWithLtm, effectiveUpstreamBase);
url = result.url;
headers = result.headers;
body = result.body;
Expand Down Expand Up @@ -2587,9 +2603,13 @@ async function handleConversationTurn(
let cached = ltmSessionCache.get(sessionID);

if (!cached) {
// Reserve budget for stable LTM already injected in system[1]
// Reserve budget for stable LTM already injected in system[1].
// Guarantee at least 50% of the total budget for context-bound
// entries — preferences are useful but gotchas/patterns are more
// critical for correctness during active work.
const stableTokens = stable?.tokenCount ?? 0;
const contextBudget = Math.max(0, ltmBudget - stableTokens);
const minContextBudget = Math.floor(ltmBudget * 0.5);
const contextBudget = Math.max(minContextBudget, ltmBudget - stableTokens);
// Exclude preferences — they're already in system[1]
const contextEntries = await ltm.forSession(projectPath, sessionID, contextBudget, {
excludeCategories: ["preference"],
Expand Down Expand Up @@ -2704,7 +2724,8 @@ async function handleConversationTurn(
const ltmFraction = cfg.budget.ltm;
const ltmBudget = getLtmBudget(ltmFraction);
const stableTokens = stableLtmCache.get(sessionID)?.tokenCount ?? 0;
const contextBudget = Math.max(0, ltmBudget - stableTokens);
const minContextBudget = Math.floor(ltmBudget * 0.5);
const contextBudget = Math.max(minContextBudget, ltmBudget - stableTokens);
const contextHint = lastUserTextTrimmed(req);
const contextEntries = await ltm.forSession(projectPath, sessionID, contextBudget, {
excludeCategories: ["preference"],
Expand Down
Loading