RobertLD · RobertLD · Mar 16, 2026 · Mar 11, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/src/cli/index.ts b/src/cli/index.ts
@@ -6,7 +6,7 @@ import { getDatabase, runMigrations, createVectorTable, closeDatabase } from "..
 import { createEmbeddingProvider, type EmbeddingProvider } from "../providers/index.js";
 import { indexDocument, indexFile } from "../core/indexing.js";
 import { getSupportedExtensions } from "../core/parsers/index.js";
-import { searchDocuments } from "../core/search.js";
+import { searchDocuments, getRelatedChunks } from "../core/search.js";
 import { askQuestion, createLlmProvider } from "../core/rag.js";
 import { getDocumentRatings, listRatings } from "../core/ratings.js";
 import { createTopic, listTopics } from "../core/topics.js";
@@ -452,6 +452,71 @@ program
     },
   );
 
+// related
+program
+  .command("related <chunkId>")
+  .description("Find chunks related to a given chunk by vector similarity")
+  .option("--limit <n>", "Number of results", "10")
+  .option("--topic <topic>", "Filter by topic")
+  .option("--library <lib>", "Filter by library")
+  .option("--min-score <n>", "Minimum similarity score (0-1)")
+  .option("--tags <tags>", "Comma-separated tags to filter by")
+  .action(
+    (
+      chunkId: string,
+      opts: {
+        limit: string;
+        topic?: string;
+        library?: string;
+        minScore?: string;
+        tags?: string;
+      },
+    ) => {
+      const { db } = initializeApp();
+      try {
+        const limit = parseIntOption(opts.limit, "--limit");
+        const minScore = opts.minScore !== undefined ? parseFloat(opts.minScore) : undefined;
+        const tags = opts.tags ? opts.tags.split(",").map((t) => t.trim()) : undefined;
+
+        let result;
+        try {
+          result = getRelatedChunks(db, {
+            chunkId,
+            ...(limit !== undefined && { limit }),
+            ...(opts.topic !== undefined && { topic: opts.topic }),
+            ...(opts.library !== undefined && { library: opts.library }),
+            ...(tags !== undefined && { tags }),
+            ...(minScore !== undefined && { minScore }),
+          });
+        } catch (err) {
+          const message = err instanceof Error ? err.message : String(err);
+          console.error(`Error: ${message}`);
+          process.exit(1);
+        }
+
+        const { sourceChunk, chunks } = result;
+        console.log(
+          `\nRelated to chunk: ${sourceChunk.id} from document ${sourceChunk.documentId}`,
+        );
+
+        if (chunks.length === 0) {
+          console.log("No related chunks found.");
+        } else {
+          console.log(`\nShowing ${chunks.length} related chunks:\n`);
+          for (const r of chunks) {
+            console.log(`\n── ${r.title} (score: ${r.score.toFixed(2)}) ──`);
+            console.log(`  Chunk ID: ${r.chunkId}`);
+            if (r.library) console.log(`  Library: ${r.library}`);
+            if (r.url) console.log(`  Source: ${r.url}`);
+            console.log(`  ${r.content.slice(0, 200)}${r.content.length > 200 ? "..." : ""}`);
+          }
+        }
+      } finally {
+        closeDatabase();
+      }
+    },
+  );
+
 // saved searches
 const searchesCmd = program.command("searches").description("Manage saved searches");
 

diff --git a/src/core/search.ts b/src/core/search.ts
@@ -102,6 +102,27 @@ export interface SearchResult {
   contextAfter?: ContextChunk[] | undefined;
 }
 
+export interface RelatedChunksOptions {
+  chunkId: string;
+  limit?: number; // default 10
+  excludeDocumentId?: string; // exclude the source document (default: auto-detected from chunkId)
+  topic?: string;
+  library?: string;
+  tags?: string[];
+  minScore?: number; // default 0.0
+  includeLinkedDocuments?: boolean; // blend in explicit document_links (default false)
+}
+
+export interface RelatedChunksResult {
+  chunks: SearchResult[];
+  sourceChunk: {
+    id: string;
+    documentId: string;
+    content: string;
+    chunkIndex: number;
+  };
+}
+
 // ---------------------------------------------------------------------------
 // Title boost multiplier: chunks whose document title contains any query word
 // receive this multiplicative boost to their final score.
@@ -820,6 +841,225 @@ function attachRatings(db: Database.Database, results: SearchResult[]): SearchRe
   return results.map((r) => ({ ...r, avgRating: ratingMap.get(r.documentId) ?? null }));
 }
 
+/**
+ * Find chunks related to a given chunk by vector similarity.
+ * Looks up the source chunk's embedding, then searches for similar chunks
+ * excluding the source document (by default). Returns synchronously.
+ */
+export function getRelatedChunks(
+  db: Database.Database,
+  options: RelatedChunksOptions,
+): RelatedChunksResult {
+  const { chunkId } = options;
+  const limit = Math.max(1, Math.min(options.limit ?? 10, 1000));
+  const minScore = options.minScore ?? 0.0;
+
+  // Look up the source chunk
+  const SourceChunkSchema = z.object({
+    id: z.string(),
+    document_id: z.string(),
+    content: z.string(),
+    chunk_index: z.number(),
+  });
+  const sourceChunkRow = validateRow(
+    SourceChunkSchema.optional(),
+    db
+      .prepare(`SELECT id, document_id, content, chunk_index FROM chunks WHERE id = ?`)
+      .get(chunkId),
+    "getRelatedChunks.sourceChunk",
+  );
+  if (!sourceChunkRow) {
+    throw new Error(`Chunk not found: ${chunkId}`);
+  }
+
+  const sourceChunk = {
+    id: sourceChunkRow.id,
+    documentId: sourceChunkRow.document_id,
+    content: sourceChunkRow.content,
+    chunkIndex: sourceChunkRow.chunk_index,
+  };
+
+  const excludeDocumentId = options.excludeDocumentId ?? sourceChunkRow.document_id;
+
+  // Fetch the embedding for the source chunk
+  const EmbeddingRowSchema = z.object({ embedding: z.instanceof(Buffer) });
+  const embeddingRow = validateRow(
+    EmbeddingRowSchema.optional(),
+    db.prepare(`SELECT embedding FROM chunk_embeddings WHERE chunk_id = ?`).get(chunkId),
+    "getRelatedChunks.embedding",
+  );
+  if (!embeddingRow) {
+    throw new Error(`No embedding found for chunk: ${chunkId}`);
+  }
+
+  const vecBuffer = embeddingRow.embedding;
+
+  // Build SQL: vector ANN search excluding the source document
+  const tagFilter = buildTagFilter(options.tags, "d");
+
+  let sql = `
+    SELECT
+      candidates.chunk_id,
+      candidates.distance,
+      c.document_id,
+      c.content AS chunk_content,
+      d.title,
+      d.source_type,
+      d.library,
+      d.version,
+      d.topic_id,
+      d.url
+    FROM (
+      SELECT chunk_id, distance
+      FROM chunk_embeddings
+      WHERE embedding MATCH ?
+      ORDER BY distance
+      LIMIT ?
+    ) candidates
+    JOIN chunks c ON c.id = candidates.chunk_id
+    JOIN documents d ON d.id = c.document_id
+    WHERE c.document_id != ?
+  `;
+
+  const params: unknown[] = [vecBuffer, limit * 10, excludeDocumentId];
+
+  if (options.library) {
+    sql += ` AND d.library = ?`;
+    params.push(options.library);
+  }
+  if (options.topic) {
+    sql += ` AND d.topic_id = ?`;
+    params.push(options.topic);
+  }
+  sql += tagFilter.clause;
+  params.push(...tagFilter.params);
+
+  sql += ` ORDER BY candidates.distance LIMIT ?`;
+  params.push(limit * 2); // over-fetch to allow minScore filtering
+
+  const RelatedRowSchema = z.object({
+    chunk_id: z.string(),
+    distance: z.number(),
+    document_id: z.string(),
+    chunk_content: z.string(),
+    title: z.string(),
+    source_type: z.string(),
+    library: z.string().nullable(),
+    version: z.string().nullable(),
+    topic_id: z.string().nullable(),
+    url: z.string().nullable(),
+  });
+
+  const rows = validateRows(
+    RelatedRowSchema,
+    db.prepare(sql).all(...params),
+    "getRelatedChunks.rows",
+  );
+
+  let results: SearchResult[] = rows.map((row) => {
+    const similarity = 1 - row.distance;
+    return {
+      documentId: row.document_id,
+      chunkId: row.chunk_id,
+      title: row.title,
+      content: row.chunk_content,
+      sourceType: row.source_type,
+      library: row.library,
+      version: row.version,
+      topicId: row.topic_id,
+      url: row.url,
+      score: similarity,
+      avgRating: null,
+      scoreExplanation: {
+        method: "vector" as SearchMethod,
+        rawScore: row.distance,
+        boostFactors: [],
+        details: `Vector similarity: distance=${row.distance.toFixed(4)}, similarity=${similarity.toFixed(4)}`,
+      },
+    };
+  });
+
+  // Apply minScore filter
+  if (minScore > 0) {
+    results = results.filter((r) => r.score >= minScore);
+  }
+
+  // Optional: blend in explicitly linked documents
+  if (options.includeLinkedDocuments) {
+    const linkedDocs = db
+      .prepare(
+        `SELECT DISTINCT
+          CASE WHEN source_id = ? THEN target_id ELSE source_id END AS linked_doc_id
+        FROM document_links
+        WHERE source_id = ? OR target_id = ?`,
+      )
+      .all(sourceChunk.documentId, sourceChunk.documentId, sourceChunk.documentId) as {
+      linked_doc_id: string;
+    }[];
+
+    const LinkedChunkSchema = z.object({
+      id: z.string(),
+      document_id: z.string(),
+      content: z.string(),
+      chunk_index: z.number(),
+      title: z.string(),
+      source_type: z.string(),
+      library: z.string().nullable(),
+      version: z.string().nullable(),
+      topic_id: z.string().nullable(),
+      url: z.string().nullable(),
+    });
+
+    const presentDocIds = new Set(results.map((r) => r.documentId));
+    for (const { linked_doc_id } of linkedDocs) {
+      if (!presentDocIds.has(linked_doc_id)) {
+        const linkedChunk = validateRow(
+          LinkedChunkSchema.optional(),
+          db
+            .prepare(
+              `SELECT c.id, c.document_id, c.content, c.chunk_index,
+                      d.title, d.source_type, d.library, d.version, d.topic_id, d.url
+               FROM chunks c
+               JOIN documents d ON d.id = c.document_id
+               WHERE c.document_id = ?
+               ORDER BY c.chunk_index ASC
+               LIMIT 1`,
+            )
+            .get(linked_doc_id),
+          "getRelatedChunks.linkedChunk",
+        );
+        if (linkedChunk) {
+          results.push({
+            documentId: linkedChunk.document_id,
+            chunkId: linkedChunk.id,
+            title: linkedChunk.title,
+            content: linkedChunk.content,
+            sourceType: linkedChunk.source_type,
+            library: linkedChunk.library,
+            version: linkedChunk.version,
+            topicId: linkedChunk.topic_id,
+            url: linkedChunk.url,
+            score: 0.6,
+            avgRating: null,
+            scoreExplanation: {
+              method: "vector" as SearchMethod,
+              rawScore: 0.6,
+              boostFactors: ["linked_document"],
+              details: "Explicitly linked document",
+            },
+          });
+        }
+      }
+    }
+    results.sort((a, b) => b.score - a.score);
+  }
+
+  // Trim to requested limit
+  results = results.slice(0, limit);
+
+  return { chunks: results, sourceChunk };
+}
+
 /** FTS5-based full-text search with BM25 ranking. Uses AND logic by default. */
 function fts5Search(
   db: Database.Database,

diff --git a/src/mcp/server.ts b/src/mcp/server.ts
@@ -5,7 +5,7 @@ import { loadConfig } from "../config.js";
 import { getDatabase, runMigrations, createVectorTable } from "../db/index.js";
 import { getActiveWorkspace, getWorkspacePath } from "../core/workspace.js";
 import { createEmbeddingProvider } from "../providers/index.js";
-import { searchDocuments } from "../core/search.js";
+import { searchDocuments, getRelatedChunks } from "../core/search.js";
 import {
   askQuestion,
   createLlmProvider,
@@ -177,6 +177,55 @@ async function main(): Promise<void> {
     }),
   );
 
+  // Tool: get-related
+  server.tool(
+    "get-related",
+    "Find chunks semantically similar to a given chunk (more-like-this). Returns related content seeded from an existing chunk's stored embedding without requiring a text query.",
+    {
+      chunkId: z.string().describe("ID of the source chunk to find related content for"),
+      limit: z
+        .number()
+        .min(1)
+        .max(50)
+        .optional()
+        .describe("Number of results to return (default 10)"),
+      topic: z.string().optional().describe("Filter results to a specific topic"),
+      library: z.string().optional().describe("Filter results to a specific library"),
+      tags: z.array(z.string()).optional().describe("Filter results to documents with these tags"),
+      minScore: z
+        .number()
+        .min(0)
+        .max(1)
+        .optional()
+        .describe("Minimum similarity score threshold (0-1)"),
+      includeLinkedDocuments: z
+        .boolean()
+        .optional()
+        .describe("Also include explicitly linked documents even if below similarity threshold"),
+    },
+    withErrorHandling(
+      ({ chunkId, limit, topic, library, tags, minScore, includeLinkedDocuments }) => {
+        const result = getRelatedChunks(db, {
+          chunkId,
+          ...(limit !== undefined && { limit }),
+          ...(topic !== undefined && { topic }),
+          ...(library !== undefined && { library }),
+          ...(tags !== undefined && { tags }),
+          ...(minScore !== undefined && { minScore }),
+          ...(includeLinkedDocuments !== undefined && { includeLinkedDocuments }),
+        });
+        return {
+          content: [
+            {
+              type: "text" as const,
+              text: JSON.stringify(result, null, 2),
+            },
+          ],
+        };
+      },
+    ),
+  );
+
   // Tool: get-document
   server.tool(
     "get-document",