RobertLD · RobertLD · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts
@@ -54,6 +54,13 @@ export default defineConfig({
             { text: "Knowledge Packs", link: "/guide/knowledge-packs" },
           ],
         },
+        {
+          text: "Deep Dives",
+          items: [
+            { text: "How Search Works", link: "/guide/how-search-works" },
+            { text: "Troubleshooting", link: "/guide/troubleshooting" },
+          ],
+        },
       ],
       "/reference/": [
         {
@@ -83,7 +90,7 @@ export default defineConfig({
     },
 
     footer: {
-      message: "Released under the MIT License.",
+      message: "Released under the Business Source License 1.1.",
       copyright: "Copyright © 2026 RobertLD",
     },
   },

diff --git a/docs/guide/how-search-works.md b/docs/guide/how-search-works.md
@@ -0,0 +1,64 @@
+# How Search Works
+
+LibScope uses a hybrid search strategy combining vector (semantic) search with full-text search (FTS5), merged via Reciprocal Rank Fusion (RRF).
+
+## Search Pipeline
+
+### 1. Query Embedding
+Your search query is converted to a vector embedding using the configured embedding provider (local model or OpenAI). This captures the semantic meaning of your query.
+
+### 2. Vector Search (ANN)
+The query vector is matched against all indexed chunk embeddings using approximate nearest-neighbour (ANN) search via `sqlite-vec`. Results are ranked by cosine similarity — chunks semantically related to your query rank highest, even if they use different words.
+
+### 3. Full-Text Search (FTS5)
+Simultaneously, SQLite's FTS5 (BM25 ranking) searches for chunks containing your query terms. LibScope first tries AND logic (all terms must match) for precision, then falls back to OR logic if no results are found.
+
+### 4. Hybrid Fusion (RRF)
+Vector and FTS5 results are merged using **Reciprocal Rank Fusion (RRF)** — a technique that combines ranked lists without needing calibrated scores:
+
+```
+RRF_score(chunk) = Σ 1 / (k + rank_in_list)
+```
+
+where `k = 60` (standard constant). Chunks that rank well in *both* vector and FTS5 lists get the highest fused scores.
+
+### 5. Title Boost
+Chunks whose document title contains any query word receive a 1.5× score multiplier, lifting exact-title matches to the top.
+
+### 6. Pagination & Deduplication
+Results are optionally deduplicated by document (`maxChunksPerDocument`) and paginated. Use `offset` and `limit` in your search options for pagination.
+
+## Search Methods
+
+| Method | When Used | Best For |
+|--------|-----------|---------|
+| `hybrid` | sqlite-vec available + FTS5 match | Most queries — best precision & recall |
+| `vector` | sqlite-vec available, FTS5 returns nothing | Conceptual/semantic queries |
+| `fts5` | Part of hybrid pipeline | Keyword-heavy queries |
+| `keyword` | sqlite-vec unavailable | Fallback — exact word matching only |
+
+The active method is returned in each result's `scoreExplanation.method` field.
+
+## Score Explanation
+
+Every search result includes a `scoreExplanation` object:
+
+```typescript
+{
+  method: "hybrid" | "vector" | "fts5" | "keyword",
+  rawScore: number,       // raw score before boosts
+  boostFactors: string[], // e.g. ["title_match:x1.5"]
+  details: string         // human-readable scoring breakdown
+}
+```
+
+## Tuning Search
+
+| Option | Default | Effect |
+|--------|---------|--------|
+| `limit` | 10 | Results per page |
+| `offset` | 0 | Pagination offset |
+| `maxChunksPerDocument` | unlimited | Max chunks returned per document |
+| `contextChunks` | 0 | Adjacent chunks to include for context (max 2) |
+| `minRating` | none | Filter by minimum avg document rating |
+| `tags` | none | Filter by document tags (AND logic) |
diff --git a/docs/guide/troubleshooting.md b/docs/guide/troubleshooting.md
@@ -0,0 +1,92 @@
+# Troubleshooting
+
+## Installation Issues
+
+### `no such module: vec0` or `sqlite-vec` not loading
+
+**Symptom:** Errors mentioning `vec0`, `vec_each`, or `no such module` when indexing or searching.
+
+**Cause:** `sqlite-vec` requires a native Node.js addon that must be built for your platform.
+
+**Fix:**
+```bash
+npm rebuild sqlite-vec
+# or
+npm install --force
+```
+
+If you're on an unsupported platform, LibScope will fall back to keyword-only search automatically.
+
+---
+
+### Model download fails or hangs
+
+**Symptom:** First run hangs at "Downloading model..." or fails with a network error.
+
+**Cause:** The local embedding model (~80MB) downloads from Hugging Face on first use.
+
+**Fix:**
+- Check your internet connection
+- If behind a proxy, set `HTTPS_PROXY` environment variable
+- To use OpenAI embeddings instead (no download required): `libscope config set embedding.provider openai`
+
+---
+
+### Embedding dimension mismatch after switching providers
+
+**Symptom:** Error like `expected N dimensions, got M` when searching after changing the embedding provider.
+
+**Cause:** Changing embedding providers produces vectors of different dimensions. Existing embeddings in the database are incompatible.
+
+**Fix:** Re-index all documents after switching providers:
+```bash
+libscope db reset          # clears all indexed content
+libscope pack install ...  # re-install packs
+```
+
+---
+
+## Search Issues
+
+### Search returns no results
+
+1. Check that documents are indexed: `libscope list`
+2. Try a simpler query — FTS5 AND logic requires all terms to match
+3. Check your filters — `--library`, `--topic`, `--tags` may be too restrictive
+4. Run `libscope search "test" --limit 5` to verify basic search works
+
+### Results seem irrelevant
+
+- The local embedding model is smaller and less accurate than OpenAI — consider switching: `libscope config set embedding.provider openai`
+- Ensure documents were indexed after the embedding provider was configured
+- Try adding more context to your query
+
+---
+
+## API / MCP Issues
+
+### `401 Unauthorized` from API
+
+The REST API requires an `X-API-Key` header. Find your key: `libscope config show`
+
+### MCP tools not appearing in Claude / Cursor
+
+1. Verify the MCP server is running: `libscope mcp start`
+2. Check the MCP config path in your client settings points to the libscope server
+3. Restart your AI client after adding the MCP server
+
+---
+
+## Database Issues
+
+### Database locked errors
+
+LibScope uses SQLite WAL mode which supports concurrent reads but only one writer. If you see lock errors:
+- Ensure only one libscope process is running
+- Check for stuck processes: `ps aux | grep libscope`
+
+### How to reset the database
+
+```bash
+libscope db reset    # removes all indexed content (keeps config)
+```
diff --git a/docs/index.md b/docs/index.md
@@ -19,7 +19,7 @@ features:
     details: Vector similarity search powered by sqlite-vec, with FTS5 full-text fallback. Find what you need, even when you don't know the exact words.
   - icon: 🤖
     title: MCP Integration
-    details: 17 tools for AI assistants out of the box. Works with Claude, Cursor, VS Code, and any MCP-compatible client.
+    details: 26 tools for AI assistants out of the box. Works with Claude, Cursor, VS Code, and any MCP-compatible client.
   - icon: 🔗
     title: Connectors
     details: Pull in docs from Obsidian, Notion, Confluence, Slack, OneNote, and GitHub/GitLab. Keep everything in one place.

diff --git a/docs/reference/mcp-tools.md b/docs/reference/mcp-tools.md
@@ -16,6 +16,17 @@ Semantic search across your knowledge base.
 | `limit`     | number |          | Max results (default: 10)    |
 | `offset`    | number |          | Pagination offset            |
 
+**Search results** include a `scoreExplanation` object on each result:
+
+```typescript
+{
+  method: "hybrid" | "vector" | "fts5" | "keyword",
+  rawScore: number,       // raw score before boosts
+  boostFactors: string[], // e.g. ["title_match:x1.5"]
+  details: string         // human-readable scoring breakdown
+}
+```
+
 ## get-document
 
 Retrieve a document by its ID, including ratings and metadata.
@@ -45,6 +56,18 @@ Index a new document. You can provide content directly, or a URL to fetch automa
 | `version`    | string |          | Library version                                      |
 | `topic`      | string |          | Topic to categorize under                            |
 | `sourceType` | string |          | `library`, `topic`, `manual`, or `model-generated`   |
+| `dedup`      | string |          | Duplicate detection behaviour (see below)             |
+| `dedupOptions` | object |        | Fine-tune duplicate detection (see below)             |
+
+**`dedup`** *(optional)*: Controls duplicate detection behaviour.
+- `"skip"` — If a duplicate is detected, return the existing document without re-indexing
+- `"warn"` — Log a warning about the duplicate but index anyway
+- `"force"` — Skip duplicate checking entirely and always index
+- *(omitted)* — Default behaviour: reject exact duplicates by title+content-length, allow similar content
+
+**`dedupOptions`** *(optional)*: Fine-tune duplicate detection.
+- `threshold` *(number, 0–1)*: Similarity threshold for semantic dedup (default 0.95)
+- `strategy` *(string)*: `"exact"` (hash-based) or `"semantic"` (embedding-based)
 
 ## update-document
 

diff --git a/src/api/server.ts b/src/api/server.ts
@@ -9,6 +9,7 @@ import { ConnectorScheduler, loadScheduleEntries } from "../core/scheduler.js";
 export interface ApiServerOptions {
   port?: number | undefined;
   host?: string | undefined;
+  /** Allowed CORS origins. Defaults to ["http://localhost", "http://localhost:3000"]. */
   corsOrigins?: string[] | undefined;
   enableScheduler?: boolean | undefined;
 }
@@ -25,7 +26,7 @@ export async function startApiServer(
   const log = getLogger();
   const port = options?.port ?? 3378;
   const host = options?.host ?? "localhost";
-  const corsOrigins = options?.corsOrigins ?? ["*"];
+  const corsOrigins = options?.corsOrigins ?? ["http://localhost", "http://localhost:3000"];
 
   const server = createServer((req, res) => {
     // Rate limiting

diff --git a/src/connectors/confluence.ts b/src/connectors/confluence.ts
@@ -321,7 +321,7 @@ export function convertConfluenceStorage(html: string): string {
 
   // ri:attachment → [attached: filename] as span
   processed = processed.replace(
-    /<ri:attachment\s+ri:filename="([^"]*)"[^>]*\/?>/gi,
+    /<ri:attachment\s+ri:filename="([^"]*)"[^>]{0,500}\/?>/gi,
     (_match, filename: string) => `<span>[attached: ${filename}]</span>`,
   );
 

diff --git a/src/core/analytics.ts b/src/core/analytics.ts
@@ -73,12 +73,23 @@ export function logSearch(db: Database.Database, entry: SearchLogEntry): string
 
 /** Return overview stats for the knowledge base. */
 export function getStats(db: Database.Database, dbPath?: string): OverviewStats {
-  const docs = db.prepare("SELECT COUNT(*) AS cnt FROM documents").get() as { cnt: number };
-  const chunks = db.prepare("SELECT COUNT(*) AS cnt FROM chunks").get() as { cnt: number };
-  const topics = db.prepare("SELECT COUNT(*) AS cnt FROM topics").get() as { cnt: number };
-  const searches = db.prepare("SELECT COUNT(*) AS cnt FROM search_log").get() as { cnt: number };
-  const latency = db.prepare("SELECT AVG(latency_ms) AS avg FROM search_log").get() as {
-    avg: number | null;
+  const row = db
+    .prepare(
+      `
+    SELECT
+      (SELECT COUNT(*) FROM documents) AS doc_count,
+      (SELECT COUNT(*) FROM chunks) AS chunk_count,
+      (SELECT COUNT(*) FROM topics) AS topic_count,
+      (SELECT COUNT(*) FROM search_log) AS search_count,
+      (SELECT AVG(latency_ms) FROM search_log) AS avg_latency
+  `,
+    )
+    .get() as {
+    doc_count: number;
+    chunk_count: number;
+    topic_count: number;
+    search_count: number;
+    avg_latency: number | null;
   };
 
   let databaseSizeBytes = 0;
@@ -91,12 +102,12 @@ export function getStats(db: Database.Database, dbPath?: string): OverviewStats
   }
 
   return {
-    totalDocuments: docs.cnt,
-    totalChunks: chunks.cnt,
-    totalTopics: topics.cnt,
+    totalDocuments: row.doc_count,
+    totalChunks: row.chunk_count,
+    totalTopics: row.topic_count,
     databaseSizeBytes,
-    totalSearches: searches.cnt,
-    avgLatencyMs: Math.round(latency.avg ?? 0),
+    totalSearches: row.search_count,
+    avgLatencyMs: Math.round(row.avg_latency ?? 0),
   };
 }