RobertLD · RobertLD · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -12,4 +12,5 @@ Refer to [agents.md](../agents.md) at the repository root for full architecture,
 - sqlite-vec is loaded via `createRequire` — do not change this to an ESM import.
 - Database migrations are versioned — never modify existing migrations, only add new ones.
 - Tests use `MockEmbeddingProvider` and in-memory SQLite (no sqlite-vec in tests).
-- Run `npm run typecheck && npm test && npm run lint` before considering work complete.
+- Run `npm run typecheck && npm run test:coverage && npm run lint` before considering work complete. Use `test:coverage` (not `test`) — CI enforces coverage thresholds (statements ≥ 75%, branches ≥ 74%, functions ≥ 75%, lines ≥ 75%) and will reject PRs that drop below them.
+- Before creating a PR, use a `code-review` sub-agent to self-review your diff. Fix any issues it finds before opening the PR.
diff --git a/agents.md b/agents.md
@@ -177,7 +177,9 @@ tests/
 - **Use `MockEmbeddingProvider`** from `tests/fixtures/mock-provider.ts` for all tests that need embeddings. It returns deterministic 4D vectors — no model download, no network.
 - **Use `createTestDb()`** from `tests/fixtures/test-db.ts` for an in-memory SQLite instance with all migrations applied.
 - **sqlite-vec is NOT available in tests.** The test DB is plain SQLite. Vector search tests exercise the FTS5/LIKE fallback path. This is by design.
-- **Coverage threshold is 80%** for statements, branches, functions, and lines (enforced in `vitest.config.ts`). CLI code (`src/cli/`) is excluded from coverage.
+- **Coverage thresholds** (enforced in `vitest.config.ts`): statements ≥ 75%, branches ≥ 74%, functions ≥ 75%, lines ≥ 75%. CLI code (`src/cli/`) is excluded from coverage.
+- **Always run `npm run test:coverage`** (not just `npm test`) before pushing. CI runs `test:coverage`, which fails if any threshold is missed. `npm test` alone does NOT check coverage.
+- When adding new source files, ensure adequate test coverage so global thresholds are not violated. New files with many uncovered branches will drag the overall percentage down.
 - Tests should be fast (< 1 second total), deterministic, and not depend on ordering.
 
 ### Common Gotcha
@@ -252,8 +254,12 @@ git worktree remove ../libscope-<branch-name>
 3. Expose via MCP tool in `src/mcp/server.ts` and/or CLI command in `src/cli/index.ts`.
 4. Write unit tests in `tests/unit/` using `MockEmbeddingProvider` and `createTestDb()`.
 5. Add integration coverage in `tests/integration/workflow.test.ts` if it's a core flow.
-6. Run `npm run typecheck && npm test && npm run lint` — all must pass.
+6. Run `npm run typecheck && npm run test:coverage && npm run lint` — all must pass. **Use `test:coverage`, not `test`** — CI enforces coverage thresholds and will fail if new code drops coverage below the configured minimums (see `vitest.config.ts` thresholds).
 7. **Update documentation** — see the Documentation section below.
+8. **PR description must match implementation.** Don't describe features that aren't implemented yet — only document what actually ships in the PR. If scope is reduced, update the description before opening the PR.
+9. **Verify HTTP error handling.** When writing code that calls external services (fetch, HTTP clients), always check response status codes — `fetch()` resolves on 4xx/5xx, so check `resp.ok` or `resp.status`. Never treat a resolved fetch as a success without status checking.
+10. **Don't expose secrets in API responses.** If a model stores sensitive fields (tokens, secrets, keys), redact them from API/MCP response payloads.
+11. **Self-review before creating a PR.** Before opening a pull request, use a `code-review` sub-agent to review your own diff (`git diff main...HEAD`). Fix any issues it finds. Do not rely on the automated GitHub review — catch problems before the PR is created, not after.
 
 ## Documentation
 

diff --git a/src/config.ts b/src/config.ts
@@ -149,6 +149,7 @@ export function loadConfig(): LibScopeConfig {
       ...DEFAULT_CONFIG.indexing,
       ...userConfig.indexing,
       ...projectConfig.indexing,
+      ...envOverrides.indexing,
     },
     logging: {
       ...DEFAULT_CONFIG.logging,

diff --git a/tests/unit/db-validation.test.ts b/tests/unit/db-validation.test.ts
@@ -0,0 +1,85 @@
+import { describe, it, expect } from "vitest";
+import { validateRow, validateCountRow } from "../../src/utils/db-validation.js";
+import { DatabaseError } from "../../src/errors.js";
+
+describe("validateRow", () => {
+  it("returns the row when all required keys are present", () => {
+    const row = { name: "test", age: 42 };
+    const result = validateRow<{ name: string; age: number }>(row, ["name", "age"], "test");
+    expect(result).toEqual({ name: "test", age: 42 });
+  });
+
+  it("throws DatabaseError for null input", () => {
+    expect(() => validateRow(null, ["id"], "test")).toThrow(DatabaseError);
+    expect(() => validateRow(null, ["id"], "test")).toThrow("Expected a row object");
+  });
+
+  it("throws DatabaseError for undefined input", () => {
+    expect(() => validateRow(undefined, ["id"], "test")).toThrow(DatabaseError);
+    expect(() => validateRow(undefined, ["id"], "test")).toThrow("Expected a row object");
+  });
+
+  it("throws DatabaseError for non-object input (string)", () => {
+    expect(() => validateRow("not an object", ["id"], "test")).toThrow(DatabaseError);
+    expect(() => validateRow("not an object", ["id"], "test")).toThrow("got string");
+  });
+
+  it("throws DatabaseError for non-object input (number)", () => {
+    expect(() => validateRow(42, ["id"], "test")).toThrow(DatabaseError);
+    expect(() => validateRow(42, ["id"], "test")).toThrow("got number");
+  });
+
+  it("throws DatabaseError when a required key is missing", () => {
+    const row = { name: "test" };
+    expect(() => validateRow(row, ["name", "age"], "user row")).toThrow(DatabaseError);
+    expect(() => validateRow(row, ["name", "age"], "user row")).toThrow(
+      "Missing expected column 'age'",
+    );
+  });
+
+  it("includes context in error message", () => {
+    expect(() => validateRow(null, ["id"], "my-context")).toThrow("my-context");
+  });
+
+  it("succeeds with empty required keys", () => {
+    const row = { a: 1 };
+    const result = validateRow(row, [], "test");
+    expect(result).toEqual({ a: 1 });
+  });
+});
+
+describe("validateCountRow", () => {
+  it("returns the count when cnt is a number", () => {
+    const result = validateCountRow({ cnt: 42 }, "test count");
+    expect(result).toBe(42);
+  });
+
+  it("returns zero when cnt is 0", () => {
+    const result = validateCountRow({ cnt: 0 }, "test count");
+    expect(result).toBe(0);
+  });
+
+  it("throws DatabaseError when cnt is a string", () => {
+    expect(() => validateCountRow({ cnt: "42" }, "test count")).toThrow(DatabaseError);
+    expect(() => validateCountRow({ cnt: "42" }, "test count")).toThrow("Expected numeric count");
+  });
+
+  it("throws DatabaseError when cnt is null", () => {
+    expect(() => validateCountRow({ cnt: null }, "test count")).toThrow(DatabaseError);
+    expect(() => validateCountRow({ cnt: null }, "test count")).toThrow("got object");
+  });
+
+  it("throws DatabaseError when cnt is undefined", () => {
+    expect(() => validateCountRow({ cnt: undefined }, "test count")).toThrow(DatabaseError);
+    expect(() => validateCountRow({ cnt: undefined }, "test count")).toThrow("got undefined");
+  });
+
+  it("throws DatabaseError for null row input", () => {
+    expect(() => validateCountRow(null, "test")).toThrow(DatabaseError);
+  });
+
+  it("throws DatabaseError for row missing cnt key", () => {
+    expect(() => validateCountRow({ other: 1 }, "test")).toThrow(DatabaseError);
+    expect(() => validateCountRow({ other: 1 }, "test")).toThrow("Missing expected column 'cnt'");
+  });
+});
diff --git a/tests/unit/export.test.ts b/tests/unit/export.test.ts
@@ -181,4 +181,72 @@ describe("export/backup", () => {
       newDb.close();
     });
   });
+
+  describe("importFromBackup — validation errors", () => {
+    it("should throw when backup is not an object", () => {
+      const path = join(tempDir, "bad.json");
+      writeFileSync(path, '"just a string"', "utf-8");
+      expect(() => importFromBackup(db, path)).toThrow("expected an object");
+    });
+
+    it("should throw when required keys are missing", () => {
+      const path = join(tempDir, "bad.json");
+      writeFileSync(path, JSON.stringify({ metadata: {} }), "utf-8");
+      expect(() => importFromBackup(db, path)).toThrow("missing topics");
+    });
+
+    it("should throw when documents is not an array", () => {
+      const path = join(tempDir, "bad.json");
+      writeFileSync(
+        path,
+        JSON.stringify({
+          metadata: {},
+          topics: [],
+          documents: "not-array",
+          chunks: [],
+          ratings: [],
+        }),
+        "utf-8",
+      );
+      expect(() => importFromBackup(db, path)).toThrow("Failed to import");
+    });
+
+    it("should throw when a document lacks id or title", () => {
+      const path = join(tempDir, "bad.json");
+      writeFileSync(
+        path,
+        JSON.stringify({
+          metadata: {},
+          topics: [],
+          documents: [{ noId: true }],
+          chunks: [],
+          ratings: [],
+        }),
+        "utf-8",
+      );
+      expect(() => importFromBackup(db, path)).toThrow("Failed to import");
+    });
+
+    it("should throw when metadata version is missing", () => {
+      const path = join(tempDir, "bad.json");
+      writeFileSync(
+        path,
+        JSON.stringify({
+          metadata: {},
+          topics: [],
+          documents: [{ id: "d1", title: "t1" }],
+          chunks: [],
+          ratings: [],
+        }),
+        "utf-8",
+      );
+      expect(() => importFromBackup(db, path)).toThrow("missing metadata");
+    });
+
+    it("should wrap non-DatabaseError exceptions", () => {
+      const path = join(tempDir, "bad.json");
+      writeFileSync(path, "NOT VALID JSON", "utf-8");
+      expect(() => importFromBackup(db, path)).toThrow("Failed to import");
+    });
+  });
 });
diff --git a/tests/unit/graph.test.ts b/tests/unit/graph.test.ts
@@ -164,6 +164,21 @@ describe("buildKnowledgeGraph", () => {
     expect(docNodes[0]!.label).toBe("Tagged Doc");
   });
 
+  it("averages multiple chunk embeddings per document", async () => {
+    insertDocument(db, "d1", "Multi Chunk Doc", null);
+    insertDocument(db, "d2", "Other Doc", null);
+    // d1 has two chunks whose embeddings average to [1, 0, 0, 0]
+    insertChunkWithEmbedding(db, "c1a", "d1", [1, 0.5, 0, 0]);
+    insertChunkWithEmbedding(db, "c1b", "d1", [1, -0.5, 0, 0]);
+    // d2 has one chunk with similar direction
+    insertChunkWithEmbedding(db, "c2", "d2", [1, 0, 0, 0]);
+
+    const graph = await buildKnowledgeGraph(db, { similarityThreshold: 0.9 });
+
+    const simEdges = graph.edges.filter((e) => e.type === "similar_to");
+    expect(simEdges.length).toBeGreaterThanOrEqual(1);
+  });
+
   it("applies threshold filtering for similarity edges", async () => {
     insertDocument(db, "d1", "Doc One", null);
     insertDocument(db, "d2", "Doc Two", null);

diff --git a/tests/unit/packs.test.ts b/tests/unit/packs.test.ts
@@ -270,6 +270,134 @@ describe("knowledge packs", () => {
 
       await expect(installPack(db, provider, packPath)).rejects.toThrow(/not an object/);
     });
+
+    it("should reject missing version", async () => {
+      const bad = {
+        name: "x",
+        version: "",
+        description: "y",
+        documents: [],
+        metadata: { author: "a", license: "MIT", createdAt: "2024-01-01" },
+      };
+      const packPath = join(tempDir, "no-version.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /missing or invalid 'version'/,
+      );
+    });
+
+    it("should reject non-string description", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: 42,
+        documents: [],
+        metadata: { author: "a", license: "MIT", createdAt: "2024-01-01" },
+      };
+      const packPath = join(tempDir, "bad-desc.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /missing or invalid 'description'/,
+      );
+    });
+
+    it("should reject non-array documents", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: "y",
+        documents: "not-array",
+        metadata: { author: "a", license: "MIT", createdAt: "2024-01-01" },
+      };
+      const packPath = join(tempDir, "bad-docs-type.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /'documents' must be an array/,
+      );
+    });
+
+    it("should reject document missing source", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: "y",
+        documents: [{ title: "t", content: "c" }],
+        metadata: { author: "a", license: "MIT", createdAt: "2024-01-01" },
+      };
+      const packPath = join(tempDir, "no-source.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /missing or invalid 'source'/,
+      );
+    });
+
+    it("should reject metadata missing license", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: "y",
+        documents: [],
+        metadata: { author: "a", createdAt: "2024-01-01" },
+      };
+      const packPath = join(tempDir, "no-license.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /metadata missing 'license'/,
+      );
+    });
+
+    it("should reject metadata missing createdAt", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: "y",
+        documents: [],
+        metadata: { author: "a", license: "MIT" },
+      };
+      const packPath = join(tempDir, "no-created.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /metadata missing 'createdAt'/,
+      );
+    });
+
+    it("should reject metadata missing author", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: "y",
+        documents: [],
+        metadata: { license: "MIT", createdAt: "2024-01-01" },
+      };
+      const packPath = join(tempDir, "no-author.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /metadata missing 'author'/,
+      );
+    });
+
+    it("should reject null metadata", async () => {
+      const bad = {
+        name: "x",
+        version: "1.0.0",
+        description: "y",
+        documents: [],
+        metadata: null,
+      };
+      const packPath = join(tempDir, "null-meta.json");
+      writeFileSync(packPath, JSON.stringify(bad), "utf-8");
+
+      await expect(installPack(db, provider, packPath)).rejects.toThrow(
+        /missing or invalid 'metadata'/,
+      );
+    });
   });
 
   describe("security validations", () => {