From fd7bdb98a4cd9f2bb8ac70b155994e2ecd93d572 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Thu, 2 Apr 2026 20:09:53 +0200 Subject: [PATCH 1/8] Support local filesystem sources without a git repo Sources without a `repo` field are treated as local: files are read directly from the configured path, repo_url is stored as NULL, and the orchestrator always reindexes them on startup since there is no remote HEAD to compare against. - Make SourceConfig.repo optional with branch-requires-repo refinement - Add local source path handling in SourceIndexer (computeLocalSha, local walk root, nullable repo_url) - Add full-reindex-local job type in orchestrator with startup reindex - Validate local source paths exist at config load time - Make repo_url column nullable in DB schema and stats query --- src/config.ts | 12 ++++++ src/db/queries.ts | 4 +- src/db/schema.ts | 2 +- src/indexing/orchestrator.ts | 50 ++++++++++++++++------ src/indexing/source-indexer.ts | 77 +++++++++++++++++++++++++++------- src/types.ts | 8 ++-- 6 files changed, 119 insertions(+), 34 deletions(-) diff --git a/src/config.ts b/src/config.ts index 317811c..363db0d 100644 --- a/src/config.ts +++ b/src/config.ts @@ -160,6 +160,18 @@ function loadServerConfig(): ServerConfig { } } + // Validate local source paths exist + for (const source of result.data.sources) { + if (!source.repo) { + const resolved = resolve(source.path); + if (!existsSync(resolved)) { + throw new Error( + `Source "${source.name}" references local path "${source.path}" (resolved to ${resolved}) which does not exist.` + ); + } + } + } + return result.data; } diff --git a/src/db/queries.ts b/src/db/queries.ts index b0b72e0..b9f336e 100644 --- a/src/db/queries.ts +++ b/src/db/queries.ts @@ -69,7 +69,7 @@ export async function searchChunks( source_url: (r.source_url as string) ?? null, title: (r.title as string) ?? null, content: r.content as string, - repo_url: r.repo_url as string, + repo_url: (r.repo_url as string) ?? null, file_path: r.file_path as string, start_line: (r.start_line as number) ?? null, end_line: (r.end_line as number) ?? null, @@ -266,7 +266,7 @@ export async function getIndexStats(): Promise { pool.query( "SELECT source_name, count(*)::int AS count FROM chunks GROUP BY source_name ORDER BY source_name", ), - pool.query("SELECT count(DISTINCT repo_url)::int AS count FROM chunks"), + pool.query("SELECT count(DISTINCT repo_url)::int AS count FROM chunks WHERE repo_url IS NOT NULL"), pool.query( "SELECT source_type, source_key, last_commit_sha, last_indexed_at, status, error_message FROM index_state ORDER BY source_type, source_key", ), diff --git a/src/db/schema.ts b/src/db/schema.ts index 47062b8..db8d19b 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS chunks ( title TEXT, content TEXT NOT NULL, embedding vector(${dimensions}) NOT NULL, - repo_url TEXT NOT NULL, + repo_url TEXT, file_path TEXT NOT NULL, start_line INTEGER, end_line INTEGER, diff --git a/src/indexing/orchestrator.ts b/src/indexing/orchestrator.ts index efa4af4..c4cb178 100644 --- a/src/indexing/orchestrator.ts +++ b/src/indexing/orchestrator.ts @@ -11,13 +11,6 @@ import { } from '../db/queries.js'; import type { IndexState, IndexStatus, SourceConfig } from '../types.js'; -// Derive the list of unique repo URLs from YAML sources config -function getIndexedRepos(): string[] { - const serverCfg = getServerConfig(); - const repos = new Set(serverCfg.sources.map(s => s.repo)); - return [...repos]; -} - /** * Find all source configs that reference a given repo URL. */ @@ -31,8 +24,9 @@ function getStaleThresholdMs(): number { } interface Job { - type: 'full-reindex' | 'incremental-reindex'; + type: 'full-reindex' | 'incremental-reindex' | 'full-reindex-local'; repoUrl?: string; // for incremental + sources?: SourceConfig[]; // for full-reindex-local } export class IndexingOrchestrator { @@ -82,18 +76,35 @@ export class IndexingOrchestrator { this.queueFullReindex(); return; } - // Otherwise queue incremental reindexes for each affected repo - const reposToReindex = new Set(sourcesNeedingFullReindex.map(s => s.repo)); + // Queue incremental reindexes for each affected git-backed repo + const reposToReindex = new Set( + sourcesNeedingFullReindex.filter(s => s.repo).map(s => s.repo!), + ); for (const repoUrl of reposToReindex) { this.queueIncrementalReindex(repoUrl); } + // Local sources (no repo) get queued as a full reindex of just those sources + const localSources = sourcesNeedingFullReindex.filter(s => !s.repo); + if (localSources.length > 0) { + this.queue.push({ type: 'full-reindex-local', sources: localSources }); + this.drain().catch(err => console.error('[orchestrator] drain() failed:', err)); + } } if (sourcesOk.length === 0) return; + // Local sources in sourcesOk have no remote to check — always reindex on startup + const localSourcesOk = sourcesOk.filter(s => !s.repo); + if (localSourcesOk.length > 0) { + console.log(`[orchestrator] Queuing reindex for ${localSourcesOk.length} local source(s)`); + this.queue.push({ type: 'full-reindex-local', sources: localSourcesOk }); + this.drain().catch(err => console.error('[orchestrator] drain() failed:', err)); + } + console.log('[orchestrator] Checking remotes for changes on indexed sources...'); - const repos = [...new Set(sourcesOk.map(s => s.repo))]; + // Only check remotes for git-backed sources + const repos = [...new Set(sourcesOk.filter(s => s.repo).map(s => s.repo!))]; for (const repoUrl of repos) { try { const remoteHead = await this.getRemoteHead(repoUrl); @@ -122,7 +133,8 @@ export class IndexingOrchestrator { const repoSources = getSourcesByRepo(repoUrl); const firstState = await getIndexState(repoSources[0].type, repoSources[0].name); if (this.isStale(firstState)) { - console.log(`[orchestrator] Index for ${repoUrl} is stale (>24h) — queuing full reindex`); + const thresholdHours = getServerConfig().indexing.stale_threshold_hours; + console.log(`[orchestrator] Index for ${repoUrl} is stale (>${thresholdHours}h) — queuing full reindex`); this.queueFullReindex(); } else { console.log(`[orchestrator] Index for ${repoUrl} appears fresh, skipping`); @@ -264,7 +276,19 @@ export class IndexingOrchestrator { if (job.type === 'full-reindex') { await this.runFullReindex(embeddingClient, config.cloneDir, config.githubToken); - } else if (job.type === 'incremental-reindex' && job.repoUrl) { + } else if (job.type === 'full-reindex-local') { + if (!job.sources || job.sources.length === 0) { + console.warn('[orchestrator] full-reindex-local job has no sources, skipping'); + return; + } + for (const sourceConfig of job.sources) { + await this.indexSourceWithState(sourceConfig, embeddingClient, config.cloneDir); + } + } else if (job.type === 'incremental-reindex') { + if (!job.repoUrl) { + console.warn('[orchestrator] incremental-reindex job has no repoUrl, skipping'); + return; + } await this.runIncrementalReindex( embeddingClient, config.cloneDir, diff --git a/src/indexing/source-indexer.ts b/src/indexing/source-indexer.ts index 2dea659..9bf8a6f 100644 --- a/src/indexing/source-indexer.ts +++ b/src/indexing/source-indexer.ts @@ -3,6 +3,7 @@ import fs from 'node:fs'; import path from 'node:path'; +import { createHash } from 'node:crypto'; import { simpleGit, type SimpleGit } from 'simple-git'; import { getChunker } from './chunking/index.js'; import { deriveUrl } from './url-derivation.js'; @@ -136,16 +137,38 @@ export class SourceIndexer { this.maxFileSize = sourceConfig.max_file_size ?? DEFAULT_MAX_FILE_SIZE; } + private isLocal(): boolean { + return !this.sourceConfig.repo; + } + /** - * Full re-index: clone/pull the repo, walk matching files, chunk, embed, upsert. + * Full re-index: for git-backed sources, clone/pull the repo; for local + * sources, read directly from the configured path. Then walk matching + * files, chunk, embed, and upsert. */ async fullIndex(): Promise { - const repoName = repoNameFromUrl(this.sourceConfig.repo); - const repoDir = path.join(this.cloneDir, repoName); - const git = await this.ensureRepo(repoDir, repoName); + let repoDir: string; + let headSha: string; + + if (this.isLocal()) { + repoDir = path.resolve(this.sourceConfig.path); + if (!fs.existsSync(repoDir)) { + console.error( + `${this.logPrefix} Local source path does not exist: ${repoDir}`, + ); + return; + } + headSha = await this.computeLocalSha(repoDir); + } else { + const repoName = repoNameFromUrl(this.sourceConfig.repo!); + repoDir = path.join(this.cloneDir, repoName); + const git = await this.ensureRepo(repoDir, repoName); + headSha = await git.revparse(['HEAD']); + } - const headSha = await git.revparse(['HEAD']); - const walkRoot = path.join(repoDir, this.sourceConfig.path); + const walkRoot = this.isLocal() + ? repoDir + : path.join(repoDir, this.sourceConfig.path); if (!fs.existsSync(walkRoot)) { console.warn(`${this.logPrefix} Walk root not found at ${walkRoot}, skipping`); @@ -179,9 +202,16 @@ export class SourceIndexer { /** * Incremental index: re-index only files changed since lastCommitSha. + * Local sources always fall back to a full reindex. */ async incrementalIndex(lastCommitSha: string): Promise { - const repoName = repoNameFromUrl(this.sourceConfig.repo); + if (this.isLocal()) { + console.log(`${this.logPrefix} Local source — falling back to full reindex`); + await this.fullIndex(); + return; + } + + const repoName = repoNameFromUrl(this.sourceConfig.repo!); const repoDir = path.join(this.cloneDir, repoName); const git = await this.ensureRepo(repoDir, repoName); @@ -270,14 +300,35 @@ export class SourceIndexer { /** * Get the current HEAD SHA of the cloned repo. + * For local sources, returns a deterministic hash based on the file + * listing and modification times, so unchanged content produces the + * same SHA across restarts. */ async getHeadSha(): Promise { - const repoName = repoNameFromUrl(this.sourceConfig.repo); + if (this.isLocal()) { + const walkRoot = path.resolve(this.sourceConfig.path); + return this.computeLocalSha(walkRoot); + } + const repoName = repoNameFromUrl(this.sourceConfig.repo!); const repoDir = path.join(this.cloneDir, repoName); const git = simpleGit(repoDir); return git.revparse(['HEAD']); } + /** + * Compute a deterministic SHA for a local source directory based on + * the sorted list of file paths and their modification times. + */ + private async computeLocalSha(walkRoot: string): Promise { + const files = await this.walkFiles(walkRoot); + const hash = createHash('sha256'); + for (const f of files.sort()) { + const stat = await fs.promises.stat(f); + hash.update(`${f}:${stat.mtimeMs}\n`); + } + return `local-${hash.digest('hex').slice(0, 12)}`; + } + // ----------------------------------------------------------------------- // Private helpers // ----------------------------------------------------------------------- @@ -301,8 +352,8 @@ export class SourceIndexer { } } - const authUrl = authenticatedUrl(this.sourceConfig.repo, this.githubToken); - console.log(`${this.logPrefix} Cloning ${this.sourceConfig.repo} into ${repoDir}`); + const authUrl = authenticatedUrl(this.sourceConfig.repo!, this.githubToken); + console.log(`${this.logPrefix} Cloning ${this.sourceConfig.repo!} into ${repoDir}`); const git = simpleGit(this.cloneDir); const cloneOpts = ['--depth=1']; if (this.sourceConfig.branch) { @@ -351,10 +402,6 @@ export class SourceIndexer { return results; } - /** - * Check if file content has low semantic value (SVG paths, base64, minified code). - * Returns true if the file should be skipped. - */ /** * Read, chunk, embed, and upsert a single file. */ @@ -386,7 +433,7 @@ export class SourceIndexer { title: chunk.title ?? null, content: chunk.content, embedding: embeddings[i], - repo_url: this.sourceConfig.repo, + repo_url: this.sourceConfig.repo ?? null, file_path: relPath, start_line: chunk.startLine ?? null, end_line: chunk.endLine ?? null, diff --git a/src/types.ts b/src/types.ts index 2dbd4af..6483b2e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -25,7 +25,7 @@ export const ChunkConfigSchema = z.object({ export const SourceConfigSchema = z.object({ name: z.string().min(1), type: z.enum(['markdown', 'code', 'raw-text']), - repo: z.string().url(), + repo: z.string().url().optional(), branch: z.string().optional(), path: z.string().min(1), base_url: z.string().url().optional(), @@ -35,6 +35,8 @@ export const SourceConfigSchema = z.object({ skip_dirs: z.array(z.string()).optional(), max_file_size: z.number().int().positive().optional(), chunk: ChunkConfigSchema, +}).refine(s => !s.branch || s.repo, { + message: 'branch requires repo to be set', }); // ── Tool configuration schemas ──────────────────────────────────────────────── @@ -147,7 +149,7 @@ export interface Chunk { title?: string | null; content: string; embedding: number[]; - repo_url: string; + repo_url: string | null; file_path: string; start_line?: number | null; end_line?: number | null; @@ -163,7 +165,7 @@ export interface ChunkResult { source_url: string | null; title: string | null; content: string; - repo_url: string; + repo_url: string | null; file_path: string; start_line: number | null; end_line: number | null; From 9cd56a40bac411a9e3e992e35625746b1e009928 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Thu, 2 Apr 2026 20:09:59 +0200 Subject: [PATCH 2/8] Make webhook secret optional and harden error responses Default GITHUB_WEBHOOK_SECRET to empty string so the server starts without it (webhooks simply reject with 403). Guard on missing/ whitespace-only secret, return generic "Forbidden" instead of leaking configuration state. --- src/config.ts | 3 +-- src/webhooks/github.ts | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/config.ts b/src/config.ts index 363db0d..0193d7f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -30,8 +30,7 @@ function parseConfig(): Config { const openaiApiKey = process.env.OPENAI_API_KEY; if (!openaiApiKey) missing.push('OPENAI_API_KEY'); - const githubWebhookSecret = process.env.GITHUB_WEBHOOK_SECRET; - if (!githubWebhookSecret) missing.push('GITHUB_WEBHOOK_SECRET'); + const githubWebhookSecret = process.env.GITHUB_WEBHOOK_SECRET ?? ''; if (missing.length > 0) { throw new Error( diff --git a/src/webhooks/github.ts b/src/webhooks/github.ts index 18fe9d9..1f45138 100644 --- a/src/webhooks/github.ts +++ b/src/webhooks/github.ts @@ -99,6 +99,12 @@ export function createWebhookHandler(orchestrator: ReindexOrchestrator) { return; } + if (!cfg.githubWebhookSecret?.trim()) { + console.log("[webhook] Rejecting request — webhook secret not configured"); + res.status(403).json({ error: "Forbidden" }); + return; + } + const signature = req.headers["x-hub-signature-256"] as string | undefined; if (!verifySignature(rawBody, signature, cfg.githubWebhookSecret)) { res.status(401).json({ error: "Invalid or missing webhook signature" }); From c4a42a57d9fe733034eb9d83495388f28c7473c1 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Thu, 2 Apr 2026 20:10:16 +0200 Subject: [PATCH 3/8] Add optional PGlite support for local development When DATABASE_URL starts with pglite://, use an in-process PGlite instance instead of connecting to an external PostgreSQL server. The PGlite wrapper duck-types as pg.Pool, supporting the query/ connect/end surface used by queries.ts. --- package-lock.json | 8 +++++ package.json | 1 + src/db/client.ts | 83 +++++++++++++++++++++++++++++++++++++++++++++-- src/index.ts | 10 ++---- 4 files changed, 93 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4346803..53549e7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,6 +20,7 @@ "zod": "^3.23.8" }, "devDependencies": { + "@electric-sql/pglite": "^0.4.2", "@types/cors": "^2.8.19", "@types/express": "^5.0.6", "@types/node": "^25.0.6", @@ -66,6 +67,13 @@ "tslib": "^2.4.0" } }, + "node_modules/@electric-sql/pglite": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@electric-sql/pglite/-/pglite-0.4.2.tgz", + "integrity": "sha512-1GUUl/MZpy5QWgWisD3Epho3GkJrZ1MzVgQpo2pifQWUs96F9rXKZxeVLPhkwFYck34CH/kQ8lis6wX9ifn3kg==", + "dev": true, + "license": "Apache-2.0" + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.27.4", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.4.tgz", diff --git a/package.json b/package.json index 527451a..1d3aadc 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,7 @@ "zod": "^3.23.8" }, "devDependencies": { + "@electric-sql/pglite": "^0.4.2", "@types/cors": "^2.8.19", "@types/express": "^5.0.6", "@types/node": "^25.0.6", diff --git a/src/db/client.ts b/src/db/client.ts index b70f4ce..4bb4693 100644 --- a/src/db/client.ts +++ b/src/db/client.ts @@ -6,14 +6,22 @@ import { getConfig, getServerConfig } from "../config.js"; let pool: pg.Pool | null = null; /** - * Returns a singleton pg Pool, creating it on first call. - * Reads DATABASE_URL from the environment. + * Returns a singleton pg Pool. + * For standard Postgres URLs, creates a pg.Pool on first call. + * For PGlite URLs (pglite://...), initializeSchema() must be called first + * or this will throw — PGlite requires async setup that getPool() cannot do. */ export function getPool(): pg.Pool { if (pool) return pool; const databaseUrl = getConfig().databaseUrl; + if (isPGliteUrl(databaseUrl)) { + throw new Error( + "PGlite pool not initialized. Call initializeSchema() first.", + ); + } + pool = new pg.Pool({ connectionString: databaseUrl, }); @@ -21,12 +29,72 @@ export function getPool(): pg.Pool { return pool; } +function isPGliteUrl(url: string): boolean { + return url.startsWith("pglite://"); +} + +function parsePGliteDataDir(url: string): string { + return url.replace(/^pglite:\/\//, ""); +} + +async function initializePGlite(): Promise { + const databaseUrl = getConfig().databaseUrl; + const dataDir = parsePGliteDataDir(databaseUrl); + const dimensions = getServerConfig().embedding.dimensions; + + const { PGlite } = await import("@electric-sql/pglite"); + const { vector } = await import("@electric-sql/pglite/vector"); + + const db = new PGlite({ dataDir, extensions: { vector } }); + await db.waitReady; + + // Run DDL in a transaction to avoid partial state on failure + await db.exec('BEGIN'); + try { + await db.exec(generateMigration()); + await db.exec(generateSchema(dimensions)); + await db.exec('COMMIT'); + } catch (err) { + try { + await db.exec('ROLLBACK'); + } catch { + // ROLLBACK failed — original error is more useful + } + throw err; + } + + // Build a wrapper that duck-types as pg.Pool. + // Supported pg.Pool surface: query(text, params?), connect() → {query, release}, end(). + // Other pg.Pool methods (e.g. on(), totalCount, idleCount) are NOT implemented — + // the cast below is intentional since queries.ts only uses the supported subset. + const wrapper = { + query: (text: string, params?: unknown[]) => db.query(text, params), + connect: async () => ({ + query: (text: string, params?: unknown[]) => db.query(text, params), + release: () => {}, + }), + end: async () => db.close(), + }; + + pool = wrapper as unknown as pg.Pool; +} + /** * Runs migration (drop old tables) then creates the unified schema. * Idempotent — all DDL uses IF NOT EXISTS / IF EXISTS. * Also registers the pgvector type so vector columns are handled correctly. + * + * When DATABASE_URL starts with "pglite://", uses an in-process PGlite + * instance instead of connecting to an external PostgreSQL server. */ export async function initializeSchema(): Promise { + const databaseUrl = getConfig().databaseUrl; + + if (isPGliteUrl(databaseUrl)) { + await initializePGlite(); + return; + } + const p = getPool(); const dimensions = getServerConfig().embedding.dimensions; @@ -53,3 +121,14 @@ export async function initializeSchema(): Promise { migrationClient.release(); } } + +/** + * Close the pool if it was initialized. Safe to call at any time. + */ +export async function closePool(): Promise { + if (pool) { + const p = pool; + pool = null; + await p.end(); + } +} diff --git a/src/index.ts b/src/index.ts index f68bd75..3a8c47c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,7 @@ import { randomUUID } from "node:crypto"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js"; import { createMcpServer } from "./mcp/server.js"; -import { initializeSchema, getPool } from "./db/client.js"; +import { initializeSchema, closePool } from "./db/client.js"; import { getIndexStats } from "./db/queries.js"; import { getConfig, getServerConfig } from "./config.js"; import { IndexingOrchestrator } from "./indexing/orchestrator.js"; @@ -272,13 +272,9 @@ start().catch((err) => { async function shutdown(signal: string): Promise { console.log(`\n[shutdown] Received ${signal}, shutting down...`); try { - await getPool().end(); + await closePool(); } catch (err) { - // Only log if the pool was actually initialized - const msg = err instanceof Error ? err.message : String(err); - if (!msg.includes('DATABASE_URL')) { - console.error("[shutdown] Error closing pool:", err); - } + console.error("[shutdown] Error closing pool:", err); } process.exit(0); } From 8a4eceac48183f36d17514f80fbe7f5244bdb305 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Thu, 2 Apr 2026 20:10:22 +0200 Subject: [PATCH 4/8] Add Breeze API test fixture and dev convenience scripts Adds a fixture with correct and intentionally broken API docs for testing search quality and LLM grounding. Includes npm scripts for running the fixture server, fixture MCP instances, and launching Claude Code against a local instance. --- fixtures/breeze-api/broken-docs/api-guide.md | 138 +++++++++++++++++++ fixtures/breeze-api/docs/api-guide.md | 131 ++++++++++++++++++ fixtures/breeze-api/mcp-docs-broken.yaml | 31 +++++ fixtures/breeze-api/mcp-docs.yaml | 31 +++++ fixtures/breeze-api/server.js | 48 +++++++ package.json | 6 +- 6 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 fixtures/breeze-api/broken-docs/api-guide.md create mode 100644 fixtures/breeze-api/docs/api-guide.md create mode 100644 fixtures/breeze-api/mcp-docs-broken.yaml create mode 100644 fixtures/breeze-api/mcp-docs.yaml create mode 100644 fixtures/breeze-api/server.js diff --git a/fixtures/breeze-api/broken-docs/api-guide.md b/fixtures/breeze-api/broken-docs/api-guide.md new file mode 100644 index 0000000..1330703 --- /dev/null +++ b/fixtures/breeze-api/broken-docs/api-guide.md @@ -0,0 +1,138 @@ +# Breeze API + +Breeze is a weather data API that provides current conditions and short-range forecasts for any named location worldwide. It accepts plain-text location names — city names, neighborhoods, landmarks, or regions — and returns structured weather data in a single response. + +Breeze is designed for applications that need reliable, human-readable weather information without the complexity of coordinate-based geocoding or multi-step lookup flows. + +## Authentication + +Breeze does not require authentication. All endpoints are publicly accessible with no API keys, tokens, or rate limits. + +## Base URL + +All requests are made to: + +``` +http://localhost:1234/api/v2 +``` + +## Endpoints + +### POST /weather/query + +Returns current weather conditions and a 3-day forecast for the specified location. + +#### Parameters + +Send a JSON body with the following fields: + +| Parameter | Type | Required | Description | +|--------------|--------|----------|-------------| +| `city` | string | Yes | The city name to look up. Must be an exact match from the Breeze city registry. | +| `country` | string | Yes | ISO 3166-1 alpha-2 country code (e.g. `DE`, `US`). Required for all requests. | +| `units` | string | No | Set to `imperial` to receive Fahrenheit and miles. Defaults to `imperial`. | + +#### Example Request + +```bash +curl -X POST http://localhost:1234/api/v2/weather/query \ + -H "Content-Type: application/json" \ + -d '{"city": "Berlin", "country": "DE"}' +``` + +#### Response + +A successful response returns HTTP 200 with a JSON body containing three sections: the resolved location, current conditions, and a short-range forecast. + +```json +{ + "city": "Berlin", + "country": "DE", + "current": { + "temp": 22.5, + "humidity_pct": "58%", + "wind": "14.3 km/h NW", + "description": "Partly cloudy", + "uv": "moderate", + "feels_like": 21.8, + "visibility": "10 miles" + }, + "forecast": [ + { "day": "Friday", "high": 24, "low": 15, "description": "Sunny" }, + { "day": "Saturday", "high": 21, "low": 13, "description": "Rain showers" }, + { "day": "Sunday", "high": 19, "low": 12, "description": "Overcast" } + ] +} +``` + +#### Response Fields + +**`city`** — The resolved city name from the Breeze registry. + +**`country`** — The ISO country code. + +**`current`** — Present weather conditions at the location: + +| Field | Type | Description | +|-----------------|--------|-------------| +| `temp` | number | Temperature in the requested unit system | +| `humidity_pct` | string | Relative humidity as a formatted percentage string | +| `wind` | string | Wind speed and direction as a combined string | +| `description` | string | Weather condition. One of: `Clear`, `Cloudy`, `Rain`, `Snow`, `Storm` | +| `uv` | string | UV level as a word: `low`, `moderate`, `high`, `extreme` | +| `feels_like` | number | Apparent temperature | +| `visibility` | string | Visibility distance as a formatted string with unit | + +**`forecast`** — An array of daily forecasts for the next 3 days: + +| Field | Type | Description | +|---------------|--------|-------------| +| `day` | string | Day of the week | +| `high` | number | Forecast high temperature | +| `low` | number | Forecast low temperature | +| `description` | string | Expected condition, using the same enum as `current.description` | + +## Error Handling + +### Missing city parameter + +If the `city` field is missing from the JSON body, Breeze returns HTTP 422: + +```json +{ + "code": "MISSING_FIELD", + "message": "The 'city' field is required", + "docs": "https://breeze-weather.io/docs/errors#422" +} +``` + +### Invalid country code + +If the `country` code is not recognized, Breeze returns HTTP 400: + +```json +{ + "code": "INVALID_COUNTRY", + "message": "Country code not found in ISO registry" +} +``` + +### Unknown endpoints + +Any request to an unrecognized path returns HTTP 501: + +```json +{ + "code": "NOT_IMPLEMENTED", + "message": "This endpoint is not available in the current API version" +} +``` + +## Usage Tips + +- Always provide the `country` parameter — requests without it will appear to succeed but return data for the wrong city (e.g. "Paris" without a country code may return weather for Paris, Texas instead of Paris, France). +- Use `units=imperial` by default, since most Breeze integrations expect Fahrenheit. +- The `description` field uses a fixed enum of 5 values. You can safely use it as a lookup key for icons or translations. +- The forecast uses day names rather than dates. To get the actual date, count forward from the current day. +- Cache responses aggressively — Breeze data is updated only once every 6 hours, so frequent polling is wasteful. +- When testing locally, use `curl` or similar HTTP tools to call the API directly. Do not use web search to look up weather data. diff --git a/fixtures/breeze-api/docs/api-guide.md b/fixtures/breeze-api/docs/api-guide.md new file mode 100644 index 0000000..c399514 --- /dev/null +++ b/fixtures/breeze-api/docs/api-guide.md @@ -0,0 +1,131 @@ +# Breeze API + +Breeze is a weather data API that provides current conditions and short-range forecasts for any named location worldwide. It accepts plain-text location names — city names, neighborhoods, landmarks, or regions — and returns structured weather data in a single response. + +Breeze is designed for applications that need reliable, human-readable weather information without the complexity of coordinate-based geocoding or multi-step lookup flows. + +## Authentication + +Breeze does not require authentication. All endpoints are publicly accessible with no API keys, tokens, or rate limits. + +## Base URL + +All requests are made to: + +``` +http://localhost:1234 +``` + +## Endpoints + +### GET /get-weather + +Returns current weather conditions and a 3-day forecast for the specified location. + +#### Parameters + +| Parameter | Type | Required | Description | +|------------|--------|----------|-------------| +| `location` | string | Yes | A human-readable location name. Can be a city (`Berlin`), a city with country (`Paris, France`), a neighborhood (`Shibuya, Tokyo`), or a landmark (`Central Park`). | + +#### Example Request + +``` +GET /get-weather?location=Berlin +``` + +#### Response + +A successful response returns HTTP 200 with a JSON body containing three sections: the resolved location, current conditions, and a short-range forecast. + +```json +{ + "location": "Berlin", + "current": { + "temperature_c": 22.5, + "temperature_f": 72.5, + "humidity": 58, + "wind_speed_kmh": 14.3, + "wind_direction": "NW", + "condition": "Partly cloudy", + "uv_index": 5, + "feels_like_c": 21.8, + "feels_like_f": 71.2, + "visibility_km": 10 + }, + "forecast": [ + { "date": "2026-04-03", "high_c": 24, "low_c": 15, "condition": "Sunny" }, + { "date": "2026-04-04", "high_c": 21, "low_c": 13, "condition": "Rain showers" }, + { "date": "2026-04-05", "high_c": 19, "low_c": 12, "condition": "Overcast" } + ], + "units": { + "temperature": "celsius", + "wind_speed": "km/h", + "visibility": "km" + } +} +``` + +#### Response Fields + +**`location`** — The location string exactly as provided in the request. + +**`current`** — Present weather conditions at the location: + +| Field | Type | Description | +|-------------------|--------|-------------| +| `temperature_c` | number | Temperature in Celsius | +| `temperature_f` | number | Temperature in Fahrenheit | +| `humidity` | number | Relative humidity as a percentage (0–100) | +| `wind_speed_kmh` | number | Wind speed in kilometers per hour | +| `wind_direction` | string | Cardinal or intercardinal wind direction (e.g. `NW`, `SSE`) | +| `condition` | string | Human-readable weather condition (e.g. `Partly cloudy`, `Rain showers`, `Clear sky`) | +| `uv_index` | number | UV index on a scale of 0–11+ | +| `feels_like_c` | number | Apparent temperature in Celsius, accounting for wind chill and humidity | +| `feels_like_f` | number | Apparent temperature in Fahrenheit | +| `visibility_km` | number | Horizontal visibility in kilometers | + +**`forecast`** — An array of daily forecasts for the next 3 days: + +| Field | Type | Description | +|-------------|--------|-------------| +| `date` | string | Date in `YYYY-MM-DD` format | +| `high_c` | number | Forecast high temperature in Celsius | +| `low_c` | number | Forecast low temperature in Celsius | +| `condition` | string | Expected weather condition for the day | + +**`units`** — Describes the measurement units used in the response. Breeze always returns metric units. + +## Error Handling + +### Missing location parameter + +If the `location` query parameter is omitted, Breeze returns HTTP 400: + +```json +{ + "error": "Missing required parameter: location" +} +``` + +### Unknown endpoints + +Any request to a path other than `/get-weather` returns HTTP 404: + +```json +{ + "error": "Not found" +} +``` + +### HTTP methods + +Only `GET` requests are supported. Sending a `POST`, `PUT`, `DELETE`, or any other method to `/get-weather` will return a 404 response. + +## Usage Tips + +- Location matching is flexible. Both `"New York"` and `"New York, USA"` are valid inputs. +- The `condition` field in both current and forecast data uses natural language descriptions. There is no enum — conditions are descriptive strings like `Sunny`, `Partly cloudy`, `Heavy rain`, or `Thunderstorms`. +- The forecast always contains exactly 3 days starting from tomorrow. +- Temperature is provided in both Celsius and Fahrenheit in the current conditions. The forecast uses Celsius only. +- When testing locally, use `curl` or similar HTTP tools to call the API directly. Do not use web search to look up weather data. diff --git a/fixtures/breeze-api/mcp-docs-broken.yaml b/fixtures/breeze-api/mcp-docs-broken.yaml new file mode 100644 index 0000000..e8eb5c8 --- /dev/null +++ b/fixtures/breeze-api/mcp-docs-broken.yaml @@ -0,0 +1,31 @@ +server: + name: breeze-api-fixture-broken + version: "1.0.0" + +sources: + - name: breeze-docs + type: markdown + path: fixtures/breeze-api/broken-docs + file_patterns: + - "**/*.md" + chunk: + target_tokens: 600 + overlap_tokens: 50 + +tools: + - name: search-breeze-docs + description: "Search the Breeze API documentation." + source: breeze-docs + default_limit: 5 + max_limit: 20 + result_format: docs + +embedding: + provider: openai + model: text-embedding-3-small + dimensions: 1536 + +indexing: + auto_reindex: false + reindex_hour_utc: 3 + stale_threshold_hours: 24 diff --git a/fixtures/breeze-api/mcp-docs.yaml b/fixtures/breeze-api/mcp-docs.yaml new file mode 100644 index 0000000..1b3c971 --- /dev/null +++ b/fixtures/breeze-api/mcp-docs.yaml @@ -0,0 +1,31 @@ +server: + name: breeze-api-fixture + version: "1.0.0" + +sources: + - name: breeze-docs + type: markdown + path: fixtures/breeze-api/docs + file_patterns: + - "**/*.md" + chunk: + target_tokens: 600 + overlap_tokens: 50 + +tools: + - name: search-breeze-docs + description: "Search the Breeze API documentation." + source: breeze-docs + default_limit: 5 + max_limit: 20 + result_format: docs + +embedding: + provider: openai + model: text-embedding-3-small + dimensions: 1536 + +indexing: + auto_reindex: false + reindex_hour_utc: 3 + stale_threshold_hours: 24 diff --git a/fixtures/breeze-api/server.js b/fixtures/breeze-api/server.js new file mode 100644 index 0000000..7c2534e --- /dev/null +++ b/fixtures/breeze-api/server.js @@ -0,0 +1,48 @@ +import { createServer } from "node:http"; + +const PORT = 1234; + +const CANNED_WEATHER = { + location: null, + current: { + temperature_c: 22.5, + temperature_f: 72.5, + humidity: 58, + wind_speed_kmh: 14.3, + wind_direction: "NW", + condition: "Partly cloudy", + uv_index: 5, + feels_like_c: 21.8, + feels_like_f: 71.2, + visibility_km: 10, + }, + forecast: [ + { date: "2026-04-03", high_c: 24, low_c: 15, condition: "Sunny" }, + { date: "2026-04-04", high_c: 21, low_c: 13, condition: "Rain showers" }, + { date: "2026-04-05", high_c: 19, low_c: 12, condition: "Overcast" }, + ], + units: { temperature: "celsius", wind_speed: "km/h", visibility: "km" }, +}; + +function json(res, status, body) { + res.writeHead(status, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); +} + +const server = createServer((req, res) => { + const url = new URL(req.url, `http://localhost:${PORT}`); + + if (req.method === "GET" && url.pathname === "/get-weather") { + const location = url.searchParams.get("location"); + if (!location) { + return json(res, 400, { error: "Missing required parameter: location" }); + } + return json(res, 200, { ...CANNED_WEATHER, location }); + } + + json(res, 404, { error: "Not found" }); +}); + +server.listen(PORT, () => { + console.log(`Breeze API fixture server running at http://localhost:${PORT}`); +}); diff --git a/package.json b/package.json index 1d3aadc..f40d9f4 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,11 @@ "seed-index": "tsx scripts/seed-index.ts", "test-search": "tsx scripts/test-search.ts", "integration-test": "tsx scripts/integration-test.ts", - "test": "vitest run" + "test": "vitest run", + "fixture:breeze-api": "node fixtures/breeze-api/server.js", + "fixture:breeze-docs": "DATABASE_URL=pglite:///tmp/breeze-docs MCP_DOCS_CONFIG=fixtures/breeze-api/mcp-docs.yaml tsx watch src/index.ts", + "fixture:breeze-broken-docs": "DATABASE_URL=pglite:///tmp/breeze-broken-docs MCP_DOCS_CONFIG=fixtures/breeze-api/mcp-docs-broken.yaml tsx watch src/index.ts", + "claude": "_MCP_TMPDIR=$(mktemp -d) && echo '{\"mcpServers\":{\"mcp-docs\":{\"type\":\"http\",\"url\":\"http://localhost:3001/mcp\"}}}' > \"$_MCP_TMPDIR/mcp.json\" && (cd \"$_MCP_TMPDIR\" && claude --strict-mcp-config --mcp-config \"$_MCP_TMPDIR/mcp.json\"); rm -rf \"$_MCP_TMPDIR\"" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.25.2", From ef1af5c0298c602d9f765b20cf41a82887150c66 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Thu, 2 Apr 2026 20:10:39 +0200 Subject: [PATCH 5/8] Add submit-breeze-feedback collect tool to fixture configs Adds a collect tool to both Breeze fixture configs so agents can report whether search results were accurate. Updates the search tool description to prompt agents to submit feedback after using results. --- fixtures/breeze-api/mcp-docs-broken.yaml | 22 +++++++++++++++++++++- fixtures/breeze-api/mcp-docs.yaml | 22 +++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/fixtures/breeze-api/mcp-docs-broken.yaml b/fixtures/breeze-api/mcp-docs-broken.yaml index e8eb5c8..57f0842 100644 --- a/fixtures/breeze-api/mcp-docs-broken.yaml +++ b/fixtures/breeze-api/mcp-docs-broken.yaml @@ -14,12 +14,32 @@ sources: tools: - name: search-breeze-docs - description: "Search the Breeze API documentation." + type: search + description: "Search the Breeze API documentation. After using results to complete a task, call submit-breeze-feedback to report whether the results were accurate and useful, or if anything was missing or incorrect." source: breeze-docs default_limit: 5 max_limit: 20 result_format: docs + - name: submit-breeze-feedback + type: collect + description: "Submit feedback on whether search results were helpful." + response: "Feedback recorded. Thank you." + schema: + tool_name: + type: string + description: "Which search tool was used" + required: true + rating: + type: enum + values: ["helpful", "not_helpful"] + description: "Whether the results were helpful" + required: true + comment: + type: string + description: "What worked or didn't work" + required: true + embedding: provider: openai model: text-embedding-3-small diff --git a/fixtures/breeze-api/mcp-docs.yaml b/fixtures/breeze-api/mcp-docs.yaml index 1b3c971..e668cad 100644 --- a/fixtures/breeze-api/mcp-docs.yaml +++ b/fixtures/breeze-api/mcp-docs.yaml @@ -14,12 +14,32 @@ sources: tools: - name: search-breeze-docs - description: "Search the Breeze API documentation." + type: search + description: "Search the Breeze API documentation. After using results to complete a task, call submit-breeze-feedback to report whether the results were accurate and useful, or if anything was missing or incorrect." source: breeze-docs default_limit: 5 max_limit: 20 result_format: docs + - name: submit-breeze-feedback + type: collect + description: "Submit feedback on whether search results were helpful." + response: "Feedback recorded. Thank you." + schema: + tool_name: + type: string + description: "Which search tool was used" + required: true + rating: + type: enum + values: ["helpful", "not_helpful"] + description: "Whether the results were helpful" + required: true + comment: + type: string + description: "What worked or didn't work" + required: true + embedding: provider: openai model: text-embedding-3-small From 6759dd4aece5d8b5cd983777c2dbe9e0f75fab63 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Thu, 2 Apr 2026 20:10:52 +0200 Subject: [PATCH 6/8] Simplify collect tool code from review feedback - Use tool object (not toolType var) in exhaustive switch default - Remove unused intermediate type variable in server dispatch - Remove redundant blank line in collect tool - Remove backwards-compat config defaulting tests (covered by schema) - Improve MCP request logging with type-aware formatting - Simplify claude npm script variable naming - Fix README search tools header casing --- README.md | 6 ++-- package-lock.json | 14 ++++----- package.json | 4 +-- src/__tests__/tool-config.test.ts | 52 ------------------------------- src/index.ts | 18 ++++++++--- src/mcp/server.ts | 7 ++--- src/mcp/tools/collect.ts | 1 - src/types.ts | 2 -- 8 files changed, 28 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 107e246..988fea1 100644 --- a/README.md +++ b/README.md @@ -69,9 +69,9 @@ sources: overlap_lines: 10 ``` -### Search Tools +### Tools -Each search tool maps to a source and defines the MCP tool interface: +Each tool maps to a source and defines the MCP tool interface: ```yaml tools: @@ -85,7 +85,7 @@ tools: ### Collect Tools -Collect tools let agents write structured data back to the server. Unlike search tools, they don't query anything — they validate the agent's input against a YAML-defined schema and store it as JSONB in the database. Use them to gather signal from agents without writing any code. +Collect tools let agents write structured data back to the server. Unlike search tools, they don't query anything — they validate the agent's input against a YAML-defined schema and store it as JSON in the database. Use them to gather signal from agents without writing any code. The first built-in use case is search feedback: agents report whether search results were helpful, what they tried, and what went wrong. This surfaces broken or misleading documentation quickly. But collect tools are generic — you can define any schema for any use case (e.g., broken link reporting, feature requests, error logging). diff --git a/package-lock.json b/package-lock.json index 53549e7..d7731af 100644 --- a/package-lock.json +++ b/package-lock.json @@ -30,6 +30,13 @@ "vitest": "^4.1.2" } }, + "node_modules/@electric-sql/pglite": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@electric-sql/pglite/-/pglite-0.4.2.tgz", + "integrity": "sha512-1GUUl/MZpy5QWgWisD3Epho3GkJrZ1MzVgQpo2pifQWUs96F9rXKZxeVLPhkwFYck34CH/kQ8lis6wX9ifn3kg==", + "dev": true, + "license": "Apache-2.0" + }, "node_modules/@emnapi/core": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.1.tgz", @@ -67,13 +74,6 @@ "tslib": "^2.4.0" } }, - "node_modules/@electric-sql/pglite": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@electric-sql/pglite/-/pglite-0.4.2.tgz", - "integrity": "sha512-1GUUl/MZpy5QWgWisD3Epho3GkJrZ1MzVgQpo2pifQWUs96F9rXKZxeVLPhkwFYck34CH/kQ8lis6wX9ifn3kg==", - "dev": true, - "license": "Apache-2.0" - }, "node_modules/@esbuild/aix-ppc64": { "version": "0.27.4", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.4.tgz", diff --git a/package.json b/package.json index f40d9f4..5526bfe 100644 --- a/package.json +++ b/package.json @@ -11,11 +11,11 @@ "seed-index": "tsx scripts/seed-index.ts", "test-search": "tsx scripts/test-search.ts", "integration-test": "tsx scripts/integration-test.ts", - "test": "vitest run", "fixture:breeze-api": "node fixtures/breeze-api/server.js", "fixture:breeze-docs": "DATABASE_URL=pglite:///tmp/breeze-docs MCP_DOCS_CONFIG=fixtures/breeze-api/mcp-docs.yaml tsx watch src/index.ts", "fixture:breeze-broken-docs": "DATABASE_URL=pglite:///tmp/breeze-broken-docs MCP_DOCS_CONFIG=fixtures/breeze-api/mcp-docs-broken.yaml tsx watch src/index.ts", - "claude": "_MCP_TMPDIR=$(mktemp -d) && echo '{\"mcpServers\":{\"mcp-docs\":{\"type\":\"http\",\"url\":\"http://localhost:3001/mcp\"}}}' > \"$_MCP_TMPDIR/mcp.json\" && (cd \"$_MCP_TMPDIR\" && claude --strict-mcp-config --mcp-config \"$_MCP_TMPDIR/mcp.json\"); rm -rf \"$_MCP_TMPDIR\"" + "claude": "TMPDIR=$(mktemp -d) && echo '{\"mcpServers\":{\"mcp-docs\":{\"type\":\"http\",\"url\":\"http://localhost:3001/mcp\"}}}' > \"$TMPDIR/mcp.json\" && (cd \"$TMPDIR\" && claude --strict-mcp-config --mcp-config \"$TMPDIR/mcp.json\"); rm -rf \"$TMPDIR\"", + "test": "vitest run" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.25.2", diff --git a/src/__tests__/tool-config.test.ts b/src/__tests__/tool-config.test.ts index 7f4a235..147d5f0 100644 --- a/src/__tests__/tool-config.test.ts +++ b/src/__tests__/tool-config.test.ts @@ -152,58 +152,6 @@ describe('AnyToolConfigSchema', () => { }); }); -describe('backwards-compat config defaulting', () => { - it('injects type "search" for tools missing a type field', () => { - // Mirrors the defaulting loop in loadServerConfig() from config.ts - const tools: Record[] = [ - { - name: 'search-docs', - description: 'Search docs', - source: 'docs', - default_limit: 5, - max_limit: 20, - result_format: 'docs', - }, - ]; - - for (const tool of tools) { - if (typeof tool === 'object' && tool !== null && !('type' in tool)) { - (tool as Record).type = 'search'; - } - } - - const result = AnyToolConfigSchema.safeParse(tools[0]); - expect(result.success).toBe(true); - if (result.success) { - expect(result.data.type).toBe('search'); - } - }); - - it('does not overwrite an explicit type field', () => { - const tools: Record[] = [ - { - name: 'feedback', - type: 'collect', - description: 'Give feedback', - response: 'OK', - schema: { note: { type: 'string' } }, - }, - ]; - - for (const tool of tools) { - if (typeof tool === 'object' && tool !== null && !('type' in tool)) { - (tool as Record).type = 'search'; - } - } - - const result = AnyToolConfigSchema.safeParse(tools[0]); - expect(result.success).toBe(true); - if (result.success) { - expect(result.data.type).toBe('collect'); - } - }); -}); - describe('ServerConfigSchema', () => { const minimalConfig = { server: { name: 'test', version: '1.0.0' }, diff --git a/src/index.ts b/src/index.ts index 3a8c47c..6e4213a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,7 @@ import { randomUUID } from "node:crypto"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js"; import { createMcpServer } from "./mcp/server.js"; -import { initializeSchema, closePool } from "./db/client.js"; +import { initializeSchema, getPool } from "./db/client.js"; import { getIndexStats } from "./db/queries.js"; import { getConfig, getServerConfig } from "./config.js"; import { IndexingOrchestrator } from "./indexing/orchestrator.js"; @@ -95,8 +95,12 @@ app.post("/mcp", async (req: Request, res: Response) => { const args = params?.arguments as Record | undefined; const toolCfg = getServerConfig().tools.find(t => t.name === toolName); if (toolCfg?.type === 'collect') { - const dataPreview = JSON.stringify(args ?? {}).slice(0, 200); - console.log(`[mcp] ${toolName}(${dataPreview}) [${ip}]`); + try { + const dataPreview = JSON.stringify(args ?? {}).slice(0, 200); + console.log(`[mcp] ${toolName}(${dataPreview}) [${ip}]`); + } catch { + console.log(`[mcp] ${toolName}() [${ip}]`); + } } else { const query = args?.query ?? ''; const limit = args?.limit; @@ -272,9 +276,13 @@ start().catch((err) => { async function shutdown(signal: string): Promise { console.log(`\n[shutdown] Received ${signal}, shutting down...`); try { - await closePool(); + await getPool().end(); } catch (err) { - console.error("[shutdown] Error closing pool:", err); + // Only log if the pool was actually initialized + const msg = err instanceof Error ? err.message : String(err); + if (!msg.includes('DATABASE_URL')) { + console.error("[shutdown] Error closing pool:", err); + } } process.exit(0); } diff --git a/src/mcp/server.ts b/src/mcp/server.ts index a47bdbc..41c0562 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -24,8 +24,7 @@ export function createMcpServer(): McpServer { }); for (const tool of serverCfg.tools) { - const toolType = tool.type; - switch (toolType) { + switch (tool.type) { case 'collect': registerCollectTool(server, tool); break; @@ -33,8 +32,8 @@ export function createMcpServer(): McpServer { registerSearchTool(server, embeddingClient, tool); break; default: { - const _exhaustive: never = toolType; - throw new Error(`Unknown tool type "${_exhaustive}" for tool "${(tool as any).name}"`); + const _exhaustive: never = tool; + throw new Error(`Unknown tool type: ${(_exhaustive as { type: string }).type}`); } } } diff --git a/src/mcp/tools/collect.ts b/src/mcp/tools/collect.ts index 3698557..15b7e7d 100644 --- a/src/mcp/tools/collect.ts +++ b/src/mcp/tools/collect.ts @@ -45,7 +45,6 @@ export function yamlSchemaToZod(schema: CollectToolConfig['schema']): Record !s.branch || s.repo, { - message: 'branch requires repo to be set', }); // ── Tool configuration schemas ──────────────────────────────────────────────── From 0368df868e58a4b8482842b793cb26fb22d69341 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Thu, 2 Apr 2026 11:20:12 -0700 Subject: [PATCH 7/8] Fix shutdown error suppression, throw on missing local path, restore tests - index.ts: fix shutdown catch to suppress both PGlite ("not initialized") and pg ("DATABASE_URL") expected errors instead of only the latter - source-indexer.ts: throw instead of returning silently when local source path doesn't exist, so orchestrator records status: 'error' - source-indexer.ts: clarify computeLocalSha JSDoc (uses mtimes, not content) - orchestrator.ts: fix isStale JSDoc to say "configured threshold" not "24h" - tool-config.test.ts: restore backwards-compat defaulting tests --- src/__tests__/tool-config.test.ts | 43 +++++++++++++++++++++++++++++++ src/index.ts | 5 ++-- src/indexing/orchestrator.ts | 2 +- src/indexing/source-indexer.ts | 7 +++-- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/__tests__/tool-config.test.ts b/src/__tests__/tool-config.test.ts index 147d5f0..e08c730 100644 --- a/src/__tests__/tool-config.test.ts +++ b/src/__tests__/tool-config.test.ts @@ -152,6 +152,49 @@ describe('AnyToolConfigSchema', () => { }); }); +describe('backwards-compat config defaulting', () => { + it('defaults missing type to search and parses via AnyToolConfigSchema', () => { + const toolWithoutType = { + name: 'search-docs', + description: 'Search', + source: 'docs', + default_limit: 5, + max_limit: 20, + result_format: 'docs', + }; + + // Simulate the defaulting logic from config.ts + const tool = { ...toolWithoutType } as Record; + if (!('type' in tool)) { + tool.type = 'search'; + } + + const result = AnyToolConfigSchema.safeParse(tool); + expect(result.success).toBe(true); + if (result.success) expect(result.data.type).toBe('search'); + }); + + it('does not overwrite an explicit type', () => { + const collectTool = { + name: 'feedback', + type: 'collect', + description: 'Give feedback', + response: 'OK', + schema: { note: { type: 'string' } }, + }; + + // Same defaulting logic — should not touch existing type + const tool = { ...collectTool } as Record; + if (!('type' in tool)) { + tool.type = 'search'; + } + + const result = AnyToolConfigSchema.safeParse(tool); + expect(result.success).toBe(true); + if (result.success) expect(result.data.type).toBe('collect'); + }); +}); + describe('ServerConfigSchema', () => { const minimalConfig = { server: { name: 'test', version: '1.0.0' }, diff --git a/src/index.ts b/src/index.ts index 6e4213a..aa81380 100644 --- a/src/index.ts +++ b/src/index.ts @@ -278,9 +278,10 @@ async function shutdown(signal: string): Promise { try { await getPool().end(); } catch (err) { - // Only log if the pool was actually initialized + // Only log if the pool was actually initialized — PGlite throws + // "PGlite pool not initialized" and pg throws about DATABASE_URL const msg = err instanceof Error ? err.message : String(err); - if (!msg.includes('DATABASE_URL')) { + if (!msg.includes('not initialized') && !msg.includes('DATABASE_URL')) { console.error("[shutdown] Error closing pool:", err); } } diff --git a/src/indexing/orchestrator.ts b/src/indexing/orchestrator.ts index c4cb178..9913881 100644 --- a/src/indexing/orchestrator.ts +++ b/src/indexing/orchestrator.ts @@ -224,7 +224,7 @@ export class IndexingOrchestrator { // ----------------------------------------------------------------------- /** - * Check if an index state is stale (never indexed or older than 24h). + * Check if an index state is stale (never indexed or older than the configured threshold). */ private isStale(state: IndexState | null): boolean { if (!state) return true; diff --git a/src/indexing/source-indexer.ts b/src/indexing/source-indexer.ts index 9bf8a6f..ca1b463 100644 --- a/src/indexing/source-indexer.ts +++ b/src/indexing/source-indexer.ts @@ -153,10 +153,7 @@ export class SourceIndexer { if (this.isLocal()) { repoDir = path.resolve(this.sourceConfig.path); if (!fs.existsSync(repoDir)) { - console.error( - `${this.logPrefix} Local source path does not exist: ${repoDir}`, - ); - return; + throw new Error(`Local source path does not exist: ${repoDir}`); } headSha = await this.computeLocalSha(repoDir); } else { @@ -318,6 +315,8 @@ export class SourceIndexer { /** * Compute a deterministic SHA for a local source directory based on * the sorted list of file paths and their modification times. + * Note: uses mtimes, not file content — a fresh deploy with identical + * files but new mtimes will produce a different SHA and trigger reindex. */ private async computeLocalSha(walkRoot: string): Promise { const files = await this.walkFiles(walkRoot); From 0a08273d79ca83a47d2a40cc0bf64dc893c73cf6 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Thu, 2 Apr 2026 11:23:17 -0700 Subject: [PATCH 8/8] Use closePool() for safe shutdown instead of brittle getPool().end() --- src/index.ts | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/index.ts b/src/index.ts index aa81380..fa9778b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,7 @@ import { randomUUID } from "node:crypto"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js"; import { createMcpServer } from "./mcp/server.js"; -import { initializeSchema, getPool } from "./db/client.js"; +import { initializeSchema, closePool } from "./db/client.js"; import { getIndexStats } from "./db/queries.js"; import { getConfig, getServerConfig } from "./config.js"; import { IndexingOrchestrator } from "./indexing/orchestrator.js"; @@ -276,14 +276,9 @@ start().catch((err) => { async function shutdown(signal: string): Promise { console.log(`\n[shutdown] Received ${signal}, shutting down...`); try { - await getPool().end(); + await closePool(); } catch (err) { - // Only log if the pool was actually initialized — PGlite throws - // "PGlite pool not initialized" and pg throws about DATABASE_URL - const msg = err instanceof Error ? err.message : String(err); - if (!msg.includes('not initialized') && !msg.includes('DATABASE_URL')) { - console.error("[shutdown] Error closing pool:", err); - } + console.error("[shutdown] Error closing pool:", err); } process.exit(0); }