diff --git a/CHANGELOG.md b/CHANGELOG.md index ecc2cbd..b48a7fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,18 +2,28 @@ ## Unreleased +## [2.1.0](https://github.com/PatrickSys/codebase-context/compare/v1.9.0...v2.1.0) (2026-04-13) + ### Features +- **search:** surface chunk intelligence directly in `search_codebase` results, including symbol identity, scope, signature preview, and compact/full response budgeting +- **map:** upgrade the conventions map with structural skeleton sections and add `map --export` so the compact map can be written to `CODEBASE_MAP.md` - **mcp:** rework multi-project routing so one MCP server can serve multiple projects instead of one hardcoded server entry per repo - **mcp:** keep explicit `project` as the fallback when the client does not provide enough project context - **mcp:** accept repo paths, subproject paths, and file paths as `project` selectors when routing is ambiguous ### Bug Fixes +- **metadata:** require real dependency evidence plus multiple framework indicators before labeling a repo as Next.js or another specialized framework +- **reranker:** auto-heal corrupted cross-encoder cache entries and surface degraded reranker state in `searchQuality.rerankerStatus` +- **benchmarks:** harden comparator lanes for cross-platform execution and keep setup failures explicit instead of silently turning them into claims - **search:** auto-heal on corrupted index now triggers a background rebuild instead of blocking the search response ### Documentation +- publish the v2.1.0 discovery benchmark rerun with the current gate output: `pending_evidence`, `claimAllowed: false`, `24` frozen tasks, `0.75` average usefulness, and `1822.25` average estimated tokens +- document the current comparator truth instead of stale assumptions: the public proof still has setup failures plus near-empty comparator outputs on this host, so benchmark win claims remain blocked +- note the new `searchQuality.tokenEstimate` advisory contract: estimates are based on the final serialized response payload and warnings only appear above the 4K-token threshold - simplify the setup story around a roots-first contract: roots-capable multi-project sessions, single-project fallback, and explicit `project` retries - clarify that issue #63 fixed the architecture and workspace-aware workflow, but issue #2 is still only partially solved when the client does not provide roots or active-project context - remove the repo-local `init` / marker-file story from the public setup guidance diff --git a/results/comparator-evidence.json b/results/comparator-evidence.json index 70efa3c..ac47bfe 100644 --- a/results/comparator-evidence.json +++ b/results/comparator-evidence.json @@ -1,7 +1,379 @@ { "codebase-memory-mcp": { - "status": "setup_failed", - "reason": "codebase-memory-mcp install failed. Run: curl -fsSL https://raw.githubusercontent.com/DeusData/codebase-memory-mcp/main/install.sh | sh" + "averageUsefulness": 0, + "averagePayloadBytes": 19, + "averageEstimatedTokens": 5, + "averageFirstRelevantHit": null, + "bestExampleUsefulnessRate": null, + "averageToolCallCount": 1, + "averageElapsedMs": 0.3333333333333333, + "status": "ok", + "taskResults": [ + { + "taskId": "as-map-01", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "libraries actually used", + "patterns", + "generated:" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 2 + }, + { + "taskId": "as-map-02", + "job": "map", + "surface": "get_codebase_metadata", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "framework", + "architecture", + "statistics" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 1 + }, + { + "taskId": "as-map-03", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "state", + "patterns", + "libraries actually used" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-map-04", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "import aliases", + "tsconfig" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-find-01", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "dependencyInjection" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-find-02", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "stateManagement" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 1 + }, + { + "taskId": "as-find-03", + "job": "find", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "preflight", + "bestExample", + "patterns" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-find-04", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "unitTestFramework", + "test" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 1 + }, + { + "taskId": "as-search-01", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-search-02", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-search-03", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "as-search-04", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-map-01", + "job": "map", + "surface": "get_codebase_metadata", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "framework", + "architecture", + "statistics" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-map-02", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "codebase intelligence", + "libraries actually used", + "patterns" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 1 + }, + { + "taskId": "ex-map-03", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "import aliases", + "tsconfig" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-map-04", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "patterns", + "libraries actually used", + "generated:" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-find-01", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "stateManagement" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-find-02", + "job": "find", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "preflight", + "bestExample", + "patterns" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-find-03", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "test", + "framework" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 1 + }, + { + "taskId": "ex-find-04", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "dependencyInjection" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-search-01", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-search-02", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + }, + { + "taskId": "ex-search-03", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 1 + }, + { + "taskId": "ex-search-04", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 19, + "estimatedTokens": 5, + "toolCallCount": 1, + "elapsedMs": 0 + } + ] }, "jCodeMunch": { "status": "setup_failed", @@ -16,7 +388,379 @@ "reason": "MCP error -32000: Connection closed" }, "raw Claude Code": { - "status": "setup_failed", - "reason": "raw Claude Code baseline requires the Claude Code CLI (claude) to be installed and authenticated. This is the manual-log-capture baseline — record as pending_evidence if claude CLI is unavailable." + "averageUsefulness": 0, + "averagePayloadBytes": 71.54166666666667, + "averageEstimatedTokens": 18.5, + "averageFirstRelevantHit": null, + "bestExampleUsefulnessRate": null, + "averageToolCallCount": null, + "averageElapsedMs": 9590.208333333334, + "status": "ok", + "taskResults": [ + { + "taskId": "as-map-01", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "libraries actually used", + "patterns", + "generated:" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 12461 + }, + { + "taskId": "as-map-02", + "job": "map", + "surface": "get_codebase_metadata", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "framework", + "architecture", + "statistics" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 9390 + }, + { + "taskId": "as-map-03", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "state", + "patterns", + "libraries actually used" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 9836 + }, + { + "taskId": "as-map-04", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "import aliases", + "tsconfig" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 10098 + }, + { + "taskId": "as-find-01", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "dependencyInjection" + ], + "payloadBytes": 70, + "estimatedTokens": 18, + "toolCallCount": null, + "elapsedMs": 8937 + }, + { + "taskId": "as-find-02", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "stateManagement" + ], + "payloadBytes": 75, + "estimatedTokens": 19, + "toolCallCount": null, + "elapsedMs": 8747 + }, + { + "taskId": "as-find-03", + "job": "find", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "preflight", + "bestExample", + "patterns" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8747 + }, + { + "taskId": "as-find-04", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "unitTestFramework", + "test" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 9351 + }, + { + "taskId": "as-search-01", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 73, + "estimatedTokens": 19, + "toolCallCount": null, + "elapsedMs": 9376 + }, + { + "taskId": "as-search-02", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 70, + "estimatedTokens": 18, + "toolCallCount": null, + "elapsedMs": 9891 + }, + { + "taskId": "as-search-03", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 11377 + }, + { + "taskId": "as-search-04", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8972 + }, + { + "taskId": "ex-map-01", + "job": "map", + "surface": "get_codebase_metadata", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "framework", + "architecture", + "statistics" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 10195 + }, + { + "taskId": "ex-map-02", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "codebase intelligence", + "libraries actually used", + "patterns" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8753 + }, + { + "taskId": "ex-map-03", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "import aliases", + "tsconfig" + ], + "payloadBytes": 71, + "estimatedTokens": 18, + "toolCallCount": null, + "elapsedMs": 8860 + }, + { + "taskId": "ex-map-04", + "job": "map", + "surface": "codebase://context", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "patterns", + "libraries actually used", + "generated:" + ], + "payloadBytes": 75, + "estimatedTokens": 19, + "toolCallCount": null, + "elapsedMs": 8623 + }, + { + "taskId": "ex-find-01", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "stateManagement" + ], + "payloadBytes": 150, + "estimatedTokens": 38, + "toolCallCount": null, + "elapsedMs": 12098 + }, + { + "taskId": "ex-find-02", + "job": "find", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "preflight", + "bestExample", + "patterns" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8783 + }, + { + "taskId": "ex-find-03", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "test", + "framework" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8785 + }, + { + "taskId": "ex-find-04", + "job": "find", + "surface": "get_team_patterns", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "dependencyInjection" + ], + "payloadBytes": 83, + "estimatedTokens": 21, + "toolCallCount": null, + "elapsedMs": 8912 + }, + { + "taskId": "ex-search-01", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8043 + }, + { + "taskId": "ex-search-02", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 75, + "estimatedTokens": 19, + "toolCallCount": null, + "elapsedMs": 8755 + }, + { + "taskId": "ex-search-03", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 12373 + }, + { + "taskId": "ex-search-04", + "job": "search", + "surface": "search_codebase", + "usefulnessScore": 0, + "matchedSignals": [], + "missingSignals": [ + "results", + "searchQuality" + ], + "payloadBytes": 65, + "estimatedTokens": 17, + "toolCallCount": null, + "elapsedMs": 8802 + } + ] } } \ No newline at end of file diff --git a/results/gate-evaluation.json b/results/gate-evaluation.json index d38f01d..d3e5788 100644 --- a/results/gate-evaluation.json +++ b/results/gate-evaluation.json @@ -1,14 +1,14 @@ { "totalTasks": 24, "averageUsefulness": 0.75, - "averagePayloadBytes": 3613.6666666666665, - "averageEstimatedTokens": 903.7083333333334, + "averagePayloadBytes": 7287.625, + "averageEstimatedTokens": 1822.25, "searchTasks": 8, "findTasks": 8, "mapTasks": 8, "averageFirstRelevantHit": null, "bestExampleUsefulnessRate": 0.125, - "averageElapsedMs": 282.625, + "averageElapsedMs": 546.75, "averageToolCallCount": 1, "results": [ { @@ -25,9 +25,9 @@ "generated:" ], "forbiddenHits": [], - "payloadBytes": 8548, - "estimatedTokens": 2137, - "elapsedMs": 20, + "payloadBytes": 23720, + "estimatedTokens": 5930, + "elapsedMs": 74, "toolCallCount": 1 }, { @@ -45,7 +45,7 @@ "forbiddenHits": [], "payloadBytes": 5751, "estimatedTokens": 1438, - "elapsedMs": 26, + "elapsedMs": 29, "toolCallCount": 1 }, { @@ -62,9 +62,9 @@ "libraries actually used" ], "forbiddenHits": [], - "payloadBytes": 8548, - "estimatedTokens": 2137, - "elapsedMs": 6, + "payloadBytes": 23720, + "estimatedTokens": 5930, + "elapsedMs": 18, "toolCallCount": 1 }, { @@ -79,9 +79,9 @@ "tsconfig" ], "forbiddenHits": [], - "payloadBytes": 8548, - "estimatedTokens": 2137, - "elapsedMs": 4, + "payloadBytes": 23720, + "estimatedTokens": 5930, + "elapsedMs": 13, "toolCallCount": 1 }, { @@ -98,7 +98,7 @@ "payloadBytes": 1802, "estimatedTokens": 451, "bestExampleUseful": true, - "elapsedMs": 3, + "elapsedMs": 4, "toolCallCount": 1 }, { @@ -131,10 +131,10 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 4014, - "estimatedTokens": 1004, + "payloadBytes": 4960, + "estimatedTokens": 1240, "bestExampleUseful": false, - "elapsedMs": 1015, + "elapsedMs": 6310, "toolCallCount": 1 }, { @@ -167,9 +167,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 2843, - "estimatedTokens": 711, - "elapsedMs": 99, + "payloadBytes": 3695, + "estimatedTokens": 924, + "elapsedMs": 130, "toolCallCount": 1 }, { @@ -184,9 +184,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 3262, - "estimatedTokens": 816, - "elapsedMs": 158, + "payloadBytes": 4627, + "estimatedTokens": 1157, + "elapsedMs": 378, "toolCallCount": 1 }, { @@ -201,9 +201,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 3156, - "estimatedTokens": 789, - "elapsedMs": 286, + "payloadBytes": 3981, + "estimatedTokens": 996, + "elapsedMs": 303, "toolCallCount": 1 }, { @@ -218,9 +218,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 3136, - "estimatedTokens": 784, - "elapsedMs": 169, + "payloadBytes": 4402, + "estimatedTokens": 1101, + "elapsedMs": 187, "toolCallCount": 1 }, { @@ -239,7 +239,7 @@ "forbiddenHits": [], "payloadBytes": 4268, "estimatedTokens": 1067, - "elapsedMs": 66, + "elapsedMs": 148, "toolCallCount": 1 }, { @@ -256,9 +256,9 @@ "libraries actually used" ], "forbiddenHits": [], - "payloadBytes": 4711, - "estimatedTokens": 1178, - "elapsedMs": 10, + "payloadBytes": 15329, + "estimatedTokens": 3833, + "elapsedMs": 63, "toolCallCount": 1 }, { @@ -273,9 +273,9 @@ "tsconfig" ], "forbiddenHits": [], - "payloadBytes": 4711, - "estimatedTokens": 1178, - "elapsedMs": 14, + "payloadBytes": 15329, + "estimatedTokens": 3833, + "elapsedMs": 52, "toolCallCount": 1 }, { @@ -292,9 +292,9 @@ "generated:" ], "forbiddenHits": [], - "payloadBytes": 4711, - "estimatedTokens": 1178, - "elapsedMs": 8, + "payloadBytes": 15329, + "estimatedTokens": 3833, + "elapsedMs": 48, "toolCallCount": 1 }, { @@ -311,7 +311,7 @@ "payloadBytes": 298, "estimatedTokens": 75, "bestExampleUseful": false, - "elapsedMs": 4, + "elapsedMs": 3, "toolCallCount": 1 }, { @@ -328,10 +328,10 @@ "bestExample" ], "forbiddenHits": [], - "payloadBytes": 3593, - "estimatedTokens": 899, + "payloadBytes": 4570, + "estimatedTokens": 1143, "bestExampleUseful": false, - "elapsedMs": 884, + "elapsedMs": 1018, "toolCallCount": 1 }, { @@ -349,7 +349,7 @@ "payloadBytes": 1615, "estimatedTokens": 404, "bestExampleUseful": false, - "elapsedMs": 3, + "elapsedMs": 4, "toolCallCount": 1 }, { @@ -381,9 +381,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 2861, - "estimatedTokens": 716, - "elapsedMs": 934, + "payloadBytes": 4033, + "estimatedTokens": 1009, + "elapsedMs": 920, "toolCallCount": 1 }, { @@ -398,9 +398,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 2459, - "estimatedTokens": 615, - "elapsedMs": 1254, + "payloadBytes": 3440, + "estimatedTokens": 860, + "elapsedMs": 1369, "toolCallCount": 1 }, { @@ -415,9 +415,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 2968, - "estimatedTokens": 742, - "elapsedMs": 1116, + "payloadBytes": 4391, + "estimatedTokens": 1098, + "elapsedMs": 1269, "toolCallCount": 1 }, { @@ -432,9 +432,9 @@ ], "missingSignals": [], "forbiddenHits": [], - "payloadBytes": 2609, - "estimatedTokens": 653, - "elapsedMs": 697, + "payloadBytes": 3607, + "estimatedTokens": 902, + "elapsedMs": 775, "toolCallCount": 1 } ], @@ -446,25 +446,25 @@ "status": "pending_evidence", "payloadMetric": "averageEstimatedTokens", "payloadMetricPassed": false, - "beatenUsefulnessMetrics": [], + "beatenUsefulnessMetrics": [ + "averageUsefulness" + ], "missingMetrics": [ - "averageEstimatedTokens", - "averageUsefulness", "averageFirstRelevantHit", "bestExampleUsefulnessRate" ], "comparisons": [ { "metric": "averageEstimatedTokens", - "comparatorValue": null, - "actualValue": 903.7083333333334, + "comparatorValue": 18.5, + "actualValue": 1822.25, "passes": false }, { "metric": "averageUsefulness", - "comparatorValue": null, + "comparatorValue": 0, "actualValue": 0.75, - "passes": false + "passes": true }, { "metric": "averageFirstRelevantHit", @@ -546,16 +546,15 @@ "status": "pending_evidence", "tolerancePercent": 15, "missingMetrics": [ - "averageUsefulness", "averageFirstRelevantHit", "bestExampleUsefulnessRate" ], "comparisons": [ { "metric": "averageUsefulness", - "comparatorValue": null, + "comparatorValue": 0, "actualValue": 0.75, - "passes": false + "passes": true }, { "metric": "averageFirstRelevantHit", diff --git a/src/index.ts b/src/index.ts index 8bc2d38..a4d7c73 100644 --- a/src/index.ts +++ b/src/index.ts @@ -50,6 +50,7 @@ import { } from './utils/project-discovery.js'; import { readIndexMeta, validateIndexArtifacts } from './core/index-meta.js'; import { TOOLS, dispatchTool, type ToolContext, type ToolResponse } from './tools/index.js'; +import { finalizeSearchPayloadText } from './tools/search-payload-budget.js'; import type { ProjectDescriptor, ToolPaths } from './tools/types.js'; import { getOrCreateProject, @@ -119,6 +120,22 @@ type ProjectResolution = | { ok: true; project: ProjectState } | { ok: false; response: ToolResponse }; +function finalizeJsonTextPayload(payload: Record): string { + const mode = + typeof payload.budget === 'object' && + payload.budget !== null && + 'mode' in payload.budget && + (payload.budget.mode === 'compact' || payload.budget.mode === 'full') + ? payload.budget.mode + : undefined; + + if (!mode) { + return JSON.stringify(payload); + } + + return finalizeSearchPayloadText(payload, { mode }); +} + function registerKnownRoot(rootPath: string): string { const resolvedRootPath = path.resolve(rootPath); knownRoots.set(normalizeRootKey(resolvedRootPath), { rootPath: resolvedRootPath }); @@ -941,7 +958,7 @@ export function registerHandlers(target: Server): void { const parsed = JSON.parse(result.content[0].text); result.content[0] = { type: 'text', - text: JSON.stringify({ + text: finalizeJsonTextPayload({ ...parsed, index: indexSignal, project: buildProjectDescriptor(project.rootPath) @@ -955,7 +972,10 @@ export function registerHandlers(target: Server): void { const parsed = JSON.parse(result.content[0].text); result.content[0] = { type: 'text', - text: JSON.stringify({ ...parsed, project: buildProjectDescriptor(project.rootPath) }) + text: finalizeJsonTextPayload({ + ...parsed, + project: buildProjectDescriptor(project.rootPath) + }) }; } catch { /* response wasn't JSON, skip injection */ diff --git a/src/tools/search-codebase.ts b/src/tools/search-codebase.ts index 7f4efaf..84e8711 100644 --- a/src/tools/search-codebase.ts +++ b/src/tools/search-codebase.ts @@ -26,6 +26,7 @@ import type { MemoryWithConfidence } from '../memory/store.js'; import { InternalFileGraph } from '../utils/usage-tracker.js'; import type { FileExport } from '../utils/usage-tracker.js'; import { RELATIONSHIPS_FILENAME } from '../constants/codebase-context.js'; +import { finalizeSearchPayloadText } from './search-payload-budget.js'; // Stop words for compact-mode memory relevance filter (mirrors QUERY_STOP_WORDS in search.ts) const COMPACT_STOP_WORDS = new Set([ @@ -1045,6 +1046,22 @@ export async function handle( ...(rerankerStatus === 'unavailable' && { rerankerStatus: 'unavailable' }) }; + type SearchResponsePayload = { + status: 'success'; + searchQuality: typeof searchQualityBlock & { + tokenEstimate?: number; + warning?: string; + }; + budget: { mode: 'compact' | 'full'; resultCount: number }; + preflight?: typeof preflightPayload; + patternSummary?: string; + bestExample?: string; + nextHops?: Array<{ tool: string; why: string; args?: Record }>; + results: Array>; + totalResults?: number; + relatedMemories?: string[]; + }; + // Compact mode (default): bounded response with light graph context const isCompact = mode !== 'full'; @@ -1054,122 +1071,123 @@ export async function handle( const patternSummary = buildPatternSummary(); const bestExample = getBestExample(compactResults); const nextHops = buildNextHops(compactResults, searchQuality); + const payloadText = finalizeSearchPayloadText( + { + status: 'success', + searchQuality: searchQualityBlock, + budget: { mode: 'compact', resultCount: compactResults.length }, + ...(preflightPayload && { preflight: preflightPayload }), + ...(patternSummary && { patternSummary }), + ...(bestExample && { bestExample }), + ...(nextHops.length > 0 && { nextHops }), + results: compactResults.map((r) => { + const importedByCount = getImportedByCount(r); + const topExports = getTopExports(r.filePath); + const scope = buildScopeHeader(r.metadata); + // First 3 lines of chunk content as a lightweight signature preview + const signaturePreview = r.snippet + ? r.snippet + .replace(/^\r?\n+/, '') + .split('\n') + .slice(0, 3) + .join('\n') + .trim() || undefined + : undefined; + return { + file: `${r.filePath}:${r.startLine}-${r.endLine}`, + summary: r.summary, + score: Math.round(r.score * 100) / 100, + ...(r.relevanceReason && { relevanceReason: r.relevanceReason }), + ...(r.componentType && + r.layer && + r.layer !== 'unknown' && { type: `${r.componentType}:${r.layer}` }), + ...(r.trend && r.trend !== 'Stable' && { trend: r.trend }), + ...(r.patternWarning && { patternWarning: r.patternWarning }), + importedByCount, + ...(topExports.length > 0 && { topExports }), + ...(r.layer && r.layer !== 'unknown' && { layer: r.layer }), + // Structural metadata: surface AST intelligence already computed at index time + ...(r.metadata?.symbolName && { symbol: r.metadata.symbolName }), + ...(r.metadata?.symbolKind && { symbolKind: r.metadata.symbolKind }), + ...(scope && { scope }), + ...(signaturePreview && { signaturePreview }) + }; + }), + ...(strongMemories.length > 0 && { + relatedMemories: strongMemories.map((m) => `${m.memory} (${m.effectiveConfidence})`) + }) + }, + { mode: 'compact', pretty: true, transportAware: true } + ); return { content: [ { type: 'text', - text: JSON.stringify( - { - status: 'success', - searchQuality: searchQualityBlock, - budget: { mode: 'compact', resultCount: compactResults.length }, - ...(preflightPayload && { preflight: preflightPayload }), - ...(patternSummary && { patternSummary }), - ...(bestExample && { bestExample }), - ...(nextHops.length > 0 && { nextHops }), - results: compactResults.map((r) => { - const importedByCount = getImportedByCount(r); - const topExports = getTopExports(r.filePath); - const scope = buildScopeHeader(r.metadata); - // First 3 lines of chunk content as a lightweight signature preview - const signaturePreview = r.snippet - ? r.snippet - .replace(/^\r?\n+/, '') - .split('\n') - .slice(0, 3) - .join('\n') - .trim() || undefined - : undefined; - return { - file: `${r.filePath}:${r.startLine}-${r.endLine}`, - summary: r.summary, - score: Math.round(r.score * 100) / 100, - ...(r.relevanceReason && { relevanceReason: r.relevanceReason }), - ...(r.componentType && - r.layer && - r.layer !== 'unknown' && { type: `${r.componentType}:${r.layer}` }), - ...(r.trend && r.trend !== 'Stable' && { trend: r.trend }), - ...(r.patternWarning && { patternWarning: r.patternWarning }), - importedByCount, - ...(topExports.length > 0 && { topExports }), - ...(r.layer && r.layer !== 'unknown' && { layer: r.layer }), - // Structural metadata: surface AST intelligence already computed at index time - ...(r.metadata?.symbolName && { symbol: r.metadata.symbolName }), - ...(r.metadata?.symbolKind && { symbolKind: r.metadata.symbolKind }), - ...(scope && { scope }), - ...(signaturePreview && { signaturePreview }) - }; - }), - ...(strongMemories.length > 0 && { - relatedMemories: strongMemories.map((m) => `${m.memory} (${m.effectiveConfidence})`) - }) - }, - null, - 2 - ) + text: payloadText } ] }; } // Full mode: today's response shape + budget + relevanceReason; consumers removed + const payloadText = finalizeSearchPayloadText( + { + status: 'success', + searchQuality: searchQualityBlock, + budget: { mode: 'full', resultCount: results.length }, + ...(preflightPayload && { preflight: preflightPayload }), + results: results.map((r) => { + const relationshipsAndHints = buildRelationshipHints(r); + const enrichedSnippet = includeSnippets + ? enrichSnippetWithScope(r.snippet, r.metadata, r.filePath, r.startLine) + : undefined; + const scope = buildScopeHeader(r.metadata); + // Chunk-level imports/exports (top 5 each) + complexity + const chunkImports = (r as unknown as { imports?: string[] }).imports?.slice(0, 5); + const chunkExports = (r as unknown as { exports?: string[] }).exports?.slice(0, 5); + + return { + file: `${r.filePath}:${r.startLine}-${r.endLine}`, + summary: r.summary, + score: Math.round(r.score * 100) / 100, + ...(r.relevanceReason && { relevanceReason: r.relevanceReason }), + ...(r.componentType && + r.layer && + r.layer !== 'unknown' && { type: `${r.componentType}:${r.layer}` }), + ...(r.trend && r.trend !== 'Stable' && { trend: r.trend }), + ...(r.patternWarning && { patternWarning: r.patternWarning }), + ...(relationshipsAndHints.relationships && { + relationships: relationshipsAndHints.relationships + }), + ...(relationshipsAndHints.hints && { hints: relationshipsAndHints.hints }), + ...(enrichedSnippet && { snippet: enrichedSnippet }), + // Structural metadata + ...(r.metadata?.symbolName && { symbol: r.metadata.symbolName }), + ...(r.metadata?.symbolKind && { symbolKind: r.metadata.symbolKind }), + ...(scope && { scope }), + ...(chunkImports && chunkImports.length > 0 && { imports: chunkImports }), + ...(chunkExports && chunkExports.length > 0 && { exports: chunkExports }), + ...(r.metadata?.cyclomaticComplexity && { + complexity: r.metadata.cyclomaticComplexity + }) + }; + }), + totalResults: results.length, + ...(relatedMemories.length > 0 && { + relatedMemories: relatedMemories + .slice(0, 3) + .map((m) => `${m.memory} (${m.effectiveConfidence})`) + }) + }, + { mode: 'full', pretty: true, transportAware: true } + ); + return { content: [ { type: 'text', - text: JSON.stringify( - { - status: 'success', - searchQuality: searchQualityBlock, - budget: { mode: 'full', resultCount: results.length }, - ...(preflightPayload && { preflight: preflightPayload }), - results: results.map((r) => { - const relationshipsAndHints = buildRelationshipHints(r); - const enrichedSnippet = includeSnippets - ? enrichSnippetWithScope(r.snippet, r.metadata, r.filePath, r.startLine) - : undefined; - const scope = buildScopeHeader(r.metadata); - // Chunk-level imports/exports (top 5 each) + complexity - const chunkImports = (r as unknown as { imports?: string[] }).imports?.slice(0, 5); - const chunkExports = (r as unknown as { exports?: string[] }).exports?.slice(0, 5); - - return { - file: `${r.filePath}:${r.startLine}-${r.endLine}`, - summary: r.summary, - score: Math.round(r.score * 100) / 100, - ...(r.relevanceReason && { relevanceReason: r.relevanceReason }), - ...(r.componentType && - r.layer && - r.layer !== 'unknown' && { type: `${r.componentType}:${r.layer}` }), - ...(r.trend && r.trend !== 'Stable' && { trend: r.trend }), - ...(r.patternWarning && { patternWarning: r.patternWarning }), - ...(relationshipsAndHints.relationships && { - relationships: relationshipsAndHints.relationships - }), - ...(relationshipsAndHints.hints && { hints: relationshipsAndHints.hints }), - ...(enrichedSnippet && { snippet: enrichedSnippet }), - // Structural metadata - ...(r.metadata?.symbolName && { symbol: r.metadata.symbolName }), - ...(r.metadata?.symbolKind && { symbolKind: r.metadata.symbolKind }), - ...(scope && { scope }), - ...(chunkImports && chunkImports.length > 0 && { imports: chunkImports }), - ...(chunkExports && chunkExports.length > 0 && { exports: chunkExports }), - ...(r.metadata?.cyclomaticComplexity && { - complexity: r.metadata.cyclomaticComplexity - }) - }; - }), - totalResults: results.length, - ...(relatedMemories.length > 0 && { - relatedMemories: relatedMemories - .slice(0, 3) - .map((m) => `${m.memory} (${m.effectiveConfidence})`) - }) - }, - null, - 2 - ) + text: payloadText } ] }; diff --git a/src/tools/search-payload-budget.ts b/src/tools/search-payload-budget.ts new file mode 100644 index 0000000..74ee28a --- /dev/null +++ b/src/tools/search-payload-budget.ts @@ -0,0 +1,69 @@ +type SearchPayloadMode = 'compact' | 'full'; + +function isPlainRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function buildWarning(tokenEstimate: number, mode: SearchPayloadMode): string | undefined { + if (tokenEstimate <= 4000) { + return undefined; + } + + if (mode === 'compact') { + return `Large search payload: estimated ${tokenEstimate} tokens. Try tighter filters (e.g. layer=, language=) to reduce payload size.`; + } + + return `Large search payload: estimated ${tokenEstimate} tokens. Prefer compact mode or tighter filters before pasting into an agent.`; +} + +export function finalizeSearchPayloadText( + payload: Record, + options: { + mode: SearchPayloadMode; + pretty?: boolean; + transportAware?: boolean; + } +): string { + if (!isPlainRecord(payload.searchQuality)) { + return JSON.stringify(payload, null, options.pretty ? 2 : undefined); + } + + let tokenEstimate = + typeof payload.searchQuality.tokenEstimate === 'number' + ? payload.searchQuality.tokenEstimate + : 0; + let warning = + typeof payload.searchQuality.warning === 'string' ? payload.searchQuality.warning : undefined; + let renderedPayload = ''; + + for (let attempt = 0; attempt < 5; attempt += 1) { + renderedPayload = JSON.stringify( + { + ...payload, + searchQuality: { + ...payload.searchQuality, + ...(warning ? { warning } : {}), + tokenEstimate + } + }, + null, + options.pretty ? 2 : undefined + ); + + const estimatedTransportPayload = + options.transportAware && process.platform === 'win32' + ? renderedPayload.replace(/\n/g, '\r\n') + : renderedPayload; + const nextTokenEstimate = Math.ceil(estimatedTransportPayload.length / 4); + const nextWarning = buildWarning(nextTokenEstimate, options.mode); + + if (nextTokenEstimate === tokenEstimate && nextWarning === warning) { + return renderedPayload; + } + + tokenEstimate = nextTokenEstimate; + warning = nextWarning; + } + + return renderedPayload; +} diff --git a/tests/search-compact-mode.test.ts b/tests/search-compact-mode.test.ts index f551d1a..104fe42 100644 --- a/tests/search-compact-mode.test.ts +++ b/tests/search-compact-mode.test.ts @@ -326,6 +326,83 @@ describe('search_codebase compact/full mode', () => { expect(payload.nextHops?.length ?? 0).toBeGreaterThan(0); }); + it('adds an exact tokenEstimate advisory to compact responses', async () => { + searchMocks.search.mockResolvedValueOnce([makeResult()]); + + const { server } = await import('../src/index.js'); + const handler = ( + server as { + _requestHandlers?: Map< + string, + (r: unknown) => Promise<{ content: Array<{ type: string; text: string }> }> + >; + } + )._requestHandlers?.get('tools/call'); + if (!handler) throw new Error('Expected tools/call handler'); + + const response = await handler({ + jsonrpc: '2.0', + id: 1, + method: 'tools/call', + params: { name: 'search_codebase', arguments: { query: 'auth service' } } + }); + + const payload = JSON.parse(response.content[0].text) as { + searchQuality: { + status: string; + confidence: string; + tokenEstimate: number; + warning?: string; + hint?: string; + rerankerStatus?: string; + }; + [key: string]: unknown; + }; + + expect(payload.searchQuality.tokenEstimate).toBe(Math.ceil(response.content[0].text.length / 4)); + expect(payload.searchQuality.warning).toBeUndefined(); + }); + + it('uses filter-only guidance when a final compact payload exceeds the token threshold', async () => { + const oversizedSummary = 'Token-heavy compact summary '.repeat(1200); + searchMocks.search.mockResolvedValueOnce([ + makeResult({ + summary: oversizedSummary + }) + ]); + + const { server } = await import('../src/index.js'); + const handler = ( + server as { + _requestHandlers?: Map< + string, + (r: unknown) => Promise<{ content: Array<{ type: string; text: string }> }> + >; + } + )._requestHandlers?.get('tools/call'); + if (!handler) throw new Error('Expected tools/call handler'); + + const response = await handler({ + jsonrpc: '2.0', + id: 1, + method: 'tools/call', + params: { name: 'search_codebase', arguments: { query: 'auth service' } } + }); + + const payload = JSON.parse(response.content[0].text) as { + searchQuality: { + tokenEstimate: number; + warning?: string; + }; + }; + + expect(payload.searchQuality.tokenEstimate).toBe(Math.ceil(response.content[0].text.length / 4)); + expect(payload.searchQuality.tokenEstimate).toBeGreaterThan(4000); + expect(payload.searchQuality.warning).toBe( + `Large search payload: estimated ${payload.searchQuality.tokenEstimate} tokens. Try tighter filters (e.g. layer=, language=) to reduce payload size.` + ); + }); + // Test 5: Full mode returns hints arrays and all memories + budget it('full mode returns hints object with callers/tests and budget metadata', async () => { searchMocks.search.mockResolvedValueOnce([makeResult()]); @@ -362,6 +439,56 @@ describe('search_codebase compact/full mode', () => { expect(Array.isArray(hints.callers)).toBe(true); }); + it('adds a warning only when the final full payload exceeds the compact budget threshold', async () => { + const oversizedSummary = 'Token-heavy summary '.repeat(1200); + const oversizedSnippet = 'const token = authService.getToken();\n'.repeat(600); + searchMocks.search.mockResolvedValueOnce([ + makeResult({ + summary: oversizedSummary, + snippet: oversizedSnippet + }) + ]); + + const { server } = await import('../src/index.js'); + const handler = ( + server as { + _requestHandlers?: Map< + string, + (r: unknown) => Promise<{ content: Array<{ type: string; text: string }> }> + >; + } + )._requestHandlers?.get('tools/call'); + if (!handler) throw new Error('Expected tools/call handler'); + + const response = await handler({ + jsonrpc: '2.0', + id: 1, + method: 'tools/call', + params: { + name: 'search_codebase', + arguments: { query: 'auth service', mode: 'full', includeSnippets: true } + } + }); + + const payload = JSON.parse(response.content[0].text) as { + searchQuality: { + status: string; + confidence: string; + tokenEstimate: number; + warning?: string; + hint?: string; + rerankerStatus?: string; + }; + [key: string]: unknown; + }; + + expect(payload.searchQuality.tokenEstimate).toBe(Math.ceil(response.content[0].text.length / 4)); + expect(payload.searchQuality.tokenEstimate).toBeGreaterThan(4000); + expect(payload.searchQuality.warning).toBe( + `Large search payload: estimated ${payload.searchQuality.tokenEstimate} tokens. Prefer compact mode or tighter filters before pasting into an agent.` + ); + }); + // Test 6: relevanceReason appears in results in both modes it('relevanceReason is included in results for both compact and full modes', async () => { searchMocks.search.mockResolvedValueOnce([