From 887a65fdd770e1c4d3760e2d5b0f775685647ac6 Mon Sep 17 00:00:00 2001 From: Kurt Overmier Date: Sat, 23 May 2026 02:45:04 -0500 Subject: [PATCH 1/2] fix(score): filter URLs, env-vars, Windows paths, and HTTP routes from grounding checker The path reference extractor was producing false positives for: - Env-var assignments in code blocks (FOO=http://...) - Windows drive-letter absolute paths (C:/Users/...) - HTTP API route paths in tables (/api/v1/users) - hostname:port strings (localhost:3000) - Windows path fragments captured by the raw prose regex Three changes: 1. `looksLikePath()`: add guards for env-var `=`, Windows drive letters, multi-segment `/`-prefixed routes with no extension, and hostname:port. 2. `extractPathCandidates()`: tighten the raw prose regex extractor to only include candidates that have an explicit `./`/`../` prefix or a known file extension, eliminating Windows path fragments and cross-repo noise. 3. `resolveReferencedPath()`: use `path.isAbsolute()` alongside `startsWith('/')` for cross-platform correctness. Adds a regression test covering all false-positive categories. Closes #164, #163 Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/src/__tests__/score.test.ts | 37 ++++++++++++++++++++++++ packages/cli/src/commands/score.ts | 28 ++++++++++++++++-- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/__tests__/score.test.ts b/packages/cli/src/__tests__/score.test.ts index 4691df6..2c25081 100644 --- a/packages/cli/src/__tests__/score.test.ts +++ b/packages/cli/src/__tests__/score.test.ts @@ -228,6 +228,43 @@ DEFAULT_LOAD: expect(report.signals.agentConfig.manifest.path).toBe('.ai/manifest.adf'); expect(findCategory(report, 'architecture')?.summary).not.toContain('no ADF manifest'); }); + + it('grounding checker does not count URLs, env-var assignments, Windows paths, or HTTP routes as broken file references', async () => { + const tmp = createTempRepo(); + process.chdir(tmp); + + fs.writeFileSync(path.join(tmp, 'package.json'), JSON.stringify({ name: 'fp-test', version: '1.0.0' }, null, 2)); + + // CLAUDE.md intentionally contains every false-positive category from issues #163/#164. + // None of these strings should appear in the broken-path list. + fs.writeFileSync(path.join(tmp, 'CLAUDE.md'), `# Rules + +Env var assignment in a code block: \`AEGIS_LOCAL_URL=http://localhost:11434\` +Windows absolute path: \`C:/Users/kover/Documents/aegis-daemon\` +Bare Windows path fragment in prose: Files/Git/mingw64/bin/git-credential-manager.exe + +HTTP API routes (from a routes table): +| /chat | Chat endpoint | +| /api/runs | Run list | +| /webhooks/voice/complete | Voice webhook | + +URL-only link: [docs](https://example.com/guide) + +Hostname with port: localhost:3000 +`); + + const { report } = await captureJson(() => scoreCommand(baseOptions, [])); + + const broken: string[] = report.signals.grounding.pathReferences.broken; + + // None of the false-positive patterns should appear as broken references + expect(broken.some((b: string) => b.includes('localhost:11434'))).toBe(false); + expect(broken.some((b: string) => b.includes('AEGIS_LOCAL_URL'))).toBe(false); + expect(broken.some((b: string) => /^[A-Za-z]:[\\/]/.test(b) || b.startsWith('C:/'))).toBe(false); + expect(broken.some((b: string) => b === '/chat' || b === '/api/runs' || b.startsWith('/webhooks/'))).toBe(false); + expect(broken.some((b: string) => b.includes('example.com'))).toBe(false); + expect(broken.some((b: string) => b === 'localhost:3000')).toBe(false); + }); }); function createTempRepo(): string { diff --git a/packages/cli/src/commands/score.ts b/packages/cli/src/commands/score.ts index f94abda..fa60001 100644 --- a/packages/cli/src/commands/score.ts +++ b/packages/cli/src/commands/score.ts @@ -980,18 +980,25 @@ function isSubstantiveInstruction(content: string): boolean { function extractPathCandidates(content: string): string[] { const candidates = new Set(); + // Markdown link targets: [text](path) for (const match of content.matchAll(/\[[^\]]+\]\(([^)]+)\)/g)) { const target = normalizePathCandidate(match[1]); if (looksLikePath(target)) candidates.add(target); } + // Backtick inline code: `path` for (const match of content.matchAll(/`([^`\n]+)`/g)) { const candidate = normalizePathCandidate(match[1]); if (looksLikePath(candidate)) candidates.add(candidate); } + // Raw path patterns in prose/tables — require explicit relative prefix (./ ../) or a known + // file extension. This avoids false positives from Windows path fragments, cross-repo refs, + // and HTTP route tables captured in freeform text. for (const match of content.matchAll(/(^|[\s(])((?:\.{1,2}\/)?(?:[A-Za-z0-9._-]+\/)*[A-Za-z0-9._-]+(?:\.[A-Za-z0-9._-]+)?)/gm)) { const candidate = normalizePathCandidate(match[2]); + const ext = path.posix.extname(candidate).toLowerCase(); + if (!candidate.startsWith('./') && !candidate.startsWith('../') && !KNOWN_PATH_EXTENSIONS.has(ext)) continue; if (looksLikePath(candidate)) candidates.add(candidate); } @@ -1009,7 +1016,8 @@ function normalizePathCandidate(raw: string): string { } function resolveReferencedPath(sourceFile: string, candidate: string): ResolvedPathReference { - if (candidate.startsWith('/')) { + // path.isAbsolute handles both Unix (/foo) and Windows (C:\foo, C:/foo) absolute paths. + if (path.isAbsolute(candidate) || candidate.startsWith('/')) { return { source: sourceFile, candidate, @@ -1030,10 +1038,26 @@ function resolveReferencedPath(sourceFile: string, candidate: string): ResolvedP function looksLikePath(candidate: string): boolean { if (!candidate) return false; - if (candidate.startsWith('http://') || candidate.startsWith('https://') || candidate.startsWith('mailto:')) return false; + // URL schemes + if (candidate.startsWith('http://') || candidate.startsWith('https://') || candidate.startsWith('mailto:') || candidate.startsWith('ftp://')) return false; + // CLI flags and fragment anchors if (candidate.startsWith('#') || candidate.startsWith('--')) return false; + // Template expressions, globs, angle-bracket types if (candidate.includes('<') || candidate.includes('>') || candidate.includes('*') || candidate.includes('${')) return false; + // Spaces mean prose, not a path if (candidate.includes(' ')) return false; + // Env-var assignments pulled from code blocks (DATABASE_URL=postgres://...) + if (candidate.includes('=')) return false; + // Windows drive-letter absolute paths (C:\..., D:/...) + if (/^[A-Za-z]:[\\/]/.test(candidate)) return false; + // hostname:port patterns (localhost:3000, api.example.com:8080) + if (/^[a-zA-Z0-9][a-zA-Z0-9.-]*:[0-9]+$/.test(candidate)) return false; + // HTTP API route paths (/api/v1/users) — starts with slash, multiple path segments, no known extension + if ( + candidate.startsWith('/') && + !KNOWN_PATH_EXTENSIONS.has(path.posix.extname(candidate).toLowerCase()) && + candidate.split('/').filter(Boolean).length > 1 + ) return false; if (KNOWN_PATH_FILENAMES.has(candidate) || KNOWN_PATH_FILENAMES.has(path.posix.basename(candidate))) return true; if (candidate.includes('/')) return true; return KNOWN_PATH_EXTENSIONS.has(path.posix.extname(candidate).toLowerCase()); From e9044cf12ce7fa2e3ddf5a70263fc08b1847eea5 Mon Sep 17 00:00:00 2001 From: Kurt Overmier Date: Sat, 23 May 2026 03:02:26 -0500 Subject: [PATCH 2/2] fix(score): check raw match for ./ prefix before normalizePathCandidate strips it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit normalizePathCandidate() removes the leading ./ from path candidates. The gate added in the previous commit checked the already-normalized string, so ./docs/runbooks (no extension) became docs/runbooks, failed the startsWith('./') check, and was silently dropped — never flagged as a broken reference even though the intent was unambiguous. Fix: capture the raw regex match before normalization and use it for the ./ / ../ prefix predicate; apply normalization only for the looksLikePath check and set insertion. Adds a regression test: ./docs/runbooks (no extension, missing file) must appear in the broken-paths list. Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/src/__tests__/score.test.ts | 17 +++++++++++++++++ packages/cli/src/commands/score.ts | 7 +++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/__tests__/score.test.ts b/packages/cli/src/__tests__/score.test.ts index 2c25081..5a5fdc8 100644 --- a/packages/cli/src/__tests__/score.test.ts +++ b/packages/cli/src/__tests__/score.test.ts @@ -265,6 +265,23 @@ Hostname with port: localhost:3000 expect(broken.some((b: string) => b.includes('example.com'))).toBe(false); expect(broken.some((b: string) => b === 'localhost:3000')).toBe(false); }); + + it('grounding checker correctly tracks ./path-without-extension as a broken reference', async () => { + const tmp = createTempRepo(); + process.chdir(tmp); + + fs.writeFileSync(path.join(tmp, 'package.json'), JSON.stringify({ name: 'rel-test', version: '1.0.0' }, null, 2)); + + // ./docs/runbooks has no file extension — it must still be tracked because the explicit + // ./ prefix makes the intent unambiguous. Regression for the normalizePathCandidate + // stripping ./ before the prefix check ran. + fs.writeFileSync(path.join(tmp, 'CLAUDE.md'), 'See ./docs/runbooks for on-call procedures.\n'); + + const { report } = await captureJson(() => scoreCommand(baseOptions, [])); + + const broken: string[] = report.signals.grounding.pathReferences.broken; + expect(broken).toContain('docs/runbooks'); + }); }); function createTempRepo(): string { diff --git a/packages/cli/src/commands/score.ts b/packages/cli/src/commands/score.ts index fa60001..a47115b 100644 --- a/packages/cli/src/commands/score.ts +++ b/packages/cli/src/commands/score.ts @@ -995,10 +995,13 @@ function extractPathCandidates(content: string): string[] { // Raw path patterns in prose/tables — require explicit relative prefix (./ ../) or a known // file extension. This avoids false positives from Windows path fragments, cross-repo refs, // and HTTP route tables captured in freeform text. + // NOTE: check `raw` (pre-normalization) for the ./ prefix — normalizePathCandidate strips it, + // so checking the already-normalized candidate would silently drop ./path-without-extension refs. for (const match of content.matchAll(/(^|[\s(])((?:\.{1,2}\/)?(?:[A-Za-z0-9._-]+\/)*[A-Za-z0-9._-]+(?:\.[A-Za-z0-9._-]+)?)/gm)) { - const candidate = normalizePathCandidate(match[2]); + const raw = match[2]; + const candidate = normalizePathCandidate(raw); const ext = path.posix.extname(candidate).toLowerCase(); - if (!candidate.startsWith('./') && !candidate.startsWith('../') && !KNOWN_PATH_EXTENSIONS.has(ext)) continue; + if (!raw.startsWith('./') && !raw.startsWith('../') && !KNOWN_PATH_EXTENSIONS.has(ext)) continue; if (looksLikePath(candidate)) candidates.add(candidate); }