diff --git a/packages/cli/src/__tests__/score.test.ts b/packages/cli/src/__tests__/score.test.ts index 4691df6..5a5fdc8 100644 --- a/packages/cli/src/__tests__/score.test.ts +++ b/packages/cli/src/__tests__/score.test.ts @@ -228,6 +228,60 @@ DEFAULT_LOAD: expect(report.signals.agentConfig.manifest.path).toBe('.ai/manifest.adf'); expect(findCategory(report, 'architecture')?.summary).not.toContain('no ADF manifest'); }); + + it('grounding checker does not count URLs, env-var assignments, Windows paths, or HTTP routes as broken file references', async () => { + const tmp = createTempRepo(); + process.chdir(tmp); + + fs.writeFileSync(path.join(tmp, 'package.json'), JSON.stringify({ name: 'fp-test', version: '1.0.0' }, null, 2)); + + // CLAUDE.md intentionally contains every false-positive category from issues #163/#164. + // None of these strings should appear in the broken-path list. + fs.writeFileSync(path.join(tmp, 'CLAUDE.md'), `# Rules + +Env var assignment in a code block: \`AEGIS_LOCAL_URL=http://localhost:11434\` +Windows absolute path: \`C:/Users/kover/Documents/aegis-daemon\` +Bare Windows path fragment in prose: Files/Git/mingw64/bin/git-credential-manager.exe + +HTTP API routes (from a routes table): +| /chat | Chat endpoint | +| /api/runs | Run list | +| /webhooks/voice/complete | Voice webhook | + +URL-only link: [docs](https://example.com/guide) + +Hostname with port: localhost:3000 +`); + + const { report } = await captureJson(() => scoreCommand(baseOptions, [])); + + const broken: string[] = report.signals.grounding.pathReferences.broken; + + // None of the false-positive patterns should appear as broken references + expect(broken.some((b: string) => b.includes('localhost:11434'))).toBe(false); + expect(broken.some((b: string) => b.includes('AEGIS_LOCAL_URL'))).toBe(false); + expect(broken.some((b: string) => /^[A-Za-z]:[\\/]/.test(b) || b.startsWith('C:/'))).toBe(false); + expect(broken.some((b: string) => b === '/chat' || b === '/api/runs' || b.startsWith('/webhooks/'))).toBe(false); + expect(broken.some((b: string) => b.includes('example.com'))).toBe(false); + expect(broken.some((b: string) => b === 'localhost:3000')).toBe(false); + }); + + it('grounding checker correctly tracks ./path-without-extension as a broken reference', async () => { + const tmp = createTempRepo(); + process.chdir(tmp); + + fs.writeFileSync(path.join(tmp, 'package.json'), JSON.stringify({ name: 'rel-test', version: '1.0.0' }, null, 2)); + + // ./docs/runbooks has no file extension — it must still be tracked because the explicit + // ./ prefix makes the intent unambiguous. Regression for the normalizePathCandidate + // stripping ./ before the prefix check ran. + fs.writeFileSync(path.join(tmp, 'CLAUDE.md'), 'See ./docs/runbooks for on-call procedures.\n'); + + const { report } = await captureJson(() => scoreCommand(baseOptions, [])); + + const broken: string[] = report.signals.grounding.pathReferences.broken; + expect(broken).toContain('docs/runbooks'); + }); }); function createTempRepo(): string { diff --git a/packages/cli/src/commands/score.ts b/packages/cli/src/commands/score.ts index f94abda..a47115b 100644 --- a/packages/cli/src/commands/score.ts +++ b/packages/cli/src/commands/score.ts @@ -980,18 +980,28 @@ function isSubstantiveInstruction(content: string): boolean { function extractPathCandidates(content: string): string[] { const candidates = new Set(); + // Markdown link targets: [text](path) for (const match of content.matchAll(/\[[^\]]+\]\(([^)]+)\)/g)) { const target = normalizePathCandidate(match[1]); if (looksLikePath(target)) candidates.add(target); } + // Backtick inline code: `path` for (const match of content.matchAll(/`([^`\n]+)`/g)) { const candidate = normalizePathCandidate(match[1]); if (looksLikePath(candidate)) candidates.add(candidate); } + // Raw path patterns in prose/tables — require explicit relative prefix (./ ../) or a known + // file extension. This avoids false positives from Windows path fragments, cross-repo refs, + // and HTTP route tables captured in freeform text. + // NOTE: check `raw` (pre-normalization) for the ./ prefix — normalizePathCandidate strips it, + // so checking the already-normalized candidate would silently drop ./path-without-extension refs. for (const match of content.matchAll(/(^|[\s(])((?:\.{1,2}\/)?(?:[A-Za-z0-9._-]+\/)*[A-Za-z0-9._-]+(?:\.[A-Za-z0-9._-]+)?)/gm)) { - const candidate = normalizePathCandidate(match[2]); + const raw = match[2]; + const candidate = normalizePathCandidate(raw); + const ext = path.posix.extname(candidate).toLowerCase(); + if (!raw.startsWith('./') && !raw.startsWith('../') && !KNOWN_PATH_EXTENSIONS.has(ext)) continue; if (looksLikePath(candidate)) candidates.add(candidate); } @@ -1009,7 +1019,8 @@ function normalizePathCandidate(raw: string): string { } function resolveReferencedPath(sourceFile: string, candidate: string): ResolvedPathReference { - if (candidate.startsWith('/')) { + // path.isAbsolute handles both Unix (/foo) and Windows (C:\foo, C:/foo) absolute paths. + if (path.isAbsolute(candidate) || candidate.startsWith('/')) { return { source: sourceFile, candidate, @@ -1030,10 +1041,26 @@ function resolveReferencedPath(sourceFile: string, candidate: string): ResolvedP function looksLikePath(candidate: string): boolean { if (!candidate) return false; - if (candidate.startsWith('http://') || candidate.startsWith('https://') || candidate.startsWith('mailto:')) return false; + // URL schemes + if (candidate.startsWith('http://') || candidate.startsWith('https://') || candidate.startsWith('mailto:') || candidate.startsWith('ftp://')) return false; + // CLI flags and fragment anchors if (candidate.startsWith('#') || candidate.startsWith('--')) return false; + // Template expressions, globs, angle-bracket types if (candidate.includes('<') || candidate.includes('>') || candidate.includes('*') || candidate.includes('${')) return false; + // Spaces mean prose, not a path if (candidate.includes(' ')) return false; + // Env-var assignments pulled from code blocks (DATABASE_URL=postgres://...) + if (candidate.includes('=')) return false; + // Windows drive-letter absolute paths (C:\..., D:/...) + if (/^[A-Za-z]:[\\/]/.test(candidate)) return false; + // hostname:port patterns (localhost:3000, api.example.com:8080) + if (/^[a-zA-Z0-9][a-zA-Z0-9.-]*:[0-9]+$/.test(candidate)) return false; + // HTTP API route paths (/api/v1/users) — starts with slash, multiple path segments, no known extension + if ( + candidate.startsWith('/') && + !KNOWN_PATH_EXTENSIONS.has(path.posix.extname(candidate).toLowerCase()) && + candidate.split('/').filter(Boolean).length > 1 + ) return false; if (KNOWN_PATH_FILENAMES.has(candidate) || KNOWN_PATH_FILENAMES.has(path.posix.basename(candidate))) return true; if (candidate.includes('/')) return true; return KNOWN_PATH_EXTENSIONS.has(path.posix.extname(candidate).toLowerCase());