Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions src/utils/error-lookup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,50 @@ function getAllEntries(): ErrorEntry[] {
return [...STATIC_ERROR_CATALOG, ...getDynamicEntries()];
}

// ---------------------------------------------------------------------------
// Code-reference filtering
// ---------------------------------------------------------------------------

const TEST_PATH_RE = /\.(test|spec|e2e)\.ts$/i;
// Segment-bounded: the named segment must be a full path component
// (preceded by a path boundary — start-of-string or ``/`` — and
// followed by ``/``). So ``/test/`` and ``tests/foo.ts`` match, but
// ``/contest/``, ``/latest/``, ``/testdata/``, ``/attestations/`` do
// NOT — they're real source dirs that incidentally contain those
// substrings.
const TEST_DIR_RE = /(?:^|\/)(?:tests?|__tests__|e2e|fixtures?|mocks?)\//i;

/**
* Drop OBVIOUSLY content-thin lines: very long single-line content
* dominated by a hex run with no whitespace breaks. Tuned to catch
* generated bytecode and hex blobs while leaving regex literals,
* long template strings, and selector maps alone — those still help
* a human navigate to the right file.
*/
export function isMinifiedShape(content: string): boolean {
const trimmed = content.trim();
if (trimmed.length < 400) return false;
const longestHexRun = (trimmed.match(/[0-9a-fA-F]{200,}/g) ?? [])
.reduce((m, t) => Math.max(m, t.length), 0);
return longestHexRun >= 200;
}

/**
* Heuristic: is this `lookup_error` code reference likely to actually
* help the user understand or fix their error? Drops obvious test
* files and minified bytecode lines; keeps everything else.
*
* Path-based exclusions are the primary signal; the content-shape
* filter is deliberately narrow to avoid dropping legitimate long
* source lines.
*/
export function isUsefulCodeRef(r: SearchResult): boolean {
if (TEST_PATH_RE.test(r.file)) return false;
if (TEST_DIR_RE.test(r.file)) return false;
if (isMinifiedShape(r.content)) return false;
return true;
}

// ---------------------------------------------------------------------------
// Matching algorithm
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -466,15 +510,22 @@ export function lookupError(
matches.sort((a, b) => b.score - a.score);
const catalogMatches = matches.slice(0, maxResults);

// Fallback: if fewer than 3 catalog matches, search code
// Fallback: if fewer than 3 catalog matches, search code. Over-fetch
// (RAW_CODE_LIMIT) so the post-filter has headroom — if all of the
// top hits are tests / fixtures / minified noise, slicing the raw
// result to 2 would otherwise return zero useful refs even when good
// ones exist deeper in the result set.
const RAW_CODE_LIMIT = 20;
const KEEP_CODE_LIMIT = 2;
let codeMatches: SearchResult[] = [];
if (catalogMatches.length < 3) {
try {
codeMatches = searchCode(query, {
const raw = searchCode(query, {
filePattern: "*.ts",
repo: "aztec-packages",
maxResults: Math.min(maxResults, 5),
maxResults: RAW_CODE_LIMIT,
});
codeMatches = raw.filter(isUsefulCodeRef).slice(0, KEEP_CODE_LIMIT);
} catch {
// Repos may not be cloned — that's fine
}
Expand Down
41 changes: 21 additions & 20 deletions src/utils/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Search utilities for finding content in cloned repositories
*/

import { execSync } from "child_process";
import { execFileSync } from "child_process";
import { existsSync, readFileSync } from "fs";
import { join, relative, extname } from "path";
import { globbySync } from "globby";
Expand Down Expand Up @@ -43,28 +43,37 @@ export function searchCode(
}

try {
// Try ripgrep first (fast)
const rgFlags = [
caseSensitive ? "" : "-i",
"-n", // line numbers
// Build argv for ripgrep. Using execFileSync (NOT execSync) skips
// the shell layer, so glob patterns like ``*.{nr,ts}`` reach rg
// verbatim instead of being brace-expanded by ``/bin/sh`` and then
// glob-expanded against the node-process cwd. The query is passed
// via ``-e`` so a query starting with a dash isn't reparsed as an
// rg flag, and ``--`` ends flag parsing before the search path.
const rgArgs: string[] = [];
if (!caseSensitive) rgArgs.push("-i");
rgArgs.push(
"-n",
"--no-heading",
"-g",
filePattern,
"-m",
String(maxResults * 2), // Get more, then trim
]
.filter(Boolean)
.join(" ");
String(maxResults * 2),
"-e",
query,
"--",
searchPath,
);

const result = execSync(`rg ${rgFlags} "${escapeShell(query)}" "${searchPath}"`, {
const result = execFileSync("rg", rgArgs, {
encoding: "utf-8",
maxBuffer: 10 * 1024 * 1024,
timeout: 30000,
});

return parseRgOutput(result, maxResults);
} catch (error) {
// Ripgrep not found or no matches, fall back to manual search
} catch {
// Ripgrep not found, no matches (rg exits 1), or other failure:
// fall back to the in-process globby + readFile loop.
return manualSearch(query, searchPath, filePattern, maxResults, caseSensitive);
}
}
Expand Down Expand Up @@ -207,14 +216,6 @@ export function getResultPriority(result: SearchResult): number {

// --- Helper functions ---

/**
* Escape a string for safe use inside double quotes in a shell command.
* Preserves regex syntax (|, *, +, etc.) while preventing shell injection.
*/
function escapeShell(str: string): string {
return str.replace(/["$`\\!]/g, "\\$&");
}

function parseRgOutput(output: string, maxResults: number): SearchResult[] {
const results: SearchResult[] = [];
const lines = output.split("\n").filter(Boolean);
Expand Down
183 changes: 182 additions & 1 deletion tests/utils/error-lookup.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ vi.mock("fs", () => ({
}));

vi.mock("child_process", () => ({
execSync: vi.fn(),
execFileSync: vi.fn(),
}));

vi.mock("globby", () => ({
Expand All @@ -21,6 +21,7 @@ vi.mock("../../src/utils/git.js", () => ({
isRepoCloned: vi.fn(() => false),
}));

import { execFileSync } from "child_process";
import { existsSync, readFileSync } from "fs";
import {
parseSolidityErrors,
Expand All @@ -29,10 +30,13 @@ import {
parseOperatorFaq,
lookupError,
clearErrorCache,
isUsefulCodeRef,
isMinifiedShape,
} from "../../src/utils/error-lookup.js";

const mockExistsSync = vi.mocked(existsSync);
const mockReadFileSync = vi.mocked(readFileSync);
const mockExecFileSync = vi.mocked(execFileSync);

beforeEach(() => {
vi.clearAllMocks();
Expand Down Expand Up @@ -260,3 +264,180 @@ describe("lookupError", () => {
}
});
});

// ---------------------------------------------------------------------------
// Code-reference filter — unit
// ---------------------------------------------------------------------------

describe("isUsefulCodeRef", () => {
const ok = (file: string, content = "export const FOO = 1;") => ({
file,
content,
repo: "aztec-packages",
line: 1,
});

it("keeps a normal source line", () => {
expect(isUsefulCodeRef(ok("yarn-project/foo.ts"))).toBe(true);
});

it("drops .test.ts files", () => {
expect(isUsefulCodeRef(ok("yarn-project/foo.test.ts"))).toBe(false);
});

it("drops .spec.ts files", () => {
expect(isUsefulCodeRef(ok("yarn-project/foo.spec.ts"))).toBe(false);
});

it("drops .e2e.ts files", () => {
expect(isUsefulCodeRef(ok("yarn-project/foo.e2e.ts"))).toBe(false);
});

it("drops files inside __tests__ directories", () => {
expect(isUsefulCodeRef(ok("yarn-project/__tests__/foo.ts"))).toBe(false);
});

it("drops files inside /test/ and /tests/ directories", () => {
expect(isUsefulCodeRef(ok("yarn-project/test/foo.ts"))).toBe(false);
expect(isUsefulCodeRef(ok("yarn-project/tests/foo.ts"))).toBe(false);
});

it("drops files inside /e2e/ and /fixtures/ and /mocks/", () => {
expect(isUsefulCodeRef(ok("yarn-project/e2e/foo.ts"))).toBe(false);
expect(isUsefulCodeRef(ok("yarn-project/fixtures/foo.ts"))).toBe(false);
expect(isUsefulCodeRef(ok("yarn-project/mocks/foo.ts"))).toBe(false);
});

it("does NOT drop on incidental substrings (latest, contest, attestations, testdata)", () => {
// The dir regex is segment-bounded — these are real source dirs
// that incidentally contain ``test``/``latest``/``contest`` as
// substrings, NOT as full path segments. They must survive.
expect(isUsefulCodeRef(ok("yarn-project/latest/foo.ts"))).toBe(true);
expect(isUsefulCodeRef(ok("yarn-project/contest/foo.ts"))).toBe(true);
expect(isUsefulCodeRef(ok("yarn-project/attestations/foo.ts"))).toBe(true);
expect(isUsefulCodeRef(ok("yarn-project/testdata/foo.ts"))).toBe(true);
});

it("drops paths that begin with a test segment (no leading slash)", () => {
// Ripgrep returns relative-from-REPOS_DIR paths, but a relative
// path could in theory start with a test segment if the repo
// itself is named ``tests`` or similar. The leading-boundary in
// TEST_DIR_RE handles this; locking it in here.
expect(isUsefulCodeRef(ok("tests/foo.ts"))).toBe(false);
expect(isUsefulCodeRef(ok("__tests__/foo.ts"))).toBe(false);
});

it("drops minified-shape lines (long pure-hex run)", () => {
// 600 chars of hex — clearly bytecode.
const hex = "deadbeef".repeat(75);
expect(isUsefulCodeRef(ok("yarn-project/foo.ts", hex))).toBe(false);
});

it("keeps long regex literals (no continuous hex run)", () => {
// A regex literal that's long but not hex-shaped.
const line =
"const re = /[A-Za-z0-9_]{200}.*([gimsuy]|[A-Z]+){50}/.test(input);";
expect(isUsefulCodeRef(ok("yarn-project/foo.ts", line))).toBe(true);
});

it("keeps short lines that contain hex literals", () => {
const line = "const sig = 0xdeadbeefcafebabe; // selector";
expect(isUsefulCodeRef(ok("yarn-project/foo.ts", line))).toBe(true);
});
});

describe("isMinifiedShape", () => {
it("returns false for short content", () => {
expect(isMinifiedShape("a".repeat(399))).toBe(false);
});

it("returns true only when a 200+ char hex run is present", () => {
const hex = "deadbeef".repeat(25); // 200 chars of hex
const padded = "x".repeat(200) + hex;
expect(padded.length).toBeGreaterThanOrEqual(400);
expect(isMinifiedShape(padded)).toBe(true);
});

it("returns false when long but no contiguous hex run", () => {
// 500 chars of mixed alphanumeric, but no 200+ hex run.
const line = ("aZ0_".repeat(125)).slice(0, 500);
expect(isMinifiedShape(line)).toBe(false);
});

it("returns false for a long generated-looking source line without hex blob", () => {
// Realistic noisy generated TypeScript: long but full of identifiers,
// commas, brackets — semantic content a human can still navigate.
// No 200-char contiguous hex run, so the heuristic must keep it.
const line =
"export const TX_ERROR_CODES = { " +
Array.from({ length: 60 }, (_, i) => `ERR_${i}: 'message ${i}'`).join(", ") +
" };";
expect(line.length).toBeGreaterThan(400);
expect(isMinifiedShape(line)).toBe(false);
});
});

// ---------------------------------------------------------------------------
// Code-reference filter — integration via lookupError
// ---------------------------------------------------------------------------

describe("lookupError code-ref over-fetch + filter + cap", () => {
beforeEach(() => {
// searchCode (called from lookupError) checks existsSync and shells
// out to ripgrep via execFileSync — the integration test pipes a
// synthetic rg stdout through the parser.
mockExistsSync.mockReturnValue(true);
mockExecFileSync.mockReset();
});

it("survives a top-of-result-set full of tests/minified — keeps deeper real refs", () => {
// Build a synthetic ripgrep stdout: 5 test files + 1 minified line +
// 2 real source lines. Pre-filter slice to 5 (the old behaviour)
// would have produced ZERO useful refs; over-fetch (RAW_CODE_LIMIT
// = 20) plus filter must surface the two real ones.
const minified = "deadbeef".repeat(75); // 600 hex chars
const lines = [
"/fake/repos/aztec-packages/yarn-project/foo.test.ts:1:test 1",
"/fake/repos/aztec-packages/yarn-project/__tests__/bar.ts:2:test 2",
"/fake/repos/aztec-packages/yarn-project/baz.spec.ts:3:test 3",
"/fake/repos/aztec-packages/yarn-project/qux.e2e.ts:4:test 4",
"/fake/repos/aztec-packages/yarn-project/mocks/zap.ts:5:test 5",
`/fake/repos/aztec-packages/yarn-project/abi.ts:6:${minified}`,
"/fake/repos/aztec-packages/yarn-project/real-one.ts:7:export const REAL = 1;",
"/fake/repos/aztec-packages/yarn-project/real-two.ts:8:export function realTwo() {}",
].join("\n");
mockExecFileSync.mockReturnValue(lines);

// Use a query that won't strongly hit the static catalog so the
// fallback (codeMatches) path is taken.
const result = lookupError("xyzzy_unknown_query_for_test_only");

expect(result.codeMatches).toHaveLength(2);
expect(result.codeMatches.map((m) => m.file)).toEqual([
"aztec-packages/yarn-project/real-one.ts",
"aztec-packages/yarn-project/real-two.ts",
]);

// Lock in the over-fetch contract: the raw cap must be wide enough
// for the filter to have headroom. searchCode passes
// ``String(maxResults * 2)`` to rg via -m, so RAW_CODE_LIMIT (20)
// surfaces as ``-m 40`` on the wire.
const calls = mockExecFileSync.mock.calls;
expect(calls.length).toBeGreaterThan(0);
const args = calls[calls.length - 1][1] as string[];
const mIdx = args.indexOf("-m");
expect(mIdx).toBeGreaterThanOrEqual(0);
expect(args[mIdx + 1]).toBe("40");
});

it("caps to 2 even when many useful refs are returned", () => {
const lines = Array.from({ length: 10 }, (_, i) =>
`/fake/repos/aztec-packages/yarn-project/foo${i}.ts:${i}:export const F${i} = ${i};`,
).join("\n");
mockExecFileSync.mockReturnValue(lines);

const result = lookupError("xyzzy_unknown_query_for_test_only_2");

expect(result.codeMatches).toHaveLength(2);
});
});
Loading
Loading