From 0ecfff5713a7a70e99d834a577a6cd626b167501 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Wed, 13 May 2026 00:24:49 +0200 Subject: [PATCH 1/2] feat(skill-search): multi-query merge matching upstream gh skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The single content-search query missed skills like `plugins/cargowise/skills/...` whose SKILL.md text doesn't contain the literal "cargowise" — the directory name is part of the path, not the body. Upstream `gh skill search` solves this by dispatching up to four parallel Code Search queries with descending priority and merging by priority. Implements the same approach. - `buildSearchQueries(query, owner)` returns up to four `SkillSearchQuery` entries with explicit priorities: P1 `filename:SKILL.md path:` (+ optional user clause) P2 `filename:SKILL.md ` (only when query has spaces, i.e. pathTerm differs from query) P3 `filename:SKILL.md user:` (only when no explicit `--owner` AND `couldBeOwner(query)`) P4 `filename:SKILL.md ` (+ optional user clause) pathTerm is `query.replace(/ /g, '-')`. - `couldBeOwner(query)` exported helper — matches GitHub's login pattern `^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$`. - `searchSkills` now dispatches all queries in parallel with `Promise.allSettled`. Non-primary failures are logged (configurable `deps.logger`, defaults to stderr) and the surviving buckets still merge; only a P4 failure throws. Items are concatenated in priority order so path-bucket hits sort ahead of primary-bucket hits, then deduped by `repo + qualifiedName` keeping the first (higher-priority) occurrence. `total` reports the final deduped count rather than summing upstream totals (which would multi-count across queries). - Test suite reorganised around a small `makeFakeFetch` helper that dispatches per Code Search query string, so the multi-query path is exercised realistically. 15 new cases: `couldBeOwner` (5), `buildSearchQueries` (7), and merge/dedup behaviour for the cargowise scenario, priority ordering, P1-wins-over-P4 dedup, and the hyphen-bucket placement for multi-word queries. The pre-existing single- call tests are migrated to the flat-array shorthand that the helper applies to every bucket. `bun test`: 1232 pass / 0 fail. Focused: 31 pass / 0 fail. Closes #392 --- src/core/skill-search.ts | 243 ++++++++---- tests/unit/core/skill-search.test.ts | 541 +++++++++++++++++++-------- 2 files changed, 565 insertions(+), 219 deletions(-) diff --git a/src/core/skill-search.ts b/src/core/skill-search.ts index 3298531..b6aa9e0 100644 --- a/src/core/skill-search.ts +++ b/src/core/skill-search.ts @@ -1,9 +1,12 @@ /** * GitHub Code Search wrapper for `allagents skill search`. * - * Hits `GET /search/code` with a `path:SKILL.md filename:SKILL.md ` - * pattern, ranks results by relevance, and maps rate-limit errors to a - * single actionable message so callers never see a raw 403 / HTML dump. + * Mirrors upstream `gh skill search`: runs up to four parallel Code Search + * queries with descending priority (path → hyphenated content → query-as-owner + * → primary content) and merges the results. This is what surfaces skills + * that live at `plugins//skills/...` but whose SKILL.md content never + * mentions `` — the path-targeted query finds them when the bare + * content query can't. * * Auth comes from `GITHUB_TOKEN` when present; unauthenticated requests are * subject to the public Code Search rate limit (10 req/min). See cli/cli @@ -12,6 +15,13 @@ const OWNER_REGEX = /^[A-Za-z0-9-]{1,39}$/; +/** + * GitHub username pattern: starts and ends with alphanumeric, may contain + * single hyphens in between, 1–39 chars total. Matches what GitHub allows + * for user/org logins. + */ +const COULD_BE_OWNER_REGEX = /^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$/; + export interface SkillSearchItem { /** Skill folder name (parent directory of SKILL.md). */ name: string; @@ -77,6 +87,73 @@ export function validateSkillSearchArgs( } } +/** + * Whether `query` could plausibly be a GitHub user/org login (so it's worth + * speculatively searching `user:` for skills owned by that account). + * + * Matches the same rules GitHub enforces for new logins. + */ +export function couldBeOwner(query: string): boolean { + return COULD_BE_OWNER_REGEX.test(query); +} + +/** + * One Code Search query, with its merge priority. Lower priority numbers + * sort earlier in the merged result list. + */ +export interface SkillSearchQuery { + /** 1 = path, 2 = hyphenated, 3 = query-as-owner, 4 = primary content. */ + priority: 1 | 2 | 3 | 4; + /** Short label used in failure logging. */ + label: 'path' | 'hyphen' | 'owner' | 'primary'; + /** The `q=` querystring value (no URL encoding). */ + q: string; +} + +/** + * Build the parallel Code Search query set, ordered by priority. + * + * Always emits: + * - Priority 1 — `filename:SKILL.md path:` (+ optional user filter) + * - Priority 4 — `filename:SKILL.md ` (+ optional user filter) + * + * Conditionally emits: + * - Priority 2 — `filename:SKILL.md ` (only when the hyphenated + * pathTerm differs from the bare query, i.e. when the query has spaces) + * - Priority 3 — `filename:SKILL.md user:` (only when no explicit + * `--owner` is set AND the query itself could plausibly be a GitHub + * login) + * + * The path query is what surfaces e.g. CargoWise skills at + * `plugins/cargowise/skills/...` even when the SKILL.md content doesn't + * mention "cargowise" — `path:cargowise` matches the directory name. + */ +export function buildSearchQueries( + query: string, + owner: string | undefined, +): SkillSearchQuery[] { + const trimmed = query.trim(); + const pathTerm = trimmed.replace(/ /g, '-'); + const userClause = owner ? `user:${owner}` : ''; + const join = (...parts: string[]) => parts.filter(Boolean).join(' '); + + const queries: SkillSearchQuery[] = [ + { priority: 1, label: 'path', q: join('filename:SKILL.md', `path:${pathTerm}`, userClause) }, + ]; + + if (pathTerm !== trimmed) { + queries.push({ priority: 2, label: 'hyphen', q: join('filename:SKILL.md', pathTerm, userClause) }); + } + + if (!owner && couldBeOwner(trimmed)) { + queries.push({ priority: 3, label: 'owner', q: `filename:SKILL.md user:${trimmed}` }); + } + + queries.push({ priority: 4, label: 'primary', q: join('filename:SKILL.md', trimmed, userClause) }); + + return queries; +} + /** * Map a GitHub API response to a SkillSearchError. The 403 / rate-limit body * has a distinctive `documentation_url` and `message` shape; everything else @@ -104,20 +181,10 @@ function classifyApiError(status: number, body: unknown): SkillSearchError { ); } -/** - * Build the `q=` querystring value: ` filename:SKILL.md path:SKILL.md [user:]`. - */ -function buildQueryString(query: string, owner: string | undefined): string { - const parts = [query.trim(), 'filename:SKILL.md', 'path:SKILL.md']; - if (owner) parts.push(`user:${owner}`); - return parts.join(' '); -} - /** * Render the namespace-qualified skill name (`/`) when a - * namespace is set, or just `` otherwise. Used both for ranking - * (so `kynan/commit` matches the query "kynan/commit") and as the dedup - * key together with the repo full name. + * namespace is set, or just `` otherwise. Used as the dedup key + * together with the repo full name. */ export function qualifiedName(item: Pick): string { return item.namespace ? `${item.namespace}/${item.name}` : item.name; @@ -172,25 +239,24 @@ function parseSkillPath( } /** - * Run the GitHub Code Search request. Network/auth comes from the environment - * via fetch + GITHUB_TOKEN; no extra deps. - * - * Items are returned in upstream relevance order, then re-ranked by - * qualified-name match against the query and deduped by - * `repo + qualifiedName`. Dedup matters because Code Search will sometimes - * return the same skill folder twice via different match contexts. + * Result of running a single Code Search query. */ -export async function searchSkills( - query: string, - options: SkillSearchOptions = {}, - deps: { fetch?: typeof fetch } = {}, -): Promise { - validateSkillSearchArgs(query, options); - const fetchFn = deps.fetch ?? fetch; +interface QueryRunResult { + items: SkillSearchItem[]; + total: number; + truncated: boolean; +} - const page = options.page ?? 1; - const limit = options.limit ?? 30; - const q = buildQueryString(query, options.owner); +/** + * Issue one Code Search request and parse the response into SkillSearchItem. + */ +async function runOneQuery( + q: string, + page: number, + limit: number, + token: string | undefined, + fetchFn: typeof fetch, +): Promise { const url = new URL('https://api.github.com/search/code'); url.searchParams.set('q', q); url.searchParams.set('per_page', String(limit)); @@ -201,7 +267,6 @@ export async function searchSkills( 'X-GitHub-Api-Version': '2022-11-28', 'User-Agent': 'allagents-cli', }; - const token = process.env.GITHUB_TOKEN || process.env.GH_TOKEN; if (token) headers.Authorization = `token ${token}`; const response = await fetchFn(url.toString(), { headers }); @@ -215,7 +280,6 @@ export async function searchSkills( throw classifyApiError(response.status, body); } - // GitHub Code Search response shape: { total_count, incomplete_results, items: [...] } const parsed = body as { total_count?: number; incomplete_results?: boolean; @@ -241,23 +305,90 @@ export async function searchSkills( }; }); - const deduped = dedupeItems(items); - return { - query, - items: rankItems(deduped, query), - total: parsed.total_count ?? deduped.length, + items, + total: parsed.total_count ?? items.length, truncated: Boolean(parsed.incomplete_results), }; } /** - * Drop duplicate hits by `repo + qualifiedName`. The Code Search API can - * return the same skill folder more than once when multiple lines in - * SKILL.md match the query; the user only cares about the folder. + * Run the multi-query Code Search and merge results. + * + * Behaviour: + * - Up to four queries are built via `buildSearchQueries` and dispatched in + * parallel with `Promise.allSettled`. + * - The primary (priority 4) result is required: if it fails, the error + * propagates. Other queries are advisory — failures are logged and the + * surviving buckets still merge. + * - Items are concatenated in priority order (1 → 4), then deduped by + * `repo + qualifiedName` keeping the first occurrence. That makes the + * path bucket win over the content bucket when both match the same skill. * - * Preserves the first occurrence so upstream relevance order is respected - * before re-ranking. + * Network/auth come from `process.env.GITHUB_TOKEN` / `GH_TOKEN` when set; + * unauthenticated requests share the public 10 req/min Code Search rate limit. + */ +export async function searchSkills( + query: string, + options: SkillSearchOptions = {}, + deps: { fetch?: typeof fetch; logger?: (msg: string) => void } = {}, +): Promise { + validateSkillSearchArgs(query, options); + const fetchFn = deps.fetch ?? fetch; + const logger = deps.logger ?? ((msg: string) => process.stderr.write(`${msg}\n`)); + + const page = options.page ?? 1; + const limit = options.limit ?? 30; + const token = process.env.GITHUB_TOKEN || process.env.GH_TOKEN; + + const queries = buildSearchQueries(query, options.owner); + const settled = await Promise.allSettled( + queries.map((entry) => runOneQuery(entry.q, page, limit, token, fetchFn)), + ); + + // Locate the primary (priority 4) result. If it failed, surface its error. + const primaryIdx = queries.findIndex((q) => q.priority === 4); + const primarySettled = settled[primaryIdx]; + if (primarySettled?.status === 'rejected') { + throw primarySettled.reason; + } + + // Collect successful buckets paired with their queries; log failures. + type Bucket = { priority: number; result: QueryRunResult }; + const buckets: Bucket[] = []; + for (let i = 0; i < queries.length; i++) { + const entry = queries[i]; + const outcome = settled[i]; + if (!entry || !outcome) continue; + if (outcome.status === 'fulfilled') { + buckets.push({ priority: entry.priority, result: outcome.value }); + } else { + // Non-primary failure — log and continue. Primary failures are handled above. + const reason = + outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason); + logger(`Warning: skill search "${entry.label}" query failed: ${reason}`); + } + } + + // Concatenate items in priority order (lower priority number first). + buckets.sort((a, b) => a.priority - b.priority); + const mergedItems = buckets.flatMap((b) => b.result.items); + + const deduped = dedupeItems(mergedItems); + + return { + query, + items: deduped, + total: deduped.length, + truncated: buckets.some((b) => b.result.truncated), + }; +} + +/** + * Drop duplicate hits by `repo + qualifiedName`. Same folder surfaced by + * multiple query buckets (e.g. both `path:` and content match) collapses to + * one entry, with the higher-priority bucket's occurrence winning because + * items are merged in priority order before this runs. */ function dedupeItems(items: SkillSearchItem[]): SkillSearchItem[] { const seen = new Set(); @@ -270,27 +401,3 @@ function dedupeItems(items: SkillSearchItem[]): SkillSearchItem[] { } return out; } - -/** - * Rank items: exact qualified-name match first, then prefix-match, then - * substring, then upstream order. Using `qualifiedName` means a query like - * `kynan/commit` matches the namespaced skill exactly while a query like - * `commit` still falls through to substring matches on both - * `kynan/commit` and `will/commit`. - */ -function rankItems(items: SkillSearchItem[], query: string): SkillSearchItem[] { - const q = query.toLowerCase(); - return [...items].sort((a, b) => { - const aScore = score(a, q); - const bScore = score(b, q); - return bScore - aScore; - }); -} - -function score(item: SkillSearchItem, q: string): number { - const qn = qualifiedName(item).toLowerCase(); - if (qn === q) return 3; - if (qn.startsWith(q)) return 2; - if (qn.includes(q)) return 1; - return 0; -} diff --git a/tests/unit/core/skill-search.test.ts b/tests/unit/core/skill-search.test.ts index ca82e6e..b79ad7d 100644 --- a/tests/unit/core/skill-search.test.ts +++ b/tests/unit/core/skill-search.test.ts @@ -1,11 +1,120 @@ import { describe, expect, it } from 'bun:test'; import { SkillSearchError, + buildSearchQueries, + couldBeOwner, qualifiedName, searchSkills, validateSkillSearchArgs, } from '../../../src/core/skill-search.js'; +/** + * Helper: build a fake `fetch` that dispatches per Code Search query. + * + * Each handler is matched by substring against the `q=` querystring value. + * The first matching handler wins; unmatched queries return an empty page. + * Pass a single string → returns the canned items for every query (legacy + * helper, used by tests that don't care about per-query behaviour). + */ +function makeFakeFetch( + handlers: + | Array<{ + match: (q: string) => boolean; + items: Array<{ + path: string; + sha: string; + repository: { full_name: string; description?: string }; + }>; + totalCount?: number; + incompleteResults?: boolean; + status?: number; + message?: string; + }> + | Array<{ + path: string; + sha: string; + repository: { full_name: string; description?: string }; + }>, + fallback?: { status: number; message: string }, +): typeof fetch { + // Detect the "single items array, applied to every query" form. + const isFlat = + Array.isArray(handlers) && + handlers.length > 0 && + 'path' in (handlers[0] as object); + + const fn = (async (url: string) => { + const u = new URL(url); + const q = u.searchParams.get('q') ?? ''; + + if (isFlat) { + const items = handlers as Array<{ + path: string; + sha: string; + repository: { full_name: string; description?: string }; + }>; + return { + ok: true, + status: 200, + json: async () => ({ + total_count: items.length, + incomplete_results: false, + items, + }), + }; + } + + const cases = handlers as Array<{ + match: (q: string) => boolean; + items: Array<{ + path: string; + sha: string; + repository: { full_name: string; description?: string }; + }>; + totalCount?: number; + incompleteResults?: boolean; + status?: number; + message?: string; + }>; + + for (const handler of cases) { + if (!handler.match(q)) continue; + if (handler.status && handler.status >= 400) { + return { + ok: false, + status: handler.status, + json: async () => ({ message: handler.message ?? '' }), + }; + } + return { + ok: true, + status: 200, + json: async () => ({ + total_count: handler.totalCount ?? handler.items.length, + incomplete_results: handler.incompleteResults ?? false, + items: handler.items, + }), + }; + } + + if (fallback) { + return { + ok: false, + status: fallback.status, + json: async () => ({ message: fallback.message }), + }; + } + return { + ok: true, + status: 200, + json: async () => ({ total_count: 0, incomplete_results: false, items: [] }), + }; + }) as unknown as typeof fetch; + return fn; +} + +const silentLogger = () => {}; + describe('validateSkillSearchArgs', () => { it('rejects queries under 2 chars', () => { expect(() => validateSkillSearchArgs('a', {})).toThrow(SkillSearchError); @@ -40,16 +149,97 @@ describe('qualifiedName', () => { }); }); +describe('couldBeOwner', () => { + it('accepts plain logins', () => { + expect(couldBeOwner('octocat')).toBe(true); + expect(couldBeOwner('WiseTechGlobal')).toBe(true); + expect(couldBeOwner('a')).toBe(true); + }); + + it('accepts logins with internal hyphens', () => { + expect(couldBeOwner('the-owner')).toBe(true); + }); + + it('rejects logins with leading or trailing hyphens', () => { + expect(couldBeOwner('-bad')).toBe(false); + expect(couldBeOwner('bad-')).toBe(false); + }); + + it('rejects logins containing slashes, spaces, or other punctuation', () => { + expect(couldBeOwner('owner/repo')).toBe(false); + expect(couldBeOwner('the owner')).toBe(false); + expect(couldBeOwner('owner.name')).toBe(false); + }); + + it('rejects logins longer than 39 chars', () => { + expect(couldBeOwner('a'.repeat(40))).toBe(false); + expect(couldBeOwner('a'.repeat(39))).toBe(true); + }); +}); + +describe('buildSearchQueries', () => { + it('emits path (P1) + primary (P4) for a single-word query without owner', () => { + const queries = buildSearchQueries('cargowise', undefined); + expect(queries.map((q) => q.label)).toEqual(['path', 'owner', 'primary']); + const labels = new Map(queries.map((q) => [q.label, q.q])); + expect(labels.get('path')).toContain('filename:SKILL.md'); + expect(labels.get('path')).toContain('path:cargowise'); + expect(labels.get('primary')).toBe('filename:SKILL.md cargowise'); + }); + + it('threads --owner onto path and primary, but skips the query-as-owner (P3) query', () => { + const queries = buildSearchQueries('cargowise', 'WiseTechGlobal'); + expect(queries.map((q) => q.label)).toEqual(['path', 'primary']); + expect(queries.find((q) => q.label === 'path')?.q).toBe( + 'filename:SKILL.md path:cargowise user:WiseTechGlobal', + ); + expect(queries.find((q) => q.label === 'primary')?.q).toBe( + 'filename:SKILL.md cargowise user:WiseTechGlobal', + ); + }); + + it('adds the hyphen (P2) query when the query has spaces', () => { + const queries = buildSearchQueries('build worker', undefined); + const path = queries.find((q) => q.label === 'path'); + const hyphen = queries.find((q) => q.label === 'hyphen'); + expect(path?.q).toContain('path:build-worker'); + expect(hyphen?.q).toBe('filename:SKILL.md build-worker'); + }); + + it('skips P3 when --owner is explicitly set', () => { + const queries = buildSearchQueries('cargowise', 'WiseTechGlobal'); + expect(queries.find((q) => q.label === 'owner')).toBeUndefined(); + }); + + it('skips P3 when the query is not a valid GitHub login', () => { + const queries = buildSearchQueries('kynan/commit', undefined); + expect(queries.find((q) => q.label === 'owner')).toBeUndefined(); + }); + + it('includes P3 when no owner is set and the query could be an owner', () => { + const queries = buildSearchQueries('octocat', undefined); + const owner = queries.find((q) => q.label === 'owner'); + expect(owner?.q).toBe('filename:SKILL.md user:octocat'); + }); + + it('orders queries by priority ascending', () => { + const queries = buildSearchQueries('build worker', undefined); + const priorities = queries.map((q) => q.priority); + expect(priorities).toEqual([...priorities].sort((a, b) => a - b)); + }); +}); + describe('searchSkills error mapping', () => { - it('maps 403 rate-limit to a SkillSearchError with kind "rate-limit"', async () => { - const fakeFetch = (async () => ({ - ok: false, - status: 403, - json: async () => ({ message: 'API rate limit exceeded for ...' }), - })) as unknown as typeof fetch; + it('maps 403 rate-limit on the primary query to a SkillSearchError with kind "rate-limit"', async () => { + const fakeFetch = makeFakeFetch( + [ + // Every query returns 403 with rate-limit body. + { match: () => true, items: [], status: 403, message: 'API rate limit exceeded for ...' }, + ], + ); try { - await searchSkills('docs', {}, { fetch: fakeFetch }); + await searchSkills('docs', {}, { fetch: fakeFetch, logger: silentLogger }); throw new Error('should have thrown'); } catch (error) { expect(error).toBeInstanceOf(SkillSearchError); @@ -59,15 +249,13 @@ describe('searchSkills error mapping', () => { } }); - it('maps 422 to a SkillSearchError with kind "api"', async () => { - const fakeFetch = (async () => ({ - ok: false, - status: 422, - json: async () => ({ message: 'Validation Failed' }), - })) as unknown as typeof fetch; + it('maps 422 on the primary query to a SkillSearchError with kind "api"', async () => { + const fakeFetch = makeFakeFetch([ + { match: () => true, items: [], status: 422, message: 'Validation Failed' }, + ]); try { - await searchSkills('docs', {}, { fetch: fakeFetch }); + await searchSkills('docs', {}, { fetch: fakeFetch, logger: silentLogger }); throw new Error('should have thrown'); } catch (error) { expect(error).toBeInstanceOf(SkillSearchError); @@ -75,34 +263,61 @@ describe('searchSkills error mapping', () => { } }); - it('returns normalized items on a successful response', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 2, - incomplete_results: false, + it('logs and continues when a non-primary query fails but the primary succeeds', async () => { + const messages: string[] = []; + const fakeFetch = makeFakeFetch([ + // Path query (P1) fails with 422. + { + match: (q) => q.includes('path:'), + items: [], + status: 422, + message: 'Path query rejected', + }, + // Owner query (P3) returns nothing. + { + match: (q) => q.startsWith('filename:SKILL.md user:'), + items: [], + }, + // Primary query succeeds with one hit. + { + match: () => true, items: [ { path: 'skills/docs-writer/SKILL.md', - sha: 'abc', - repository: { full_name: 'org/repo', description: 'Some skills' }, - }, - { - path: 'skills/api-docs/SKILL.md', - sha: 'def', - repository: { full_name: 'org2/repo2', description: '' }, + sha: 'p1', + repository: { full_name: 'org/repo' }, }, ], - }), - })) as unknown as typeof fetch; + }, + ]); + + const result = await searchSkills('docs', {}, { + fetch: fakeFetch, + logger: (msg) => messages.push(msg), + }); + expect(result.items.length).toBe(1); + expect(result.items[0]?.name).toBe('docs-writer'); + expect(messages.some((m) => m.includes('path') && m.includes('failed'))).toBe(true); + }); + + it('returns normalized items on a successful response (all buckets identical)', async () => { + const fakeFetch = makeFakeFetch([ + { + path: 'skills/docs-writer/SKILL.md', + sha: 'abc', + repository: { full_name: 'org/repo', description: 'Some skills' }, + }, + { + path: 'skills/api-docs/SKILL.md', + sha: 'def', + repository: { full_name: 'org2/repo2', description: '' }, + }, + ]); - const result = await searchSkills('docs', { limit: 5 }, { fetch: fakeFetch }); - expect(result.total).toBe(2); + const result = await searchSkills('docs', { limit: 5 }, { fetch: fakeFetch, logger: silentLogger }); expect(result.items.length).toBe(2); expect(result.items.map((i) => i.name)).toContain('docs-writer'); expect(result.items.map((i) => i.name)).toContain('api-docs'); - // Non-namespaced paths get empty namespace strings. for (const item of result.items) { expect(item.namespace).toBe(''); } @@ -111,173 +326,197 @@ describe('searchSkills error mapping', () => { describe('namespace extraction', () => { it('parses skills///SKILL.md into namespace + name', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 2, - incomplete_results: false, - items: [ - { - path: 'skills/kynan/commit/SKILL.md', - sha: 'a1', - repository: { full_name: 'org/skills', description: '' }, - }, - { - path: 'skills/will/commit/SKILL.md', - sha: 'b2', - repository: { full_name: 'org/skills', description: '' }, - }, - ], - }), - })) as unknown as typeof fetch; + const fakeFetch = makeFakeFetch([ + { + path: 'skills/kynan/commit/SKILL.md', + sha: 'a1', + repository: { full_name: 'org/skills', description: '' }, + }, + { + path: 'skills/will/commit/SKILL.md', + sha: 'b2', + repository: { full_name: 'org/skills', description: '' }, + }, + ]); - const result = await searchSkills('commit', {}, { fetch: fakeFetch }); + const result = await searchSkills('commit', {}, { fetch: fakeFetch, logger: silentLogger }); expect(result.items.length).toBe(2); const byKey = new Map(result.items.map((i) => [`${i.namespace}/${i.name}`, i])); expect(byKey.has('kynan/commit')).toBe(true); expect(byKey.has('will/commit')).toBe(true); - expect(byKey.get('kynan/commit')?.namespace).toBe('kynan'); - expect(byKey.get('kynan/commit')?.name).toBe('commit'); }); it('parses non-namespaced skills//SKILL.md with empty namespace', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 1, - incomplete_results: false, - items: [ - { - path: 'skills/brainstorming/SKILL.md', - sha: 'cc', - repository: { full_name: 'obra/superpowers', description: '' }, - }, - ], - }), - })) as unknown as typeof fetch; + const fakeFetch = makeFakeFetch([ + { + path: 'skills/brainstorming/SKILL.md', + sha: 'cc', + repository: { full_name: 'obra/superpowers', description: '' }, + }, + ]); - const result = await searchSkills('brainstorming', {}, { fetch: fakeFetch }); + const result = await searchSkills('brainstorming', {}, { fetch: fakeFetch, logger: silentLogger }); expect(result.items[0]?.namespace).toBe(''); expect(result.items[0]?.name).toBe('brainstorming'); }); it('parses nested plugin layouts like plugins/foo/skills///SKILL.md', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 1, - incomplete_results: false, + const fakeFetch = makeFakeFetch([ + { + path: 'plugins/foo/skills/team-a/deploy/SKILL.md', + sha: 'cc', + repository: { full_name: 'org/repo', description: '' }, + }, + ]); + + const result = await searchSkills('deploy', {}, { fetch: fakeFetch, logger: silentLogger }); + expect(result.items[0]?.namespace).toBe('team-a'); + expect(result.items[0]?.name).toBe('deploy'); + }); +}); + +describe('multi-query merge + dedup', () => { + it('returns a path-only hit even when content has no mention of the query (cargowise case)', async () => { + const fakeFetch = makeFakeFetch([ + // P1 path query: finds CargoWise skill at plugins/cargowise/skills/... + { + match: (q) => q.includes('path:cargowise'), items: [ { - path: 'plugins/foo/skills/team-a/deploy/SKILL.md', - sha: 'cc', - repository: { full_name: 'org/repo', description: '' }, + path: 'plugins/cargowise/skills/cw-deploy/SKILL.md', + sha: 'p1-hit', + repository: { full_name: 'WiseTechGlobal/skills', description: '' }, }, ], - }), - })) as unknown as typeof fetch; + }, + // P4 primary content: returns nothing because SKILL.md doesn't mention "cargowise" + { match: () => true, items: [] }, + ]); - const result = await searchSkills('deploy', {}, { fetch: fakeFetch }); - expect(result.items[0]?.namespace).toBe('team-a'); - expect(result.items[0]?.name).toBe('deploy'); + const result = await searchSkills('cargowise', { owner: 'WiseTechGlobal' }, { + fetch: fakeFetch, + logger: silentLogger, + }); + expect(result.items.length).toBe(1); + expect(result.items[0]?.path).toBe('plugins/cargowise/skills/cw-deploy/SKILL.md'); }); -}); -describe('dedup + ranking', () => { - it('deduplicates hits by repo + qualifiedName', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 3, - incomplete_results: false, + it('merges in priority order — path (P1) ahead of primary (P4) when both match', async () => { + const fakeFetch = makeFakeFetch([ + // Path query → only the kynan/commit folder (path contains the literal "kynan/commit"). + { + match: (q) => q.includes('path:kynan/commit'), items: [ { path: 'skills/kynan/commit/SKILL.md', - sha: 'first-match', - repository: { full_name: 'org/skills', description: '' }, + sha: 'path-hit', + repository: { full_name: 'org/skills' }, }, + ], + }, + // Primary content query → both folders mention "kynan/commit" in their content. + { + match: () => true, + items: [ { - path: 'skills/kynan/commit/SKILL.md', - sha: 'duplicate-match', - repository: { full_name: 'org/skills', description: '' }, + path: 'skills/will/commit-helper/SKILL.md', + sha: 'content-hit-a', + repository: { full_name: 'org/skills' }, }, { - path: 'skills/will/commit/SKILL.md', - sha: 'distinct', - repository: { full_name: 'org/skills', description: '' }, + path: 'skills/kynan/commit/SKILL.md', + sha: 'content-hit-b', + repository: { full_name: 'org/skills' }, }, ], - }), - })) as unknown as typeof fetch; + }, + ]); - const result = await searchSkills('commit', {}, { fetch: fakeFetch }); + const result = await searchSkills('kynan/commit', {}, { fetch: fakeFetch, logger: silentLogger }); expect(result.items.length).toBe(2); - const qnames = result.items.map((i) => `${i.namespace}/${i.name}`); - expect(qnames).toContain('kynan/commit'); - expect(qnames).toContain('will/commit'); - // First-occurrence preservation: the surviving kynan/commit entry has the - // first match's sha. - const kynan = result.items.find((i) => i.namespace === 'kynan' && i.name === 'commit'); - expect(kynan?.sha).toBe('first-match'); + expect(result.items[0]?.namespace).toBe('kynan'); + expect(result.items[0]?.name).toBe('commit'); + // Dedup keeps the higher-priority (path) bucket's occurrence. + expect(result.items[0]?.sha).toBe('path-hit'); + expect(result.items[1]?.name).toBe('commit-helper'); }); - it('keeps same-name skills in different repos as separate hits', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 2, - incomplete_results: false, + it('deduplicates hits across buckets by repo + qualifiedName', async () => { + const fakeFetch = makeFakeFetch([ + // Both buckets return the same skill — should collapse to one. + { + match: () => true, items: [ { - path: 'skills/commit/SKILL.md', - sha: 'r1', - repository: { full_name: 'orgA/repo', description: '' }, - }, - { - path: 'skills/commit/SKILL.md', - sha: 'r2', - repository: { full_name: 'orgB/repo', description: '' }, + path: 'skills/kynan/commit/SKILL.md', + sha: 'same', + repository: { full_name: 'org/skills' }, }, ], - }), - })) as unknown as typeof fetch; + }, + ]); + + const result = await searchSkills('commit', {}, { fetch: fakeFetch, logger: silentLogger }); + expect(result.items.length).toBe(1); + }); + + it('keeps same-name skills in different repos as separate hits', async () => { + const fakeFetch = makeFakeFetch([ + { + path: 'skills/commit/SKILL.md', + sha: 'r1', + repository: { full_name: 'orgA/repo' }, + }, + { + path: 'skills/commit/SKILL.md', + sha: 'r2', + repository: { full_name: 'orgB/repo' }, + }, + ]); - const result = await searchSkills('commit', {}, { fetch: fakeFetch }); + const result = await searchSkills('commit', {}, { fetch: fakeFetch, logger: silentLogger }); expect(result.items.length).toBe(2); expect(new Set(result.items.map((i) => i.repo)).size).toBe(2); }); - it('ranks an exact qualified-name match above a substring match', async () => { - const fakeFetch = (async () => ({ - ok: true, - status: 200, - json: async () => ({ - total_count: 2, - incomplete_results: false, + it('places the hyphen (P2) bucket between path (P1) and primary (P4) for multi-word queries', async () => { + const fakeFetch = makeFakeFetch([ + // P1 path query (`path:build-worker`) → path-only hit + { + match: (q) => q.includes('path:build-worker'), items: [ - // Substring match on the bare name. { - path: 'skills/will/commit-helper/SKILL.md', - sha: 's1', - repository: { full_name: 'org/skills', description: '' }, + path: 'plugins/build-worker/skills/run/SKILL.md', + sha: 'p1', + repository: { full_name: 'org/repo' }, }, - // Exact match on the qualified name. + ], + }, + // P2 hyphen content query (`build-worker`) → distinct hit + { + match: (q) => q.includes('build-worker') && !q.includes('path:'), + items: [ { - path: 'skills/kynan/commit/SKILL.md', - sha: 's2', - repository: { full_name: 'org/skills', description: '' }, + path: 'skills/build-worker-utils/SKILL.md', + sha: 'p2', + repository: { full_name: 'org/repo' }, }, ], - }), - })) as unknown as typeof fetch; + }, + // P4 primary content (`build worker` with space) → another distinct hit + { + match: (q) => q.includes('build worker'), + items: [ + { + path: 'skills/spaced/SKILL.md', + sha: 'p4', + repository: { full_name: 'org/repo' }, + }, + ], + }, + ]); - const result = await searchSkills('kynan/commit', {}, { fetch: fakeFetch }); - expect(result.items[0]?.namespace).toBe('kynan'); - expect(result.items[0]?.name).toBe('commit'); + const result = await searchSkills('build worker', {}, { fetch: fakeFetch, logger: silentLogger }); + expect(result.items.map((i) => i.sha)).toEqual(['p1', 'p2', 'p4']); }); }); From 9a8aab67b523bf8b37f2c2cf773d915dc7c361a0 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Wed, 13 May 2026 00:41:14 +0200 Subject: [PATCH 2/2] fix(skill-search): use in:path qualifier for P1, not path: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live verification against GitHub's real Code Search API showed that the spec's `filename:SKILL.md path:` query returns zero results for the cargowise case — `path:` is a prefix-match qualifier on path components, so `path:cargowise` only matches files whose path starts with `cargowise/`, not files at `plugins/cargowise/skills/...`. The right qualifier for the substring semantics the issue actually wanted is `in:path `. Probing seven path-qualifier variants against `--owner WiseTechGlobal`: ``` path:cargowise → 0 path:plugins/cargowise → 38 (prefix works when given the full prefix) path:**/cargowise/** → 0 (no glob support) in:path cargowise → 103 ← substring match, what we want ``` After flipping P1 from `path:` → `in:path `, the cargowise --owner WiseTechGlobal search returns 23 unique skills (up from 8 with the old query), including entire trees the content-only P4 missed: - `WiseTechGlobal/WTG.AI.Prompts plugins/cargowise/skills/cw-gui` (+9 more cargowise plugin skills) - `WiseTechGlobal/WZG.Playbook.Content plugins/cargowise-customs/skills/*` (4 skills under a cargowise-customs subtree the content query didn't surface at all) - `src/core/skill-search.ts`: change P1's query string from `path:${pathTerm}` to `in:path ${pathTerm}`. Doc-comment updated to explain the difference between the two qualifiers. - `tests/unit/core/skill-search.test.ts`: assert the new `in:path ` form everywhere we previously asserted `path:`. The `logs-and-continues` and merge/dedup matchers that dispatched on `q.includes('path:')` now dispatch on `q.includes('in:path')`. Added a negative assertion (`not.toContain('path:cargowise')`) on the no-owner cargowise case to lock in the qualifier choice. Builds clean, full test suite 1232 pass / 0 fail, focused suite 31 pass. --- src/core/skill-search.ts | 17 ++++++++++------- tests/unit/core/skill-search.test.ts | 25 ++++++++++++++++--------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/core/skill-search.ts b/src/core/skill-search.ts index b6aa9e0..06a59e7 100644 --- a/src/core/skill-search.ts +++ b/src/core/skill-search.ts @@ -114,8 +114,8 @@ export interface SkillSearchQuery { * Build the parallel Code Search query set, ordered by priority. * * Always emits: - * - Priority 1 — `filename:SKILL.md path:` (+ optional user filter) - * - Priority 4 — `filename:SKILL.md ` (+ optional user filter) + * - Priority 1 — `filename:SKILL.md in:path ` (+ optional user filter) + * - Priority 4 — `filename:SKILL.md ` (+ optional user filter) * * Conditionally emits: * - Priority 2 — `filename:SKILL.md ` (only when the hyphenated @@ -124,9 +124,12 @@ export interface SkillSearchQuery { * `--owner` is set AND the query itself could plausibly be a GitHub * login) * - * The path query is what surfaces e.g. CargoWise skills at - * `plugins/cargowise/skills/...` even when the SKILL.md content doesn't - * mention "cargowise" — `path:cargowise` matches the directory name. + * P1 uses `in:path ` rather than `path:`. The two qualifiers + * differ on the live API: `path:foo` is prefix-on-path-components (so + * `path:cargowise` doesn't match `plugins/cargowise/skills/...`), while + * `in:path foo` matches the term anywhere in the path. The substring form + * is what surfaces nested skills like `plugins/cargowise/skills/cw-yard/SKILL.md` + * when the SKILL.md body itself doesn't mention "cargowise". */ export function buildSearchQueries( query: string, @@ -138,7 +141,7 @@ export function buildSearchQueries( const join = (...parts: string[]) => parts.filter(Boolean).join(' '); const queries: SkillSearchQuery[] = [ - { priority: 1, label: 'path', q: join('filename:SKILL.md', `path:${pathTerm}`, userClause) }, + { priority: 1, label: 'path', q: join('filename:SKILL.md', `in:path ${pathTerm}`, userClause) }, ]; if (pathTerm !== trimmed) { @@ -386,7 +389,7 @@ export async function searchSkills( /** * Drop duplicate hits by `repo + qualifiedName`. Same folder surfaced by - * multiple query buckets (e.g. both `path:` and content match) collapses to + * multiple query buckets (e.g. both `in:path` and content match) collapses to * one entry, with the higher-priority bucket's occurrence winning because * items are merged in priority order before this runs. */ diff --git a/tests/unit/core/skill-search.test.ts b/tests/unit/core/skill-search.test.ts index b79ad7d..144875f 100644 --- a/tests/unit/core/skill-search.test.ts +++ b/tests/unit/core/skill-search.test.ts @@ -183,7 +183,11 @@ describe('buildSearchQueries', () => { expect(queries.map((q) => q.label)).toEqual(['path', 'owner', 'primary']); const labels = new Map(queries.map((q) => [q.label, q.q])); expect(labels.get('path')).toContain('filename:SKILL.md'); - expect(labels.get('path')).toContain('path:cargowise'); + // P1 uses `in:path ` so the term substring-matches anywhere in + // the path. (GitHub's `path:` qualifier is prefix-only and + // misses nested layouts like `plugins/cargowise/skills/...`.) + expect(labels.get('path')).toContain('in:path cargowise'); + expect(labels.get('path')).not.toContain('path:cargowise'); expect(labels.get('primary')).toBe('filename:SKILL.md cargowise'); }); @@ -191,7 +195,7 @@ describe('buildSearchQueries', () => { const queries = buildSearchQueries('cargowise', 'WiseTechGlobal'); expect(queries.map((q) => q.label)).toEqual(['path', 'primary']); expect(queries.find((q) => q.label === 'path')?.q).toBe( - 'filename:SKILL.md path:cargowise user:WiseTechGlobal', + 'filename:SKILL.md in:path cargowise user:WiseTechGlobal', ); expect(queries.find((q) => q.label === 'primary')?.q).toBe( 'filename:SKILL.md cargowise user:WiseTechGlobal', @@ -202,7 +206,7 @@ describe('buildSearchQueries', () => { const queries = buildSearchQueries('build worker', undefined); const path = queries.find((q) => q.label === 'path'); const hyphen = queries.find((q) => q.label === 'hyphen'); - expect(path?.q).toContain('path:build-worker'); + expect(path?.q).toContain('in:path build-worker'); expect(hyphen?.q).toBe('filename:SKILL.md build-worker'); }); @@ -268,7 +272,7 @@ describe('searchSkills error mapping', () => { const fakeFetch = makeFakeFetch([ // Path query (P1) fails with 422. { - match: (q) => q.includes('path:'), + match: (q) => q.includes('in:path'), items: [], status: 422, message: 'Path query rejected', @@ -379,8 +383,11 @@ describe('multi-query merge + dedup', () => { it('returns a path-only hit even when content has no mention of the query (cargowise case)', async () => { const fakeFetch = makeFakeFetch([ // P1 path query: finds CargoWise skill at plugins/cargowise/skills/... + // The `in:path cargowise` qualifier substring-matches the term anywhere + // in the file path, so it picks up the nested `plugins/cargowise/...` + // layout even though the SKILL.md body has no mention of "cargowise". { - match: (q) => q.includes('path:cargowise'), + match: (q) => q.includes('in:path cargowise'), items: [ { path: 'plugins/cargowise/skills/cw-deploy/SKILL.md', @@ -405,7 +412,7 @@ describe('multi-query merge + dedup', () => { const fakeFetch = makeFakeFetch([ // Path query → only the kynan/commit folder (path contains the literal "kynan/commit"). { - match: (q) => q.includes('path:kynan/commit'), + match: (q) => q.includes('in:path kynan/commit'), items: [ { path: 'skills/kynan/commit/SKILL.md', @@ -481,9 +488,9 @@ describe('multi-query merge + dedup', () => { it('places the hyphen (P2) bucket between path (P1) and primary (P4) for multi-word queries', async () => { const fakeFetch = makeFakeFetch([ - // P1 path query (`path:build-worker`) → path-only hit + // P1 path query (`in:path build-worker`) → path-only hit { - match: (q) => q.includes('path:build-worker'), + match: (q) => q.includes('in:path build-worker'), items: [ { path: 'plugins/build-worker/skills/run/SKILL.md', @@ -494,7 +501,7 @@ describe('multi-query merge + dedup', () => { }, // P2 hyphen content query (`build-worker`) → distinct hit { - match: (q) => q.includes('build-worker') && !q.includes('path:'), + match: (q) => q.includes('build-worker') && !q.includes('in:path'), items: [ { path: 'skills/build-worker-utils/SKILL.md',