Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions apps/cli/src/commands/workspace/deps.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import path from 'node:path';
import { command, flag, restPositionals, string } from 'cmd-ts';

import { scanRepoDeps } from '@agentv/core';

import { resolveEvalPaths } from '../eval/shared.js';

export const depsCommand = command({
name: 'deps',
description: 'Scan eval files and list git repo dependencies needed by workspaces',
args: {
evalPaths: restPositionals({
type: string,
displayName: 'eval-paths',
description: 'Path(s) or glob(s) to evaluation .yaml file(s)',
}),
usedBy: flag({
long: 'used-by',
description: 'Include list of eval files that reference each repo',
}),
},
handler: async ({ evalPaths, usedBy }) => {
if (evalPaths.length === 0) {
console.error('Usage: agentv workspace deps <eval-paths...>');
process.exit(1);
}

const cwd = process.cwd();
const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
const result = await scanRepoDeps(resolvedPaths);

// Print errors to stderr
for (const err of result.errors) {
console.error(`warning: ${path.relative(cwd, err.file)}: ${err.message}`);
}

// Output JSON manifest to stdout (snake_case per wire format convention)
const output = {
repos: result.repos.map((r) => ({
url: r.url,
...(r.ref !== undefined && { ref: r.ref }),
...(r.clone !== undefined && { clone: r.clone }),
...(r.checkout !== undefined && { checkout: r.checkout }),
...(usedBy && { used_by: r.usedBy.map((p) => path.relative(cwd, p)) }),
})),
};

console.log(JSON.stringify(output, null, 2));
},
});
2 changes: 2 additions & 0 deletions apps/cli/src/commands/workspace/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { subcommands } from 'cmd-ts';

import { cleanCommand } from './clean.js';
import { depsCommand } from './deps.js';
import { listCommand } from './list.js';

export const workspaceCommand = subcommands({
Expand All @@ -9,5 +10,6 @@ export const workspaceCommand = subcommands({
cmds: {
list: listCommand,
clean: cleanCommand,
deps: depsCommand,
},
});
4 changes: 4 additions & 0 deletions apps/web/src/content/docs/docs/guides/workspace-pool.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ agentv workspace clean

# Remove only pools for a specific repo
agentv workspace clean --repo github.com/org/my-repo

# Scan eval files and output a JSON manifest of required git repos
# Useful in CI to determine what to clone before running evals
agentv workspace deps evals/**/*.eval.yaml
```

## External workspace config
Expand Down
1 change: 1 addition & 0 deletions apps/web/src/content/docs/docs/targets/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ Similar to GitHub Actions checkout conventions, `source` answers "where does thi
Pool management commands:
- `agentv workspace list` — list all pool entries with size and repo info
- `agentv workspace clean` — remove all pool entries
- `agentv workspace deps <eval-paths>` — scan eval files and output a JSON manifest of required git repos (for CI pre-cloning)

**Common patterns:**

Expand Down
176 changes: 176 additions & 0 deletions packages/core/src/evaluation/workspace/deps-scanner.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/**
* Lightweight scanner that extracts git repo dependencies from eval YAML files
* without performing full test/grader parsing.
*
* Used by `agentv workspace deps` to determine which repos CI needs to clone
* before running evals.
*
* How it works:
* 1. Reads each eval YAML file and parses it
* 2. Extracts `workspace.repos` at suite-level and per-test level
* 3. Resolves external workspace file references (string → file path)
* 4. Deduplicates git repos by (url, ref)
* 5. Returns a flat list of unique repo dependencies
*
* To extend: add support for new workspace source types by adding a branch
* in `extractReposFromWorkspaceRaw()`.
*/
import { readFile } from 'node:fs/promises';
import path from 'node:path';
import { parse } from 'yaml';

import { interpolateEnv } from '../interpolation.js';
import type { RepoCheckout, RepoClone, RepoSource } from '../types.js';
import { parseRepoCheckout, parseRepoClone, parseRepoSource } from './repo-config-parser.js';

/** A single git repo dependency discovered from eval files. */
export interface RepoDep {
/** Git clone URL */
readonly url: string;
/** Checkout ref (branch, tag, SHA). undefined means HEAD. */
readonly ref: string | undefined;
/** Clone options (depth, filter, sparse) — first-wins on dedup collision */
readonly clone: RepoClone | undefined;
/** Checkout options (resolve, ancestor) — first-wins on dedup collision */
readonly checkout: Omit<RepoCheckout, 'ref'> | undefined;
/** Eval files that reference this repo */
readonly usedBy: string[];
}

/** Full output of the deps scanner. */
export interface DepsScanResult {
readonly repos: readonly RepoDep[];
/** Files that failed to parse (non-fatal) */
readonly errors: readonly { file: string; message: string }[];
}

/** Normalize a git URL for dedup: strip trailing .git and lowercase the host. */
function normalizeGitUrl(url: string): string {
let normalized = url.replace(/\.git$/, '');
// Lowercase the host portion of https:// URLs
try {
const parsed = new URL(normalized);
parsed.hostname = parsed.hostname.toLowerCase();
normalized = parsed.toString().replace(/\/$/, '');
} catch {
// Not a valid URL (e.g., SSH shorthand) — use as-is
}
return normalized;
}

/**
* Scan eval YAML files and collect unique git repo dependencies.
* Non-YAML files and parse errors are collected in `errors` but don't stop scanning.
*
* Dedup strategy: repos are keyed by (normalized URL, ref). On collision,
* clone/checkout options from the first occurrence win — this is intentional
* since the manifest is advisory (CI can override clone options).
*/
export async function scanRepoDeps(evalFilePaths: readonly string[]): Promise<DepsScanResult> {
const seen = new Map<string, RepoDep & { usedBy: string[] }>();
const errors: { file: string; message: string }[] = [];

for (const filePath of evalFilePaths) {
try {
const repos = await extractReposFromEvalFile(filePath);
for (const repo of repos) {
if (repo.source.type !== 'git') continue;
const ref = repo.checkout?.ref;
const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ''}`;
const existing = seen.get(key);
if (existing) {
existing.usedBy.push(filePath);
} else {
const { ref: _ref, ...checkoutRest } = repo.checkout ?? {};
const hasCheckout = Object.keys(checkoutRest).length > 0;
seen.set(key, {
url: repo.source.url,
ref,
clone: repo.clone,
checkout: hasCheckout ? checkoutRest : undefined,
usedBy: [filePath],
});
}
}
} catch (err) {
errors.push({
file: filePath,
message: err instanceof Error ? err.message : String(err),
});
}
}

return { repos: [...seen.values()], errors };
}

// ---------------------------------------------------------------------------
// Internal helpers — lightweight YAML extraction, no full test parsing
// ---------------------------------------------------------------------------

interface RawRepo {
source: RepoSource;
checkout?: RepoCheckout;
clone?: RepoClone;
}

async function extractReposFromEvalFile(filePath: string): Promise<RawRepo[]> {
const content = await readFile(filePath, 'utf8');
const parsed = interpolateEnv(parse(content), process.env);
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return [];
const obj = parsed as Record<string, unknown>;
const evalFileDir = path.dirname(path.resolve(filePath));

const repos: RawRepo[] = [];

// Suite-level workspace
const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
repos.push(...suiteRepos);

// Per-test workspace
const tests = Array.isArray(obj.tests) ? obj.tests : [];
for (const test of tests) {
if (test && typeof test === 'object' && !Array.isArray(test)) {
const testObj = test as Record<string, unknown>;
const testRepos = await extractReposFromWorkspaceRaw(testObj.workspace, evalFileDir);
repos.push(...testRepos);
}
}

return repos;
}

/**
* Extract repos from a raw workspace value.
* Handles both inline objects and string references to external workspace files.
*/
async function extractReposFromWorkspaceRaw(raw: unknown, evalFileDir: string): Promise<RawRepo[]> {
if (typeof raw === 'string') {
// External workspace file reference
const workspaceFilePath = path.resolve(evalFileDir, raw);
const content = await readFile(workspaceFilePath, 'utf8');
const parsed = interpolateEnv(parse(content), process.env);
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return [];
return extractReposFromObject(parsed as Record<string, unknown>);
}
if (raw && typeof raw === 'object' && !Array.isArray(raw)) {
return extractReposFromObject(raw as Record<string, unknown>);
}
return [];
}

function extractReposFromObject(obj: Record<string, unknown>): RawRepo[] {
const rawRepos = Array.isArray(obj.repos) ? obj.repos : [];
const result: RawRepo[] = [];
for (const r of rawRepos) {
if (!r || typeof r !== 'object' || Array.isArray(r)) continue;
const repo = r as Record<string, unknown>;
const source = parseRepoSource(repo.source);
if (!source) continue;
result.push({
source,
checkout: parseRepoCheckout(repo.checkout),
clone: parseRepoClone(repo.clone),
});
}
return result;
}
1 change: 1 addition & 0 deletions packages/core/src/evaluation/workspace/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ export {
type AcquireWorkspaceOptions,
type PoolSlot,
} from './pool-manager.js';
export { scanRepoDeps, type RepoDep, type DepsScanResult } from './deps-scanner.js';
66 changes: 66 additions & 0 deletions packages/core/src/evaluation/workspace/repo-config-parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* Shared parsers for repo configuration objects (source, checkout, clone).
*
* Used by both the full YAML parser (yaml-parser.ts) and the lightweight
* deps scanner (deps-scanner.ts) to avoid duplicating parsing logic.
*/
import type { RepoCheckout, RepoClone, RepoConfig, RepoSource } from '../types.js';
import { isJsonObject } from '../types.js';

export function parseRepoSource(raw: unknown): RepoSource | undefined {
if (!isJsonObject(raw)) return undefined;
const obj = raw as Record<string, unknown>;
if (obj.type === 'git' && typeof obj.url === 'string') {
return { type: 'git', url: obj.url };
}
if (obj.type === 'local' && typeof obj.path === 'string') {
return { type: 'local', path: obj.path };
}
return undefined;
}

export function parseRepoCheckout(raw: unknown): RepoCheckout | undefined {
if (!isJsonObject(raw)) return undefined;
const obj = raw as Record<string, unknown>;
const ref = typeof obj.ref === 'string' ? obj.ref : undefined;
const resolve = obj.resolve === 'remote' || obj.resolve === 'local' ? obj.resolve : undefined;
const ancestor = typeof obj.ancestor === 'number' ? obj.ancestor : undefined;
if (!ref && !resolve && ancestor === undefined) return undefined;
return {
...(ref !== undefined && { ref }),
...(resolve !== undefined && { resolve }),
...(ancestor !== undefined && { ancestor }),
};
}

export function parseRepoClone(raw: unknown): RepoClone | undefined {
if (!isJsonObject(raw)) return undefined;
const obj = raw as Record<string, unknown>;
const depth = typeof obj.depth === 'number' ? obj.depth : undefined;
const filter = typeof obj.filter === 'string' ? obj.filter : undefined;
const sparse = Array.isArray(obj.sparse)
? obj.sparse.filter((s): s is string => typeof s === 'string')
: undefined;
if (depth === undefined && !filter && !sparse) return undefined;
return {
...(depth !== undefined && { depth }),
...(filter !== undefined && { filter }),
...(sparse !== undefined && { sparse }),
};
}

export function parseRepoConfig(raw: unknown): RepoConfig | undefined {
if (!isJsonObject(raw)) return undefined;
const obj = raw as Record<string, unknown>;
const repoPath = typeof obj.path === 'string' ? obj.path : undefined;
const source = parseRepoSource(obj.source);
if (!repoPath || !source) return undefined;
const checkout = parseRepoCheckout(obj.checkout);
const clone = parseRepoClone(obj.clone);
return {
path: repoPath,
source,
...(checkout !== undefined && { checkout }),
...(clone !== undefined && { clone }),
};
}
Loading
Loading