From fb1a62361889eada3b387f99575bc847eccb8c90 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 20 May 2026 09:21:49 +1000 Subject: [PATCH 1/3] feat(core): support eval test vars templating Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cli/src/commands/import/promptfoo.test.ts | 11 +- apps/cli/src/commands/import/promptfoo.ts | 39 +- .../docs/docs/evaluation/eval-files.mdx | 28 + .../src/content/docs/docs/tools/import.mdx | 1 + packages/core/src/evaluation/interpolation.ts | 75 + .../evaluation/validation/eval-file.schema.ts | 5 +- packages/core/src/evaluation/yaml-parser.ts | 120 +- .../test/evaluation/interpolation.test.ts | 30 +- .../test/evaluation/suite-level-input.test.ts | 74 + .../references/eval-schema.json | 3085 +++++++++++++---- 10 files changed, 2797 insertions(+), 671 deletions(-) diff --git a/apps/cli/src/commands/import/promptfoo.test.ts b/apps/cli/src/commands/import/promptfoo.test.ts index 1e31df426..39b1e8892 100644 --- a/apps/cli/src/commands/import/promptfoo.test.ts +++ b/apps/cli/src/commands/import/promptfoo.test.ts @@ -51,7 +51,8 @@ tests: expect(suite.tests[0]).toMatchObject({ id: 'capital', criteria: 'Capital answer stays deterministic', - input: 'Answer clearly: What is the capital of France?', + input: 'Answer clearly: {{question}}', + vars: { question: 'What is the capital of France?' }, assertions: [{ type: 'equals', value: 'Paris' }], metadata: { promptfoo: { @@ -95,7 +96,8 @@ tests: file://./tests.jsonl const yaml = await convertPromptfooToAgentvYaml(configPath); expect(yaml).toContain('# Converted from promptfoo config:'); expect(yaml).toContain('id: math'); - expect(yaml).toContain('input: "Please answer: What is 2 + 2?"'); + expect(yaml).toContain('input: "Please answer: {{question}}"'); + expect(yaml).toContain('vars:'); expect(yaml).toContain('type: equals'); }); @@ -129,7 +131,10 @@ tests: file://./tests.csv expect(suite.tests[0]).toMatchObject({ id: 'capital-question', criteria: 'Capital question', - input: 'Question: What is the capital of France?', + input: 'Question: {{question}}', + vars: { + question: 'What is the capital of France?', + }, assertions: [ { type: 'equals', value: 'Paris' }, { type: 'contains', value: 'Paris' }, diff --git a/apps/cli/src/commands/import/promptfoo.ts b/apps/cli/src/commands/import/promptfoo.ts index 968a733e9..db7b45d28 100644 --- a/apps/cli/src/commands/import/promptfoo.ts +++ b/apps/cli/src/commands/import/promptfoo.ts @@ -74,6 +74,7 @@ interface AgentvAssertion { interface AgentvTest { readonly id: string; readonly input: AgentvInput; + readonly vars?: Record; readonly assertions?: readonly AgentvAssertion[]; readonly [key: string]: unknown; } @@ -825,7 +826,8 @@ async function buildAgentvTests(options: { } for (const prompt of promptSelection) { - const renderedInput = renderPrompt(prompt, effectiveVars, testOptions); + const importedVars = testOptions.disableVarExpansion ? undefined : effectiveVars; + const templatedInput = buildPromptTemplate(prompt, testOptions); const promptSuffix = promptSelection.length > 1 ? `--${sanitizeName(prompt.key || prompt.label)}` : ''; const metadata = buildPromptfooMetadata(rawTest, effectiveVars, prompt, effectiveTargets); @@ -838,7 +840,8 @@ async function buildAgentvTests(options: { const test: AgentvTest = { id: `${explicitId ?? baseId}${promptSuffix}`, ...(typeof rawTest.description === 'string' ? { criteria: rawTest.description } : {}), - input: renderedInput, + input: templatedInput, + ...(importedVars && Object.keys(importedVars).length > 0 ? { vars: importedVars } : {}), ...(convertedCaseAssertions.length > 0 ? { assertions: convertedCaseAssertions } : {}), ...(metadata ? { metadata } : {}), ...(execution ? { execution } : {}), @@ -970,52 +973,30 @@ function filterProviders( return matched.map((provider) => provider.targetName); } -function renderPrompt( +function buildPromptTemplate( prompt: PromptfooPrompt, - vars: Record, testOptions: PromptfooTestOptions, ): AgentvInput { const prefix = testOptions.prefix ?? ''; const suffix = testOptions.suffix ?? ''; if (typeof prompt.content === 'string') { - return `${prefix}${renderTemplate(prompt.content, vars)}${suffix}`; + return `${prefix}${preserveTemplate(prompt.content)}${suffix}`; } return prompt.content.map((message, index, allMessages) => ({ role: message.role, - content: `${index === 0 ? prefix : ''}${renderTemplate(message.content, vars)}${index === allMessages.length - 1 ? suffix : ''}`, + content: `${index === 0 ? prefix : ''}${preserveTemplate(message.content)}${index === allMessages.length - 1 ? suffix : ''}`, })); } -function renderTemplate(template: string, vars: Record) { +function preserveTemplate(template: string) { if (template.includes('{%') || template.includes('{#') || /\{\{[^}]*\|/.test(template)) { throw new Error( `Unsupported Nunjucks syntax in prompt '${template.slice(0, 80)}'. Use simple {{var}} templates or migrate manually`, ); } - - return template.replace(/\{\{\s*([^}]+?)\s*\}\}/g, (_match, expression: string) => { - const value = lookupPath(vars, expression.trim()); - if (value === undefined) { - return ''; - } - if (typeof value === 'string') return value; - return JSON.stringify(value); - }); -} - -function lookupPath( - value: JsonValue | Record, - expression: string, -): JsonValue | undefined { - if (!expression) return undefined; - return expression.split('.').reduce((current, part) => { - if (!current || typeof current !== 'object' || Array.isArray(current)) { - return undefined; - } - return (current as Record)[part]; - }, value as JsonValue); + return template; } function buildPromptfooMetadata( diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx index dac72a078..c2b6e6817 100644 --- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx +++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx @@ -264,6 +264,34 @@ For local sources, omit `checkout.resolve`. If you need to pin the local clone t MY_REPO_LOCAL_PATH=/home/dev/repos/my-repo ``` +## Per-Test Template Variables + +Eval YAML also supports per-test `vars` for data-driven prompt templates. Use `{{name}}` placeholders in test-facing text fields, and AgentV resolves them when the suite loads. + +```yaml +input: "Answer clearly: {{question}}" + +tests: + - id: capital + vars: + question: What is the capital of France? + expected_answer: Paris + criteria: "Answers {{question}} correctly" + input: + - role: user + content: "Question: {{question}}" + expected_output: "{{expected_answer}}" +``` + +### Behavior + +- `vars` is defined per test as an object +- `{{name}}` and dotted paths like `{{ user.name }}` are supported +- Substitution applies to suite-level `input`, test `input`, `input_files`, `criteria`, `expected_output`, and conversation turn `input` / `expected_output` +- When the whole string is a single placeholder, the original JSON value is preserved +- Missing variables are left unchanged, so unrelated template syntax is not silently blanked out +- `vars` interpolation is separate from environment interpolation: `{{question}}` uses test data, `${{ PROJECT_NAME }}` uses environment variables + ## JSONL Format For large-scale evaluations, AgentV supports JSONL (JSON Lines) format. Each line is a single test: diff --git a/apps/web/src/content/docs/docs/tools/import.mdx b/apps/web/src/content/docs/docs/tools/import.mdx index 7655eadd9..cece3d0fe 100644 --- a/apps/web/src/content/docs/docs/tools/import.mdx +++ b/apps/web/src/content/docs/docs/tools/import.mdx @@ -46,6 +46,7 @@ Default output: `EVAL.yaml` beside the promptfoo config file. - inline tests and external YAML / JSON / JSONL / CSV test files - `defaultTest.assert` promoted to suite-level `assertions` - per-test `vars`, `description`, `threshold`, `metadata`, prompt filters, and provider filters +- simple prompt templates are preserved as AgentV `{{var}}` input templates instead of being eagerly flattened - deterministic assertions that map directly to AgentV: `equals`, `contains`, `icontains`, `regex`, `starts-with`, `ends-with`, `contains-any`, `contains-all`, `icontains-any`, `icontains-all`, `is-json`, `latency`, `cost` - rubric-style assertions mapped to `llm-grader`: `llm-rubric`, `g-eval`, `factuality`, `context-faithfulness`, `context-recall` diff --git a/packages/core/src/evaluation/interpolation.ts b/packages/core/src/evaluation/interpolation.ts index 7bd2dbc17..e8b396224 100644 --- a/packages/core/src/evaluation/interpolation.ts +++ b/packages/core/src/evaluation/interpolation.ts @@ -1,6 +1,8 @@ import type { EnvLookup } from './providers/types.js'; const ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g; +const TEMPLATE_VAR_PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}/g; +const WHOLE_TEMPLATE_VAR_PATTERN = /^\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}$/; /** * Regex that matches a string consisting of exactly one `${{ VAR }}` reference @@ -29,6 +31,42 @@ function coercePrimitive(value: string): unknown { return value; } +function isPlainObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function cloneTemplateValue(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map((item) => cloneTemplateValue(item)); + } + if (isPlainObject(value)) { + const result: Record = {}; + for (const [key, nested] of Object.entries(value)) { + result[key] = cloneTemplateValue(nested); + } + return result; + } + return value; +} + +function stringifyTemplateValue(value: unknown): string { + if (typeof value === 'string') return value; + return JSON.stringify(value); +} + +function lookupTemplateVar( + vars: Readonly>, + expression: string, +): unknown | undefined { + if (!expression) return undefined; + return expression.split('.').reduce((current, segment) => { + if (!isPlainObject(current)) { + return undefined; + } + return current[segment]; + }, vars); +} + /** * Recursively interpolate `${{ VAR }}` references in all string values. * Missing variables resolve to empty string. @@ -71,3 +109,40 @@ export function interpolateEnv(value: unknown, env: EnvLookup): unknown { } return value; } + +/** + * Recursively interpolate `{{ var }}` references in string values using per-test vars. + * Missing variables are left unchanged so unrelated template syntaxes remain intact. + * When the whole string is a single variable reference, the original JSON value is preserved. + */ +export function interpolateTemplateVars( + value: unknown, + vars: Readonly>, +): unknown { + if (typeof value === 'string') { + const wholeMatch = WHOLE_TEMPLATE_VAR_PATTERN.exec(value); + if (wholeMatch) { + const resolved = lookupTemplateVar(vars, wholeMatch[1] as string); + return resolved === undefined ? value : cloneTemplateValue(resolved); + } + + return value.replace(TEMPLATE_VAR_PATTERN, (match, expression: string) => { + const resolved = lookupTemplateVar(vars, expression); + return resolved === undefined ? match : stringifyTemplateValue(resolved); + }); + } + + if (Array.isArray(value)) { + return value.map((item) => interpolateTemplateVars(item, vars)); + } + + if (isPlainObject(value)) { + const result: Record = {}; + for (const [key, nested] of Object.entries(value)) { + result[key] = interpolateTemplateVars(nested, vars); + } + return result; + } + + return value; +} diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index 669463237..81c2e59ca 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -25,11 +25,13 @@ const MessageSchema = z.object({ content: MessageContentSchema, }); +const JsonObjectSchema = z.object({}).catchall(z.unknown()); + /** Input: string shorthand or message array */ const InputSchema = z.union([z.string(), z.array(MessageSchema)]); /** Expected output: string, object, or message array */ -const ExpectedOutputSchema = z.union([z.string(), z.record(z.unknown()), z.array(MessageSchema)]); +const ExpectedOutputSchema = z.union([z.string(), JsonObjectSchema, z.array(MessageSchema)]); // --------------------------------------------------------------------------- // Grader schemas (YAML input format) @@ -389,6 +391,7 @@ const ConversationTurnSchema = z.object({ const EvalTestSchema = z.object({ id: z.string().min(1), + vars: JsonObjectSchema.optional(), criteria: z.string().optional(), input: InputSchema.optional(), input_files: z.array(z.string()).optional(), diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index ba25e993a..474cf276b 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -3,7 +3,7 @@ import path from 'node:path'; import micromatch from 'micromatch'; import { collectResolvedInputFilePaths } from './input-message-utils.js'; -import { interpolateEnv } from './interpolation.js'; +import { interpolateEnv, interpolateTemplateVars } from './interpolation.js'; import { loadTestsFromAgentSkills } from './loaders/agent-skills-parser.js'; import { expandFileReferences, @@ -129,6 +129,7 @@ type RawTestSuite = JsonObject & { type RawEvalCase = JsonObject & { readonly id?: JsonValue; + readonly vars?: JsonValue; readonly conversation_id?: JsonValue; readonly criteria?: JsonValue; /** @deprecated Use `criteria` instead */ @@ -160,6 +161,37 @@ function resolveTests(suite: RawTestSuite): JsonValue | undefined { return undefined; } +function interpolateCaseField( + value: T, + vars: JsonObject | undefined, +): T { + if (!vars || value === undefined) { + return value; + } + return interpolateTemplateVars(value, vars as Record) as T; +} + +function interpolateCaseTurns( + turns: JsonValue | undefined, + vars: JsonObject | undefined, +): JsonValue | undefined { + if (!vars || !Array.isArray(turns)) { + return turns; + } + + return turns.map((rawTurn) => { + if (!isJsonObject(rawTurn)) { + return rawTurn; + } + + return { + ...rawTurn, + input: interpolateCaseField(rawTurn.input, vars), + expected_output: interpolateCaseField(rawTurn.expected_output, vars), + } satisfies JsonObject; + }); +} + /** * Read metadata from a test suite file (like target name). * This is a convenience function for CLI tools that need metadata without loading all tests. @@ -366,11 +398,8 @@ async function loadTestsFromYaml( // Merged into each case's `metadata.governance` via mergeSuiteMetadataPayload. const suiteGovernance = extractSuiteGovernance(suite); - // Resolve suite-level input (prepended to each test's input messages) - const suiteInputMessages = expandInputShorthand(suite.input); - - // Suite-level input_files: passed to resolveInputMessages for each test - const suiteInputFiles = suite.input_files; + const rawSuiteInput = suite.input; + const rawSuiteInputFiles = suite.input_files; // Extract global target from execution.target (or legacy root-level target) const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : undefined; @@ -403,21 +432,30 @@ async function loadTestsFromYaml( continue; } - const conversationId = asString(testCaseConfig.conversation_id); - let outcome = asString(testCaseConfig.criteria); - if (!outcome && testCaseConfig.expected_outcome !== undefined) { - outcome = asString(testCaseConfig.expected_outcome); + const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : undefined; + const renderedCase = { + ...testCaseConfig, + criteria: interpolateCaseField(testCaseConfig.criteria, caseVars), + expected_outcome: interpolateCaseField(testCaseConfig.expected_outcome, caseVars), + input: interpolateCaseField(testCaseConfig.input, caseVars), + input_files: interpolateCaseField(testCaseConfig.input_files, caseVars), + expected_output: interpolateCaseField(testCaseConfig.expected_output, caseVars), + turns: interpolateCaseTurns(testCaseConfig.turns, caseVars), + } satisfies RawEvalCase; + + const conversationId = asString(renderedCase.conversation_id); + let outcome = asString(renderedCase.criteria); + if (!outcome && renderedCase.expected_outcome !== undefined) { + outcome = asString(renderedCase.expected_outcome); if (outcome) { logWarning( - `Test '${asString(testCaseConfig.id) ?? 'unknown'}': 'expected_outcome' is deprecated. Use 'criteria' instead.`, + `Test '${asString(renderedCase.id) ?? 'unknown'}': 'expected_outcome' is deprecated. Use 'criteria' instead.`, ); } } // Extract per-case execution config early (reused below for skip_defaults) - const caseExecution = isJsonObject(testCaseConfig.execution) - ? testCaseConfig.execution - : undefined; + const caseExecution = isJsonObject(renderedCase.execution) ? renderedCase.execution : undefined; const skipDefaults = caseExecution?.skip_defaults === true; const caseThreshold = typeof caseExecution?.threshold === 'number' && @@ -427,18 +465,21 @@ async function loadTestsFromYaml( : undefined; // Resolve input with shorthand support (pass suite-level input_files for merge) - const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : undefined; - const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles); + const effectiveSuiteInputFiles = + rawSuiteInputFiles && !skipDefaults + ? interpolateCaseField(rawSuiteInputFiles, caseVars) + : undefined; + const testInputMessages = resolveInputMessages(renderedCase, effectiveSuiteInputFiles); // Resolve expected_output with shorthand support - const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? []; + const expectedMessages = resolveExpectedMessages(renderedCase) ?? []; // A test is complete when it has id, input, and at least one of: criteria, expected_output, assertions, or turns (conversation mode) const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || - testCaseConfig.assertions !== undefined || - testCaseConfig.assert !== undefined || - (Array.isArray(testCaseConfig.turns) && testCaseConfig.turns.length > 0); + renderedCase.assertions !== undefined || + renderedCase.assert !== undefined || + (Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0); if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) { logError( `Skipping incomplete test: ${id ?? 'unknown'}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions/turns`, @@ -447,8 +488,9 @@ async function loadTestsFromYaml( } // Prepend suite-level input to test input (respecting skip_defaults) - const effectiveSuiteInputMessages = - suiteInputMessages && !skipDefaults ? suiteInputMessages : undefined; + const effectiveSuiteInputValue = + rawSuiteInput && !skipDefaults ? interpolateCaseField(rawSuiteInput, caseVars) : undefined; + const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue); // expected_output is optional - for outcome-only evaluation const hasExpectedMessages = expectedMessages.length > 0; @@ -513,11 +555,11 @@ async function loadTestsFromYaml( .filter((part) => part.length > 0) .join(' '); - const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator; + const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator; let evaluators: Awaited>; try { evaluators = await parseGraders( - testCaseConfig, + renderedCase, globalExecution, searchRoots, id ?? 'unknown', @@ -531,7 +573,7 @@ async function loadTestsFromYaml( } // Handle inline rubrics field (deprecated: use assertions: [{type: rubrics, criteria: [...]}] instead) - const inlineRubrics = testCaseConfig.rubrics; + const inlineRubrics = renderedCase.rubrics; if (inlineRubrics !== undefined && Array.isArray(inlineRubrics)) { const rubricEvaluator = parseInlineRubrics(inlineRubrics); if (rubricEvaluator) { @@ -545,28 +587,28 @@ async function loadTestsFromYaml( const userFilePaths = collectResolvedInputFilePaths(inputMessages); // Parse per-case workspace config and merge with suite-level - const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir); + const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir); const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace); // Parse per-case metadata, then merge suite-level metadata payload. // Arrays concatenate (suite-first, deduplicated), scalars on the case win. - const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) - ? (testCaseConfig.metadata as Record) + const rawCaseMetadata = isJsonObject(renderedCase.metadata) + ? (renderedCase.metadata as Record) : undefined; const suitePayload = suiteGovernance !== undefined ? { governance: suiteGovernance } : undefined; const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload); // Extract per-test targets override (matrix evaluation) - const caseTargets = extractTargetsFromTestCase(testCaseConfig as JsonObject); + const caseTargets = extractTargetsFromTestCase(renderedCase as JsonObject); // Extract dependency fields - const dependsOn = Array.isArray(testCaseConfig.depends_on) - ? (testCaseConfig.depends_on as readonly string[]).filter( + const dependsOn = Array.isArray(renderedCase.depends_on) + ? (renderedCase.depends_on as readonly string[]).filter( (v): v is string => typeof v === 'string', ) : undefined; - const onDependencyFailureRaw = asString(testCaseConfig.on_dependency_failure); + const onDependencyFailureRaw = asString(renderedCase.on_dependency_failure); const onDependencyFailure = onDependencyFailureRaw === 'skip' || onDependencyFailureRaw === 'fail' || @@ -575,23 +617,23 @@ async function loadTestsFromYaml( : undefined; // Extract conversation mode fields - const modeRaw = asString(testCaseConfig.mode); + const modeRaw = asString(renderedCase.mode); const mode: ConversationMode | undefined = modeRaw === 'conversation' ? 'conversation' : undefined; - const turns = Array.isArray(testCaseConfig.turns) - ? parseTurns(testCaseConfig.turns as readonly unknown[]) + const turns = Array.isArray(renderedCase.turns) + ? parseTurns(renderedCase.turns as readonly unknown[]) : undefined; - const aggregationRaw = asString(testCaseConfig.aggregation); + const aggregationRaw = asString(renderedCase.aggregation); const aggregation: ConversationAggregation | undefined = aggregationRaw === 'mean' || aggregationRaw === 'min' || aggregationRaw === 'max' ? aggregationRaw : undefined; - const onTurnFailureRaw = asString(testCaseConfig.on_turn_failure); + const onTurnFailureRaw = asString(renderedCase.on_turn_failure); const onTurnFailure: TurnFailurePolicy | undefined = onTurnFailureRaw === 'continue' || onTurnFailureRaw === 'stop' ? onTurnFailureRaw : undefined; const windowSize = - typeof testCaseConfig.window_size === 'number' && testCaseConfig.window_size >= 1 - ? (testCaseConfig.window_size as number) + typeof renderedCase.window_size === 'number' && renderedCase.window_size >= 1 + ? (renderedCase.window_size as number) : undefined; const testCase: EvalTest = { diff --git a/packages/core/test/evaluation/interpolation.test.ts b/packages/core/test/evaluation/interpolation.test.ts index d416f7d0f..ecaccf299 100644 --- a/packages/core/test/evaluation/interpolation.test.ts +++ b/packages/core/test/evaluation/interpolation.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import { interpolateEnv } from '../../src/evaluation/interpolation.js'; +import { interpolateEnv, interpolateTemplateVars } from '../../src/evaluation/interpolation.js'; describe('interpolateEnv', () => { const env = { HOME: '/home/user', PROJECT: 'agentv', EMPTY: '' }; @@ -135,3 +135,31 @@ describe('interpolateEnv', () => { expect(interpolateEnv('${{ MY_VAR_2 }}', envWithSpecial)).toBe('value'); }); }); + +describe('interpolateTemplateVars', () => { + const vars = { + question: 'What is 2 + 2?', + nested: { topic: 'math' }, + expected: { answer: '4' }, + }; + + it('replaces {{ var }} in strings', () => { + expect(interpolateTemplateVars('Answer clearly: {{question}}', vars)).toBe( + 'Answer clearly: What is 2 + 2?', + ); + }); + + it('supports dotted paths', () => { + expect(interpolateTemplateVars('Topic: {{ nested.topic }}', vars)).toBe('Topic: math'); + }); + + it('preserves missing variables instead of blanking them out', () => { + expect(interpolateTemplateVars('Answer clearly: {{missing}}', vars)).toBe( + 'Answer clearly: {{missing}}', + ); + }); + + it('returns the original JSON value for whole-value substitutions', () => { + expect(interpolateTemplateVars('{{expected}}', vars)).toEqual({ answer: '4' }); + }); +}); diff --git a/packages/core/test/evaluation/suite-level-input.test.ts b/packages/core/test/evaluation/suite-level-input.test.ts index d23a77fe9..eebf53f82 100644 --- a/packages/core/test/evaluation/suite-level-input.test.ts +++ b/packages/core/test/evaluation/suite-level-input.test.ts @@ -191,4 +191,78 @@ tests: expect(tests[0].input[2]).toEqual({ role: 'assistant', content: 'I understand.' }); expect(tests[0].input[3]).toEqual({ role: 'user', content: 'Follow-up question' }); }); + + it('applies per-test vars to suite and test input templates', async () => { + await writeFile( + path.join(tempDir, 'templated-input.eval.yaml'), + `input: "Answer clearly: {{question}}" +tests: + - id: templated + vars: + question: "What is the capital of France?" + criteria: "Answers {{question}} correctly" + input: + - role: user + content: "Question: {{question}}" + - role: assistant + content: "Thinking about {{question}}" + - role: user + content: "Final answer only." + expected_output: "{{expected_answer}}" + metadata: + untouched: "{{question}}" +`, + ); + + const tests = await loadTests(path.join(tempDir, 'templated-input.eval.yaml'), tempDir); + + expect(tests).toHaveLength(1); + expect(tests[0].criteria).toBe('Answers What is the capital of France? correctly'); + expect(tests[0].question).toContain('Answer clearly: What is the capital of France?'); + expect(tests[0].input[0]).toEqual({ + role: 'user', + content: 'Answer clearly: What is the capital of France?', + }); + expect(tests[0].input[1]).toEqual({ + role: 'user', + content: 'Question: What is the capital of France?', + }); + expect(tests[0].input[2]).toEqual({ + role: 'assistant', + content: 'Thinking about What is the capital of France?', + }); + expect(tests[0].expected_output).toEqual([ + { role: 'assistant', content: '{{expected_answer}}' }, + ]); + expect(tests[0].metadata).toEqual({ untouched: '{{question}}' }); + }); + + it('applies per-test vars inside conversation turns', async () => { + await writeFile( + path.join(tempDir, 'templated-turns.eval.yaml'), + `tests: + - id: conversation + vars: + bug: parser null check + mode: conversation + input: "Fix {{bug}}" + turns: + - input: "Fix {{bug}}" + expected_output: "Fixed {{bug}}" + assertions: + - "Mentions {{bug}}" +`, + ); + + const tests = await loadTests(path.join(tempDir, 'templated-turns.eval.yaml'), tempDir); + + expect(tests).toHaveLength(1); + expect(tests[0].turns).toEqual([ + { + input: 'Fix parser null check', + expected_output: 'Fixed parser null check', + assertions: ['Mentions {{bug}}'], + }, + ]); + }); }); diff --git a/skills-data/agentv-eval-writer/references/eval-schema.json b/skills-data/agentv-eval-writer/references/eval-schema.json index 2f6fd88a9..10bca50f4 100644 --- a/skills-data/agentv-eval-writer/references/eval-schema.json +++ b/skills-data/agentv-eval-writer/references/eval-schema.json @@ -56,7 +56,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -70,20 +75,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -106,6 +121,11 @@ "type": "string", "minLength": 1 }, + "vars": { + "type": "object", + "properties": {}, + "additionalProperties": {} + }, "criteria": { "type": "string" }, @@ -121,7 +141,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -135,20 +160,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -167,6 +202,7 @@ }, { "type": "object", + "properties": {}, "additionalProperties": {} }, { @@ -176,7 +212,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -190,20 +231,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -247,7 +298,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -321,12 +375,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -363,7 +423,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -458,7 +521,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -509,12 +575,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -525,7 +596,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -588,7 +661,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -604,7 +679,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -621,7 +699,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -638,13 +719,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -681,11 +767,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -726,7 +821,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -740,7 +840,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -751,7 +856,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -759,7 +866,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -773,7 +885,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -784,7 +901,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -821,7 +941,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -833,7 +956,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -855,17 +982,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -909,7 +1045,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -953,7 +1092,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -990,7 +1132,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -1005,7 +1150,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1042,7 +1189,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -1074,7 +1224,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1117,7 +1269,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1160,7 +1315,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1197,10 +1355,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1243,7 +1406,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1332,7 +1498,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1342,7 +1511,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -1386,7 +1558,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -1460,12 +1635,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -1502,7 +1683,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -1597,7 +1781,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1648,12 +1835,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1664,7 +1856,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -1727,7 +1921,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1743,7 +1939,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -1760,7 +1959,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -1777,13 +1979,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -1820,11 +2027,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -1865,7 +2081,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1879,7 +2100,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1890,7 +2116,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -1898,7 +2126,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1912,7 +2145,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1923,7 +2161,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -1960,7 +2201,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -1972,7 +2216,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -1994,17 +2242,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -2048,7 +2305,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -2092,7 +2352,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -2129,7 +2392,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -2144,7 +2410,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2181,7 +2449,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -2213,7 +2484,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2256,7 +2529,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2299,7 +2575,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2336,10 +2615,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2382,7 +2666,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2471,7 +2758,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2481,7 +2771,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -2553,7 +2846,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -2598,7 +2895,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -2643,7 +2944,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -2688,7 +2993,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -2697,7 +3006,9 @@ "additionalProperties": false } }, - "required": ["name"], + "required": [ + "name" + ], "additionalProperties": false } ] @@ -2746,7 +3057,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -2820,12 +3134,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -2862,7 +3182,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -2957,7 +3280,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3008,12 +3334,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3024,7 +3355,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -3087,7 +3420,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3103,7 +3438,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3120,7 +3458,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -3137,13 +3478,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -3180,11 +3526,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -3225,7 +3580,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3239,7 +3599,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3250,7 +3615,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -3258,7 +3625,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3272,7 +3644,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3283,7 +3660,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -3320,7 +3700,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -3332,7 +3715,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -3354,17 +3741,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -3408,7 +3804,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3452,7 +3851,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -3489,7 +3891,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -3504,7 +3909,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3541,7 +3948,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -3573,7 +3983,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3616,7 +4028,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3659,7 +4074,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3696,10 +4114,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3742,7 +4165,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3831,7 +4257,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3841,7 +4270,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -3885,7 +4317,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -3959,12 +4394,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -4001,7 +4442,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -4096,7 +4540,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4147,12 +4594,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4163,7 +4615,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -4226,7 +4680,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4242,7 +4698,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4259,7 +4718,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -4276,13 +4738,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -4319,11 +4786,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -4364,7 +4840,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4378,7 +4859,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4389,7 +4875,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -4397,7 +4885,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4411,7 +4904,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4422,7 +4920,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -4459,7 +4960,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -4471,7 +4975,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -4493,17 +5001,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -4547,7 +5064,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4591,7 +5111,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -4628,7 +5151,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -4643,7 +5169,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4680,7 +5208,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -4712,7 +5243,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4755,7 +5288,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4798,7 +5334,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4835,10 +5374,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4881,7 +5425,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4970,7 +5517,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4980,7 +5530,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -5001,7 +5554,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -5012,7 +5569,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "budget_usd": { @@ -5045,7 +5604,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -5069,7 +5631,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -5083,7 +5648,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -5100,7 +5668,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -5178,7 +5749,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5223,7 +5798,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5268,7 +5847,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5313,7 +5896,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5323,7 +5910,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -5346,7 +5937,9 @@ "minimum": 0.1 } }, - "required": ["image"], + "required": [ + "image" + ], "additionalProperties": false } }, @@ -5370,11 +5963,17 @@ }, "on_dependency_failure": { "type": "string", - "enum": ["skip", "fail", "run"] + "enum": [ + "skip", + "fail", + "run" + ] }, "mode": { "type": "string", - "enum": ["conversation"] + "enum": [ + "conversation" + ] }, "turns": { "type": "array", @@ -5398,13 +5997,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -5429,13 +6035,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -5486,7 +6099,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -5560,12 +6176,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -5602,7 +6224,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -5697,7 +6322,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -5748,12 +6376,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5764,7 +6397,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -5827,7 +6462,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5843,7 +6480,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -5860,7 +6500,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -5877,13 +6520,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -5920,7 +6568,10 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", @@ -5971,7 +6622,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5985,7 +6641,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5996,7 +6657,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -6004,7 +6667,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6018,7 +6686,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6029,7 +6702,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -6066,7 +6742,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -6078,7 +6757,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -6100,17 +6783,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -6154,7 +6846,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -6198,7 +6893,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -6235,7 +6933,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -6250,7 +6951,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6287,7 +6990,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -6319,7 +7025,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6362,7 +7070,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6405,7 +7116,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6442,10 +7156,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6488,7 +7207,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6577,7 +7299,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6587,7 +7312,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -6596,25 +7324,36 @@ } } }, - "required": ["input"], + "required": [ + "input" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["mean", "min", "max"] + "enum": [ + "mean", + "min", + "max" + ] }, "on_turn_failure": { "type": "string", - "enum": ["continue", "stop"] + "enum": [ + "continue", + "stop" + ] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -6634,6 +7373,11 @@ "type": "string", "minLength": 1 }, + "vars": { + "type": "object", + "properties": {}, + "additionalProperties": {} + }, "criteria": { "type": "string" }, @@ -6649,7 +7393,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -6663,20 +7412,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -6695,6 +7454,7 @@ }, { "type": "object", + "properties": {}, "additionalProperties": {} }, { @@ -6704,7 +7464,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -6718,20 +7483,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -6775,7 +7550,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -6849,12 +7627,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -6891,7 +7675,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -6986,7 +7773,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7037,12 +7827,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7053,7 +7848,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -7116,7 +7913,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7132,7 +7931,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7149,7 +7951,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -7166,13 +7971,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -7209,11 +8019,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -7254,7 +8073,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7268,7 +8092,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7279,7 +8108,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -7287,7 +8118,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7301,7 +8137,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7312,7 +8153,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -7349,7 +8193,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -7361,7 +8208,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -7383,17 +8234,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -7437,7 +8297,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7481,7 +8344,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -7518,7 +8384,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -7533,7 +8402,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7570,7 +8441,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -7602,7 +8476,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7645,7 +8521,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7688,7 +8567,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7725,10 +8607,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7771,7 +8658,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7860,7 +8750,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7870,7 +8763,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -7914,7 +8810,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -7988,12 +8887,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -8030,7 +8935,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -8125,7 +9033,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8176,12 +9087,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8192,7 +9108,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -8255,7 +9173,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8271,7 +9191,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8288,7 +9211,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -8305,13 +9231,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -8348,11 +9279,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -8393,7 +9333,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8407,7 +9352,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8418,7 +9368,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -8426,7 +9378,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8440,7 +9397,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8451,7 +9413,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -8488,7 +9453,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -8500,7 +9468,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -8522,17 +9494,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -8576,7 +9557,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8620,7 +9604,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -8657,7 +9644,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -8672,7 +9662,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8709,7 +9701,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -8741,7 +9736,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8784,7 +9781,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8827,7 +9827,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8864,10 +9867,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8910,7 +9918,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8999,7 +10010,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9009,7 +10023,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -9081,7 +10098,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9126,7 +10147,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9171,7 +10196,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9216,7 +10245,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9225,7 +10258,9 @@ "additionalProperties": false } }, - "required": ["name"], + "required": [ + "name" + ], "additionalProperties": false } ] @@ -9274,7 +10309,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -9348,12 +10386,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -9390,7 +10434,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -9485,7 +10532,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9536,12 +10586,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9552,7 +10607,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -9615,7 +10672,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9631,7 +10690,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9648,7 +10710,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -9665,13 +10730,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -9708,11 +10778,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -9753,7 +10832,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9767,7 +10851,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9778,7 +10867,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -9786,7 +10877,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9800,7 +10896,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9811,7 +10912,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -9848,7 +10952,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -9860,7 +10967,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -9882,17 +10993,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -9936,7 +11056,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9980,7 +11103,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -10017,7 +11143,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -10032,7 +11161,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10069,7 +11200,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -10101,7 +11235,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10144,7 +11280,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10187,7 +11326,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10224,10 +11366,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10270,7 +11417,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10359,7 +11509,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10369,7 +11522,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -10413,7 +11569,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -10487,12 +11646,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -10529,7 +11694,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -10624,7 +11792,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10675,12 +11846,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10691,7 +11867,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -10754,7 +11932,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10770,7 +11950,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -10787,7 +11970,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -10804,13 +11990,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -10847,11 +12038,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -10892,7 +12092,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10906,7 +12111,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10917,7 +12127,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -10925,7 +12137,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10939,7 +12156,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10950,7 +12172,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -10987,7 +12212,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -10999,7 +12227,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -11021,17 +12253,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -11075,7 +12316,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -11119,7 +12363,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -11156,7 +12403,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -11171,7 +12421,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11208,7 +12460,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -11240,7 +12495,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11283,7 +12540,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11326,7 +12586,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11363,10 +12626,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11409,7 +12677,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11498,7 +12769,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11508,7 +12782,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -11529,7 +12806,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -11540,7 +12821,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "budget_usd": { @@ -11573,7 +12856,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -11597,7 +12883,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -11611,7 +12900,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -11628,7 +12920,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -11706,7 +13001,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11751,7 +13050,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11796,7 +13099,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11841,7 +13148,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11851,7 +13162,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -11874,7 +13189,9 @@ "minimum": 0.1 } }, - "required": ["image"], + "required": [ + "image" + ], "additionalProperties": false } }, @@ -11898,11 +13215,17 @@ }, "on_dependency_failure": { "type": "string", - "enum": ["skip", "fail", "run"] + "enum": [ + "skip", + "fail", + "run" + ] }, "mode": { "type": "string", - "enum": ["conversation"] + "enum": [ + "conversation" + ] }, "turns": { "type": "array", @@ -11926,13 +13249,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -11957,13 +13287,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -12014,7 +13351,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -12088,12 +13428,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -12130,7 +13476,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -12225,7 +13574,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12276,12 +13628,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12292,7 +13649,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -12355,7 +13714,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12371,7 +13732,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12388,7 +13752,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -12405,13 +13772,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -12448,7 +13820,10 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", @@ -12499,7 +13874,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12513,7 +13893,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12524,7 +13909,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -12532,7 +13919,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12546,7 +13938,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12557,7 +13954,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -12594,7 +13994,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -12606,7 +14009,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -12628,17 +14035,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -12682,7 +14098,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12726,7 +14145,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -12763,7 +14185,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -12778,7 +14203,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12815,7 +14242,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -12847,7 +14277,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12890,7 +14322,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12933,7 +14368,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12970,10 +14408,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13016,7 +14459,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13105,7 +14551,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -13115,7 +14564,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -13124,25 +14576,36 @@ } } }, - "required": ["input"], + "required": [ + "input" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["mean", "min", "max"] + "enum": [ + "mean", + "min", + "max" + ] }, "on_turn_failure": { "type": "string", - "enum": ["continue", "stop"] + "enum": [ + "continue", + "stop" + ] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -13220,7 +14683,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13265,7 +14732,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13310,7 +14781,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13355,7 +14830,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13364,7 +14843,9 @@ "additionalProperties": false } }, - "required": ["name"], + "required": [ + "name" + ], "additionalProperties": false } ] @@ -13413,7 +14894,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -13487,12 +14971,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -13529,7 +15019,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -13624,7 +15117,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -13675,12 +15171,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13691,7 +15192,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -13754,7 +15257,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13770,7 +15275,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -13787,7 +15295,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -13804,13 +15315,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -13847,11 +15363,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -13892,7 +15417,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13906,7 +15436,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13917,7 +15452,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -13925,7 +15462,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13939,7 +15481,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13950,7 +15497,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -13987,7 +15537,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -13999,7 +15552,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -14021,17 +15578,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -14075,7 +15641,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -14119,7 +15688,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -14156,7 +15728,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -14171,7 +15746,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14208,7 +15785,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -14240,7 +15820,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14283,7 +15865,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -14326,7 +15911,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -14363,10 +15951,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14409,7 +16002,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -14498,7 +16094,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -14508,7 +16107,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -14552,7 +16154,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -14626,12 +16231,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -14668,7 +16279,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -14763,7 +16377,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -14814,12 +16431,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14830,7 +16452,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -14893,7 +16517,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14909,7 +16535,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -14926,7 +16555,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -14943,13 +16575,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -14986,11 +16623,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -15031,7 +16677,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15045,7 +16696,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15056,7 +16712,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -15064,7 +16722,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15078,7 +16741,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15089,7 +16757,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -15126,7 +16797,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -15138,7 +16812,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -15160,17 +16838,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -15214,7 +16901,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -15258,7 +16948,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -15295,7 +16988,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -15310,7 +17006,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15347,7 +17045,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -15379,7 +17080,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15422,7 +17125,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15465,7 +17171,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15502,10 +17211,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15548,7 +17262,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15637,7 +17354,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -15647,7 +17367,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -15668,7 +17391,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -15679,7 +17406,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "budget_usd": { @@ -15742,7 +17471,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -15816,12 +17548,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -15858,7 +17596,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -15953,7 +17694,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -16004,12 +17748,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16020,7 +17769,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -16083,7 +17834,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16099,7 +17852,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -16116,7 +17872,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -16133,13 +17892,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -16176,11 +17940,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -16221,7 +17994,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16235,7 +18013,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16246,7 +18029,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -16254,7 +18039,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16268,7 +18058,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16279,7 +18074,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -16316,7 +18114,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -16328,7 +18129,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -16350,17 +18155,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -16404,7 +18218,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -16448,7 +18265,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -16485,7 +18305,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -16500,7 +18323,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16537,7 +18362,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -16569,7 +18397,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16612,7 +18442,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16655,7 +18488,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16692,10 +18528,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16738,7 +18579,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16827,7 +18671,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -16837,7 +18684,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -16866,7 +18716,10 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } }, @@ -16880,7 +18733,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -16904,7 +18760,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -16918,7 +18777,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -16935,7 +18797,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -17013,7 +18878,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -17058,7 +18927,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -17103,7 +18976,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -17148,7 +19025,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -17158,7 +19039,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -17181,7 +19066,9 @@ "minimum": 0.1 } }, - "required": ["image"], + "required": [ + "image" + ], "additionalProperties": false } }, @@ -17193,7 +19080,9 @@ ] } }, - "required": ["tests"], + "required": [ + "tests" + ], "additionalProperties": false } } From 2151efbb01c49d483fbca27cb27c724d7e2fa515 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 20 May 2026 09:24:51 +1000 Subject: [PATCH 2/3] fix(core): tighten eval vars loader typing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/core/src/evaluation/yaml-parser.ts | 40 ++++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 474cf276b..18c863cc8 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -138,6 +138,7 @@ type RawEvalCase = JsonObject & { /** Shorthand: list of file paths to prepend as type:file content blocks in the user message. */ readonly input_files?: JsonValue; readonly expected_output?: JsonValue; + readonly evaluator?: JsonValue; readonly execution?: JsonValue; readonly evaluators?: JsonValue; readonly assertions?: JsonValue; @@ -146,6 +147,13 @@ type RawEvalCase = JsonObject & { readonly rubrics?: JsonValue; readonly workspace?: JsonValue; readonly metadata?: JsonValue; + readonly depends_on?: JsonValue; + readonly on_dependency_failure?: JsonValue; + readonly mode?: JsonValue; + readonly turns?: JsonValue; + readonly aggregation?: JsonValue; + readonly on_turn_failure?: JsonValue; + readonly window_size?: JsonValue; }; function resolveTests(suite: RawTestSuite): JsonValue | undefined { @@ -192,6 +200,28 @@ function interpolateCaseTurns( }); } +function interpolateRawEvalCase(raw: RawEvalCase, vars: JsonObject | undefined): RawEvalCase { + if (!vars) { + return raw; + } + + return { + ...raw, + ...(raw.criteria !== undefined ? { criteria: interpolateCaseField(raw.criteria, vars) } : {}), + ...(raw.expected_outcome !== undefined + ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars) } + : {}), + ...(raw.input !== undefined ? { input: interpolateCaseField(raw.input, vars) } : {}), + ...(raw.input_files !== undefined + ? { input_files: interpolateCaseField(raw.input_files, vars) } + : {}), + ...(raw.expected_output !== undefined + ? { expected_output: interpolateCaseField(raw.expected_output, vars) } + : {}), + ...(raw.turns !== undefined ? { turns: interpolateCaseTurns(raw.turns, vars) } : {}), + }; +} + /** * Read metadata from a test suite file (like target name). * This is a convenience function for CLI tools that need metadata without loading all tests. @@ -433,15 +463,7 @@ async function loadTestsFromYaml( } const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : undefined; - const renderedCase = { - ...testCaseConfig, - criteria: interpolateCaseField(testCaseConfig.criteria, caseVars), - expected_outcome: interpolateCaseField(testCaseConfig.expected_outcome, caseVars), - input: interpolateCaseField(testCaseConfig.input, caseVars), - input_files: interpolateCaseField(testCaseConfig.input_files, caseVars), - expected_output: interpolateCaseField(testCaseConfig.expected_output, caseVars), - turns: interpolateCaseTurns(testCaseConfig.turns, caseVars), - } satisfies RawEvalCase; + const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars); const conversationId = asString(renderedCase.conversation_id); let outcome = asString(renderedCase.criteria); From ae13b942d4f61f6164bf348dc0305da04aca0aba Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 20 May 2026 09:25:59 +1000 Subject: [PATCH 3/3] style(schema): format generated eval schema Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../references/eval-schema.json | 3073 ++++------------- 1 file changed, 598 insertions(+), 2475 deletions(-) diff --git a/skills-data/agentv-eval-writer/references/eval-schema.json b/skills-data/agentv-eval-writer/references/eval-schema.json index 10bca50f4..3a26739e6 100644 --- a/skills-data/agentv-eval-writer/references/eval-schema.json +++ b/skills-data/agentv-eval-writer/references/eval-schema.json @@ -56,12 +56,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -75,30 +70,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -141,12 +126,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -160,30 +140,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -212,12 +182,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -231,30 +196,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -298,10 +253,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -375,18 +327,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -423,10 +369,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -521,10 +464,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -575,17 +515,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -596,9 +531,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -661,9 +594,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -679,10 +610,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -699,10 +627,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -719,18 +644,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -767,20 +687,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -821,12 +732,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -840,12 +746,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -856,9 +757,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -866,12 +765,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -885,12 +779,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -901,10 +790,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -941,10 +827,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -956,11 +839,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -982,26 +861,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -1045,10 +915,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1092,10 +959,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -1132,10 +996,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -1150,9 +1011,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1189,10 +1048,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -1224,9 +1080,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1269,10 +1123,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1315,10 +1166,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1355,15 +1203,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1406,10 +1249,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1498,10 +1338,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1511,10 +1348,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -1558,10 +1392,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -1635,18 +1466,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -1683,10 +1508,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -1781,10 +1603,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1835,17 +1654,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1856,9 +1670,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -1921,9 +1733,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1939,10 +1749,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1959,10 +1766,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -1979,18 +1783,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -2027,20 +1826,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -2081,12 +1871,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2100,12 +1885,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2116,9 +1896,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -2126,12 +1904,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2145,12 +1918,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2161,10 +1929,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -2201,10 +1966,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -2216,11 +1978,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -2242,26 +2000,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -2305,10 +2054,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -2352,10 +2098,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -2392,10 +2135,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -2410,9 +2150,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2449,10 +2187,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -2484,9 +2219,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2529,10 +2262,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2575,10 +2305,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2615,15 +2342,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2666,10 +2388,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2758,10 +2477,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -2771,10 +2487,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -2846,11 +2559,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -2895,11 +2604,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -2944,11 +2649,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -2993,11 +2694,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -3006,9 +2703,7 @@ "additionalProperties": false } }, - "required": [ - "name" - ], + "required": ["name"], "additionalProperties": false } ] @@ -3057,10 +2752,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -3134,18 +2826,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -3182,10 +2868,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -3280,10 +2963,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -3334,17 +3014,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3355,9 +3030,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -3420,9 +3093,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3438,10 +3109,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3458,10 +3126,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -3478,18 +3143,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -3526,20 +3186,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -3580,12 +3231,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3599,12 +3245,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3615,9 +3256,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -3625,12 +3264,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3644,12 +3278,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3660,10 +3289,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -3700,10 +3326,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -3715,11 +3338,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -3741,26 +3360,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -3804,10 +3414,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3851,10 +3458,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -3891,10 +3495,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -3909,9 +3510,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3948,10 +3547,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -3983,9 +3579,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4028,10 +3622,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -4074,10 +3665,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -4114,15 +3702,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4165,10 +3748,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -4257,10 +3837,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4270,10 +3847,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -4317,10 +3891,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -4394,18 +3965,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -4442,10 +4007,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -4540,10 +4102,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4594,17 +4153,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4615,9 +4169,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -4680,9 +4232,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4698,10 +4248,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4718,10 +4265,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -4738,18 +4282,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -4786,20 +4325,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -4840,12 +4370,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4859,12 +4384,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4875,9 +4395,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -4885,12 +4403,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4904,12 +4417,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4920,10 +4428,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -4960,10 +4465,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -4975,11 +4477,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -5001,26 +4499,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -5064,10 +4553,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -5111,10 +4597,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -5151,10 +4634,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -5169,9 +4649,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5208,10 +4686,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -5243,9 +4718,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5288,10 +4761,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5334,10 +4804,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5374,15 +4841,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5425,10 +4887,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5517,10 +4976,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -5530,10 +4986,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -5554,11 +5007,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -5569,9 +5018,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "budget_usd": { @@ -5604,10 +5051,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -5631,10 +5075,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -5648,10 +5089,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -5668,10 +5106,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -5749,11 +5184,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5798,11 +5229,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5847,11 +5274,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5896,11 +5319,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5910,11 +5329,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -5937,9 +5352,7 @@ "minimum": 0.1 } }, - "required": [ - "image" - ], + "required": ["image"], "additionalProperties": false } }, @@ -5963,17 +5376,11 @@ }, "on_dependency_failure": { "type": "string", - "enum": [ - "skip", - "fail", - "run" - ] + "enum": ["skip", "fail", "run"] }, "mode": { "type": "string", - "enum": [ - "conversation" - ] + "enum": ["conversation"] }, "turns": { "type": "array", @@ -5997,20 +5404,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -6035,20 +5435,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -6099,10 +5492,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -6176,18 +5566,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -6224,10 +5608,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -6322,10 +5703,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -6376,17 +5754,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6397,9 +5770,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -6462,9 +5833,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6480,10 +5849,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6500,10 +5866,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -6520,18 +5883,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -6568,10 +5926,7 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", @@ -6622,12 +5977,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6641,12 +5991,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6657,9 +6002,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -6667,12 +6010,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6686,12 +6024,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6702,10 +6035,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -6742,10 +6072,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -6757,11 +6084,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -6783,26 +6106,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -6846,10 +6160,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6893,10 +6204,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -6933,10 +6241,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -6951,9 +6256,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6990,10 +6293,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -7025,9 +6325,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7070,10 +6368,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7116,10 +6411,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7156,15 +6448,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7207,10 +6494,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7299,10 +6583,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7312,10 +6593,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -7324,36 +6602,25 @@ } } }, - "required": [ - "input" - ], + "required": ["input"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "mean", - "min", - "max" - ] + "enum": ["mean", "min", "max"] }, "on_turn_failure": { "type": "string", - "enum": [ - "continue", - "stop" - ] + "enum": ["continue", "stop"] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -7393,12 +6660,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -7412,30 +6674,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -7464,12 +6716,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -7483,30 +6730,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -7550,10 +6787,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -7627,18 +6861,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -7675,10 +6903,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -7773,10 +6998,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7827,17 +7049,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7848,9 +7065,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -7913,9 +7128,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7931,10 +7144,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -7951,10 +7161,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -7971,18 +7178,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -8019,20 +7221,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -8073,12 +7266,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8092,12 +7280,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8108,9 +7291,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -8118,12 +7299,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8137,12 +7313,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8153,10 +7324,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -8193,10 +7361,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -8208,11 +7373,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -8234,26 +7395,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -8297,10 +7449,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -8344,10 +7493,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -8384,10 +7530,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -8402,9 +7545,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8441,10 +7582,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -8476,9 +7614,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8521,10 +7657,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8567,10 +7700,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8607,15 +7737,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8658,10 +7783,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8750,10 +7872,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8763,10 +7882,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -8810,10 +7926,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -8887,18 +8000,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -8935,10 +8042,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -9033,10 +8137,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -9087,17 +8188,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9108,9 +8204,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -9173,9 +8267,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9191,10 +8283,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9211,10 +8300,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -9231,18 +8317,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -9279,20 +8360,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -9333,12 +8405,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9352,12 +8419,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9368,9 +8430,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -9378,12 +8438,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9397,12 +8452,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9413,10 +8463,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -9453,10 +8500,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -9468,11 +8512,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -9494,26 +8534,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -9557,10 +8588,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9604,10 +8632,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -9644,10 +8669,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -9662,9 +8684,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9701,10 +8721,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -9736,9 +8753,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9781,10 +8796,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9827,10 +8839,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9867,15 +8876,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9918,10 +8922,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10010,10 +9011,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10023,10 +9021,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -10098,11 +9093,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -10147,11 +9138,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -10196,11 +9183,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -10245,11 +9228,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -10258,9 +9237,7 @@ "additionalProperties": false } }, - "required": [ - "name" - ], + "required": ["name"], "additionalProperties": false } ] @@ -10309,10 +9286,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -10386,18 +9360,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -10434,10 +9402,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -10532,10 +9497,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10586,17 +9548,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10607,9 +9564,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -10672,9 +9627,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10690,10 +9643,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -10710,10 +9660,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -10730,18 +9677,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -10778,20 +9720,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -10832,12 +9765,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10851,12 +9779,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10867,9 +9790,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -10877,12 +9798,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10896,12 +9812,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10912,10 +9823,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -10952,10 +9860,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -10967,11 +9872,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -10993,26 +9894,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -11056,10 +9948,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11103,10 +9992,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -11143,10 +10029,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -11161,9 +10044,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11200,10 +10081,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -11235,9 +10113,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11280,10 +10156,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -11326,10 +10199,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -11366,15 +10236,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11417,10 +10282,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -11509,10 +10371,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11522,10 +10381,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -11569,10 +10425,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -11646,18 +10499,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -11694,10 +10541,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -11792,10 +10636,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11846,17 +10687,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11867,9 +10703,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -11932,9 +10766,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11950,10 +10782,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11970,10 +10799,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -11990,18 +10816,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -12038,20 +10859,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -12092,12 +10904,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12111,12 +10918,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12127,9 +10929,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -12137,12 +10937,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12156,12 +10951,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12172,10 +10962,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -12212,10 +10999,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -12227,11 +11011,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -12253,26 +11033,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -12316,10 +11087,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -12363,10 +11131,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -12403,10 +11168,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -12421,9 +11183,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12460,10 +11220,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -12495,9 +11252,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12540,10 +11295,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12586,10 +11338,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12626,15 +11375,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12677,10 +11421,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12769,10 +11510,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12782,10 +11520,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -12806,11 +11541,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -12821,9 +11552,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "budget_usd": { @@ -12856,10 +11585,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -12883,10 +11609,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -12900,10 +11623,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -12920,10 +11640,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -13001,11 +11718,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -13050,11 +11763,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -13099,11 +11808,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -13148,11 +11853,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -13162,11 +11863,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -13189,9 +11886,7 @@ "minimum": 0.1 } }, - "required": [ - "image" - ], + "required": ["image"], "additionalProperties": false } }, @@ -13215,17 +11910,11 @@ }, "on_dependency_failure": { "type": "string", - "enum": [ - "skip", - "fail", - "run" - ] + "enum": ["skip", "fail", "run"] }, "mode": { "type": "string", - "enum": [ - "conversation" - ] + "enum": ["conversation"] }, "turns": { "type": "array", @@ -13249,20 +11938,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -13287,20 +11969,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -13351,10 +12026,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -13428,18 +12100,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -13476,10 +12142,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -13574,10 +12237,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13628,17 +12288,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13649,9 +12304,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -13714,9 +12367,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13732,10 +12383,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -13752,10 +12400,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -13772,18 +12417,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -13820,10 +12460,7 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", @@ -13874,12 +12511,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13893,12 +12525,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13909,9 +12536,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -13919,12 +12544,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13938,12 +12558,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13954,10 +12569,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -13994,10 +12606,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -14009,11 +12618,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -14035,26 +12640,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -14098,10 +12694,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14145,10 +12738,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -14185,10 +12775,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -14203,9 +12790,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14242,10 +12827,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -14277,9 +12859,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14322,10 +12902,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14368,10 +12945,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14408,15 +12982,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14459,10 +13028,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14551,10 +13117,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -14564,10 +13127,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -14576,36 +13136,25 @@ } } }, - "required": [ - "input" - ], + "required": ["input"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "mean", - "min", - "max" - ] + "enum": ["mean", "min", "max"] }, "on_turn_failure": { "type": "string", - "enum": [ - "continue", - "stop" - ] + "enum": ["continue", "stop"] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -14683,11 +13232,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -14732,11 +13277,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -14781,11 +13322,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -14830,11 +13367,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -14843,9 +13376,7 @@ "additionalProperties": false } }, - "required": [ - "name" - ], + "required": ["name"], "additionalProperties": false } ] @@ -14894,10 +13425,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -14971,18 +13499,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -15019,10 +13541,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -15117,10 +13636,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -15171,17 +13687,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15192,9 +13703,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -15257,9 +13766,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15275,10 +13782,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -15295,10 +13799,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -15315,18 +13816,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -15363,20 +13859,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -15417,12 +13904,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15436,12 +13918,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15452,9 +13929,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -15462,12 +13937,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15481,12 +13951,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15497,10 +13962,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -15537,10 +13999,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -15552,11 +14011,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -15578,26 +14033,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -15641,10 +14087,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -15688,10 +14131,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -15728,10 +14168,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -15746,9 +14183,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15785,10 +14220,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -15820,9 +14252,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15865,10 +14295,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -15911,10 +14338,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -15951,15 +14375,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16002,10 +14421,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16094,10 +14510,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16107,10 +14520,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -16154,10 +14564,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -16231,18 +14638,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -16279,10 +14680,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -16377,10 +14775,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16431,17 +14826,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16452,9 +14842,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -16517,9 +14905,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16535,10 +14921,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -16555,10 +14938,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -16575,18 +14955,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -16623,20 +14998,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -16677,12 +15043,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -16696,12 +15057,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -16712,9 +15068,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -16722,12 +15076,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -16741,12 +15090,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -16757,10 +15101,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -16797,10 +15138,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -16812,11 +15150,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -16838,26 +15172,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -16901,10 +15226,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -16948,10 +15270,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -16988,10 +15307,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -17006,9 +15322,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17045,10 +15359,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -17080,9 +15391,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17125,10 +15434,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17171,10 +15477,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17211,15 +15514,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17262,10 +15560,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17354,10 +15649,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -17367,10 +15659,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -17391,11 +15680,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -17406,9 +15691,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "budget_usd": { @@ -17471,10 +15754,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -17548,18 +15828,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -17596,10 +15870,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -17694,10 +15965,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -17748,17 +16016,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17769,9 +16032,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -17834,9 +16095,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17852,10 +16111,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -17872,10 +16128,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -17892,18 +16145,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -17940,20 +16188,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -17994,12 +16233,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18013,12 +16247,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18029,9 +16258,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -18039,12 +16266,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18058,12 +16280,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18074,10 +16291,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -18114,10 +16328,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -18129,11 +16340,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -18155,26 +16362,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -18218,10 +16416,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -18265,10 +16460,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -18305,10 +16497,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -18323,9 +16512,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18362,10 +16549,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -18397,9 +16581,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18442,10 +16624,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -18488,10 +16667,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -18528,15 +16704,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18579,10 +16750,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -18671,10 +16839,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -18684,10 +16849,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -18716,10 +16878,7 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } }, @@ -18733,10 +16892,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -18760,10 +16916,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -18777,10 +16930,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -18797,10 +16947,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -18878,11 +17025,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -18927,11 +17070,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -18976,11 +17115,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -19025,11 +17160,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -19039,11 +17170,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -19066,9 +17193,7 @@ "minimum": 0.1 } }, - "required": [ - "image" - ], + "required": ["image"], "additionalProperties": false } }, @@ -19080,9 +17205,7 @@ ] } }, - "required": [ - "tests" - ], + "required": ["tests"], "additionalProperties": false } }