From fe6ca01d9cf6af8c24e46b47c54fc9d8c72044db Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 4 May 2026 07:38:30 +0200 Subject: [PATCH 1/2] fix(core): drop api_format from Azure targets; surface pi-ai errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two regressions from the Vercel AI SDK → pi-ai migration (#1206) broke Azure evals end-to-end whenever a target had api_format=chat (or unset on a config still defaulting to it): 1. The migration kept the api_format field but pi-ai's azure-openai- responses provider always hits /openai/v1/responses. With chat-style defaults still in place, AgentV sent ?api-version=2024-12-01-preview to the Responses path, which Azure rejects with 400 "API version not supported." Every eval call failed. 2. invokePiAi never inspected pi-ai's stopReason. When pi-ai surfaced the 400 as { stopReason: 'error', errorMessage: '...', content: [] }, the adapter happily returned an empty assistant message, which downstream graders then reported as "Unexpected EOF" JSON parse failures — completely hiding the underlying HTTP error. Fix: - Remove api_format from Azure targets entirely. Pi-ai's adapter only exposes the Responses path, so a chat/responses switch on this provider has no effect. Default Azure api version to v1 (matching the /openai/v1/responses path). Reject api_format on Azure targets at both validation and resolution time with a migration message pointing at `provider: openai` for chat-completions-only deployments. api_format remains supported on `provider: openai`. - Throw from invokePiAi when pi-ai returns stopReason 'error' so failures reach the surface and withRetry can apply its status-based retry policy. The thrown message includes the parsed HTTP status so isRetryableError can decide correctly. Also drops api_format from the repo's .agentv/targets.yaml and updates the Azure provider docs to document the migration path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .agentv/targets.yaml | 10 +-- apps/cli/src/templates/.agentv/targets.yaml | 2 +- .../docs/docs/targets/llm-providers.mdx | 24 ++++--- .../src/evaluation/providers/llm-providers.ts | 50 +++++++++---- .../core/src/evaluation/providers/targets.ts | 40 ++++++----- .../validation/targets-validator.ts | 23 +++++- .../test/evaluation/providers/targets.test.ts | 70 +++++-------------- .../validation/targets-validator.test.ts | 7 +- 8 files changed, 120 insertions(+), 106 deletions(-) diff --git a/.agentv/targets.yaml b/.agentv/targets.yaml index a1b426dd7..0d77335b5 100644 --- a/.agentv/targets.yaml +++ b/.agentv/targets.yaml @@ -124,16 +124,16 @@ targets: api_key: ${{ GH_MODELS_TOKEN }} model: ${{ GH_MODELS_MODEL }} - # Single Azure target. Control the endpoint shape with AZURE_OPENAI_API_FORMAT: - # - chat (default): uses /chat/completions and AZURE_OPENAI_API_VERSION - # If AZURE_OPENAI_API_VERSION is omitted, AgentV defaults chat targets to 2024-12-01-preview. - # - responses: uses /responses and AgentV auto-defaults the version to v1 + # Single Azure target. Always uses Azure's Responses API + # (`/openai/v1/responses`); the api version defaults to `v1` and can be + # overridden via AZURE_OPENAI_API_VERSION. Chat-completions-only Azure + # deployments must use `provider: openai` with a deployment-scoped + # `base_url` instead. - name: azure provider: azure endpoint: ${{ AZURE_OPENAI_ENDPOINT }} api_key: ${{ AZURE_OPENAI_API_KEY }} model: ${{ AZURE_DEPLOYMENT_NAME }} - api_format: ${{ AZURE_OPENAI_API_FORMAT }} version: ${{ AZURE_OPENAI_API_VERSION }} - name: gemini diff --git a/apps/cli/src/templates/.agentv/targets.yaml b/apps/cli/src/templates/.agentv/targets.yaml index e90eae501..d0fbd7af9 100644 --- a/apps/cli/src/templates/.agentv/targets.yaml +++ b/apps/cli/src/templates/.agentv/targets.yaml @@ -8,7 +8,7 @@ targets: endpoint: ${{ AZURE_OPENAI_ENDPOINT }} api_key: ${{ AZURE_OPENAI_API_KEY }} model: ${{ AZURE_DEPLOYMENT_NAME }} - # version: ${{ AZURE_OPENAI_API_VERSION }} # Optional: uncomment to override default (2024-12-01-preview) + # version: ${{ AZURE_OPENAI_API_VERSION }} # Optional: uncomment to override default (v1) - name: codex provider: codex diff --git a/apps/web/src/content/docs/docs/targets/llm-providers.mdx b/apps/web/src/content/docs/docs/targets/llm-providers.mdx index c705fd33b..4e7a5ec71 100644 --- a/apps/web/src/content/docs/docs/targets/llm-providers.mdx +++ b/apps/web/src/content/docs/docs/targets/llm-providers.mdx @@ -66,26 +66,28 @@ targets: | Field | Required | Description | |-------|----------|-------------| -| `endpoint` | Yes | Azure OpenAI endpoint URL | +| `endpoint` | Yes | Azure OpenAI endpoint URL or resource name | | `api_key` | Yes | API key | | `model` | Yes | Deployment name | -| `api_format` | No | API format: `chat` (default) or `responses` | +| `version` | No | Azure API version (defaults to `v1`) | + +Azure targets always route through the Responses API (`/openai/v1/responses`). The api version defaults to `v1` and can be overridden via the `version` field. + +### Chat-completions-only deployments -Azure OpenAI supports the same `api_format` switch: +If your Azure deployment only exposes `/chat/completions` (older deployments, certain regions), use `provider: openai` with a deployment-scoped `base_url` instead: ```yaml targets: - - name: azure-responses - provider: azure - endpoint: ${{ AZURE_OPENAI_ENDPOINT }} + - name: azure-chat + provider: openai + base_url: https://.openai.azure.com/openai/deployments/ api_key: ${{ AZURE_OPENAI_API_KEY }} - model: ${{ AZURE_DEPLOYMENT_NAME }} - api_format: responses + model: + api_format: chat ``` -When `api_format: responses` is used with Azure, AgentV defaults the API version to `v1` unless you explicitly override `version`. - -The repository's default [`.agentv/targets.yaml`](/home/christso/projects/agentv.worktrees/feat-920-azure-responses-api/.agentv/targets.yaml) uses a single `azure` target and drives `api_format` from `AZURE_OPENAI_API_FORMAT`. +The `api_format` field was previously available on `provider: azure` but has been removed — Azure targets always go through the Responses API. ## Anthropic diff --git a/packages/core/src/evaluation/providers/llm-providers.ts b/packages/core/src/evaluation/providers/llm-providers.ts index 3d6f01909..5fcda365a 100644 --- a/packages/core/src/evaluation/providers/llm-providers.ts +++ b/packages/core/src/evaluation/providers/llm-providers.ts @@ -253,11 +253,9 @@ export class AzureProvider implements Provider { // Pi-ai's azure-openai-responses provider handles the Azure-specific URL // shape and api-version query param. We pass either a full base URL or a // resource name + apiVersion via providerOptions; pi-ai does the rest. - // - // apiFormat is intentionally not branched here: pi-ai uses Azure's - // Responses API for both chat-style and responses-style calls. Users who - // hit an Azure deployment that only exposes /chat/completions can route - // through `provider: openai` with a deployment-scoped baseURL instead. + // The Responses API is the only path here — chat-completions-only Azure + // deployments must route through `provider: openai` with a + // deployment-scoped baseURL. const trimmed = config.resourceName.trim(); const isFullUrl = /^https?:\/\//i.test(trimmed); const baseUrl = isFullUrl ? buildAzureBaseUrl(trimmed) : undefined; @@ -368,11 +366,18 @@ export async function invokePiAi(options: InvokePiAiOptions): Promise piComplete(model, ctx, callOptions), - retryConfig, - request.signal, - ); + // pi-ai catches provider errors and surfaces them as `stopReason: 'error'` + // with `errorMessage` populated and `content: []`. Without this re-raise, + // the empty content propagates up as a "successful" empty response and + // downstream graders report misleading "JSON parse" errors instead of the + // underlying HTTP failure. Throw so withRetry can apply its status-based + // retry policy and the real cause reaches the surface. + const callPi = async (): Promise => { + const r = await piComplete(model, ctx, callOptions); + if (r.stopReason === 'error') throw piErrorFromResult(r); + return r; + }; + let result: PiAssistantMessage = await withRetry(callPi, retryConfig, request.signal); ctx.messages.push(result); stepCount = 1; accumulateUsage(aggregateUsage, result.usage); @@ -413,11 +418,7 @@ export async function invokePiAi(options: InvokePiAiOptions): Promise piComplete(model, ctx, callOptions), - retryConfig, - request.signal, - ); + result = await withRetry(callPi, retryConfig, request.signal); ctx.messages.push(result); stepCount += 1; accumulateUsage(aggregateUsage, result.usage); @@ -796,6 +797,25 @@ async function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } +/** + * Build a thrown Error from a pi-ai result whose `stopReason === 'error'`. + * Pi-ai's `errorMessage` typically begins with the HTTP status (e.g. "400 API + * version not supported"); we prefix it with `HTTP ` so + * `extractStatus()` can parse it for retry-policy decisions. + */ +function piErrorFromResult(r: PiAssistantMessage): Error { + const raw = r.errorMessage ?? 'pi-ai call failed with no error message'; + const statusMatch = raw.match(/^(\d{3})\b\s*(.*)$/); + if (statusMatch) { + const status = statusMatch[1]; + const rest = statusMatch[2] || raw; + const err = new Error(`pi-ai call failed: HTTP ${status} ${rest}`); + (err as { status?: number }).status = Number.parseInt(status, 10); + return err; + } + return new Error(`pi-ai call failed: ${raw}`); +} + async function withRetry( fn: () => Promise, retryConfig?: RetryConfig, diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts index fb20ae4ec..3e3f42252 100644 --- a/packages/core/src/evaluation/providers/targets.ts +++ b/packages/core/src/evaluation/providers/targets.ts @@ -359,14 +359,17 @@ export interface RetryConfig { export type ApiFormat = 'chat' | 'responses'; /** - * Azure OpenAI settings used by the Vercel AI SDK. + * Azure OpenAI settings. + * + * Note: `api_format` was removed — AgentV always routes Azure targets through + * pi-ai's Responses API path. Chat-completions-only Azure deployments must + * use `provider: openai` with a deployment-scoped `base_url`. */ export interface AzureResolvedConfig { readonly resourceName: string; readonly deploymentName: string; readonly apiKey: string; readonly version?: string; - readonly apiFormat?: ApiFormat; readonly temperature?: number; readonly maxOutputTokens?: number; readonly retry?: RetryConfig; @@ -757,28 +760,24 @@ const BASE_TARGET_SCHEMA = z }) .passthrough(); -const DEFAULT_AZURE_API_VERSION = '2024-12-01-preview'; -const DEFAULT_AZURE_RESPONSES_API_VERSION = 'v1'; +// Azure targets always go through pi-ai's `/openai/v1/responses` path, which +// requires `?api-version=v1`. The legacy chat-completions default +// (`2024-12-01-preview`) is no longer reachable from the Azure provider here. +const DEFAULT_AZURE_API_VERSION = 'v1'; const DEFAULT_OPENAI_BASE_URL = 'https://api.openai.com/v1'; -function normalizeAzureApiVersion( - value: string | undefined, - apiFormat: ApiFormat | undefined, -): string { - const defaultVersion = - apiFormat === 'responses' ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION; - +function normalizeAzureApiVersion(value: string | undefined): string { if (!value) { - return defaultVersion; + return DEFAULT_AZURE_API_VERSION; } const trimmed = value.trim(); if (trimmed.length === 0) { - return defaultVersion; + return DEFAULT_AZURE_API_VERSION; } const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, '').trim(); - return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion; + return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION; } function resolveRetryConfig(target: z.infer): RetryConfig | undefined { @@ -1087,6 +1086,16 @@ function resolveAzureConfig( target: z.infer, env: EnvLookup, ): AzureResolvedConfig { + // `api_format` was removed from Azure targets — pi-ai always routes Azure + // through `/openai/v1/responses`, so the chat-completions branch is gone. + // Reject the field loudly so users on chat-only deployments switch to the + // documented escape hatch instead of silently 400-ing on every call. + if (target.api_format !== undefined) { + throw new Error( + `The 'api_format' field is no longer supported on Azure targets ('${target.name}'). AgentV always uses Azure's Responses API. If your deployment only exposes /chat/completions, use 'provider: openai' with a deployment-scoped 'base_url' instead. See docs/targets/llm-providers for details.`, + ); + } + const endpointSource = target.endpoint ?? target.resource; const apiKeySource = target.api_key; const deploymentSource = target.deployment ?? target.model; @@ -1097,13 +1106,11 @@ function resolveAzureConfig( const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`); const apiKey = resolveString(apiKeySource, env, `${target.name} api key`); const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`); - const apiFormat = resolveApiFormat(target, env, target.name); const version = normalizeAzureApiVersion( resolveOptionalString(versionSource, env, `${target.name} api version`, { allowLiteral: true, optionalEnv: true, }), - apiFormat, ); const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`); const maxOutputTokens = resolveOptionalNumber( @@ -1117,7 +1124,6 @@ function resolveAzureConfig( deploymentName, apiKey, version, - apiFormat, temperature, maxOutputTokens, retry, diff --git a/packages/core/src/evaluation/validation/targets-validator.ts b/packages/core/src/evaluation/validation/targets-validator.ts index 524b66548..1f9d04c09 100644 --- a/packages/core/src/evaluation/validation/targets-validator.ts +++ b/packages/core/src/evaluation/validation/targets-validator.ts @@ -41,7 +41,6 @@ const AZURE_SETTINGS = new Set([ 'model', 'version', 'api_version', - 'api_format', 'temperature', 'max_output_tokens', ]); @@ -248,6 +247,19 @@ function validateUnknownSettings( ]); const removedFields = new Set(['workspace_template', 'workspaceTemplate']); + // Provider-specific removed fields. Map value is the migration message. + const removedPerProvider: Record> = { + azure: new Map([ + [ + 'api_format', + "The 'api_format' field is no longer supported on Azure targets. " + + "AgentV always uses Azure's Responses API (`/openai/v1/responses`). " + + "If your deployment only exposes /chat/completions, use 'provider: openai' " + + "with a deployment-scoped 'base_url' instead.", + ], + ]), + }; + const removedForProvider = removedPerProvider[provider]; for (const key of Object.keys(target)) { if (removedFields.has(key)) { @@ -260,6 +272,15 @@ function validateUnknownSettings( }); continue; } + if (removedForProvider?.has(key)) { + errors.push({ + severity: 'error', + filePath: absolutePath, + location: `${location}.${key}`, + message: removedForProvider.get(key) as string, + }); + continue; + } if (!baseFields.has(key) && !knownSettings.has(key)) { errors.push({ severity: 'warning', diff --git a/packages/core/test/evaluation/providers/targets.test.ts b/packages/core/test/evaluation/providers/targets.test.ts index 5b6632341..47583949f 100644 --- a/packages/core/test/evaluation/providers/targets.test.ts +++ b/packages/core/test/evaluation/providers/targets.test.ts @@ -131,7 +131,7 @@ describe('resolveTargetDefinition', () => { resourceName: 'https://example.openai.azure.com', deploymentName: 'gpt-4o', apiKey: 'secret', - version: '2024-12-01-preview', + version: 'v1', }); }); @@ -231,64 +231,29 @@ describe('resolveTargetDefinition', () => { expect(target.config.version).toBe('2024-08-01-preview'); }); - it('resolves azure api_format when configured', () => { + it('rejects azure api_format with a migration error', () => { const env = { AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', AZURE_OPENAI_API_KEY: 'secret', AZURE_DEPLOYMENT_NAME: 'gpt-4o', } satisfies Record; - const target = resolveTargetDefinition( - { - name: 'azure-responses', - provider: 'azure', - endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', - api_key: '${{ AZURE_OPENAI_API_KEY }}', - model: '${{ AZURE_DEPLOYMENT_NAME }}', - api_format: 'responses', - }, - env, - ); - - expect(target.kind).toBe('azure'); - if (target.kind !== 'azure') { - throw new Error('expected azure target'); - } - - expect(target.config.apiFormat).toBe('responses'); - expect(target.config.version).toBe('v1'); - }); - - it('resolves azure api_format from env interpolation', () => { - const env = { - AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', - AZURE_OPENAI_API_KEY: 'secret', - AZURE_DEPLOYMENT_NAME: 'gpt-4o', - AZURE_OPENAI_API_FORMAT: 'responses', - } satisfies Record; - - const target = resolveTargetDefinition( - { - name: 'azure-env-format', - provider: 'azure', - endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', - api_key: '${{ AZURE_OPENAI_API_KEY }}', - model: '${{ AZURE_DEPLOYMENT_NAME }}', - api_format: '${{ AZURE_OPENAI_API_FORMAT }}', - }, - env, - ); - - expect(target.kind).toBe('azure'); - if (target.kind !== 'azure') { - throw new Error('expected azure target'); - } - - expect(target.config.apiFormat).toBe('responses'); - expect(target.config.version).toBe('v1'); + expect(() => + resolveTargetDefinition( + { + name: 'azure-with-api-format', + provider: 'azure', + endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', + api_key: '${{ AZURE_OPENAI_API_KEY }}', + model: '${{ AZURE_DEPLOYMENT_NAME }}', + api_format: 'responses', + }, + env, + ), + ).toThrow(/'api_format' field is no longer supported/i); }); - it('defaults azure responses targets to api version v1', () => { + it('defaults azure to api version v1', () => { const env = { AZURE_OPENAI_ENDPOINT: 'https://example.openai.azure.com', AZURE_OPENAI_API_KEY: 'secret', @@ -297,12 +262,11 @@ describe('resolveTargetDefinition', () => { const target = resolveTargetDefinition( { - name: 'azure-responses-default-version', + name: 'azure-default-version', provider: 'azure', endpoint: '${{ AZURE_OPENAI_ENDPOINT }}', api_key: '${{ AZURE_OPENAI_API_KEY }}', model: '${{ AZURE_DEPLOYMENT_NAME }}', - api_format: 'responses', }, env, ); diff --git a/packages/core/test/evaluation/validation/targets-validator.test.ts b/packages/core/test/evaluation/validation/targets-validator.test.ts index cfaddd517..24dc65c61 100644 --- a/packages/core/test/evaluation/validation/targets-validator.test.ts +++ b/packages/core/test/evaluation/validation/targets-validator.test.ts @@ -96,7 +96,7 @@ describe('validateTargetsFile', () => { ).toBe(true); }); - it('accepts azure api_format as a known setting', async () => { + it('rejects azure api_format with a migration error', async () => { const filePath = path.join(tempDir, 'azure-api-format.yaml'); await writeFile( filePath, @@ -115,9 +115,10 @@ describe('validateTargetsFile', () => { expect( result.errors.some( (error) => + error.severity === 'error' && error.location === 'targets[0].api_format' && - error.message.includes("Unknown setting 'api_format'"), + /'api_format' field is no longer supported/i.test(error.message), ), - ).toBe(false); + ).toBe(true); }); }); From b69d9026ed72f4d79115894948fb5ac73894e107 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 4 May 2026 07:38:39 +0200 Subject: [PATCH 2/2] chore: biome formatter pass on self-update files Pure whitespace fix from `biome format --write` on apps/cli/src/self-update.ts and apps/cli/test/self-update.test.ts. Pre-existing issue from #1213; the pre-push hook fails on these files independently of any other change, so fixing them here unblocks the bug-fix branch's push. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/cli/src/self-update.ts | 13 ++++++++----- apps/cli/test/self-update.test.ts | 5 +---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/cli/src/self-update.ts b/apps/cli/src/self-update.ts index 2951fe0bb..34982b3fe 100644 --- a/apps/cli/src/self-update.ts +++ b/apps/cli/src/self-update.ts @@ -65,7 +65,10 @@ export function detectInstallScopeFromPath( return 'global'; } - if (normalizedScriptPath.includes('/.npm/_npx/') || normalizedScriptPath.includes('/npm-cache/_npx/')) { + if ( + normalizedScriptPath.includes('/.npm/_npx/') || + normalizedScriptPath.includes('/npm-cache/_npx/') + ) { return 'local'; } @@ -74,11 +77,11 @@ export function detectInstallScopeFromPath( return 'global'; } - const scriptPathComparable = process.platform === 'win32' - ? normalizedScriptPath.toLowerCase() - : normalizedScriptPath; + const scriptPathComparable = + process.platform === 'win32' ? normalizedScriptPath.toLowerCase() : normalizedScriptPath; const cwdComparable = process.platform === 'win32' ? normalizedCwd.toLowerCase() : normalizedCwd; - const packageRootComparable = process.platform === 'win32' ? packageRoot.toLowerCase() : packageRoot; + const packageRootComparable = + process.platform === 'win32' ? packageRoot.toLowerCase() : packageRoot; const projectOwnsPackage = cwdComparable === packageRootComparable || diff --git a/apps/cli/test/self-update.test.ts b/apps/cli/test/self-update.test.ts index 00a4e825b..6dc13e890 100644 --- a/apps/cli/test/self-update.test.ts +++ b/apps/cli/test/self-update.test.ts @@ -28,10 +28,7 @@ describe('detectPackageManagerFromPath', () => { describe('detectInstallScopeFromPath', () => { test('detects local for project node_modules path', () => { expect( - detectInstallScopeFromPath( - '/home/user/proj/node_modules/.bin/agentv', - '/home/user/proj', - ), + detectInstallScopeFromPath('/home/user/proj/node_modules/.bin/agentv', '/home/user/proj'), ).toBe('local'); });