From a6820ff9be34ba2bc5d43102ec7be8f831023814 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Thu, 14 May 2026 00:04:09 +0200 Subject: [PATCH 1/2] fix(cli): add --dangerously-skip-permissions to ClaudeCliProvider Evals using `provider: claude` were hitting permission prompts on every tool call because ClaudeCliProvider never passed permission bypass flags to the Claude CLI. ClaudeSdkProvider already sets `permissionMode: 'bypassPermissions'` unconditionally, so this aligns the two providers. - Add `bypassPermissions` option to `ClaudeResolvedConfig` (optional, defaults to true in buildArgs) - Append `--dangerously-skip-permissions` to CLI args when `bypassPermissions !== false` - Read `bypass_permissions` from target YAML in `resolveClaudeConfig` - Add `executable` to test mock config; add three new tests covering default, explicit-true, and explicit-false behavior Closes #1238 Co-Authored-By: Claude Sonnet 4.6 --- .../src/evaluation/providers/claude-cli.ts | 4 ++++ .../core/src/evaluation/providers/targets.ts | 8 +++++++ .../providers/claude-provider-aliases.test.ts | 24 +++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/packages/core/src/evaluation/providers/claude-cli.ts b/packages/core/src/evaluation/providers/claude-cli.ts index 06ed5c810..91099a44c 100644 --- a/packages/core/src/evaluation/providers/claude-cli.ts +++ b/packages/core/src/evaluation/providers/claude-cli.ts @@ -182,6 +182,10 @@ export class ClaudeCliProvider implements Provider { '--verbose', ]; + if (this.config.bypassPermissions !== false) { + args.push('--dangerously-skip-permissions'); + } + if (this.config.model) { args.push('--model', this.config.model); } diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts index 3e3f42252..9ed108612 100644 --- a/packages/core/src/evaluation/providers/targets.ts +++ b/packages/core/src/evaluation/providers/targets.ts @@ -533,6 +533,8 @@ export interface ClaudeResolvedConfig { readonly logFormat?: 'summary' | 'json'; /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */ readonly streamLog?: false | 'raw' | 'summary'; + /** When true (default), passes --dangerously-skip-permissions to the Claude CLI. Matches ClaudeSdkProvider behavior. */ + readonly bypassPermissions?: boolean; } export interface MockResolvedConfig { @@ -1853,6 +1855,11 @@ function resolveClaudeConfig( const maxBudgetUsd = typeof target.max_budget_usd === 'number' ? target.max_budget_usd : undefined; + const bypassPermissions = + target.bypass_permissions !== undefined + ? resolveOptionalBoolean(target.bypass_permissions) + : undefined; + return { executable, model, @@ -1864,6 +1871,7 @@ function resolveClaudeConfig( logDir, logFormat, streamLog: streamLogResult.streamLog, + bypassPermissions, }; } diff --git a/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts index 4834d02c5..9b6f4698d 100644 --- a/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts +++ b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts @@ -6,6 +6,7 @@ import { ClaudeProvider } from '../../../src/evaluation/providers/claude.js'; import { createBuiltinProviderRegistry } from '../../../src/evaluation/providers/index.js'; const mockClaudeConfig = { + executable: 'claude', model: undefined, cwd: undefined, timeoutMs: undefined, @@ -61,3 +62,26 @@ describe('Claude provider alias resolution', () => { expect(sdkProvider.kind).toBe('claude'); }); }); + +describe('ClaudeCliProvider buildArgs', () => { + it('includes --dangerously-skip-permissions by default', () => { + const provider = new ClaudeCliProvider('target', mockClaudeConfig); + // biome-ignore lint/suspicious/noExplicitAny: testing private method + const args: string[] = (provider as any).buildArgs(); + expect(args).toContain('--dangerously-skip-permissions'); + }); + + it('includes --dangerously-skip-permissions when bypassPermissions is true', () => { + const provider = new ClaudeCliProvider('target', { ...mockClaudeConfig, bypassPermissions: true }); + // biome-ignore lint/suspicious/noExplicitAny: testing private method + const args: string[] = (provider as any).buildArgs(); + expect(args).toContain('--dangerously-skip-permissions'); + }); + + it('omits --dangerously-skip-permissions when bypassPermissions is false', () => { + const provider = new ClaudeCliProvider('target', { ...mockClaudeConfig, bypassPermissions: false }); + // biome-ignore lint/suspicious/noExplicitAny: testing private method + const args: string[] = (provider as any).buildArgs(); + expect(args).not.toContain('--dangerously-skip-permissions'); + }); +}); From 58491d759392710926c6535ac827af14564abdaa Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Thu, 14 May 2026 00:06:32 +0200 Subject: [PATCH 2/2] style: fix biome formatting in test file Co-Authored-By: Claude Sonnet 4.6 --- .../providers/claude-provider-aliases.test.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts index 9b6f4698d..0ef9ee056 100644 --- a/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts +++ b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts @@ -72,14 +72,20 @@ describe('ClaudeCliProvider buildArgs', () => { }); it('includes --dangerously-skip-permissions when bypassPermissions is true', () => { - const provider = new ClaudeCliProvider('target', { ...mockClaudeConfig, bypassPermissions: true }); + const provider = new ClaudeCliProvider('target', { + ...mockClaudeConfig, + bypassPermissions: true, + }); // biome-ignore lint/suspicious/noExplicitAny: testing private method const args: string[] = (provider as any).buildArgs(); expect(args).toContain('--dangerously-skip-permissions'); }); it('omits --dangerously-skip-permissions when bypassPermissions is false', () => { - const provider = new ClaudeCliProvider('target', { ...mockClaudeConfig, bypassPermissions: false }); + const provider = new ClaudeCliProvider('target', { + ...mockClaudeConfig, + bypassPermissions: false, + }); // biome-ignore lint/suspicious/noExplicitAny: testing private method const args: string[] = (provider as any).buildArgs(); expect(args).not.toContain('--dangerously-skip-permissions');