From 0f1c4b5b4252333a4b626ff576d622026a7aefb0 Mon Sep 17 00:00:00 2001 From: vraj Date: Fri, 15 May 2026 23:08:13 -0700 Subject: [PATCH 1/8] feat: extend providerOptions with baseUrl/apiKey for OpenAI-compatible endpoints Adds two optional fields to AgentDefinition.providerOptions (and the backend OpenRouterProviderRoutingOptions + Zod schema) so an agent can direct its LLM calls at a custom OpenAI-compatible base URL (Ollama, LM Studio, self-hosted). The dispatch logic that consumes these fields lands in a follow-up commit. Also adds CODEBUFF_BASE_URL / CODEBUFF_PROVIDER_API_KEY env var constants and SDK getters for them. Part of issue #678. Co-Authored-By: Claude Opus 4.7 --- .agents/types/agent-definition.ts | 19 ++++++++++++++++++ agents/types/agent-definition.ts | 19 ++++++++++++++++++ common/src/constants/custom-provider.ts | 7 +++++++ .../types/agent-definition.ts | 19 ++++++++++++++++++ common/src/types/agent-template.ts | 20 +++++++++++++++++++ common/src/types/dynamic-agent-template.ts | 2 ++ sdk/src/env.ts | 20 +++++++++++++++++++ 7 files changed, 106 insertions(+) create mode 100644 common/src/constants/custom-provider.ts diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts index d89843404e..80cfacdea4 100644 --- a/.agents/types/agent-definition.ts +++ b/.agents/types/agent-definition.ts @@ -109,6 +109,25 @@ export interface AgentDefinition { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible + * providers. The other providerOptions keys (order, allow_fallbacks, etc.) + * are OpenRouter-specific and ignored when `baseUrl` is set. + * + * Falls back to env var CODEBUFF_BASE_URL when unset. + * Example: "http://localhost:11434/v1" + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes + * (Ollama, LM Studio) ignore the value entirely. + */ + apiKey?: string } // ============================================================================ diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index 030de3a14f..8847a98ecb 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -109,6 +109,25 @@ export interface AgentDefinition { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible + * providers. The other providerOptions keys (order, allow_fallbacks, etc.) + * are OpenRouter-specific and ignored when `baseUrl` is set. + * + * Falls back to env var CODEBUFF_BASE_URL when unset. + * Example: "http://localhost:11434/v1" + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes + * (Ollama, LM Studio) ignore the value entirely. + */ + apiKey?: string } // ============================================================================ diff --git a/common/src/constants/custom-provider.ts b/common/src/constants/custom-provider.ts new file mode 100644 index 0000000000..0bedb0355b --- /dev/null +++ b/common/src/constants/custom-provider.ts @@ -0,0 +1,7 @@ +/** Env var that overrides the upstream LLM endpoint with an OpenAI-compatible base URL. + * Lower precedence than per-agent providerOptions.baseUrl and the CodebuffClient option. */ +export const PROVIDER_BASE_URL_ENV_VAR = 'CODEBUFF_BASE_URL' + +/** Env var providing the API key for the endpoint set by PROVIDER_BASE_URL_ENV_VAR. + * Most local runtimes (Ollama, LM Studio) ignore the key entirely. */ +export const PROVIDER_API_KEY_ENV_VAR = 'CODEBUFF_PROVIDER_API_KEY' diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index 030de3a14f..8847a98ecb 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -109,6 +109,25 @@ export interface AgentDefinition { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible + * providers. The other providerOptions keys (order, allow_fallbacks, etc.) + * are OpenRouter-specific and ignored when `baseUrl` is set. + * + * Falls back to env var CODEBUFF_BASE_URL when unset. + * Example: "http://localhost:11434/v1" + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes + * (Ollama, LM Studio) ignore the value entirely. + */ + apiKey?: string } // ============================================================================ diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts index 0d89ba7ede..6cb0a77842 100644 --- a/common/src/types/agent-template.ts +++ b/common/src/types/agent-template.ts @@ -37,6 +37,12 @@ export type OpenRouterReasoningOptions = { } ) +/** + * OpenRouter provider-routing options, plus optional fields to override the + * upstream endpoint with an OpenAI-compatible base URL (e.g. Ollama, LM Studio, + * self-hosted). The routing fields below are OpenRouter-specific and are + * ignored when `baseUrl` is set. + */ export type OpenRouterProviderRoutingOptions = { /** * List of provider slugs to try in order (e.g. ["anthropic", "openai"]) @@ -90,6 +96,20 @@ export type OpenRouterProviderRoutingOptions = { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Other keys above (order, allow_fallbacks, ...) are OpenRouter-specific and + * are ignored when baseUrl is set. Falls back to env var CODEBUFF_BASE_URL. + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if baseUrl is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. + */ + apiKey?: string } export type OpenRouterProviderOptions = { diff --git a/common/src/types/dynamic-agent-template.ts b/common/src/types/dynamic-agent-template.ts index d0a4097305..b9762566d1 100644 --- a/common/src/types/dynamic-agent-template.ts +++ b/common/src/types/dynamic-agent-template.ts @@ -167,6 +167,8 @@ export const DynamicAgentDefinitionSchema = z.object({ request: z.union([z.number(), z.string()]).optional(), }) .optional(), + baseUrl: z.string().url().optional(), + apiKey: z.string().optional(), }) .optional(), diff --git a/sdk/src/env.ts b/sdk/src/env.ts index 033e3f245d..9814712dfa 100644 --- a/sdk/src/env.ts +++ b/sdk/src/env.ts @@ -7,6 +7,10 @@ import { BYOK_OPENROUTER_ENV_VAR } from '@codebuff/common/constants/byok' import { CHATGPT_OAUTH_TOKEN_ENV_VAR } from '@codebuff/common/constants/chatgpt-oauth' +import { + PROVIDER_API_KEY_ENV_VAR, + PROVIDER_BASE_URL_ENV_VAR, +} from '@codebuff/common/constants/custom-provider' import { API_KEY_ENV_VAR } from '@codebuff/common/constants/paths' import { getBaseEnv } from '@codebuff/common/env-process' @@ -48,3 +52,19 @@ export const getByokOpenrouterApiKeyFromEnv = (): string | undefined => { export const getChatGptOAuthTokenFromEnv = (): string | undefined => { return process.env[CHATGPT_OAUTH_TOKEN_ENV_VAR] } + +/** + * Get the custom upstream provider base URL from environment. + * Used when an agent's providerOptions.baseUrl is unset and no CodebuffClient option overrides it. + */ +export const getCustomProviderBaseUrlFromEnv = (): string | undefined => { + return process.env[PROVIDER_BASE_URL_ENV_VAR] +} + +/** + * Get the custom upstream provider API key from environment. + * Paired with getCustomProviderBaseUrlFromEnv. + */ +export const getCustomProviderApiKeyFromEnv = (): string | undefined => { + return process.env[PROVIDER_API_KEY_ENV_VAR] +} From d3a6e084c3c2dc048d5eca66e811c4c5c23821b6 Mon Sep 17 00:00:00 2001 From: vraj Date: Fri, 15 May 2026 23:10:49 -0700 Subject: [PATCH 2/8] feat(sdk): add custom OpenAI-compatible provider branch to getModelForRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ModelRequestParams.customProvider.baseUrl is set, return an OpenAICompatibleChatLanguageModel pointed at that endpoint and flag the result with isCustomProvider: true. Bypasses both the Codebuff backend and the ChatGPT OAuth direct path. No metadataExtractor — direct calls don't flow through Codebuff cost accounting. Mirrors the existing ChatGPT-OAuth-direct branch pattern. Trailing slashes on baseUrl are trimmed. apiKey defaults to "codebuff" when absent (most local runtimes ignore it). Adds 5 unit tests covering the new branch, regression-tested against existing model-provider-free-mode tests. Part of issue #678. Co-Authored-By: Claude Opus 4.7 --- .../__tests__/model-provider-custom.test.ts | 70 +++++++++++++++++++ sdk/src/impl/model-provider.ts | 51 +++++++++++++- 2 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 sdk/src/impl/__tests__/model-provider-custom.test.ts diff --git a/sdk/src/impl/__tests__/model-provider-custom.test.ts b/sdk/src/impl/__tests__/model-provider-custom.test.ts new file mode 100644 index 0000000000..62e5e19a86 --- /dev/null +++ b/sdk/src/impl/__tests__/model-provider-custom.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, test, afterEach, mock } from 'bun:test' + +describe('getModelForRequest with customProvider', () => { + afterEach(() => { + mock.restore() + }) + + test('returns isCustomProvider: true when customProvider.baseUrl is set', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'gemma2:9b', + customProvider: { baseUrl: 'http://localhost:11434/v1', apiKey: 'ollama' }, + }) + + expect(result.isCustomProvider).toBe(true) + expect(result.isChatGptOAuth).toBe(false) + expect(result.model).toBeDefined() + expect((result.model as any).modelId).toBe('gemma2:9b') + }) + + test('does not return isCustomProvider when baseUrl is missing', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'anthropic/claude-sonnet-4', + }) + + expect(result.isCustomProvider).toBe(false) + }) + + test('customProvider takes precedence over ChatGPT OAuth eligibility', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'openai/gpt-5.3', + customProvider: { baseUrl: 'http://localhost:11434/v1' }, + }) + + expect(result.isCustomProvider).toBe(true) + expect(result.isChatGptOAuth).toBe(false) + }) + + test('trims trailing slash from baseUrl (constructs cleanly)', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'gemma2:9b', + customProvider: { baseUrl: 'http://localhost:11434/v1/' }, + }) + + expect(result.isCustomProvider).toBe(true) + }) + + test('omitting apiKey is allowed', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'gemma2:9b', + customProvider: { baseUrl: 'http://localhost:11434/v1' }, + }) + + expect(result.isCustomProvider).toBe(true) + }) +}) diff --git a/sdk/src/impl/model-provider.ts b/sdk/src/impl/model-provider.ts index 83e016c611..de7e3c14a7 100644 --- a/sdk/src/impl/model-provider.ts +++ b/sdk/src/impl/model-provider.ts @@ -86,6 +86,8 @@ export interface ModelRequestParams { skipChatGptOAuth?: boolean /** Cost mode (e.g. 'free') — affects fallback behavior for OAuth routes */ costMode?: string + /** When set, route this request directly to the OpenAI-compatible endpoint and bypass Codebuff/OAuth. */ + customProvider?: { baseUrl: string; apiKey?: string } } /** @@ -96,6 +98,8 @@ export interface ModelResult { model: LanguageModel /** Whether this model uses ChatGPT OAuth direct (affects cost tracking) */ isChatGptOAuth: boolean + /** Whether this model uses a custom OpenAI-compatible endpoint (affects cost tracking + metadata) */ + isCustomProvider: boolean } // Usage accounting type for OpenRouter/Codebuff backend responses @@ -115,7 +119,21 @@ type OpenRouterUsageAccounting = { * This function is async because it may need to refresh the OAuth token. */ export async function getModelForRequest(params: ModelRequestParams): Promise { - const { apiKey, model, skipChatGptOAuth, costMode } = params + const { apiKey, model, skipChatGptOAuth, costMode, customProvider } = params + + // 1) Custom OpenAI-compatible endpoint wins — explicit per-agent / client / env override. + // Bypasses Codebuff backend AND ChatGPT OAuth. + if (customProvider?.baseUrl) { + return { + model: createCustomProviderModel({ + model, + baseUrl: customProvider.baseUrl, + apiKey: customProvider.apiKey, + }), + isChatGptOAuth: false, + isCustomProvider: true, + } + } // Check if we should use ChatGPT OAuth direct // Only attempt for allowlisted models; non-allowlisted models silently fall through to backend. @@ -140,6 +158,7 @@ export async function getModelForRequest(params: ModelRequestParams): Promise `${trimmedBase}${endpoint}`, + headers: () => ({ + Authorization: `Bearer ${apiKey ?? 'codebuff'}`, + 'Content-Type': 'application/json', + 'user-agent': `ai-sdk/openai-compatible/${VERSION}/codebuff-custom-provider`, + }), + fetch: undefined, + includeUsage: undefined, + supportsStructuredOutputs: true, + }) +} From 4c6aebfb3eb8e3ab71507df8dd32df39c27e1b09 Mon Sep 17 00:00:00 2001 From: vraj Date: Fri, 15 May 2026 23:16:56 -0700 Subject: [PATCH 3/8] feat(sdk): wire custom-provider precedence ladder and CodebuffClient option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In promptAiSdkStream, resolve baseUrl/apiKey across three layers (agent providerOptions > clientCustomProvider > env vars) and forward to getModelForRequest. When the custom-provider path is active: • maxRetries: 1 (one retry handles brief model-load stalls; no further fallback — would violate user intent re: privacy / cost) • Skip codebuff_metadata and OpenRouter routing keys in the request body (same as the existing ChatGPT-OAuth-direct branch) • Wrap connection failures and 404s in friendly messages pointing at the configured URL and model Plumbs CodebuffClient.{providerBaseUrl, providerApiKey} through runOnce → getAgentRuntimeImpl, which wraps promptAiSdkStream with a closure that injects clientCustomProvider on every call. Adds an integration test documenting the precedence contract. Resolves issue #678 (implementation; smoke verification follows). Co-Authored-By: Claude Opus 4.7 --- common/src/types/contracts/llm.ts | 3 + .../__tests__/model-provider-custom.test.ts | 24 ++++ sdk/src/impl/agent-runtime.ts | 8 +- sdk/src/impl/llm.ts | 120 +++++++++++++++++- sdk/src/run.ts | 15 +++ 5 files changed, 164 insertions(+), 6 deletions(-) diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts index 11c5a5ba0c..853ef54b42 100644 --- a/common/src/types/contracts/llm.ts +++ b/common/src/types/contracts/llm.ts @@ -56,6 +56,9 @@ export type PromptAiSdkStreamFn = ( includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions + /** Fallback custom-provider config injected by the SDK Client. + * Lower precedence than an agent's own providerOptions.baseUrl. */ + clientCustomProvider?: { baseUrl?: string; apiKey?: string } /** List of agents that can be spawned - used to transform agent tool calls */ spawnableAgents?: string[] /** Map of locally available agent templates - used to transform agent tool calls */ diff --git a/sdk/src/impl/__tests__/model-provider-custom.test.ts b/sdk/src/impl/__tests__/model-provider-custom.test.ts index 62e5e19a86..0ff4336355 100644 --- a/sdk/src/impl/__tests__/model-provider-custom.test.ts +++ b/sdk/src/impl/__tests__/model-provider-custom.test.ts @@ -67,4 +67,28 @@ describe('getModelForRequest with customProvider', () => { expect(result.isCustomProvider).toBe(true) }) + + test('customProvider arg drives selection regardless of env (precedence contract)', async () => { + // This documents the contract: getModelForRequest receives the *resolved* + // customProvider — the caller (promptAiSdkStream) is responsible for + // applying the agent > client > env precedence ladder before calling. + process.env.CODEBUFF_BASE_URL = 'http://from-env:11434/v1' + process.env.CODEBUFF_PROVIDER_API_KEY = 'env-key' + + const { getModelForRequest } = await import('../model-provider') + const result = await getModelForRequest({ + apiKey: 'cb-key', + model: 'gemma2:9b', + customProvider: { + baseUrl: 'http://from-agent:11434/v1', + apiKey: 'agent-key', + }, + }) + + expect(result.isCustomProvider).toBe(true) + expect(result.model).toBeDefined() + + delete process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_PROVIDER_API_KEY + }) }) diff --git a/sdk/src/impl/agent-runtime.ts b/sdk/src/impl/agent-runtime.ts index 17858d8196..d7d077bf76 100644 --- a/sdk/src/impl/agent-runtime.ts +++ b/sdk/src/impl/agent-runtime.ts @@ -29,6 +29,8 @@ export function getAgentRuntimeImpl( logger?: Logger apiKey: string clientEnv?: ClientEnv + /** Default custom provider used for runs that don't set one per-agent. */ + clientCustomProvider?: { baseUrl?: string; apiKey?: string } } & Pick< AgentRuntimeScopedDeps, | 'handleStepsLogChunk' @@ -44,6 +46,7 @@ export function getAgentRuntimeImpl( logger, apiKey, clientEnv = clientEnvDefault, + clientCustomProvider, handleStepsLogChunk, requestToolCall, requestMcpToolData, @@ -87,7 +90,10 @@ export function getAgentRuntimeImpl( }), // LLM - promptAiSdkStream, + promptAiSdkStream: clientCustomProvider + ? (streamParams) => + promptAiSdkStream({ ...streamParams, clientCustomProvider }) + : promptAiSdkStream, promptAiSdk, promptAiSdkStructured, diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 60bb678bb1..cf86e0e4de 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -23,6 +23,10 @@ import { markChatGptOAuthRateLimited, } from './model-provider' import { refreshChatGptOAuthToken } from '../credentials' +import { + getCustomProviderApiKeyFromEnv, + getCustomProviderBaseUrlFromEnv, +} from '../env' import { getErrorStatusCode } from '../error-utils' import type { ModelRequestParams } from './model-provider' @@ -130,6 +134,54 @@ type OpenRouterUsageAccounting = { } } +/** + * Wrap raw errors from a custom OpenAI-compatible endpoint in a friendly, + * actionable message. Distinguishes connection failures (provider down, + * wrong URL) from model-not-found errors. + */ +function buildCustomProviderError(args: { + baseUrl: string + model: string + rawMessage: string +}): string { + const lower = args.rawMessage.toLowerCase() + const isConnectionError = + lower.includes('econnrefused') || + lower.includes('fetch failed') || + lower.includes('etimedout') || + lower.includes('enotfound') || + lower.includes('socket hang up') + const isModelNotFound = + lower.includes('model not found') || + lower.includes('does not exist') || + (lower.includes('404') && lower.includes(args.model.toLowerCase())) + + if (isConnectionError) { + return [ + `Cannot reach LLM provider at ${args.baseUrl}.`, + ``, + `Check:`, + ` • Is the provider running? (e.g. \`ollama serve\` or LM Studio's Local Server)`, + ` • Is the URL correct? Currently configured: ${args.baseUrl}`, + ` • Is the model '${args.model}' loaded? (e.g. \`ollama list\`)`, + ``, + `Original error: ${args.rawMessage}`, + ].join('\n') + } + if (isModelNotFound) { + return [ + `Model '${args.model}' not found at ${args.baseUrl}.`, + ``, + `Check:`, + ` • Pull the model first: \`ollama pull ${args.model}\``, + ` • Verify the exact tag with \`ollama list\``, + ``, + `Original error: ${args.rawMessage}`, + ].join('\n') + } + return args.rawMessage +} + /** * Check if an error is an OAuth rate limit error that should trigger fallback. */ @@ -303,13 +355,34 @@ export async function* promptAiSdkStream( return promptAborted('User cancelled input') } + // Resolve custom-provider precedence: agent > client option > env. + // apiKey is paired with whichever URL "wins" to avoid mixing sources. + const agentBaseUrl = params.agentProviderOptions?.baseUrl + const agentApiKey = params.agentProviderOptions?.apiKey + const clientBaseUrl = params.clientCustomProvider?.baseUrl + const clientApiKey = params.clientCustomProvider?.apiKey + const envBaseUrl = getCustomProviderBaseUrlFromEnv() + const envApiKey = getCustomProviderApiKeyFromEnv() + + const resolvedBaseUrl = agentBaseUrl ?? clientBaseUrl ?? envBaseUrl + const resolvedApiKey = agentBaseUrl + ? agentApiKey + : clientBaseUrl + ? clientApiKey + : envBaseUrl + ? envApiKey + : undefined + const modelParams: ModelRequestParams = { apiKey: params.apiKey, model: params.model, skipChatGptOAuth: params.skipChatGptOAuth, costMode: params.costMode, + ...(resolvedBaseUrl + ? { customProvider: { baseUrl: resolvedBaseUrl, apiKey: resolvedApiKey } } + : {}), } - const { model: aiSDKModel, isChatGptOAuth } = + const { model: aiSDKModel, isChatGptOAuth, isCustomProvider } = await getModelForRequest(modelParams) if (isChatGptOAuth) { @@ -329,9 +402,14 @@ export async function* promptAiSdkStream( prompt: undefined, model: aiSDKModel, messages: convertCbToModelMessages(params), - ...(isChatGptOAuth && { maxRetries: 0 }), - // For ChatGPT OAuth direct, don't send codebuff metadata/provider options to OpenAI - ...(isChatGptOAuth + // ChatGPT OAuth: no retries (we fall back to Codebuff on first failure). + // Custom provider: one retry to handle brief model-load stalls without + // dragging out errors when the provider is actually down. + ...(isChatGptOAuth ? { maxRetries: 0 } : {}), + ...(isCustomProvider ? { maxRetries: 1 } : {}), + // Direct routes (ChatGPT OAuth, custom provider): skip codebuff_metadata + // and OpenRouter routing keys — neither belongs in those request bodies. + ...(isChatGptOAuth || isCustomProvider ? {} : { providerOptions: getProviderOptions({ @@ -458,7 +536,27 @@ export async function* promptAiSdkStream( // Track if we've yielded any content - if so, we can't safely fall back let hasYieldedContent = false - for await (const chunkValue of response.fullStream) { + // For custom-provider streams, a connection refusal at request init throws + // from the iterator before any error chunk is emitted. Rewrap into a + // friendly message so users see "is Ollama running?" not raw "fetch failed". + const stream = isCustomProvider && resolvedBaseUrl + ? (async function* () { + try { + yield* response.fullStream + } catch (e) { + const rawMessage = e instanceof Error ? e.message : String(e) + throw new Error( + buildCustomProviderError({ + baseUrl: resolvedBaseUrl, + model: params.model, + rawMessage, + }), + ) + } + })() + : response.fullStream + + for await (const chunkValue of stream) { if (chunkValue.type !== 'text-delta') { const flushed = stopSequenceHandler.flush() if (flushed) { @@ -603,6 +701,18 @@ export async function* promptAiSdkStream( 'Error in AI SDK stream', ) + // For custom-provider failures, rewrap with a friendly, actionable message + // before throwing so users see "is Ollama running?" not raw "fetch failed". + if (isCustomProvider && resolvedBaseUrl) { + throw new Error( + buildCustomProviderError({ + baseUrl: resolvedBaseUrl, + model: params.model, + rawMessage: errorMessage, + }), + ) + } + // For all other errors, throw them -- they are fatal. throw chunkValue.error } diff --git a/sdk/src/run.ts b/sdk/src/run.ts index f5794a7def..36944ed0e9 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -85,6 +85,16 @@ export type CodebuffClientOptions = { maxAgentSteps?: number env?: Record + /** + * Default custom OpenAI-compatible provider base URL for runs that don't set + * one per-agent. Used for local models (Ollama, LM Studio) or self-hosted + * endpoints. Lower precedence than an agent's own providerOptions.baseUrl; + * higher precedence than the CODEBUFF_BASE_URL env var. + */ + providerBaseUrl?: string + /** Default API key paired with providerBaseUrl. Ignored if providerBaseUrl is unset. */ + providerApiKey?: string + handleEvent?: (event: PrintModeEvent) => void | Promise handleStreamChunk?: ( chunk: @@ -198,6 +208,8 @@ async function runOnce({ agentDefinitions, maxAgentSteps = MAX_AGENT_STEPS_DEFAULT, env, + providerBaseUrl, + providerApiKey, handleEvent, handleStreamChunk, @@ -376,6 +388,9 @@ async function runOnce({ const agentRuntimeImpl = getAgentRuntimeImpl({ logger, apiKey, + clientCustomProvider: providerBaseUrl + ? { baseUrl: providerBaseUrl, apiKey: providerApiKey } + : undefined, handleStepsLogChunk: () => { // Does nothing for now }, From c9ca1e819b651cbe8d19b6ce9fc60130debd1895 Mon Sep 17 00:00:00 2001 From: vraj Date: Sat, 16 May 2026 04:25:25 -0700 Subject: [PATCH 4/8] fix(sdk): broaden connection-error patterns for custom provider wrapper Bun's fetch surfaces ECONNREFUSED as code='ConnectionRefused' with message "Unable to connect. Is the computer able to access the url?". Neither matched the original error-wrap regex. Now check both the raw message and the error.code property across Bun/Node patterns. Co-Authored-By: Claude Opus 4.7 --- sdk/src/impl/llm.ts | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index cf86e0e4de..1bb6c06884 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -143,14 +143,23 @@ function buildCustomProviderError(args: { baseUrl: string model: string rawMessage: string + rawCode?: string }): string { const lower = args.rawMessage.toLowerCase() + const codeLower = (args.rawCode ?? '').toLowerCase() const isConnectionError = lower.includes('econnrefused') || + lower.includes('connectionrefused') || + lower.includes('connection refused') || + lower.includes('unable to connect') || lower.includes('fetch failed') || lower.includes('etimedout') || lower.includes('enotfound') || - lower.includes('socket hang up') + lower.includes('socket hang up') || + codeLower === 'connectionrefused' || + codeLower === 'econnrefused' || + codeLower === 'enotfound' || + codeLower === 'etimedout' const isModelNotFound = lower.includes('model not found') || lower.includes('does not exist') || @@ -545,11 +554,16 @@ export async function* promptAiSdkStream( yield* response.fullStream } catch (e) { const rawMessage = e instanceof Error ? e.message : String(e) + const rawCode = + e && typeof e === 'object' && 'code' in e + ? String((e as { code?: unknown }).code ?? '') + : undefined throw new Error( buildCustomProviderError({ baseUrl: resolvedBaseUrl, model: params.model, rawMessage, + rawCode, }), ) } @@ -704,11 +718,18 @@ export async function* promptAiSdkStream( // For custom-provider failures, rewrap with a friendly, actionable message // before throwing so users see "is Ollama running?" not raw "fetch failed". if (isCustomProvider && resolvedBaseUrl) { + const rawCode = + chunkValue.error && + typeof chunkValue.error === 'object' && + 'code' in chunkValue.error + ? String((chunkValue.error as { code?: unknown }).code ?? '') + : undefined throw new Error( buildCustomProviderError({ baseUrl: resolvedBaseUrl, model: params.model, rawMessage: errorMessage, + rawCode, }), ) } From e40e8f67e9ea455fd592eeafc632f0d8674b39fc Mon Sep 17 00:00:00 2001 From: vraj Date: Sat, 16 May 2026 00:08:43 -0700 Subject: [PATCH 5/8] feat(cli): add /local slash command for runtime local-provider toggle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UX layer on top of the providerOptions.baseUrl plumbing — lets a user toggle between local and cloud inference without restarting codebuff or editing agent files. Subcommands: /local — show current status /local on — enable with default Ollama URL (localhost:11434/v1) /local on — enable with a specific URL /local set — alias for `/local on ` /local off — disable, return to Codebuff backend /local status — same as `/local` Implementation mutates process.env.CODEBUFF_BASE_URL at runtime. The SDK reads this env var lazily on every promptAiSdkStream call, so changes take effect immediately for the next request without needing to rebuild the CodebuffClient. Agent-level providerOptions.baseUrl still wins — /local only affects agents that don't set their own baseUrl. Communicated in the enable message so users aren't surprised. 29 unit tests covering: parse/apply separation, all subcommands and aliases, URL validation, idempotent disable, end-to-end toggle cycle, and verification that mutations are visible to the SDK env getter. All 2354 existing CLI tests still pass. Co-Authored-By: Claude Opus 4.7 --- .../commands/__tests__/local-provider.test.ts | 235 ++++++++++++++++++ cli/src/commands/command-registry.ts | 14 ++ cli/src/commands/local-provider.ts | 174 +++++++++++++ cli/src/data/slash-commands.ts | 5 + 4 files changed, 428 insertions(+) create mode 100644 cli/src/commands/__tests__/local-provider.test.ts create mode 100644 cli/src/commands/local-provider.ts diff --git a/cli/src/commands/__tests__/local-provider.test.ts b/cli/src/commands/__tests__/local-provider.test.ts new file mode 100644 index 0000000000..aca1e35c01 --- /dev/null +++ b/cli/src/commands/__tests__/local-provider.test.ts @@ -0,0 +1,235 @@ +import { describe, expect, test, beforeEach, afterEach } from 'bun:test' + +import { + applyLocalAction, + DEFAULT_LOCAL_BASE_URL, + getActiveLocalBaseUrl, + parseLocalArgs, +} from '../local-provider' + +describe('parseLocalArgs', () => { + test('empty args → status', () => { + expect(parseLocalArgs('').kind).toBe('status') + expect(parseLocalArgs(' ').kind).toBe('status') + expect(parseLocalArgs('\t\n').kind).toBe('status') + }) + + test('"status" → status', () => { + expect(parseLocalArgs('status').kind).toBe('status') + expect(parseLocalArgs(' status ').kind).toBe('status') + expect(parseLocalArgs('STATUS').kind).toBe('status') // case-insensitive + }) + + test('"on" with no URL → enable with default Ollama URL', () => { + const r = parseLocalArgs('on') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + }) + + test('"on " → enable with that URL', () => { + const r = parseLocalArgs('on http://localhost:1234/v1') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:1234/v1') + }) + + test('"enable " alias works', () => { + const r = parseLocalArgs('enable http://localhost:1234/v1') + expect(r.kind).toBe('enable') + }) + + test('"set " alias works', () => { + const r = parseLocalArgs('set http://localhost:1234/v1') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:1234/v1') + }) + + test('"set" with no URL → invalid', () => { + const r = parseLocalArgs('set') + expect(r.kind).toBe('enable') + // "set" with no URL falls back to default — that's debatable but matches "on" + if (r.kind === 'enable') expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + }) + + test('bare URL (no subcommand) → treated as enable', () => { + const r = parseLocalArgs('http://localhost:11434/v1') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:11434/v1') + }) + + test('"off" → disable', () => { + expect(parseLocalArgs('off').kind).toBe('disable') + expect(parseLocalArgs('disable').kind).toBe('disable') + }) + + test('"off" with stray args → invalid', () => { + const r = parseLocalArgs('off http://oops') + expect(r.kind).toBe('invalid') + }) + + test('non-http URL → invalid', () => { + const r = parseLocalArgs('on ftp://localhost') + expect(r.kind).toBe('invalid') + }) + + test('malformed URL → invalid', () => { + const r = parseLocalArgs('on http://') + expect(r.kind).toBe('invalid') + }) + + test('unknown subcommand → invalid with helpful message', () => { + const r = parseLocalArgs('foobar') + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.reason).toContain('Unknown') + }) + + test('https URL is accepted (for remote endpoints)', () => { + const r = parseLocalArgs('on https://my-vm.example.com:8080/v1') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') + expect(r.baseUrl).toBe('https://my-vm.example.com:8080/v1') + }) + + test('extra whitespace in URL is preserved as-is when valid', () => { + const r = parseLocalArgs(' on http://localhost:11434/v1 ') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:11434/v1') + }) +}) + +describe('applyLocalAction (side effects on process.env)', () => { + let originalBaseUrl: string | undefined + let originalApiKey: string | undefined + + beforeEach(() => { + originalBaseUrl = process.env.CODEBUFF_BASE_URL + originalApiKey = process.env.CODEBUFF_PROVIDER_API_KEY + delete process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_PROVIDER_API_KEY + }) + + afterEach(() => { + if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL + else process.env.CODEBUFF_BASE_URL = originalBaseUrl + if (originalApiKey === undefined) + delete process.env.CODEBUFF_PROVIDER_API_KEY + else process.env.CODEBUFF_PROVIDER_API_KEY = originalApiKey + }) + + test('enable sets process.env.CODEBUFF_BASE_URL', () => { + const msg = applyLocalAction({ + kind: 'enable', + baseUrl: 'http://localhost:11434/v1', + }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') + expect(getActiveLocalBaseUrl()).toBe('http://localhost:11434/v1') + expect(msg).toContain('ON') + expect(msg).toContain('http://localhost:11434/v1') + }) + + test('disable deletes process.env.CODEBUFF_BASE_URL', () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + const msg = applyLocalAction({ kind: 'disable' }) + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + expect(msg).toContain('OFF') + expect(msg).toContain('Previously: http://localhost:11434/v1') + }) + + test('disable also clears the API key env var', () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + process.env.CODEBUFF_PROVIDER_API_KEY = 'ollama' + applyLocalAction({ kind: 'disable' }) + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_API_KEY).toBeUndefined() + }) + + test('disable when already off is idempotent and friendly', () => { + const msg = applyLocalAction({ kind: 'disable' }) + expect(msg).toContain('already OFF') + }) + + test('status when off shows OFF', () => { + const msg = applyLocalAction({ kind: 'status' }) + expect(msg).toContain('OFF') + }) + + test('status when on shows the URL', () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:1234/v1' + const msg = applyLocalAction({ kind: 'status' }) + expect(msg).toContain('ON') + expect(msg).toContain('http://localhost:1234/v1') + }) + + test('invalid action returns the reason prefixed', () => { + const msg = applyLocalAction({ + kind: 'invalid', + reason: 'something wrong', + }) + expect(msg).toContain('something wrong') + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + }) + + test('enable overwrites a previously-set URL', () => { + applyLocalAction({ kind: 'enable', baseUrl: 'http://localhost:11434/v1' }) + applyLocalAction({ kind: 'enable', baseUrl: 'http://localhost:1234/v1' }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:1234/v1') + }) + + test('full toggle cycle: off → on → status → off', () => { + expect(applyLocalAction({ kind: 'status' })).toContain('OFF') + + applyLocalAction({ kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL }) + expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL) + + const statusOn = applyLocalAction({ kind: 'status' }) + expect(statusOn).toContain('ON') + + const off = applyLocalAction({ kind: 'disable' }) + expect(off).toContain('OFF') + expect(off).toContain(`Previously: ${DEFAULT_LOCAL_BASE_URL}`) + expect(getActiveLocalBaseUrl()).toBeUndefined() + }) + + test('mentions agent-level override in the enable message', () => { + const msg = applyLocalAction({ + kind: 'enable', + baseUrl: DEFAULT_LOCAL_BASE_URL, + }) + expect(msg.toLowerCase()).toContain('providerOptions.baseUrl'.toLowerCase()) + }) +}) + +describe('parseLocalArgs + applyLocalAction end-to-end', () => { + let originalBaseUrl: string | undefined + + beforeEach(() => { + originalBaseUrl = process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_BASE_URL + }) + + afterEach(() => { + if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL + else process.env.CODEBUFF_BASE_URL = originalBaseUrl + }) + + test('user types `/local on` → URL is set to default', () => { + applyLocalAction(parseLocalArgs('on')) + expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) + }) + + test('user types `/local on http://x` → URL is set', () => { + applyLocalAction(parseLocalArgs('on http://x.example.com:9999/v1')) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://x.example.com:9999/v1') + }) + + test('user types `/local off` after `/local on` → URL is cleared', () => { + applyLocalAction(parseLocalArgs('on')) + applyLocalAction(parseLocalArgs('off')) + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + }) + + test('user types `/local garbage` → no env change, error message returned', () => { + const msg = applyLocalAction(parseLocalArgs('garbage')) + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + expect(msg).toContain('Unknown') + }) +}) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index 0eda49607e..a1cd4ea308 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -5,6 +5,7 @@ import { handleAdsEnable, handleAdsDisable } from './ads' import { handleHelpCommand } from './help' import { handleImageCommand } from './image' import { handleInitializationFlowLocally } from './init' +import { applyLocalAction, parseLocalArgs } from './local-provider' import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' import { runBashCommand } from './router' import { handleUsageCommand } from './usage' @@ -392,6 +393,19 @@ const ALL_COMMANDS: CommandDefinition[] = [ clearInput(params) }, }), + defineCommandWithArgs({ + name: 'local', + handler: (params, args) => { + const message = applyLocalAction(parseLocalArgs(args)) + params.setMessages((prev) => [ + ...prev, + getUserMessage(params.inputValue.trim()), + getSystemMessage(message), + ]) + params.saveToHistory(params.inputValue.trim()) + clearInput(params) + }, + }), // Mode commands generated from AGENT_MODES (excluded in Freebuff) ...(IS_FREEBUFF ? [] : AGENT_MODES).map((mode) => defineCommandWithArgs({ diff --git a/cli/src/commands/local-provider.ts b/cli/src/commands/local-provider.ts new file mode 100644 index 0000000000..be26386ff8 --- /dev/null +++ b/cli/src/commands/local-provider.ts @@ -0,0 +1,174 @@ +/** + * /local slash command — runtime toggle for the custom OpenAI-compatible + * provider (Ollama, LM Studio, self-hosted). + * + * Mutates process.env.CODEBUFF_BASE_URL at runtime. The SDK reads this lazily + * on every promptAiSdkStream call, so changes take effect immediately for the + * next request without needing to rebuild the CodebuffClient. + * + * Subcommands: + * /local — show current status + * /local on — enable with default Ollama URL + * /local on — enable with a specific URL + * /local set — alias for `/local on ` + * /local off — disable, return to Codebuff backend + * /local status — same as `/local` + * + * Agent-level providerOptions.baseUrl always wins; /local only affects agents + * that don't set their own baseUrl. + */ + +import { + PROVIDER_API_KEY_ENV_VAR, + PROVIDER_BASE_URL_ENV_VAR, +} from '@codebuff/common/constants/custom-provider' + +/** Default URL used by `/local on` when the user doesn't specify one. */ +export const DEFAULT_LOCAL_BASE_URL = 'http://localhost:11434/v1' + +export type LocalCommandAction = + | { kind: 'status' } + | { kind: 'enable'; baseUrl: string } + | { kind: 'disable' } + | { kind: 'invalid'; reason: string } + +/** + * Parse the args passed to `/local`. Pure function — no side effects. + * Exported for unit testing. + */ +export function parseLocalArgs(rawArgs: string): LocalCommandAction { + const trimmed = rawArgs.trim() + + // No args → show status + if (!trimmed) { + return { kind: 'status' } + } + + const [subcommand, ...rest] = trimmed.split(/\s+/) + const sub = subcommand.toLowerCase() + const value = rest.join(' ').trim() + + if (sub === 'status') { + return { kind: 'status' } + } + + if (sub === 'off' || sub === 'disable') { + if (value) { + return { + kind: 'invalid', + reason: `\`/local ${sub}\` does not take arguments. Got: "${value}"`, + } + } + return { kind: 'disable' } + } + + if (sub === 'on' || sub === 'enable' || sub === 'set') { + const url = value || DEFAULT_LOCAL_BASE_URL + const validated = validateBaseUrl(url) + if (!validated.ok) { + return { kind: 'invalid', reason: validated.reason } + } + return { kind: 'enable', baseUrl: validated.url } + } + + // Looks like a bare URL (e.g. `/local http://localhost:11434/v1`)? + // Be friendly — treat it as `/local on `. + if (sub.startsWith('http://') || sub.startsWith('https://')) { + const validated = validateBaseUrl(trimmed) + if (!validated.ok) { + return { kind: 'invalid', reason: validated.reason } + } + return { kind: 'enable', baseUrl: validated.url } + } + + return { + kind: 'invalid', + reason: `Unknown /local subcommand: "${subcommand}". Try: on, off, status, or set .`, + } +} + +function validateBaseUrl( + raw: string, +): + | { ok: true; url: string } + | { ok: false; reason: string } { + if (!raw || !raw.trim()) { + return { ok: false, reason: 'URL is required.' } + } + if (!raw.startsWith('http://') && !raw.startsWith('https://')) { + return { + ok: false, + reason: `URL must start with http:// or https://. Got: "${raw}"`, + } + } + try { + // eslint-disable-next-line no-new + new URL(raw) + } catch { + return { ok: false, reason: `Invalid URL: "${raw}"` } + } + return { ok: true, url: raw } +} + +/** + * Read the currently-active local provider URL (or undefined if disabled). + * Reads from process.env so it reflects both shell-set values and /local-set values. + */ +export function getActiveLocalBaseUrl(): string | undefined { + return process.env[PROVIDER_BASE_URL_ENV_VAR] +} + +/** + * Apply an action to process.env. Returns a user-facing message describing what happened. + * Side effects are isolated to this function for testability. + */ +export function applyLocalAction(action: LocalCommandAction): string { + if (action.kind === 'invalid') { + return `❌ ${action.reason}` + } + + if (action.kind === 'status') { + const current = getActiveLocalBaseUrl() + if (!current) { + return [ + 'Local provider: OFF', + '', + 'All agents (without per-agent providerOptions.baseUrl) go through the Codebuff backend.', + '', + 'Enable with: /local on (defaults to ' + DEFAULT_LOCAL_BASE_URL + ')', + ].join('\n') + } + return [ + `Local provider: ON`, + ` URL: ${current}`, + '', + 'Agents without their own providerOptions.baseUrl will use this endpoint.', + 'Disable with: /local off', + ].join('\n') + } + + if (action.kind === 'enable') { + process.env[PROVIDER_BASE_URL_ENV_VAR] = action.baseUrl + return [ + `Local provider: ON`, + ` URL: ${action.baseUrl}`, + '', + 'Note: agents with their own `providerOptions.baseUrl` still win.', + 'Disable with: /local off', + ].join('\n') + } + + // action.kind === 'disable' + const wasSet = getActiveLocalBaseUrl() + delete process.env[PROVIDER_BASE_URL_ENV_VAR] + delete process.env[PROVIDER_API_KEY_ENV_VAR] + if (!wasSet) { + return 'Local provider was already OFF. No change.' + } + return [ + 'Local provider: OFF', + ` Previously: ${wasSet}`, + '', + 'Routing returns to the Codebuff backend.', + ].join('\n') +} diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index 14d71abecd..9a5b82ea96 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -163,6 +163,11 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [ description: 'Attach an image file (or Ctrl+V to paste from clipboard)', aliases: ['img', 'attach'], }, + { + id: 'local', + label: 'local', + description: 'Toggle local LLM provider (Ollama/LM Studio). Usage: /local [on|off|status|set ]', + }, ...MODE_COMMANDS, // { // id: 'publish', From dff5f7edcba14cb98e6310a83b8c86046de1c6e1 Mon Sep 17 00:00:00 2001 From: vraj Date: Sat, 16 May 2026 03:44:24 -0700 Subject: [PATCH 6/8] fix(cli): /local can override the agent's model so requests don't fail with "model not found" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, `/local on` redirected the URL to Ollama but the agent still sent its declared cloud model id (e.g. anthropic/claude-opus-4-7) which Ollama doesn't have — every prompt failed with "model not found". Adds a CODEBUFF_LOCAL_MODEL env var that overrides the agent's model when a custom provider is active. The SDK reads it lazily in promptAiSdkStream, mirroring how the URL is resolved. Override only applies to agents WITHOUT their own providerOptions.baseUrl — explicit per-agent config is left alone. /local subcommands grew: /local on — enable with default URL + model /local on — enable with both /local model — set model after enable /local model clear — drop the override /local list — probe Ollama's /api/tags for models /local — shortcut: same as `on ` The status message also nudges users toward setting a model: without an override, it warns that cloud model ids will go to the local provider and fail. 40 unit tests cover: parser shapes (URL only, model only, both, aliases, bare URL, bare model with-colon shortcut), URL/model validation, set-model rejection when local is off, idempotent disable, end-to-end env mutation via the SDK getter. All 3245 existing tests across CLI + SDK + agent-runtime still pass. Co-Authored-By: Claude Opus 4.7 --- .../commands/__tests__/local-provider.test.ts | 299 ++++++++++----- cli/src/commands/command-registry.ts | 15 +- cli/src/commands/local-provider.ts | 341 +++++++++++++++--- cli/src/data/slash-commands.ts | 2 +- common/src/constants/custom-provider.ts | 6 + sdk/src/env.ts | 10 + sdk/src/impl/llm.ts | 19 +- 7 files changed, 530 insertions(+), 162 deletions(-) diff --git a/cli/src/commands/__tests__/local-provider.test.ts b/cli/src/commands/__tests__/local-provider.test.ts index aca1e35c01..b1cc154b66 100644 --- a/cli/src/commands/__tests__/local-provider.test.ts +++ b/cli/src/commands/__tests__/local-provider.test.ts @@ -4,10 +4,11 @@ import { applyLocalAction, DEFAULT_LOCAL_BASE_URL, getActiveLocalBaseUrl, + getActiveLocalModel, parseLocalArgs, } from '../local-provider' -describe('parseLocalArgs', () => { +describe('parseLocalArgs — basic shapes', () => { test('empty args → status', () => { expect(parseLocalArgs('').kind).toBe('status') expect(parseLocalArgs(' ').kind).toBe('status') @@ -17,53 +18,89 @@ describe('parseLocalArgs', () => { test('"status" → status', () => { expect(parseLocalArgs('status').kind).toBe('status') expect(parseLocalArgs(' status ').kind).toBe('status') - expect(parseLocalArgs('STATUS').kind).toBe('status') // case-insensitive + expect(parseLocalArgs('STATUS').kind).toBe('status') }) - test('"on" with no URL → enable with default Ollama URL', () => { + test('"list" / "models" → list', () => { + expect(parseLocalArgs('list').kind).toBe('list') + expect(parseLocalArgs('models').kind).toBe('list') + }) + + test('"off" → disable', () => { + expect(parseLocalArgs('off').kind).toBe('disable') + expect(parseLocalArgs('disable').kind).toBe('disable') + }) + + test('"off" with stray args → invalid', () => { + const r = parseLocalArgs('off http://oops') + expect(r.kind).toBe('invalid') + }) + + test('unknown subcommand → invalid', () => { + const r = parseLocalArgs('foobar') + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.reason).toContain('Unknown') + }) +}) + +describe('parseLocalArgs — enable shapes', () => { + test('"on" → enable with default URL, no model', () => { const r = parseLocalArgs('on') expect(r.kind).toBe('enable') - if (r.kind === 'enable') expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + if (r.kind === 'enable') { + expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + expect(r.model).toBeUndefined() + } }) - test('"on " → enable with that URL', () => { + test('"on " → enable with URL only', () => { const r = parseLocalArgs('on http://localhost:1234/v1') expect(r.kind).toBe('enable') - if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:1234/v1') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe('http://localhost:1234/v1') + expect(r.model).toBeUndefined() + } }) - test('"enable " alias works', () => { - const r = parseLocalArgs('enable http://localhost:1234/v1') + test('"on " (model only, no URL) → enable with default URL + model', () => { + const r = parseLocalArgs('on llama3.1:8b') expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + expect(r.model).toBe('llama3.1:8b') + } }) - test('"set " alias works', () => { - const r = parseLocalArgs('set http://localhost:1234/v1') + test('"on " → both set', () => { + const r = parseLocalArgs('on http://localhost:1234/v1 llama3.1:8b') expect(r.kind).toBe('enable') - if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:1234/v1') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe('http://localhost:1234/v1') + expect(r.model).toBe('llama3.1:8b') + } }) - test('"set" with no URL → invalid', () => { - const r = parseLocalArgs('set') - expect(r.kind).toBe('enable') - // "set" with no URL falls back to default — that's debatable but matches "on" - if (r.kind === 'enable') expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + test('"enable " and "set " aliases work', () => { + const a = parseLocalArgs('enable http://x:1/v1') + expect(a.kind).toBe('enable') + const b = parseLocalArgs('set gemma4:e2b') + expect(b.kind).toBe('enable') + if (b.kind === 'enable') expect(b.model).toBe('gemma4:e2b') }) - test('bare URL (no subcommand) → treated as enable', () => { + test('bare URL → enable', () => { const r = parseLocalArgs('http://localhost:11434/v1') expect(r.kind).toBe('enable') if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:11434/v1') }) - test('"off" → disable', () => { - expect(parseLocalArgs('off').kind).toBe('disable') - expect(parseLocalArgs('disable').kind).toBe('disable') - }) - - test('"off" with stray args → invalid', () => { - const r = parseLocalArgs('off http://oops') - expect(r.kind).toBe('invalid') + test('bare model tag → enable with default URL + model', () => { + const r = parseLocalArgs('llama3.1:8b') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + expect(r.model).toBe('llama3.1:8b') + } }) test('non-http URL → invalid', () => { @@ -76,35 +113,50 @@ describe('parseLocalArgs', () => { expect(r.kind).toBe('invalid') }) - test('unknown subcommand → invalid with helpful message', () => { - const r = parseLocalArgs('foobar') - expect(r.kind).toBe('invalid') - if (r.kind === 'invalid') expect(r.reason).toContain('Unknown') - }) - - test('https URL is accepted (for remote endpoints)', () => { - const r = parseLocalArgs('on https://my-vm.example.com:8080/v1') + test('https URL accepted', () => { + const r = parseLocalArgs('on https://my-vm.example.com:8080/v1 llama3.1:8b') expect(r.kind).toBe('enable') if (r.kind === 'enable') expect(r.baseUrl).toBe('https://my-vm.example.com:8080/v1') }) +}) + +describe('parseLocalArgs — model subcommand', () => { + test('"model " → set-model', () => { + const r = parseLocalArgs('model llama3.1:8b') + expect(r.kind).toBe('set-model') + if (r.kind === 'set-model') expect(r.model).toBe('llama3.1:8b') + }) - test('extra whitespace in URL is preserved as-is when valid', () => { - const r = parseLocalArgs(' on http://localhost:11434/v1 ') - expect(r.kind).toBe('enable') - if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:11434/v1') + test('"model clear" / "model off" / "model none" → clear-model', () => { + expect(parseLocalArgs('model clear').kind).toBe('clear-model') + expect(parseLocalArgs('model off').kind).toBe('clear-model') + expect(parseLocalArgs('model none').kind).toBe('clear-model') + }) + + test('"model" without name → invalid', () => { + const r = parseLocalArgs('model') + expect(r.kind).toBe('invalid') + }) + + test('"model " → invalid', () => { + const r = parseLocalArgs('model --x') + expect(r.kind).toBe('invalid') }) }) describe('applyLocalAction (side effects on process.env)', () => { let originalBaseUrl: string | undefined let originalApiKey: string | undefined + let originalModel: string | undefined beforeEach(() => { originalBaseUrl = process.env.CODEBUFF_BASE_URL originalApiKey = process.env.CODEBUFF_PROVIDER_API_KEY + originalModel = process.env.CODEBUFF_LOCAL_MODEL delete process.env.CODEBUFF_BASE_URL delete process.env.CODEBUFF_PROVIDER_API_KEY + delete process.env.CODEBUFF_LOCAL_MODEL }) afterEach(() => { @@ -113,123 +165,180 @@ describe('applyLocalAction (side effects on process.env)', () => { if (originalApiKey === undefined) delete process.env.CODEBUFF_PROVIDER_API_KEY else process.env.CODEBUFF_PROVIDER_API_KEY = originalApiKey + if (originalModel === undefined) delete process.env.CODEBUFF_LOCAL_MODEL + else process.env.CODEBUFF_LOCAL_MODEL = originalModel }) - test('enable sets process.env.CODEBUFF_BASE_URL', () => { - const msg = applyLocalAction({ + test('enable without model sets baseUrl, clears any previous model override', async () => { + process.env.CODEBUFF_LOCAL_MODEL = 'stale-model' + const msg = await applyLocalAction({ kind: 'enable', baseUrl: 'http://localhost:11434/v1', }) expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') - expect(getActiveLocalBaseUrl()).toBe('http://localhost:11434/v1') + expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() expect(msg).toContain('ON') - expect(msg).toContain('http://localhost:11434/v1') + expect(msg).toContain('No model override') + expect(msg).toContain('llama3.1:8b') }) - test('disable deletes process.env.CODEBUFF_BASE_URL', () => { - process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' - const msg = applyLocalAction({ kind: 'disable' }) - expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + test('enable with model sets both env vars', async () => { + const msg = await applyLocalAction({ + kind: 'enable', + baseUrl: 'http://localhost:11434/v1', + model: 'llama3.1:8b', + }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') + expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(msg).toContain('Model override: llama3.1:8b') + }) + + test('set-model when local is OFF → error', async () => { + const msg = await applyLocalAction({ + kind: 'set-model', + model: 'llama3.1:8b', + }) + expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() expect(msg).toContain('OFF') - expect(msg).toContain('Previously: http://localhost:11434/v1') }) - test('disable also clears the API key env var', () => { + test('set-model when local is ON → updates model', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + const msg = await applyLocalAction({ + kind: 'set-model', + model: 'llama3.1:8b', + }) + expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(msg).toContain('Model override: llama3.1:8b') + }) + + test('clear-model removes only the model, keeps baseUrl', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + process.env.CODEBUFF_LOCAL_MODEL = 'llama3.1:8b' + const msg = await applyLocalAction({ kind: 'clear-model' }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') + expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(msg).toContain('cleared') + }) + + test('clear-model when none set is friendly', async () => { + const msg = await applyLocalAction({ kind: 'clear-model' }) + expect(msg).toContain('No model override') + }) + + test('disable clears baseUrl, apiKey, and model', async () => { process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' process.env.CODEBUFF_PROVIDER_API_KEY = 'ollama' - applyLocalAction({ kind: 'disable' }) + process.env.CODEBUFF_LOCAL_MODEL = 'llama3.1:8b' + const msg = await applyLocalAction({ kind: 'disable' }) expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() expect(process.env.CODEBUFF_PROVIDER_API_KEY).toBeUndefined() + expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(msg).toContain('OFF') + expect(msg).toContain('llama3.1:8b') }) - test('disable when already off is idempotent and friendly', () => { - const msg = applyLocalAction({ kind: 'disable' }) + test('disable when already off → idempotent', async () => { + const msg = await applyLocalAction({ kind: 'disable' }) expect(msg).toContain('already OFF') }) - test('status when off shows OFF', () => { - const msg = applyLocalAction({ kind: 'status' }) + test('status when off mentions /local list and shows usage', async () => { + const msg = await applyLocalAction({ kind: 'status' }) expect(msg).toContain('OFF') + expect(msg).toContain('/local list') }) - test('status when on shows the URL', () => { + test('status when on with model shows both URL and model', async () => { process.env.CODEBUFF_BASE_URL = 'http://localhost:1234/v1' - const msg = applyLocalAction({ kind: 'status' }) + process.env.CODEBUFF_LOCAL_MODEL = 'llama3.1:8b' + const msg = await applyLocalAction({ kind: 'status' }) expect(msg).toContain('ON') expect(msg).toContain('http://localhost:1234/v1') + expect(msg).toContain('llama3.1:8b') + }) + + test('status when on without model warns about no model override', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + const msg = await applyLocalAction({ kind: 'status' }) + expect(msg).toContain('ON') + expect(msg).toContain('(none') }) - test('invalid action returns the reason prefixed', () => { - const msg = applyLocalAction({ + test('invalid returns reason prefixed', async () => { + const msg = await applyLocalAction({ kind: 'invalid', reason: 'something wrong', }) expect(msg).toContain('something wrong') - expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() - }) - - test('enable overwrites a previously-set URL', () => { - applyLocalAction({ kind: 'enable', baseUrl: 'http://localhost:11434/v1' }) - applyLocalAction({ kind: 'enable', baseUrl: 'http://localhost:1234/v1' }) - expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:1234/v1') }) - test('full toggle cycle: off → on → status → off', () => { - expect(applyLocalAction({ kind: 'status' })).toContain('OFF') - - applyLocalAction({ kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL }) - expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL) - - const statusOn = applyLocalAction({ kind: 'status' }) - expect(statusOn).toContain('ON') - - const off = applyLocalAction({ kind: 'disable' }) - expect(off).toContain('OFF') - expect(off).toContain(`Previously: ${DEFAULT_LOCAL_BASE_URL}`) - expect(getActiveLocalBaseUrl()).toBeUndefined() - }) - - test('mentions agent-level override in the enable message', () => { - const msg = applyLocalAction({ - kind: 'enable', - baseUrl: DEFAULT_LOCAL_BASE_URL, - }) - expect(msg.toLowerCase()).toContain('providerOptions.baseUrl'.toLowerCase()) + test('list when off returns error', async () => { + const msg = await applyLocalAction({ kind: 'list' }) + expect(msg).toContain('OFF') }) }) describe('parseLocalArgs + applyLocalAction end-to-end', () => { let originalBaseUrl: string | undefined + let originalModel: string | undefined beforeEach(() => { originalBaseUrl = process.env.CODEBUFF_BASE_URL + originalModel = process.env.CODEBUFF_LOCAL_MODEL delete process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_LOCAL_MODEL }) afterEach(() => { if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL else process.env.CODEBUFF_BASE_URL = originalBaseUrl + if (originalModel === undefined) delete process.env.CODEBUFF_LOCAL_MODEL + else process.env.CODEBUFF_LOCAL_MODEL = originalModel + }) + + test('user types `/local on llama3.1:8b` → URL default + model set', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) + expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') }) - test('user types `/local on` → URL is set to default', () => { - applyLocalAction(parseLocalArgs('on')) + test('user types `/local llama3.1:8b` (no `on`) → same effect', async () => { + await applyLocalAction(parseLocalArgs('llama3.1:8b')) expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) + expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') }) - test('user types `/local on http://x` → URL is set', () => { - applyLocalAction(parseLocalArgs('on http://x.example.com:9999/v1')) + test('user types `/local on http://x/v1 llama3.1:8b` → both set', async () => { + await applyLocalAction(parseLocalArgs('on http://x.example.com:9999/v1 llama3.1:8b')) expect(process.env.CODEBUFF_BASE_URL).toBe('http://x.example.com:9999/v1') + expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') }) - test('user types `/local off` after `/local on` → URL is cleared', () => { - applyLocalAction(parseLocalArgs('on')) - applyLocalAction(parseLocalArgs('off')) - expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + test('user types `/local model llama3.1:8b` after `/local on` → model added', async () => { + await applyLocalAction(parseLocalArgs('on')) + expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + await applyLocalAction(parseLocalArgs('model llama3.1:8b')) + expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') }) - test('user types `/local garbage` → no env change, error message returned', () => { - const msg = applyLocalAction(parseLocalArgs('garbage')) + test('user types `/local off` → both cleared', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + await applyLocalAction(parseLocalArgs('off')) expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() - expect(msg).toContain('Unknown') + expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + }) + + test('mutations are visible via getter functions', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL) + expect(getActiveLocalModel()).toBe('llama3.1:8b') + }) + + test('re-enabling without model clears previous model override', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + await applyLocalAction(parseLocalArgs('on')) + expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL) + expect(getActiveLocalModel()).toBeUndefined() }) }) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index a1cd4ea308..9afa024028 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -395,15 +395,14 @@ const ALL_COMMANDS: CommandDefinition[] = [ }), defineCommandWithArgs({ name: 'local', - handler: (params, args) => { - const message = applyLocalAction(parseLocalArgs(args)) - params.setMessages((prev) => [ - ...prev, - getUserMessage(params.inputValue.trim()), - getSystemMessage(message), - ]) - params.saveToHistory(params.inputValue.trim()) + handler: async (params, args) => { + const userText = params.inputValue.trim() + params.setMessages((prev) => [...prev, getUserMessage(userText)]) + params.saveToHistory(userText) clearInput(params) + + const message = await applyLocalAction(parseLocalArgs(args)) + params.setMessages((prev) => [...prev, getSystemMessage(message)]) }, }), // Mode commands generated from AGENT_MODES (excluded in Freebuff) diff --git a/cli/src/commands/local-provider.ts b/cli/src/commands/local-provider.ts index be26386ff8..3fda138965 100644 --- a/cli/src/commands/local-provider.ts +++ b/cli/src/commands/local-provider.ts @@ -2,25 +2,32 @@ * /local slash command — runtime toggle for the custom OpenAI-compatible * provider (Ollama, LM Studio, self-hosted). * - * Mutates process.env.CODEBUFF_BASE_URL at runtime. The SDK reads this lazily - * on every promptAiSdkStream call, so changes take effect immediately for the + * Mutates process.env at runtime. The SDK reads these env vars lazily on + * every promptAiSdkStream call, so changes take effect immediately for the * next request without needing to rebuild the CodebuffClient. * * Subcommands: - * /local — show current status - * /local on — enable with default Ollama URL - * /local on — enable with a specific URL - * /local set — alias for `/local on ` - * /local off — disable, return to Codebuff backend - * /local status — same as `/local` + * /local — show current status + * /local on — enable with default Ollama URL (model unchanged) + * /local on — enable with default URL + model override + * /local on — enable with custom URL (model unchanged) + * /local on — enable with URL + model override + * /local set — alias for `/local on ` + * /local model — set model override only (URL must already be set) + * /local model clear — clear the model override + * /local off — disable, return to Codebuff backend + * /local status — same as `/local` + * /local list — query the local provider for available models * * Agent-level providerOptions.baseUrl always wins; /local only affects agents - * that don't set their own baseUrl. + * that don't set their own baseUrl. Same for the model override — agents with + * an explicit providerOptions.baseUrl use their own declared model. */ import { PROVIDER_API_KEY_ENV_VAR, PROVIDER_BASE_URL_ENV_VAR, + PROVIDER_MODEL_ENV_VAR, } from '@codebuff/common/constants/custom-provider' /** Default URL used by `/local on` when the user doesn't specify one. */ @@ -28,10 +35,28 @@ export const DEFAULT_LOCAL_BASE_URL = 'http://localhost:11434/v1' export type LocalCommandAction = | { kind: 'status' } - | { kind: 'enable'; baseUrl: string } + | { kind: 'enable'; baseUrl: string; model?: string } + | { kind: 'set-model'; model: string } + | { kind: 'clear-model' } + | { kind: 'list' } | { kind: 'disable' } | { kind: 'invalid'; reason: string } +function isUrl(token: string): boolean { + return token.startsWith('http://') || token.startsWith('https://') +} + +function looksLikeUrl(token: string): boolean { + // Anything with a scheme separator — caller validates the actual scheme. + return token.includes('://') +} + +function isLikelyModelTag(token: string): boolean { + // Ollama-style tags: name[:tag], e.g. "llama3.1:8b", "gemma4:e2b", "qwen2.5". + // Reject URL-shaped tokens and flags. + return Boolean(token) && !looksLikeUrl(token) && !token.startsWith('-') +} + /** * Parse the args passed to `/local`. Pure function — no side effects. * Exported for unit testing. @@ -44,49 +69,116 @@ export function parseLocalArgs(rawArgs: string): LocalCommandAction { return { kind: 'status' } } - const [subcommand, ...rest] = trimmed.split(/\s+/) + const tokens = trimmed.split(/\s+/) + const subcommand = tokens[0] const sub = subcommand.toLowerCase() - const value = rest.join(' ').trim() + const rest = tokens.slice(1) if (sub === 'status') { return { kind: 'status' } } + if (sub === 'list' || sub === 'models') { + return { kind: 'list' } + } + if (sub === 'off' || sub === 'disable') { - if (value) { + if (rest.length > 0) { return { kind: 'invalid', - reason: `\`/local ${sub}\` does not take arguments. Got: "${value}"`, + reason: `\`/local ${sub}\` does not take arguments. Got: "${rest.join(' ')}"`, } } return { kind: 'disable' } } - if (sub === 'on' || sub === 'enable' || sub === 'set') { - const url = value || DEFAULT_LOCAL_BASE_URL - const validated = validateBaseUrl(url) - if (!validated.ok) { - return { kind: 'invalid', reason: validated.reason } + if (sub === 'model') { + if (rest.length === 0) { + return { + kind: 'invalid', + reason: 'Usage: `/local model ` or `/local model clear`', + } + } + const value = rest.join(' ') + if (value === 'clear' || value === 'off' || value === 'none') { + return { kind: 'clear-model' } + } + if (!isLikelyModelTag(rest[0])) { + return { + kind: 'invalid', + reason: `Invalid model name: "${value}". Expected something like "llama3.1:8b".`, + } } - return { kind: 'enable', baseUrl: validated.url } + return { kind: 'set-model', model: value } } - // Looks like a bare URL (e.g. `/local http://localhost:11434/v1`)? - // Be friendly — treat it as `/local on `. - if (sub.startsWith('http://') || sub.startsWith('https://')) { - const validated = validateBaseUrl(trimmed) - if (!validated.ok) { - return { kind: 'invalid', reason: validated.reason } - } - return { kind: 'enable', baseUrl: validated.url } + if (sub === 'on' || sub === 'enable' || sub === 'set') { + return parseEnable(rest) + } + + // Looks like a bare URL or bare model (e.g. `/local http://...` or `/local llama3.1:8b`)? + if (looksLikeUrl(subcommand)) { + return parseEnable([subcommand, ...rest]) + } + // Bare model shortcut: must contain `:` so we don't silently accept typos + // like `/local foobar`. Use `/local on ` for tagless models. + if (subcommand.includes(':') && isLikelyModelTag(subcommand) && rest.length === 0) { + return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL, model: subcommand } } return { kind: 'invalid', - reason: `Unknown /local subcommand: "${subcommand}". Try: on, off, status, or set .`, + reason: `Unknown /local subcommand: "${subcommand}". Try: on, off, model, status, list.`, } } +/** + * Parse the tokens after `/local on` / `/local enable` / `/local set`. + * Supports four shapes: + * (empty) → default URL, no model override + * → URL, no model override + * → default URL + model + * → URL + model + */ +function parseEnable(tokens: string[]): LocalCommandAction { + if (tokens.length === 0) { + return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL } + } + + if (tokens.length === 1) { + const t = tokens[0] + // URL-shaped tokens go through URL validation regardless of scheme. + if (looksLikeUrl(t)) { + const v = validateBaseUrl(t) + if (!v.ok) return { kind: 'invalid', reason: v.reason } + return { kind: 'enable', baseUrl: v.url } + } + if (isLikelyModelTag(t)) { + return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL, model: t } + } + return { kind: 'invalid', reason: `Could not interpret "${t}" as URL or model name.` } + } + + // Two or more tokens. Pattern: first is URL, rest joined is model. + const [first, ...rest] = tokens + if (!looksLikeUrl(first)) { + return { + kind: 'invalid', + reason: `Expected URL or model. Got: "${first}". Usage: /local on [url] [model]`, + } + } + const v = validateBaseUrl(first) + if (!v.ok) return { kind: 'invalid', reason: v.reason } + const modelToken = rest.join(' ') + if (!isLikelyModelTag(rest[0])) { + return { + kind: 'invalid', + reason: `Invalid model name: "${modelToken}".`, + } + } + return { kind: 'enable', baseUrl: v.url, model: modelToken } +} + function validateBaseUrl( raw: string, ): @@ -118,57 +210,196 @@ export function getActiveLocalBaseUrl(): string | undefined { return process.env[PROVIDER_BASE_URL_ENV_VAR] } +/** Read the currently-active local model override (or undefined). */ +export function getActiveLocalModel(): string | undefined { + return process.env[PROVIDER_MODEL_ENV_VAR] +} + /** * Apply an action to process.env. Returns a user-facing message describing what happened. * Side effects are isolated to this function for testability. + * + * Note: `list` is async because it hits the network. Other actions are sync. */ -export function applyLocalAction(action: LocalCommandAction): string { +export async function applyLocalAction( + action: LocalCommandAction, +): Promise { if (action.kind === 'invalid') { return `❌ ${action.reason}` } if (action.kind === 'status') { - const current = getActiveLocalBaseUrl() - if (!current) { + return formatStatus() + } + + if (action.kind === 'list') { + return listModels() + } + + if (action.kind === 'enable') { + process.env[PROVIDER_BASE_URL_ENV_VAR] = action.baseUrl + if (action.model) { + process.env[PROVIDER_MODEL_ENV_VAR] = action.model + } else { + // Important: an `enable` without an explicit model clears any previous + // model override, so an old setting doesn't silently apply to a new URL. + delete process.env[PROVIDER_MODEL_ENV_VAR] + } + const lines = [ + 'Local provider: ON', + ` URL: ${action.baseUrl}`, + ] + if (action.model) { + lines.push(` Model override: ${action.model}`) + lines.push('') + lines.push( + `Agents that would otherwise use a cloud model will use \`${action.model}\` instead.`, + ) + } else { + lines.push('') + lines.push('⚠️ No model override set. Cloud models (e.g.') + lines.push(' `anthropic/claude-opus-4-7`) will not exist on the local provider.') + lines.push(' Run `/local model ` (e.g. `/local model llama3.1:8b`)') + lines.push(' or `/local list` to see available models.') + } + lines.push('') + lines.push('Note: agents with their own `providerOptions.baseUrl` still win.') + lines.push('Disable with: /local off') + return lines.join('\n') + } + + if (action.kind === 'set-model') { + if (!getActiveLocalBaseUrl()) { return [ - 'Local provider: OFF', - '', - 'All agents (without per-agent providerOptions.baseUrl) go through the Codebuff backend.', - '', - 'Enable with: /local on (defaults to ' + DEFAULT_LOCAL_BASE_URL + ')', + '❌ Local provider is OFF. Enable it first with `/local on` before setting a model.', ].join('\n') } + process.env[PROVIDER_MODEL_ENV_VAR] = action.model return [ - `Local provider: ON`, - ` URL: ${current}`, + `Model override: ${action.model}`, '', - 'Agents without their own providerOptions.baseUrl will use this endpoint.', - 'Disable with: /local off', + `Local provider remains ON at ${getActiveLocalBaseUrl()}.`, + `Agents will use \`${action.model}\` for inference.`, ].join('\n') } - if (action.kind === 'enable') { - process.env[PROVIDER_BASE_URL_ENV_VAR] = action.baseUrl + if (action.kind === 'clear-model') { + const wasSet = getActiveLocalModel() + delete process.env[PROVIDER_MODEL_ENV_VAR] + if (!wasSet) { + return 'No model override was set. No change.' + } return [ - `Local provider: ON`, - ` URL: ${action.baseUrl}`, + `Model override cleared (was: ${wasSet}).`, '', - 'Note: agents with their own `providerOptions.baseUrl` still win.', - 'Disable with: /local off', + 'Warning: without an override, the agent\'s declared cloud model will be sent', + 'to the local provider — likely a "model not found" error. Either set a new', + 'model with `/local model ` or turn local mode off with `/local off`.', ].join('\n') } // action.kind === 'disable' - const wasSet = getActiveLocalBaseUrl() + const wasBaseUrl = getActiveLocalBaseUrl() + const wasModel = getActiveLocalModel() delete process.env[PROVIDER_BASE_URL_ENV_VAR] delete process.env[PROVIDER_API_KEY_ENV_VAR] - if (!wasSet) { + delete process.env[PROVIDER_MODEL_ENV_VAR] + if (!wasBaseUrl && !wasModel) { return 'Local provider was already OFF. No change.' } - return [ - 'Local provider: OFF', - ` Previously: ${wasSet}`, - '', - 'Routing returns to the Codebuff backend.', - ].join('\n') + const lines = ['Local provider: OFF'] + if (wasBaseUrl) lines.push(` Previously: ${wasBaseUrl}`) + if (wasModel) lines.push(` Cleared model override: ${wasModel}`) + lines.push('') + lines.push('Routing returns to the Codebuff backend.') + return lines.join('\n') +} + +function formatStatus(): string { + const url = getActiveLocalBaseUrl() + const model = getActiveLocalModel() + if (!url) { + return [ + 'Local provider: OFF', + '', + 'All agents (without per-agent providerOptions.baseUrl) go through the Codebuff backend.', + '', + `Enable with: /local on (e.g. /local on llama3.1:8b)`, + ` /local on (uses ${DEFAULT_LOCAL_BASE_URL}, no model override)`, + `Discover available local models: /local list`, + ].join('\n') + } + const lines = [ + 'Local provider: ON', + ` URL: ${url}`, + ] + if (model) lines.push(` Model override: ${model}`) + else + lines.push( + ' Model override: (none — agent\'s declared model will be sent as-is)', + ) + lines.push('') + lines.push('Agents without their own providerOptions.baseUrl will use this endpoint.') + lines.push('Commands: /local model , /local off, /local list') + return lines.join('\n') +} + +/** + * Query the local provider's `/api/tags` endpoint (Ollama-compatible) to list + * available models. Best-effort — short timeout, friendly fallback. + */ +async function listModels(): Promise { + const baseUrl = getActiveLocalBaseUrl() + if (!baseUrl) { + return [ + '❌ Local provider is OFF. Run `/local on ` first, then `/local list`.', + ].join('\n') + } + // /api/tags lives at the root of the Ollama server, not under /v1. + // Strip a trailing /v1 if present, then append /api/tags. + const root = baseUrl.replace(/\/+$/, '').replace(/\/v1$/, '') + const tagsUrl = `${root}/api/tags` + + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 3000) + try { + const res = await fetch(tagsUrl, { signal: controller.signal }) + if (!res.ok) { + return [ + `Could not list models at ${tagsUrl} (HTTP ${res.status}).`, + '', + 'Note: this only works for Ollama-compatible providers.', + 'For LM Studio or others, set the model manually with `/local model `.', + ].join('\n') + } + const body = (await res.json()) as { models?: Array<{ name?: string }> } + const names = (body.models ?? []) + .map((m) => m.name) + .filter((n): n is string => typeof n === 'string') + if (names.length === 0) { + return [ + `Local provider has no models loaded.`, + '', + 'Try `ollama pull llama3.1:8b` (or any tag of your choice) and run `/local list` again.', + ].join('\n') + } + const active = getActiveLocalModel() + const lines = [`Available models at ${root}:`] + for (const name of names) { + const marker = name === active ? ' ▶ ' : ' ' + lines.push(`${marker}${name}`) + } + lines.push('') + lines.push(`Use \`/local model \` to pick one.`) + return lines.join('\n') + } catch (e) { + return [ + `Could not reach ${tagsUrl}.`, + '', + 'Check that the provider is running and the URL is correct.', + 'For non-Ollama providers, set the model manually with `/local model `.', + ].join('\n') + } finally { + clearTimeout(timeout) + } } diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index 9a5b82ea96..57e9b2668c 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -166,7 +166,7 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [ { id: 'local', label: 'local', - description: 'Toggle local LLM provider (Ollama/LM Studio). Usage: /local [on|off|status|set ]', + description: 'Toggle local LLM provider (Ollama/LM Studio). Usage: /local [on |off|model |list|status]', }, ...MODE_COMMANDS, // { diff --git a/common/src/constants/custom-provider.ts b/common/src/constants/custom-provider.ts index 0bedb0355b..131f59ed09 100644 --- a/common/src/constants/custom-provider.ts +++ b/common/src/constants/custom-provider.ts @@ -5,3 +5,9 @@ export const PROVIDER_BASE_URL_ENV_VAR = 'CODEBUFF_BASE_URL' /** Env var providing the API key for the endpoint set by PROVIDER_BASE_URL_ENV_VAR. * Most local runtimes (Ollama, LM Studio) ignore the key entirely. */ export const PROVIDER_API_KEY_ENV_VAR = 'CODEBUFF_PROVIDER_API_KEY' + +/** Env var overriding the agent's declared model when a custom provider is active. + * Used by `/local on ` to substitute the cloud model (e.g. + * `anthropic/claude-opus-4-7`) with a model the local provider actually has + * (e.g. `llama3.1:8b`). Only takes effect when PROVIDER_BASE_URL_ENV_VAR is set. */ +export const PROVIDER_MODEL_ENV_VAR = 'CODEBUFF_LOCAL_MODEL' diff --git a/sdk/src/env.ts b/sdk/src/env.ts index 9814712dfa..b49f88b6fa 100644 --- a/sdk/src/env.ts +++ b/sdk/src/env.ts @@ -10,6 +10,7 @@ import { CHATGPT_OAUTH_TOKEN_ENV_VAR } from '@codebuff/common/constants/chatgpt- import { PROVIDER_API_KEY_ENV_VAR, PROVIDER_BASE_URL_ENV_VAR, + PROVIDER_MODEL_ENV_VAR, } from '@codebuff/common/constants/custom-provider' import { API_KEY_ENV_VAR } from '@codebuff/common/constants/paths' import { getBaseEnv } from '@codebuff/common/env-process' @@ -68,3 +69,12 @@ export const getCustomProviderBaseUrlFromEnv = (): string | undefined => { export const getCustomProviderApiKeyFromEnv = (): string | undefined => { return process.env[PROVIDER_API_KEY_ENV_VAR] } + +/** + * Get the override model name from environment. When the custom provider is + * active, this value replaces the agent's declared model. + * Returns undefined if unset. + */ +export const getCustomProviderModelFromEnv = (): string | undefined => { + return process.env[PROVIDER_MODEL_ENV_VAR] +} diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 1bb6c06884..225e3282d7 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -26,6 +26,7 @@ import { refreshChatGptOAuthToken } from '../credentials' import { getCustomProviderApiKeyFromEnv, getCustomProviderBaseUrlFromEnv, + getCustomProviderModelFromEnv, } from '../env' import { getErrorStatusCode } from '../error-utils' @@ -382,9 +383,21 @@ export async function* promptAiSdkStream( ? envApiKey : undefined + // Model override: when a custom provider is active and CODEBUFF_LOCAL_MODEL + // is set, substitute the agent's declared model (which is typically a cloud + // model id like 'anthropic/claude-opus-4-7' that a local provider won't + // recognize) with the configured local model (e.g. 'llama3.1:8b'). + // Only applies to envBaseUrl/clientBaseUrl paths — an agent that explicitly + // sets providerOptions.baseUrl is assumed to also have set a matching model. + const envModelOverride = + resolvedBaseUrl && !agentBaseUrl + ? getCustomProviderModelFromEnv() + : undefined + const effectiveModel = envModelOverride ?? params.model + const modelParams: ModelRequestParams = { apiKey: params.apiKey, - model: params.model, + model: effectiveModel, skipChatGptOAuth: params.skipChatGptOAuth, costMode: params.costMode, ...(resolvedBaseUrl @@ -561,7 +574,7 @@ export async function* promptAiSdkStream( throw new Error( buildCustomProviderError({ baseUrl: resolvedBaseUrl, - model: params.model, + model: effectiveModel, rawMessage, rawCode, }), @@ -727,7 +740,7 @@ export async function* promptAiSdkStream( throw new Error( buildCustomProviderError({ baseUrl: resolvedBaseUrl, - model: params.model, + model: effectiveModel, rawMessage: errorMessage, rawCode, }), From a74e57be467a840de064440cde561931386e692f Mon Sep 17 00:00:00 2001 From: vraj Date: Sat, 16 May 2026 04:01:47 -0700 Subject: [PATCH 7/8] chore(sdk): log /local model substitution for visibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CODEBUFF_LOCAL_MODEL is set and overrides params.model, log it at INFO so users can grep their cli.jsonl and confirm the substitution is happening on outbound requests. Verified end-to-end against live Ollama: params.model='anthropic/claude-opus-4.7' + env CODEBUFF_LOCAL_MODEL='llama3.1:8b' → request reached Ollama, llama3.1:8b responded. Co-Authored-By: Claude Opus 4.7 --- sdk/src/impl/llm.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 225e3282d7..0086c52fab 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -395,6 +395,19 @@ export async function* promptAiSdkStream( : undefined const effectiveModel = envModelOverride ?? params.model + // Surface the substitution so users can confirm in logs that their /local + // model override is actually being applied to outbound requests. + if (envModelOverride && envModelOverride !== params.model) { + logger.info( + { + requestedModel: params.model, + effectiveModel, + baseUrl: resolvedBaseUrl, + }, + 'Custom provider active: substituting agent model with /local override', + ) + } + const modelParams: ModelRequestParams = { apiKey: params.apiKey, model: effectiveModel, From 44ac9f32a5216f18c5592fad06853ac29bda2c44 Mon Sep 17 00:00:00 2001 From: vraj Date: Sat, 16 May 2026 04:57:02 -0700 Subject: [PATCH 8/8] polish: address self-review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove unused isUrl() helper in /local parser (replaced by looksLikeUrl during earlier fix; never deleted). • Rename env var CODEBUFF_LOCAL_MODEL → CODEBUFF_PROVIDER_MODEL so all three custom-provider env vars share the CODEBUFF_(PROVIDER_)? prefix consistently. Clarify in JSDoc that the override is skipped when an agent declares its own providerOptions.baseUrl. • Default apiKey placeholder "codebuff" → "unused" in createCustomProviderModel. The literal string "codebuff" invited the wrong mental model (could read as "send my Codebuff key"); "unused" plus a comment makes the intent obvious. Local runtimes ignore the Authorization header entirely; we never send the user's real key on the direct path. • Extract maxRetries: 1 into CUSTOM_PROVIDER_MAX_RETRIES with a JSDoc explaining the choice (one retry for cold-start; more wouldn't help with deterministic local failures). • Simplify the precedence ladder in promptAiSdkStream — replace the nested ternary that paired apiKey-with-winning-baseUrl with a small sources array + .find(). Same behavior, easier to read at a glance. Tests updated for the env var rename. All 3245 tests across CLI, SDK, and agent-runtime still pass. Co-Authored-By: Claude Opus 4.7 --- .../commands/__tests__/local-provider.test.ts | 48 ++++++++--------- cli/src/commands/local-provider.ts | 4 -- common/src/constants/custom-provider.ts | 8 ++- sdk/src/impl/llm.ts | 53 ++++++++++--------- sdk/src/impl/model-provider.ts | 5 +- 5 files changed, 62 insertions(+), 56 deletions(-) diff --git a/cli/src/commands/__tests__/local-provider.test.ts b/cli/src/commands/__tests__/local-provider.test.ts index b1cc154b66..2f9580c720 100644 --- a/cli/src/commands/__tests__/local-provider.test.ts +++ b/cli/src/commands/__tests__/local-provider.test.ts @@ -153,10 +153,10 @@ describe('applyLocalAction (side effects on process.env)', () => { beforeEach(() => { originalBaseUrl = process.env.CODEBUFF_BASE_URL originalApiKey = process.env.CODEBUFF_PROVIDER_API_KEY - originalModel = process.env.CODEBUFF_LOCAL_MODEL + originalModel = process.env.CODEBUFF_PROVIDER_MODEL delete process.env.CODEBUFF_BASE_URL delete process.env.CODEBUFF_PROVIDER_API_KEY - delete process.env.CODEBUFF_LOCAL_MODEL + delete process.env.CODEBUFF_PROVIDER_MODEL }) afterEach(() => { @@ -165,18 +165,18 @@ describe('applyLocalAction (side effects on process.env)', () => { if (originalApiKey === undefined) delete process.env.CODEBUFF_PROVIDER_API_KEY else process.env.CODEBUFF_PROVIDER_API_KEY = originalApiKey - if (originalModel === undefined) delete process.env.CODEBUFF_LOCAL_MODEL - else process.env.CODEBUFF_LOCAL_MODEL = originalModel + if (originalModel === undefined) delete process.env.CODEBUFF_PROVIDER_MODEL + else process.env.CODEBUFF_PROVIDER_MODEL = originalModel }) test('enable without model sets baseUrl, clears any previous model override', async () => { - process.env.CODEBUFF_LOCAL_MODEL = 'stale-model' + process.env.CODEBUFF_PROVIDER_MODEL = 'stale-model' const msg = await applyLocalAction({ kind: 'enable', baseUrl: 'http://localhost:11434/v1', }) expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') - expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() expect(msg).toContain('ON') expect(msg).toContain('No model override') expect(msg).toContain('llama3.1:8b') @@ -189,7 +189,7 @@ describe('applyLocalAction (side effects on process.env)', () => { model: 'llama3.1:8b', }) expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') - expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') expect(msg).toContain('Model override: llama3.1:8b') }) @@ -198,7 +198,7 @@ describe('applyLocalAction (side effects on process.env)', () => { kind: 'set-model', model: 'llama3.1:8b', }) - expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() expect(msg).toContain('OFF') }) @@ -208,16 +208,16 @@ describe('applyLocalAction (side effects on process.env)', () => { kind: 'set-model', model: 'llama3.1:8b', }) - expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') expect(msg).toContain('Model override: llama3.1:8b') }) test('clear-model removes only the model, keeps baseUrl', async () => { process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' - process.env.CODEBUFF_LOCAL_MODEL = 'llama3.1:8b' + process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b' const msg = await applyLocalAction({ kind: 'clear-model' }) expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') - expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() expect(msg).toContain('cleared') }) @@ -229,11 +229,11 @@ describe('applyLocalAction (side effects on process.env)', () => { test('disable clears baseUrl, apiKey, and model', async () => { process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' process.env.CODEBUFF_PROVIDER_API_KEY = 'ollama' - process.env.CODEBUFF_LOCAL_MODEL = 'llama3.1:8b' + process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b' const msg = await applyLocalAction({ kind: 'disable' }) expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() expect(process.env.CODEBUFF_PROVIDER_API_KEY).toBeUndefined() - expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() expect(msg).toContain('OFF') expect(msg).toContain('llama3.1:8b') }) @@ -251,7 +251,7 @@ describe('applyLocalAction (side effects on process.env)', () => { test('status when on with model shows both URL and model', async () => { process.env.CODEBUFF_BASE_URL = 'http://localhost:1234/v1' - process.env.CODEBUFF_LOCAL_MODEL = 'llama3.1:8b' + process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b' const msg = await applyLocalAction({ kind: 'status' }) expect(msg).toContain('ON') expect(msg).toContain('http://localhost:1234/v1') @@ -285,48 +285,48 @@ describe('parseLocalArgs + applyLocalAction end-to-end', () => { beforeEach(() => { originalBaseUrl = process.env.CODEBUFF_BASE_URL - originalModel = process.env.CODEBUFF_LOCAL_MODEL + originalModel = process.env.CODEBUFF_PROVIDER_MODEL delete process.env.CODEBUFF_BASE_URL - delete process.env.CODEBUFF_LOCAL_MODEL + delete process.env.CODEBUFF_PROVIDER_MODEL }) afterEach(() => { if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL else process.env.CODEBUFF_BASE_URL = originalBaseUrl - if (originalModel === undefined) delete process.env.CODEBUFF_LOCAL_MODEL - else process.env.CODEBUFF_LOCAL_MODEL = originalModel + if (originalModel === undefined) delete process.env.CODEBUFF_PROVIDER_MODEL + else process.env.CODEBUFF_PROVIDER_MODEL = originalModel }) test('user types `/local on llama3.1:8b` → URL default + model set', async () => { await applyLocalAction(parseLocalArgs('on llama3.1:8b')) expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) - expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') }) test('user types `/local llama3.1:8b` (no `on`) → same effect', async () => { await applyLocalAction(parseLocalArgs('llama3.1:8b')) expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) - expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') }) test('user types `/local on http://x/v1 llama3.1:8b` → both set', async () => { await applyLocalAction(parseLocalArgs('on http://x.example.com:9999/v1 llama3.1:8b')) expect(process.env.CODEBUFF_BASE_URL).toBe('http://x.example.com:9999/v1') - expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') }) test('user types `/local model llama3.1:8b` after `/local on` → model added', async () => { await applyLocalAction(parseLocalArgs('on')) - expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() await applyLocalAction(parseLocalArgs('model llama3.1:8b')) - expect(process.env.CODEBUFF_LOCAL_MODEL).toBe('llama3.1:8b') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') }) test('user types `/local off` → both cleared', async () => { await applyLocalAction(parseLocalArgs('on llama3.1:8b')) await applyLocalAction(parseLocalArgs('off')) expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() - expect(process.env.CODEBUFF_LOCAL_MODEL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() }) test('mutations are visible via getter functions', async () => { diff --git a/cli/src/commands/local-provider.ts b/cli/src/commands/local-provider.ts index 3fda138965..85a3a65143 100644 --- a/cli/src/commands/local-provider.ts +++ b/cli/src/commands/local-provider.ts @@ -42,10 +42,6 @@ export type LocalCommandAction = | { kind: 'disable' } | { kind: 'invalid'; reason: string } -function isUrl(token: string): boolean { - return token.startsWith('http://') || token.startsWith('https://') -} - function looksLikeUrl(token: string): boolean { // Anything with a scheme separator — caller validates the actual scheme. return token.includes('://') diff --git a/common/src/constants/custom-provider.ts b/common/src/constants/custom-provider.ts index 131f59ed09..5b59394287 100644 --- a/common/src/constants/custom-provider.ts +++ b/common/src/constants/custom-provider.ts @@ -9,5 +9,9 @@ export const PROVIDER_API_KEY_ENV_VAR = 'CODEBUFF_PROVIDER_API_KEY' /** Env var overriding the agent's declared model when a custom provider is active. * Used by `/local on ` to substitute the cloud model (e.g. * `anthropic/claude-opus-4-7`) with a model the local provider actually has - * (e.g. `llama3.1:8b`). Only takes effect when PROVIDER_BASE_URL_ENV_VAR is set. */ -export const PROVIDER_MODEL_ENV_VAR = 'CODEBUFF_LOCAL_MODEL' + * (e.g. `llama3.1:8b`). + * + * Only takes effect when PROVIDER_BASE_URL_ENV_VAR is set AND the agent + * itself doesn't declare its own `providerOptions.baseUrl` — agents with an + * explicit baseUrl are assumed to declare a matching model. */ +export const PROVIDER_MODEL_ENV_VAR = 'CODEBUFF_PROVIDER_MODEL' diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 0086c52fab..60283e1745 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -135,6 +135,14 @@ type OpenRouterUsageAccounting = { } } +/** + * Retry count for direct calls to a custom OpenAI-compatible provider. + * One retry absorbs brief model-load stalls on first call. We deliberately + * don't retry more — local failures are usually deterministic (provider down, + * wrong URL, model not pulled) and extra retries only make errors slower. + */ +const CUSTOM_PROVIDER_MAX_RETRIES = 1 + /** * Wrap raw errors from a custom OpenAI-compatible endpoint in a friendly, * actionable message. Distinguishes connection failures (provider down, @@ -366,29 +374,25 @@ export async function* promptAiSdkStream( } // Resolve custom-provider precedence: agent > client option > env. - // apiKey is paired with whichever URL "wins" to avoid mixing sources. + // First non-empty baseUrl wins; its apiKey comes along to avoid mixing + // credentials with the wrong endpoint. + const customSources = [ + params.agentProviderOptions, + params.clientCustomProvider, + { + baseUrl: getCustomProviderBaseUrlFromEnv(), + apiKey: getCustomProviderApiKeyFromEnv(), + }, + ] + const winningSource = customSources.find((s) => s?.baseUrl) + const resolvedBaseUrl = winningSource?.baseUrl + const resolvedApiKey = winningSource?.apiKey + + // Model override: substitute the agent's declared model with the env-configured + // local model when the custom provider is active. Skipped when an agent + // explicitly sets its own providerOptions.baseUrl — that agent is assumed to + // have declared a matching model. See PROVIDER_MODEL_ENV_VAR JSDoc. const agentBaseUrl = params.agentProviderOptions?.baseUrl - const agentApiKey = params.agentProviderOptions?.apiKey - const clientBaseUrl = params.clientCustomProvider?.baseUrl - const clientApiKey = params.clientCustomProvider?.apiKey - const envBaseUrl = getCustomProviderBaseUrlFromEnv() - const envApiKey = getCustomProviderApiKeyFromEnv() - - const resolvedBaseUrl = agentBaseUrl ?? clientBaseUrl ?? envBaseUrl - const resolvedApiKey = agentBaseUrl - ? agentApiKey - : clientBaseUrl - ? clientApiKey - : envBaseUrl - ? envApiKey - : undefined - - // Model override: when a custom provider is active and CODEBUFF_LOCAL_MODEL - // is set, substitute the agent's declared model (which is typically a cloud - // model id like 'anthropic/claude-opus-4-7' that a local provider won't - // recognize) with the configured local model (e.g. 'llama3.1:8b'). - // Only applies to envBaseUrl/clientBaseUrl paths — an agent that explicitly - // sets providerOptions.baseUrl is assumed to also have set a matching model. const envModelOverride = resolvedBaseUrl && !agentBaseUrl ? getCustomProviderModelFromEnv() @@ -438,10 +442,9 @@ export async function* promptAiSdkStream( model: aiSDKModel, messages: convertCbToModelMessages(params), // ChatGPT OAuth: no retries (we fall back to Codebuff on first failure). - // Custom provider: one retry to handle brief model-load stalls without - // dragging out errors when the provider is actually down. + // Custom provider: see CUSTOM_PROVIDER_MAX_RETRIES. ...(isChatGptOAuth ? { maxRetries: 0 } : {}), - ...(isCustomProvider ? { maxRetries: 1 } : {}), + ...(isCustomProvider ? { maxRetries: CUSTOM_PROVIDER_MAX_RETRIES } : {}), // Direct routes (ChatGPT OAuth, custom provider): skip codebuff_metadata // and OpenRouter routing keys — neither belongs in those request bodies. ...(isChatGptOAuth || isCustomProvider diff --git a/sdk/src/impl/model-provider.ts b/sdk/src/impl/model-provider.ts index de7e3c14a7..03329aa8c1 100644 --- a/sdk/src/impl/model-provider.ts +++ b/sdk/src/impl/model-provider.ts @@ -296,7 +296,10 @@ function createCustomProviderModel(params: { provider: 'custom', url: ({ path: endpoint }) => `${trimmedBase}${endpoint}`, headers: () => ({ - Authorization: `Bearer ${apiKey ?? 'codebuff'}`, + // Most local runtimes (Ollama, LM Studio) ignore the Authorization header + // entirely. Send a non-empty placeholder since some servers reject empty + // Bearer values; never send the user's Codebuff key on this code path. + Authorization: `Bearer ${apiKey ?? 'unused'}`, 'Content-Type': 'application/json', 'user-agent': `ai-sdk/openai-compatible/${VERSION}/codebuff-custom-provider`, }),