OpenCortexIDE · Pterjudin · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx b/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx
@@ -1943,13 +1943,12 @@ export const Settings = () => {
 													<select
 														className='text-xs bg-void-bg-1 text-void-fg-1 border border-void-border-1 rounded px-1 py-0.5'
 														value={settingsState.globalSettings.routingPolicy ?? 'auto-cheapest'}
-														onChange={(e) => cortexideSettingsService.setGlobalSetting('routingPolicy', e.target.value as ('auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid'))}
+														onChange={(e) => cortexideSettingsService.setGlobalSetting('routingPolicy', e.target.value as ('auto-cheapest' | 'free-tier' | 'local-only'))}
 														title='Routing policy'
 													>
 														<option value='auto-cheapest'>Auto (cheapest viable)</option>
 														<option value='free-tier'>Free-tier ladder</option>
 														<option value='local-only'>Local only</option>
-														<option value='byok-paid'>BYOK paid models</option>
 													</select>
 												</div>
 											</div>

diff --git a/src/vs/workbench/contrib/cortexide/common/cortexideSettingsService.ts b/src/vs/workbench/contrib/cortexide/common/cortexideSettingsService.ts
@@ -373,6 +373,25 @@ class VoidSettingsService extends Disposable implements ICortexideSettingsServic
 			if (!validChatModes.includes(readS.globalSettings.chatMode as ChatMode)) {
 				readS.globalSettings.chatMode = defaultGlobalSettings.chatMode;
 			}
+
+			// Migrate the removed 'byok-paid' routing policy: it was never wired
+			// to a distinct code path and the use case is already covered by
+			// 'auto-cheapest' (score-based selection). Silently coerce so older
+			// installs don't surface an invalid value.
+			const persistedRoutingPolicy = readS.globalSettings.routingPolicy as string | undefined;
+			if (persistedRoutingPolicy === 'byok-paid') {
+				readS.globalSettings.routingPolicy = 'auto-cheapest';
+			}
+
+			// Migrate the deprecated `localFirstAI` flag into `routingPolicy`.
+			// `localFirstAI` is being retired in favour of the unified routing
+			// policy. We only translate when the user hasn't explicitly chosen
+			// a policy yet so an explicit selection always wins.
+			if (readS.globalSettings.routingPolicy === undefined) {
+				readS.globalSettings.routingPolicy = readS.globalSettings.localFirstAI === true
+					? 'local-only'
+					: 'auto-cheapest';
+			}
 		}
 		catch (e) {
 			readS = defaultState()

diff --git a/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts b/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts
@@ -113,6 +113,9 @@ export const displayInfoOfProviderName = (providerName: ProviderName): DisplayIn
 		// allow-any-unicode-next-line
 		return { title: 'Moonshot AI (Kimi)', desc: 'Kimi K2 — #1 SWE-bench agentic coding. Free tier available.' }
 	}
+	else if (providerName === 'cerebras') {
+		return { title: 'Cerebras', desc: 'Free tier: 1M tokens/day, ~2,600 tok/s, 8K context cap.' }
+	}
 
 	throw new Error(`descOfProviderName: Unknown provider name: "${providerName}"`)
 }
@@ -137,6 +140,7 @@ export const subTextMdOfProviderName = (providerName: ProviderName): string => {
 	if (providerName === 'liteLLM') return 'Read more about endpoints [here](https://docs.litellm.ai/docs/providers/openai_compatible).'
 	if (providerName === 'pollinations') return 'Get your [API Key here](https://enter.pollinations.ai/). [API Docs](https://enter.pollinations.ai/api/docs).'
 	if (providerName === 'moonshot') return 'Get your free [API Key here](https://platform.moonshot.ai/console/api-keys). Kimi K2 has a generous free tier. [Pricing](https://platform.moonshot.ai/docs/pricing).'
+	if (providerName === 'cerebras') return 'Get your free [API Key here](https://cloud.cerebras.ai/). Free tier includes 1M tokens/day with no card required. [Docs](https://inference-docs.cerebras.ai/).'
 
 	throw new Error(`subTextMdOfProviderName: Unknown provider name: "${providerName}"`)
 }
@@ -167,7 +171,8 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName
 														providerName === 'awsBedrock' ? 'key-...' :
 															providerName === 'pollinations' ? 'sk-... or pk-...' :
 																	providerName === 'moonshot' ? 'sk-key...' :
-																		'',
+																		providerName === 'cerebras' ? 'csk-key...' :
+																			'',
 
 			isPasswordField: true,
 		}
@@ -375,6 +380,12 @@ export const defaultSettingsOfProvider: SettingsOfProvider = {
 		...modelInfoOfDefaultModelNames(defaultModelsOfProvider.moonshot),
 		_didFillInProviderSettings: undefined,
 	},
+	cerebras: {
+		...defaultCustomSettings,
+		...defaultProviderSettings.cerebras,
+		...modelInfoOfDefaultModelNames(defaultModelsOfProvider.cerebras),
+		_didFillInProviderSettings: undefined,
+	},
 }
 
 
@@ -530,19 +541,25 @@ export type GlobalSettings = {
 		indexerParallelism?: number; // Indexer parallelism limit (default: 2)
 		routerCacheTtlMs?: number; // Router cache TTL in ms (default: 2000)
 	};
-	// Local-First AI: When enabled, heavily bias router toward local models
-	localFirstAI?: boolean; // Prefer local models over cloud models (default: false)
+	/**
+	 * @deprecated Use `routingPolicy === 'local-only'` instead. Retained for
+	 * backward compatibility with stored settings and to keep the VS Code
+	 * configuration key `cortexide.global.localFirstAI` readable. The settings
+	 * service migrates `localFirstAI: true` -> `routingPolicy: 'local-only'`
+	 * on load when `routingPolicy` is unset. Will be removed after a few
+	 * releases.
+	 */
+	localFirstAI?: boolean;
 	// Routing policy: controls how the model router selects between configured providers.
 	// - 'auto-cheapest': existing behaviour - score-based mixture of rules + learned (default)
 	// - 'free-tier':     prefer free-tier providers in quality-ranked order with quota tracking
 	// - 'local-only':    never dispatch to a cloud provider, even if the model selection points there
-	// - 'byok-paid':     prefer paid BYOK models, skipping free-tier ladders entirely
 	routingPolicy?: RoutingPolicy;
 }
 
 /** User-selectable routing policy for the model router. */
-export type RoutingPolicy = 'auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid';
-export const routingPolicies: readonly RoutingPolicy[] = ['auto-cheapest', 'free-tier', 'local-only', 'byok-paid'];
+export type RoutingPolicy = 'auto-cheapest' | 'free-tier' | 'local-only';
+export const routingPolicies: readonly RoutingPolicy[] = ['auto-cheapest', 'free-tier', 'local-only'];
 
 export const defaultGlobalSettings: GlobalSettings = {
 	autoRefreshModels: true,

diff --git a/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts b/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts
@@ -284,7 +284,6 @@ export const EN_TRANSLATIONS = {
 	'routing.policy.autoCheapest': 'Auto (cheapest viable)',
 	'routing.policy.freeTier': 'Free-tier ladder',
 	'routing.policy.localOnly': 'Local only',
-	'routing.policy.byokPaid': 'BYOK paid models',
 	'routing.statusBar.label': 'Free-tier quota',
 	'routing.statusBar.none': 'No free-tier providers',
 	'routing.statusBar.entry': '{0}: {1}/{2} RPD',

diff --git a/src/vs/workbench/contrib/cortexide/common/modelCapabilities.ts b/src/vs/workbench/contrib/cortexide/common/modelCapabilities.ts
@@ -92,6 +92,9 @@ export const defaultProviderSettings = {
 	moonshot: { // Kimi K2 — free tier available at platform.moonshot.ai
 		apiKey: '',
 	},
+	cerebras: { // Cerebras Cloud - OpenAI-compatible, 1M tokens/day free tier, 8K context cap
+		apiKey: '',
+	},
 
 } as const
 
@@ -301,6 +304,14 @@ export const defaultModelsOfProvider = {
 		'moonshot-v1-32k',       // 32k context model
 		'moonshot-v1-128k',      // Long context (128k tokens)
 	],
+	cerebras: [ // https://inference-docs.cerebras.ai/introduction
+		// Cerebras Cloud free tier: 1M tokens/day, ~2,600 tok/s, 8K context cap.
+		// Reference: https://inference-docs.cerebras.ai/api-reference/models (checked 2026-05)
+		'llama-4-scout-17b-16e-instruct', // Llama 4 Scout 17B 16E
+		'qwen-3-32b',                     // Qwen 3 32B reasoning model
+		'deepseek-r1-distill-llama-70b',  // DeepSeek R1 distilled into Llama 70B
+		'llama-3.3-70b',                  // Llama 3.3 70B
+	],
 
 
 } as const satisfies Record<ProviderName, string[]>
@@ -1799,6 +1810,72 @@ const moonshotSettings: VoidStaticProviderInfo = {
 	},
 }
 
+// ---------------- CEREBRAS ----------------
+// Cerebras Cloud is OpenAI-compatible. Free tier: 1M tokens/day, ~2,600 tok/s,
+// 8K context cap. Reference: https://inference-docs.cerebras.ai
+// Per Cerebras docs, the documented models support tool calling via the
+// standard OpenAI-style `tools` parameter.
+const cerebrasModelOptions = {
+	'llama-4-scout-17b-16e-instruct': {
+		contextWindow: 8_192,
+		reservedOutputTokenSpace: 2_048,
+		cost: { input: 0, output: 0 }, // free tier
+		downloadable: false,
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		specialToolFormat: 'openai-style',
+		reasoningCapabilities: false,
+	},
+	'qwen-3-32b': {
+		contextWindow: 8_192,
+		reservedOutputTokenSpace: 2_048,
+		cost: { input: 0, output: 0 },
+		downloadable: false,
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		specialToolFormat: 'openai-style',
+		reasoningCapabilities: { supportsReasoning: true, canIOReasoning: true, canTurnOffReasoning: false, openSourceThinkTags: ['<think>', '</think>'] },
+	},
+	'deepseek-r1-distill-llama-70b': {
+		contextWindow: 8_192,
+		reservedOutputTokenSpace: 2_048,
+		cost: { input: 0, output: 0 },
+		downloadable: false,
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		specialToolFormat: 'openai-style',
+		reasoningCapabilities: { supportsReasoning: true, canIOReasoning: true, canTurnOffReasoning: false, openSourceThinkTags: ['<think>', '</think>'] },
+	},
+	'llama-3.3-70b': {
+		contextWindow: 8_192,
+		reservedOutputTokenSpace: 2_048,
+		cost: { input: 0, output: 0 },
+		downloadable: false,
+		supportsFIM: false,
+		supportsSystemMessage: 'system-role',
+		specialToolFormat: 'openai-style',
+		reasoningCapabilities: false,
+	},
+} as const satisfies { [s: string]: CortexideStaticModelInfo }
+
+const cerebrasSettings: VoidStaticProviderInfo = {
+	modelOptions: cerebrasModelOptions,
+	modelOptionsFallback: (modelName) => {
+		// Conservative fallback: assume the 8K context-cap free-tier behaviour
+		// rather than letting an unknown model claim 128K. Tool calling is
+		// supported by the documented models so default to openai-style.
+		const fallback = extensiveModelOptionsFallback(modelName, { contextWindow: 8_192 });
+		if (fallback && !fallback.specialToolFormat) {
+			fallback.specialToolFormat = 'openai-style';
+		}
+		return fallback;
+	},
+	providerReasoningIOSettings: {
+		input: { includeInPayload: openAICompatIncludeInPayloadReasoning },
+		output: { nameOfFieldInDelta: 'reasoning_content' },
+	},
+}
+
 // ---------------- OPENROUTER ----------------
 const openRouterModelOptions_assumingOpenAICompat = {
 	'qwen/qwen3-235b-a22b': {
@@ -2028,6 +2105,7 @@ const modelSettingsOfProvider: { [providerName in ProviderName]: VoidStaticProvi
 
 	pollinations: pollinationsSettings,
 	moonshot: moonshotSettings,
+	cerebras: cerebrasSettings,
 
 	googleVertex: googleVertexSettings,
 	microsoftAzure: microsoftAzureSettings,

diff --git a/src/vs/workbench/contrib/cortexide/common/modelRouter.ts b/src/vs/workbench/contrib/cortexide/common/modelRouter.ts
@@ -156,7 +156,11 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
 		// Pre-compute config to avoid repeated lookups
 		const settingsState = this.settingsService.state;
 		const perfSettings = settingsState.globalSettings.perf;
-		const localFirstAI = settingsState.globalSettings.localFirstAI ?? false;
+		// migrated from localFirstAI: 'local-only' policy is the canonical signal
+		// to bias toward local models. We continue to honour the deprecated
+		// `localFirstAI` flag for installs that haven't migrated yet.
+		const localFirstAI = (settingsState.globalSettings.routingPolicy === 'local-only')
+			|| (settingsState.globalSettings.localFirstAI ?? false);
 
 		// Fast path: Check cache for identical contexts
 		const cacheKey = this.getCacheKey(context);
@@ -812,7 +816,11 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
 
 		// Check Local-First AI setting
 		// PERFORMANCE: Use pre-computed value if provided, otherwise lookup (for backward compatibility)
-		const localFirstAICached = localFirstAI !== undefined ? localFirstAI : (settingsState.globalSettings.localFirstAI ?? false);
+		// migrated from localFirstAI: also honour `routingPolicy === 'local-only'`.
+		const localFirstAICached = localFirstAI !== undefined
+			? localFirstAI
+			: ((settingsState.globalSettings.routingPolicy === 'local-only')
+				|| (settingsState.globalSettings.localFirstAI ?? false));
 
 		let score = 0; // Start from 0, build up based on quality and fit
 
@@ -1543,7 +1551,9 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
 		// Score local models using mixture policy
 		// Note: hasOnlineModels is false here since we're in privacy/offline mode
 		// PERFORMANCE: Pre-compute localFirstAI to pass to scoreModel
-		const localFirstAI = settingsState.globalSettings.localFirstAI ?? false;
+		// migrated from localFirstAI: also honour `routingPolicy === 'local-only'`.
+		const localFirstAI = (settingsState.globalSettings.routingPolicy === 'local-only')
+			|| (settingsState.globalSettings.localFirstAI ?? false);
 		const scored = localModels.map(model => {
 			const ruleScore = this.scoreModel(model, context, settingsState, false, localFirstAI);
 			const learnedScore = this.getLearnedScore(model, context);

diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts
@@ -25,11 +25,6 @@ import { ProviderName } from '../cortexideSettingsTypes.js';
 
 /**
  * Free-tier-routable provider names.
- *
- * NOTE: Cerebras is intentionally absent because it is not yet wired into
- * `modelCapabilities.ts` / `cortexideSettingsTypes.ts` - see PR body.  It is
- * still represented in this constants file so the ladder is ready to pick it
- * up the moment a Cerebras provider exists.
  */
 export type FreeTierProviderId =
 	| 'cerebras'
@@ -64,7 +59,7 @@ export interface FreeTierQuota {
 export const FREE_TIER_QUOTAS: { readonly [K in FreeTierProviderId]: FreeTierQuota } = {
 	cerebras: {
 		providerId: 'cerebras',
-		cortexProviderName: null,
+		cortexProviderName: 'cerebras',
 		qualityRank: 100,
 		rpd: null,
 		rpm: 30,

diff --git a/src/vs/workbench/contrib/cortexide/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/cortexide/electron-main/llmMessage/sendLLMMessage.impl.ts
@@ -334,6 +334,11 @@ const newOpenAICompatibleSDK = async ({ settingsOfProvider, providerName, includ
 		const thisConfig = settingsOfProvider[providerName]
 		return new OpenAI({ baseURL: 'https://api.moonshot.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
 	}
+	else if (providerName === 'cerebras') {
+		// Cerebras Cloud - OpenAI-compatible endpoint, 1M tokens/day free tier
+		const thisConfig = settingsOfProvider[providerName]
+		return new OpenAI({ baseURL: 'https://api.cerebras.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
+	}
 
 	else throw new Error(`CortexIDE providerName was invalid: ${providerName}.`)
 }
@@ -1627,6 +1632,11 @@ export const sendLLMMessageToProviderImplementation = {
 		sendFIM: null,
 		list: null,
 	},
+	cerebras: {
+		sendChat: (params) => _sendOpenAICompatibleChat(params),
+		sendFIM: null,
+		list: null,
+	},
 
 } satisfies CallFnOfProvider
 

diff --git a/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts b/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts
@@ -125,4 +125,38 @@ suite('FreeTierLadder', () => {
 		assert.strictEqual(ladder.length, 0);
 		assert.strictEqual(pickTopFromLadder(ladder), null);
 	});
+
+	test('cerebras outranks groq when both have quota (quality rank 100 vs 80)', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+			{ providerName: 'cerebras', modelName: 'llama-4-scout-17b-16e-instruct' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [snap('cerebras'), snap('groq')],
+			privacyMode: false,
+		});
+		assert.strictEqual(ladder.length, 2);
+		assert.strictEqual(ladder[0].providerId, 'cerebras', 'cerebras should win on quality rank');
+		assert.strictEqual(ladder[1].providerId, 'groq');
+		const top = pickTopFromLadder(ladder);
+		assert.deepStrictEqual(top, { providerName: 'cerebras', modelName: 'llama-4-scout-17b-16e-instruct' });
+	});
+
+	test('cerebras exhausted -> groq becomes top of ladder', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'cerebras', modelName: 'qwen-3-32b' },
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [
+				snap('cerebras', { exhausted: true, resetAt: Date.now() + 30_000 }),
+				snap('groq'),
+			],
+			privacyMode: false,
+		});
+		assert.strictEqual(ladder.length, 1);
+		assert.strictEqual(ladder[0].providerId, 'groq');
+	});
 });