Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1943,13 +1943,12 @@ export const Settings = () => {
<select
className='text-xs bg-void-bg-1 text-void-fg-1 border border-void-border-1 rounded px-1 py-0.5'
value={settingsState.globalSettings.routingPolicy ?? 'auto-cheapest'}
onChange={(e) => cortexideSettingsService.setGlobalSetting('routingPolicy', e.target.value as ('auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid'))}
onChange={(e) => cortexideSettingsService.setGlobalSetting('routingPolicy', e.target.value as ('auto-cheapest' | 'free-tier' | 'local-only'))}
title='Routing policy'
>
<option value='auto-cheapest'>Auto (cheapest viable)</option>
<option value='free-tier'>Free-tier ladder</option>
<option value='local-only'>Local only</option>
<option value='byok-paid'>BYOK paid models</option>
</select>
</div>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,25 @@ class VoidSettingsService extends Disposable implements ICortexideSettingsServic
if (!validChatModes.includes(readS.globalSettings.chatMode as ChatMode)) {
readS.globalSettings.chatMode = defaultGlobalSettings.chatMode;
}

// Migrate the removed 'byok-paid' routing policy: it was never wired
// to a distinct code path and the use case is already covered by
// 'auto-cheapest' (score-based selection). Silently coerce so older
// installs don't surface an invalid value.
const persistedRoutingPolicy = readS.globalSettings.routingPolicy as string | undefined;
if (persistedRoutingPolicy === 'byok-paid') {
readS.globalSettings.routingPolicy = 'auto-cheapest';
}

// Migrate the deprecated `localFirstAI` flag into `routingPolicy`.
// `localFirstAI` is being retired in favour of the unified routing
// policy. We only translate when the user hasn't explicitly chosen
// a policy yet so an explicit selection always wins.
if (readS.globalSettings.routingPolicy === undefined) {
readS.globalSettings.routingPolicy = readS.globalSettings.localFirstAI === true
? 'local-only'
: 'auto-cheapest';
}
}
catch (e) {
readS = defaultState()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ export const displayInfoOfProviderName = (providerName: ProviderName): DisplayIn
// allow-any-unicode-next-line
return { title: 'Moonshot AI (Kimi)', desc: 'Kimi K2 — #1 SWE-bench agentic coding. Free tier available.' }
}
else if (providerName === 'cerebras') {
return { title: 'Cerebras', desc: 'Free tier: 1M tokens/day, ~2,600 tok/s, 8K context cap.' }
}

throw new Error(`descOfProviderName: Unknown provider name: "${providerName}"`)
}
Expand All @@ -137,6 +140,7 @@ export const subTextMdOfProviderName = (providerName: ProviderName): string => {
if (providerName === 'liteLLM') return 'Read more about endpoints [here](https://docs.litellm.ai/docs/providers/openai_compatible).'
if (providerName === 'pollinations') return 'Get your [API Key here](https://enter.pollinations.ai/). [API Docs](https://enter.pollinations.ai/api/docs).'
if (providerName === 'moonshot') return 'Get your free [API Key here](https://platform.moonshot.ai/console/api-keys). Kimi K2 has a generous free tier. [Pricing](https://platform.moonshot.ai/docs/pricing).'
if (providerName === 'cerebras') return 'Get your free [API Key here](https://cloud.cerebras.ai/). Free tier includes 1M tokens/day with no card required. [Docs](https://inference-docs.cerebras.ai/).'

throw new Error(`subTextMdOfProviderName: Unknown provider name: "${providerName}"`)
}
Expand Down Expand Up @@ -167,7 +171,8 @@ export const displayInfoOfSettingName = (providerName: ProviderName, settingName
providerName === 'awsBedrock' ? 'key-...' :
providerName === 'pollinations' ? 'sk-... or pk-...' :
providerName === 'moonshot' ? 'sk-key...' :
'',
providerName === 'cerebras' ? 'csk-key...' :
'',

isPasswordField: true,
}
Expand Down Expand Up @@ -375,6 +380,12 @@ export const defaultSettingsOfProvider: SettingsOfProvider = {
...modelInfoOfDefaultModelNames(defaultModelsOfProvider.moonshot),
_didFillInProviderSettings: undefined,
},
cerebras: {
...defaultCustomSettings,
...defaultProviderSettings.cerebras,
...modelInfoOfDefaultModelNames(defaultModelsOfProvider.cerebras),
_didFillInProviderSettings: undefined,
},
}


Expand Down Expand Up @@ -530,19 +541,25 @@ export type GlobalSettings = {
indexerParallelism?: number; // Indexer parallelism limit (default: 2)
routerCacheTtlMs?: number; // Router cache TTL in ms (default: 2000)
};
// Local-First AI: When enabled, heavily bias router toward local models
localFirstAI?: boolean; // Prefer local models over cloud models (default: false)
/**
* @deprecated Use `routingPolicy === 'local-only'` instead. Retained for
* backward compatibility with stored settings and to keep the VS Code
* configuration key `cortexide.global.localFirstAI` readable. The settings
* service migrates `localFirstAI: true` -> `routingPolicy: 'local-only'`
* on load when `routingPolicy` is unset. Will be removed after a few
* releases.
*/
localFirstAI?: boolean;
// Routing policy: controls how the model router selects between configured providers.
// - 'auto-cheapest': existing behaviour - score-based mixture of rules + learned (default)
// - 'free-tier': prefer free-tier providers in quality-ranked order with quota tracking
// - 'local-only': never dispatch to a cloud provider, even if the model selection points there
// - 'byok-paid': prefer paid BYOK models, skipping free-tier ladders entirely
routingPolicy?: RoutingPolicy;
}

/** User-selectable routing policy for the model router. */
export type RoutingPolicy = 'auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid';
export const routingPolicies: readonly RoutingPolicy[] = ['auto-cheapest', 'free-tier', 'local-only', 'byok-paid'];
export type RoutingPolicy = 'auto-cheapest' | 'free-tier' | 'local-only';
export const routingPolicies: readonly RoutingPolicy[] = ['auto-cheapest', 'free-tier', 'local-only'];

export const defaultGlobalSettings: GlobalSettings = {
autoRefreshModels: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ export const EN_TRANSLATIONS = {
'routing.policy.autoCheapest': 'Auto (cheapest viable)',
'routing.policy.freeTier': 'Free-tier ladder',
'routing.policy.localOnly': 'Local only',
'routing.policy.byokPaid': 'BYOK paid models',
'routing.statusBar.label': 'Free-tier quota',
'routing.statusBar.none': 'No free-tier providers',
'routing.statusBar.entry': '{0}: {1}/{2} RPD',
Expand Down
78 changes: 78 additions & 0 deletions src/vs/workbench/contrib/cortexide/common/modelCapabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ export const defaultProviderSettings = {
moonshot: { // Kimi K2 — free tier available at platform.moonshot.ai
apiKey: '',
},
cerebras: { // Cerebras Cloud - OpenAI-compatible, 1M tokens/day free tier, 8K context cap
apiKey: '',
},

} as const

Expand Down Expand Up @@ -301,6 +304,14 @@ export const defaultModelsOfProvider = {
'moonshot-v1-32k', // 32k context model
'moonshot-v1-128k', // Long context (128k tokens)
],
cerebras: [ // https://inference-docs.cerebras.ai/introduction
// Cerebras Cloud free tier: 1M tokens/day, ~2,600 tok/s, 8K context cap.
// Reference: https://inference-docs.cerebras.ai/api-reference/models (checked 2026-05)
'llama-4-scout-17b-16e-instruct', // Llama 4 Scout 17B 16E
'qwen-3-32b', // Qwen 3 32B reasoning model
'deepseek-r1-distill-llama-70b', // DeepSeek R1 distilled into Llama 70B
'llama-3.3-70b', // Llama 3.3 70B
],


} as const satisfies Record<ProviderName, string[]>
Expand Down Expand Up @@ -1799,6 +1810,72 @@ const moonshotSettings: VoidStaticProviderInfo = {
},
}

// ---------------- CEREBRAS ----------------
// Cerebras Cloud is OpenAI-compatible. Free tier: 1M tokens/day, ~2,600 tok/s,
// 8K context cap. Reference: https://inference-docs.cerebras.ai
// Per Cerebras docs, the documented models support tool calling via the
// standard OpenAI-style `tools` parameter.
const cerebrasModelOptions = {
'llama-4-scout-17b-16e-instruct': {
contextWindow: 8_192,
reservedOutputTokenSpace: 2_048,
cost: { input: 0, output: 0 }, // free tier
downloadable: false,
supportsFIM: false,
supportsSystemMessage: 'system-role',
specialToolFormat: 'openai-style',
reasoningCapabilities: false,
},
'qwen-3-32b': {
contextWindow: 8_192,
reservedOutputTokenSpace: 2_048,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
supportsSystemMessage: 'system-role',
specialToolFormat: 'openai-style',
reasoningCapabilities: { supportsReasoning: true, canIOReasoning: true, canTurnOffReasoning: false, openSourceThinkTags: ['<think>', '</think>'] },
},
'deepseek-r1-distill-llama-70b': {
contextWindow: 8_192,
reservedOutputTokenSpace: 2_048,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
supportsSystemMessage: 'system-role',
specialToolFormat: 'openai-style',
reasoningCapabilities: { supportsReasoning: true, canIOReasoning: true, canTurnOffReasoning: false, openSourceThinkTags: ['<think>', '</think>'] },
},
'llama-3.3-70b': {
contextWindow: 8_192,
reservedOutputTokenSpace: 2_048,
cost: { input: 0, output: 0 },
downloadable: false,
supportsFIM: false,
supportsSystemMessage: 'system-role',
specialToolFormat: 'openai-style',
reasoningCapabilities: false,
},
} as const satisfies { [s: string]: CortexideStaticModelInfo }

const cerebrasSettings: VoidStaticProviderInfo = {
modelOptions: cerebrasModelOptions,
modelOptionsFallback: (modelName) => {
// Conservative fallback: assume the 8K context-cap free-tier behaviour
// rather than letting an unknown model claim 128K. Tool calling is
// supported by the documented models so default to openai-style.
const fallback = extensiveModelOptionsFallback(modelName, { contextWindow: 8_192 });
if (fallback && !fallback.specialToolFormat) {
fallback.specialToolFormat = 'openai-style';
}
return fallback;
},
providerReasoningIOSettings: {
input: { includeInPayload: openAICompatIncludeInPayloadReasoning },
output: { nameOfFieldInDelta: 'reasoning_content' },
},
}

// ---------------- OPENROUTER ----------------
const openRouterModelOptions_assumingOpenAICompat = {
'qwen/qwen3-235b-a22b': {
Expand Down Expand Up @@ -2028,6 +2105,7 @@ const modelSettingsOfProvider: { [providerName in ProviderName]: VoidStaticProvi

pollinations: pollinationsSettings,
moonshot: moonshotSettings,
cerebras: cerebrasSettings,

googleVertex: googleVertexSettings,
microsoftAzure: microsoftAzureSettings,
Expand Down
16 changes: 13 additions & 3 deletions src/vs/workbench/contrib/cortexide/common/modelRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,11 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
// Pre-compute config to avoid repeated lookups
const settingsState = this.settingsService.state;
const perfSettings = settingsState.globalSettings.perf;
const localFirstAI = settingsState.globalSettings.localFirstAI ?? false;
// migrated from localFirstAI: 'local-only' policy is the canonical signal
// to bias toward local models. We continue to honour the deprecated
// `localFirstAI` flag for installs that haven't migrated yet.
const localFirstAI = (settingsState.globalSettings.routingPolicy === 'local-only')
|| (settingsState.globalSettings.localFirstAI ?? false);

// Fast path: Check cache for identical contexts
const cacheKey = this.getCacheKey(context);
Expand Down Expand Up @@ -812,7 +816,11 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR

// Check Local-First AI setting
// PERFORMANCE: Use pre-computed value if provided, otherwise lookup (for backward compatibility)
const localFirstAICached = localFirstAI !== undefined ? localFirstAI : (settingsState.globalSettings.localFirstAI ?? false);
// migrated from localFirstAI: also honour `routingPolicy === 'local-only'`.
const localFirstAICached = localFirstAI !== undefined
? localFirstAI
: ((settingsState.globalSettings.routingPolicy === 'local-only')
|| (settingsState.globalSettings.localFirstAI ?? false));

let score = 0; // Start from 0, build up based on quality and fit

Expand Down Expand Up @@ -1543,7 +1551,9 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
// Score local models using mixture policy
// Note: hasOnlineModels is false here since we're in privacy/offline mode
// PERFORMANCE: Pre-compute localFirstAI to pass to scoreModel
const localFirstAI = settingsState.globalSettings.localFirstAI ?? false;
// migrated from localFirstAI: also honour `routingPolicy === 'local-only'`.
const localFirstAI = (settingsState.globalSettings.routingPolicy === 'local-only')
|| (settingsState.globalSettings.localFirstAI ?? false);
const scored = localModels.map(model => {
const ruleScore = this.scoreModel(model, context, settingsState, false, localFirstAI);
const learnedScore = this.getLearnedScore(model, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@ import { ProviderName } from '../cortexideSettingsTypes.js';

/**
* Free-tier-routable provider names.
*
* NOTE: Cerebras is intentionally absent because it is not yet wired into
* `modelCapabilities.ts` / `cortexideSettingsTypes.ts` - see PR body. It is
* still represented in this constants file so the ladder is ready to pick it
* up the moment a Cerebras provider exists.
*/
export type FreeTierProviderId =
| 'cerebras'
Expand Down Expand Up @@ -64,7 +59,7 @@ export interface FreeTierQuota {
export const FREE_TIER_QUOTAS: { readonly [K in FreeTierProviderId]: FreeTierQuota } = {
cerebras: {
providerId: 'cerebras',
cortexProviderName: null,
cortexProviderName: 'cerebras',
qualityRank: 100,
rpd: null,
rpm: 30,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,11 @@ const newOpenAICompatibleSDK = async ({ settingsOfProvider, providerName, includ
const thisConfig = settingsOfProvider[providerName]
return new OpenAI({ baseURL: 'https://api.moonshot.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
}
else if (providerName === 'cerebras') {
// Cerebras Cloud - OpenAI-compatible endpoint, 1M tokens/day free tier
const thisConfig = settingsOfProvider[providerName]
return new OpenAI({ baseURL: 'https://api.cerebras.ai/v1', apiKey: thisConfig.apiKey, ...commonPayloadOpts })
}

else throw new Error(`CortexIDE providerName was invalid: ${providerName}.`)
}
Expand Down Expand Up @@ -1627,6 +1632,11 @@ export const sendLLMMessageToProviderImplementation = {
sendFIM: null,
list: null,
},
cerebras: {
sendChat: (params) => _sendOpenAICompatibleChat(params),
sendFIM: null,
list: null,
},

} satisfies CallFnOfProvider

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,38 @@ suite('FreeTierLadder', () => {
assert.strictEqual(ladder.length, 0);
assert.strictEqual(pickTopFromLadder(ladder), null);
});

test('cerebras outranks groq when both have quota (quality rank 100 vs 80)', () => {
const configured: ModelSelection[] = [
{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
{ providerName: 'cerebras', modelName: 'llama-4-scout-17b-16e-instruct' },
];
const ladder = buildFreeTierLadder({
configuredModels: configured,
quotas: [snap('cerebras'), snap('groq')],
privacyMode: false,
});
assert.strictEqual(ladder.length, 2);
assert.strictEqual(ladder[0].providerId, 'cerebras', 'cerebras should win on quality rank');
assert.strictEqual(ladder[1].providerId, 'groq');
const top = pickTopFromLadder(ladder);
assert.deepStrictEqual(top, { providerName: 'cerebras', modelName: 'llama-4-scout-17b-16e-instruct' });
});

test('cerebras exhausted -> groq becomes top of ladder', () => {
const configured: ModelSelection[] = [
{ providerName: 'cerebras', modelName: 'qwen-3-32b' },
{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
];
const ladder = buildFreeTierLadder({
configuredModels: configured,
quotas: [
snap('cerebras', { exhausted: true, resetAt: Date.now() + 30_000 }),
snap('groq'),
],
privacyMode: false,
});
assert.strictEqual(ladder.length, 1);
assert.strictEqual(ladder[0].providerId, 'groq');
});
});
Loading