diff --git a/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts b/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts index cbe02bde8cf..ed75eddd343 100644 --- a/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts +++ b/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts @@ -13,6 +13,9 @@ import { registerWorkbenchContribution2, WorkbenchPhase } from '../../../common/ import { IChatThreadService } from './chatThreadService.js'; import { localProviderNames } from '../common/cortexideSettingsTypes.js'; import { ProviderName } from '../common/cortexideSettingsTypes.js'; +import { IFreeTierQuotaService, FreeTierRemaining } from '../common/routing/freeTierQuotaService.js'; +import { freeTierIdOfProviderName, FREE_TIER_QUOTAS } from '../common/routing/freeTierConstants.js'; +import { ICortexideI18nService } from '../common/i18n/i18nService.js'; export class CortexideStatusBarContribution extends Disposable implements IWorkbenchContribution { static readonly ID = 'workbench.contrib.cortexideStatusBar'; @@ -20,12 +23,15 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb private modelEntry: IStatusbarEntryAccessor | undefined; private latencyEntry: IStatusbarEntryAccessor | undefined; private privacyEntry: IStatusbarEntryAccessor | undefined; + private freeTierEntry: IStatusbarEntryAccessor | undefined; private readonly updateDisposables = this._register(new MutableDisposable()); constructor( @IStatusbarService private readonly statusbarService: IStatusbarService, @ICortexideSettingsService private readonly cortexideSettingsService: ICortexideSettingsService, @IChatThreadService private readonly chatThreadService: IChatThreadService, + @IFreeTierQuotaService private readonly freeTierQuotaService: IFreeTierQuotaService, + @ICortexideI18nService private readonly i18nService: ICortexideI18nService, ) { super(); this.create(); @@ -56,6 +62,14 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb StatusbarAlignment.RIGHT, { location: { id: 'status.editor.mode', priority: 100.4 }, alignment: StatusbarAlignment.RIGHT } ); + + // Free-tier quota widget + this.freeTierEntry = this.statusbarService.addEntry( + this.getFreeTierEntryProps(), + 'cortexide.freeTier', + StatusbarAlignment.RIGHT, + { location: { id: 'status.editor.mode', priority: 100.5 }, alignment: StatusbarAlignment.RIGHT } + ); } private registerListeners(): void { @@ -76,6 +90,20 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb }, 500); this._register({ dispose: () => clearInterval(latencyUpdateInterval) }); + + // Refresh free-tier widget on every quota mutation (recordCall, markExhausted) + this._register(this.freeTierQuotaService.onQuotaChange(() => { + this.freeTierEntry?.update(this.getFreeTierEntryProps()); + })); + // Also refresh on settings changes so newly-added providers appear immediately + this._register(this.cortexideSettingsService.onDidChangeState(() => { + this.freeTierEntry?.update(this.getFreeTierEntryProps()); + })); + // Slow tick to keep window rollovers visible to the user + const quotaTick = setInterval(() => { + this.freeTierEntry?.update(this.getFreeTierEntryProps()); + }, 15_000); + this._register({ dispose: () => clearInterval(quotaTick) }); } private getModelEntryProps(): IStatusbarEntry { @@ -229,14 +257,109 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb }; } + /** + * Free-tier quota widget. Hides itself when no free-tier providers are + * configured; otherwise shows the most-constrained remaining metric for + * the top-quality provider, with a multiline tooltip listing every + * provider's status. + */ + private getFreeTierEntryProps(): IStatusbarEntry { + const t = (key: Parameters[0], fallback?: string) => this.i18nService.t(key, fallback); + const configuredFreeTierProviders = this.collectConfiguredFreeTierProviders(); + if (configuredFreeTierProviders.length === 0) { + return { + name: t('routing.statusBar.label', 'Free-tier quota'), + text: '', + ariaLabel: '', + tooltip: t('routing.statusBar.tooltipNoProviders', 'No free-tier providers configured.'), + }; + } + + // Build display text from the highest-quality configured provider. + // Sort by quality rank descending and use the first usable entry. + const enriched = configuredFreeTierProviders + .map(p => ({ ...p, remaining: this.freeTierQuotaService.getRemaining(p.providerId, p.modelName) })) + .sort((a, b) => b.qualityRank - a.qualityRank); + + const top = enriched[0]; + let text: string; + if (top.remaining.exhausted) { + text = `$(warning) ${this.formatProviderStatus(top.remaining)}`; + } else if (top.remaining.rpd !== null && top.remaining.limits.rpd !== null) { + text = `$(pulse) ${this.formatProviderStatus(top.remaining)}`; + } else if (top.remaining.rpm !== null && top.remaining.limits.rpm !== null) { + text = `$(pulse) ${this.formatProviderStatus(top.remaining)}`; + } else { + text = `$(pulse) ${this.formatProviderStatus(top.remaining)}`; + } + + // Multiline tooltip listing every provider's status. + const lines: string[] = [t('routing.statusBar.tooltipTitle', 'Free-tier provider quotas')]; + for (const p of enriched) { + lines.push(this.formatProviderStatus(p.remaining)); + } + const tooltip = lines.join('\n'); + + return { + name: t('routing.statusBar.label', 'Free-tier quota'), + text, + ariaLabel: text, + tooltip, + }; + } + + /** + * Inspect settings to find configured free-tier providers with at least + * one visible model. Returns provider id + first visible model name. + */ + private collectConfiguredFreeTierProviders(): Array<{ providerId: NonNullable>; providerName: ProviderName; modelName: string; qualityRank: number }> { + const settings = this.cortexideSettingsService.state; + const out: Array<{ providerId: NonNullable>; providerName: ProviderName; modelName: string; qualityRank: number }> = []; + for (const providerName of Object.keys(settings.settingsOfProvider) as ProviderName[]) { + const ps = settings.settingsOfProvider[providerName]; + if (!ps._didFillInProviderSettings) continue; + const ftId = freeTierIdOfProviderName(providerName); + if (ftId === null) continue; + const firstModel = ps.models.find(m => !m.isHidden); + if (!firstModel) continue; + out.push({ + providerId: ftId, + providerName, + modelName: firstModel.modelName, + qualityRank: FREE_TIER_QUOTAS[ftId].qualityRank, + }); + } + return out; + } + + private formatProviderStatus(remaining: FreeTierRemaining): string { + const t = (key: Parameters[0], ...args: string[]) => + args.reduce((acc, arg, i) => acc.replace(`{${i}}`, arg), this.i18nService.t(key)); + const name = remaining.providerId; + if (remaining.exhausted) { + return t('routing.statusBar.exhausted', name); + } + if (remaining.rpd !== null && remaining.limits.rpd !== null) { + const used = remaining.limits.rpd - remaining.rpd; + return t('routing.statusBar.entry', name, String(used), String(remaining.limits.rpd)); + } + if (remaining.rpm !== null && remaining.limits.rpm !== null) { + const used = remaining.limits.rpm - remaining.rpm; + return t('routing.statusBar.entryRpm', name, String(used), String(remaining.limits.rpm)); + } + return t('routing.statusBar.uncapped', name); + } + override dispose(): void { super.dispose(); this.modelEntry?.dispose(); this.latencyEntry?.dispose(); this.privacyEntry?.dispose(); + this.freeTierEntry?.dispose(); this.modelEntry = undefined; this.latencyEntry = undefined; this.privacyEntry = undefined; + this.freeTierEntry = undefined; } } diff --git a/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx b/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx index 436f7ea586d..e8a212a9ece 100644 --- a/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx +++ b/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx @@ -1932,6 +1932,29 @@ export const Settings = () => { + {/* Routing Policy Section */} + +
+

Routing policy

+
+ Controls how CortexIDE picks between configured model providers. Free-tier ladder tracks per-provider quotas and auto-fails-over on 429. +
+
+ +
+
+
+ {/* YOLO Mode Section */}
diff --git a/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts b/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts index 1913aaa68ad..a08c5c46f4d 100644 --- a/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts +++ b/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts @@ -532,8 +532,18 @@ export type GlobalSettings = { }; // Local-First AI: When enabled, heavily bias router toward local models localFirstAI?: boolean; // Prefer local models over cloud models (default: false) + // Routing policy: controls how the model router selects between configured providers. + // - 'auto-cheapest': existing behaviour - score-based mixture of rules + learned (default) + // - 'free-tier': prefer free-tier providers in quality-ranked order with quota tracking + // - 'local-only': never dispatch to a cloud provider, even if the model selection points there + // - 'byok-paid': prefer paid BYOK models, skipping free-tier ladders entirely + routingPolicy?: RoutingPolicy; } +/** User-selectable routing policy for the model router. */ +export type RoutingPolicy = 'auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid'; +export const routingPolicies: readonly RoutingPolicy[] = ['auto-cheapest', 'free-tier', 'local-only', 'byok-paid']; + export const defaultGlobalSettings: GlobalSettings = { autoRefreshModels: true, aiInstructions: '', @@ -589,6 +599,7 @@ export const defaultGlobalSettings: GlobalSettings = { routerCacheTtlMs: 2000, // 2 second cache TTL (caching enabled) }, localFirstAI: false, // Local-First AI disabled by default (users can enable for privacy/performance) + routingPolicy: 'auto-cheapest', // Existing scoring behaviour remains the default } export type GlobalSettingName = keyof GlobalSettings diff --git a/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts b/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts index 38fbba9c5fb..e5a69351a11 100644 --- a/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts +++ b/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts @@ -276,6 +276,23 @@ export const EN_TRANSLATIONS = { 'common.copy': 'Copy', 'common.copied': 'Copied!', 'common.open': 'Open', + + // allow-any-unicode-next-line + // ── Routing / free-tier router ─────────────────────────────────────────── + 'routing.policy.label': 'Routing policy', + 'routing.policy.description': 'Controls how CortexIDE picks between configured model providers.', + 'routing.policy.autoCheapest': 'Auto (cheapest viable)', + 'routing.policy.freeTier': 'Free-tier ladder', + 'routing.policy.localOnly': 'Local only', + 'routing.policy.byokPaid': 'BYOK paid models', + 'routing.statusBar.label': 'Free-tier quota', + 'routing.statusBar.none': 'No free-tier providers', + 'routing.statusBar.entry': '{0}: {1}/{2} RPD', + 'routing.statusBar.entryRpm': '{0}: {1}/{2} RPM', + 'routing.statusBar.exhausted': '{0}: exhausted', + 'routing.statusBar.uncapped': '{0}: uncapped', + 'routing.statusBar.tooltipTitle': 'Free-tier provider quotas', + 'routing.statusBar.tooltipNoProviders': 'No free-tier providers are configured. Add a free-tier API key (Groq, Gemini, OpenRouter, Mistral) to see live quota tracking.', } as const; // allow-any-unicode-next-line diff --git a/src/vs/workbench/contrib/cortexide/common/modelRouter.ts b/src/vs/workbench/contrib/cortexide/common/modelRouter.ts index 837fb69a136..edc0a5edf03 100644 --- a/src/vs/workbench/contrib/cortexide/common/modelRouter.ts +++ b/src/vs/workbench/contrib/cortexide/common/modelRouter.ts @@ -5,7 +5,7 @@ import { ProviderName, ModelSelection } from './cortexideSettingsTypes.js'; import { getModelCapabilities, CortexideStaticModelInfo } from './modelCapabilities.js'; -import { ICortexideSettingsService } from './cortexideSettingsService.js'; +import { ICortexideSettingsService, CortexideSettingsState } from './cortexideSettingsService.js'; import { localProviderNames } from './cortexideSettingsTypes.js'; import { Disposable } from '../../../../base/common/lifecycle.js'; import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js'; @@ -14,6 +14,8 @@ import { RoutingEvaluationService } from './routingEvaluation.js'; import { IStorageService } from '../../../../platform/storage/common/storage.js'; import { shouldUseSpeculativeEscalation } from './routingEscalation.js'; import { getPerformanceHarness } from './performanceHarness.js'; +import { IFreeTierQuotaService } from './routing/freeTierQuotaService.js'; +import { buildFreeTierLadder, pickTopFromLadder } from './routing/freeTierLadder.js'; /** * Task types for automatic model selection @@ -90,7 +92,8 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR constructor( @ICortexideSettingsService private readonly settingsService: ICortexideSettingsService, - @IStorageService private readonly storageService: IStorageService + @IStorageService private readonly storageService: IStorageService, + @IFreeTierQuotaService private readonly freeTierQuotaService: IFreeTierQuotaService, ) { super(); this.evaluationService = new RoutingEvaluationService(this.storageService); @@ -198,6 +201,34 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR // This is handled in scoreModel by applying heavy bonuses to local models } + // Routing policy: 'free-tier' -> consult the smart free-tier router first. + // If the ladder is empty (no configured free-tier providers, all exhausted, + // or privacy gate engaged), fall through to the standard scoring path so + // the user is never stranded. + const routingPolicy = settingsState.globalSettings.routingPolicy ?? 'auto-cheapest'; + if (routingPolicy === 'free-tier') { + const ladderDecision = this.routeViaFreeTierLadder(context, settingsState); + if (ladderDecision) { + this.routingCache.set(cacheKey, { decision: ladderDecision, timestamp: Date.now() }); + return ladderDecision; + } + } else if (routingPolicy === 'local-only') { + // Hard local-only: refuse to dispatch to any cloud provider. + const localDecision = this.routeToLocalModel(context); + if (localDecision) { + this.routingCache.set(cacheKey, { decision: localDecision, timestamp: Date.now() }); + return localDecision; + } + return { + modelSelection: { providerName: 'auto', modelName: 'auto' }, + confidence: 0.0, + reasoning: 'Routing policy is local-only but no local models are configured.', + qualityTier: 'abstain', + shouldAbstain: true, + abstainReason: 'No local models for local-only routing policy', + }; + } + // Quality gate: pre-flight quality estimate const qualityTier = this.estimateQualityTier(context); @@ -1438,6 +1469,53 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR * Route to a local model (privacy/offline mode) * Returns null if no local models are available (caller must handle fallback) */ + /** + * Route via the smart free-tier ladder. Returns `null` when no free-tier + * provider is currently usable (caller should fall through to standard + * scoring or local fallback). + * + * Cloud providers are only considered when the privacy gate is NOT engaged + * - `requiresPrivacy` short-circuits to `null` here so callers can route + * to local. + */ + private routeViaFreeTierLadder( + context: TaskContext, + settingsState: CortexideSettingsState, + ): RoutingDecision | null { + if (context.requiresPrivacy) { + return null; + } + + const configured = this.getAvailableModels(settingsState); + const quotas = this.freeTierQuotaService.getAllRemaining(); + const ladder = buildFreeTierLadder({ + configuredModels: configured, + quotas, + privacyMode: !!context.requiresPrivacy, + }); + + const top = pickTopFromLadder(ladder); + if (!top) { + return null; + } + + const fallbackChain: ModelSelection[] = ladder.slice(1, 4).map(c => ({ + providerName: c.providerName, + modelName: c.modelName, + })); + + const timeoutMs = this.getModelTimeout(top, context, settingsState); + + return { + modelSelection: top, + confidence: 0.75, + reasoning: `Free-tier ladder selected ${top.providerName}/${top.modelName} (next: ${fallbackChain.map(m => m.providerName).join(', ') || 'none'})`, + fallbackChain, + qualityTier: 'cheap_fast', + timeoutMs, + }; + } + private routeToLocalModel(context: TaskContext): RoutingDecision | null { const settingsState = this.settingsService.state; const localModels: ModelSelection[] = []; diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts new file mode 100644 index 00000000000..c6c45acfc21 --- /dev/null +++ b/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts @@ -0,0 +1,181 @@ +/*-------------------------------------------------------------------------------------- + * Copyright 2025 Glass Devtools, Inc. All rights reserved. + * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. + *--------------------------------------------------------------------------------------*/ + +/** + * Free-tier quota and quality configuration for the smart free-tier router. + * + * Source of truth for the rate limits we know about as of 2026-05. These are + * intentionally NOT hardcoded URLs - only request/token rate limits. When a + * provider changes its published limits, update this file in one place. + * + * RPD = requests per day. + * RPM = requests per minute. + * TPM = tokens per minute. + * + * `qualityRank` is a relative ordering used by `freeTierLadder` when more + * than one provider has remaining quota. Higher = preferred. See research + * doc section 4.1 for the full rationale. + * + * Cerebras > Groq > Gemini Flash > OpenRouter free > Mistral > Cloudflare + */ + +import { ProviderName } from '../cortexideSettingsTypes.js'; + +/** + * Free-tier-routable provider names. + * + * NOTE: Cerebras is intentionally absent because it is not yet wired into + * `modelCapabilities.ts` / `cortexideSettingsTypes.ts` - see PR body. It is + * still represented in this constants file so the ladder is ready to pick it + * up the moment a Cerebras provider exists. + */ +export type FreeTierProviderId = + | 'cerebras' + | 'groq' + | 'gemini' + | 'mistral' + | 'openRouter' + | 'cloudflareWorkersAI'; + +/** + * Per-provider free-tier policy. + */ +export interface FreeTierQuota { + readonly providerId: FreeTierProviderId; + /** Provider name in CortexIDE's `ProviderName` enum, or `null` if not yet wired. */ + readonly cortexProviderName: ProviderName | null; + /** Higher value = preferred when multiple providers have quota. */ + readonly qualityRank: number; + /** Requests per day, or `null` if not capped. */ + readonly rpd: number | null; + /** Requests per minute, or `null` if not capped. */ + readonly rpm: number | null; + /** Tokens per minute, or `null` if not capped. */ + readonly tpm: number | null; + /** Free-text note for tooltips / docs. */ + readonly notes: string; +} + +/** + * Free-tier quota table. Update here when provider docs change. + */ +export const FREE_TIER_QUOTAS: { readonly [K in FreeTierProviderId]: FreeTierQuota } = { + cerebras: { + providerId: 'cerebras', + cortexProviderName: null, + qualityRank: 100, + rpd: null, + rpm: 30, + tpm: null, + notes: '1M tokens/day; 8K context cap', + }, + groq: { + providerId: 'groq', + cortexProviderName: 'groq', + qualityRank: 80, + rpd: 1000, + rpm: 30, + tpm: 6000, + notes: '', + }, + gemini: { + providerId: 'gemini', + cortexProviderName: 'gemini', + qualityRank: 60, + // We track the broadest tier (Flash-Lite) here; per-model tightening is + // done in `freeTierLadder.ts` based on the actual model name. + rpd: 1000, + rpm: 15, + tpm: null, + notes: 'Flash-Lite limits; Flash/Pro tighter', + }, + openRouter: { + providerId: 'openRouter', + cortexProviderName: 'openRouter', + qualityRank: 40, + rpd: 50, + rpm: 20, + tpm: null, + notes: '1000 RPD with $10 top-up', + }, + mistral: { + providerId: 'mistral', + cortexProviderName: 'mistral', + qualityRank: 30, + rpd: null, + rpm: 2, + tpm: null, + notes: '1B tokens/month (Experiment tier)', + }, + cloudflareWorkersAI: { + providerId: 'cloudflareWorkersAI', + cortexProviderName: null, + qualityRank: 20, + rpd: null, + rpm: null, + tpm: null, + notes: '10,000 Neurons/day', + }, +}; + +/** + * Per-model overrides for Gemini (tighter than the entry-level Flash-Lite + * limits in the master table). Matched case-insensitively by substring. + */ +export interface GeminiModelQuotaOverride { + readonly modelNameSubstring: string; + readonly rpd: number | null; + readonly rpm: number | null; +} + +export const GEMINI_MODEL_OVERRIDES: readonly GeminiModelQuotaOverride[] = [ + { modelNameSubstring: 'pro', rpd: 100, rpm: 5 }, + { modelNameSubstring: 'flash-lite', rpd: 1000, rpm: 15 }, + { modelNameSubstring: 'flash', rpd: 250, rpm: 10 }, +]; + +/** + * Resolve effective per-call limits for a given provider+model. Returns the + * tightest applicable RPD/RPM/TPM triple. + */ +export function resolveEffectiveQuota( + providerId: FreeTierProviderId, + modelName: string, +): { rpd: number | null; rpm: number | null; tpm: number | null } { + const base = FREE_TIER_QUOTAS[providerId]; + let rpd = base.rpd; + let rpm = base.rpm; + const tpm = base.tpm; + + if (providerId === 'gemini') { + const lower = modelName.toLowerCase(); + for (const override of GEMINI_MODEL_OVERRIDES) { + if (lower.includes(override.modelNameSubstring)) { + rpd = override.rpd; + rpm = override.rpm; + break; + } + } + } + + return { rpd, rpm, tpm }; +} + +/** + * Reverse lookup: CortexIDE `ProviderName` -> free-tier id, or `null` if the + * provider isn't on the free-tier ladder. Accepts the union with `'auto'` + * so callers don't have to narrow first; `'auto'` always returns `null`. + */ +export function freeTierIdOfProviderName(providerName: ProviderName | 'auto'): FreeTierProviderId | null { + if (providerName === 'auto') { + return null; + } + for (const id of Object.keys(FREE_TIER_QUOTAS) as FreeTierProviderId[]) { + if (FREE_TIER_QUOTAS[id].cortexProviderName === providerName) { + return id; + } + } + return null; +} diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierLadder.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierLadder.ts new file mode 100644 index 00000000000..71bc46cf2b2 --- /dev/null +++ b/src/vs/workbench/contrib/cortexide/common/routing/freeTierLadder.ts @@ -0,0 +1,122 @@ +/*-------------------------------------------------------------------------------------- + * Copyright 2025 Glass Devtools, Inc. All rights reserved. + * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. + *--------------------------------------------------------------------------------------*/ + +/** + * Pure function: given configured providers + privacy state + remaining + * quotas, return an ordered list of [provider, model] candidates for the + * free-tier router. + * + * Layer: `common/`. Pure. No I/O. Tested in isolation under + * `test/common/freeTierLadder.test.ts`. + */ + +import { ModelSelection, ProviderName } from '../cortexideSettingsTypes.js'; +import { + FREE_TIER_QUOTAS, + FreeTierProviderId, + freeTierIdOfProviderName, +} from './freeTierConstants.js'; +import { FreeTierRemaining } from './freeTierQuotaService.js'; + +/** A configured free-tier provider candidate (always a real provider, never `'auto'`). */ +export interface FreeTierCandidate { + readonly providerName: ProviderName; + readonly providerId: FreeTierProviderId; + readonly modelName: string; + /** Higher = preferred. */ + readonly qualityRank: number; +} + +/** Inputs to the ladder computation - all caller-supplied, no service deps. */ +export interface FreeTierLadderInput { + /** Provider/model pairs the user has actually configured + enabled. */ + readonly configuredModels: readonly ModelSelection[]; + /** + * Quota snapshots from `IFreeTierQuotaService.getAllRemaining()`, indexed + * implicitly by `providerId`. Providers absent from this list are + * treated as having unlimited quota. + */ + readonly quotas: readonly FreeTierRemaining[]; + /** + * If true, the privacy gate is engaged - the ladder MUST be empty so the + * caller falls back to local models. + */ + readonly privacyMode: boolean; +} + +/** + * Build the ordered candidate list. Filters out: + * - providers not on the free-tier table + * - providers without configured models + * - providers marked exhausted (429) + * - providers with zero remaining RPD or RPM + * then sorts the remainder by descending `qualityRank`. + * + * If `privacyMode` is true, returns `[]`. + */ +export function buildFreeTierLadder(input: FreeTierLadderInput): readonly FreeTierCandidate[] { + if (input.privacyMode) { + return []; + } + + const quotaById = new Map(); + for (const q of input.quotas) { + quotaById.set(q.providerId, q); + } + + const candidates: FreeTierCandidate[] = []; + + for (const model of input.configuredModels) { + if (model.providerName === 'auto') { + continue; + } + // model.providerName is now narrowed to ProviderName + const providerName: ProviderName = model.providerName; + const providerId = freeTierIdOfProviderName(providerName); + if (providerId === null) { + continue; + } + const def = FREE_TIER_QUOTAS[providerId]; + + const remaining = quotaById.get(providerId); + if (remaining) { + if (remaining.exhausted) { + continue; + } + if (remaining.rpd !== null && remaining.rpd <= 0) { + continue; + } + if (remaining.rpm !== null && remaining.rpm <= 0) { + continue; + } + } + + candidates.push({ + providerName, + providerId, + modelName: model.modelName, + qualityRank: def.qualityRank, + }); + } + + candidates.sort((a, b) => b.qualityRank - a.qualityRank); + return candidates; +} + +/** + * Convenience: convert the first ladder candidate into a `ModelSelection`, + * or return `null` if the ladder is empty. + */ +export function pickTopFromLadder( + ladder: readonly FreeTierCandidate[], +): ModelSelection | null { + if (ladder.length === 0) { + return null; + } + return { + providerName: ladder[0].providerName, + modelName: ladder[0].modelName, + }; +} diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierQuotaService.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierQuotaService.ts new file mode 100644 index 00000000000..a1381a20cbb --- /dev/null +++ b/src/vs/workbench/contrib/cortexide/common/routing/freeTierQuotaService.ts @@ -0,0 +1,252 @@ +/*-------------------------------------------------------------------------------------- + * Copyright 2025 Glass Devtools, Inc. All rights reserved. + * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. + *--------------------------------------------------------------------------------------*/ + +/** + * Free-tier quota tracking service. + * + * Tracks per-provider request counts (RPD / RPM) and rough token counts (TPM) + * for free-tier providers, persisted across restarts via VS Code's storage. + * + * The service is the in-process source of truth. It has no network access of + * its own; updates come from `recordCall(...)` and `markExhausted(...)` which + * are invoked by the LLM message pipeline. + * + * Layer: `common/`. No DOM, Node, or Electron imports. + */ + +import { Emitter, Event } from '../../../../../base/common/event.js'; +import { Disposable } from '../../../../../base/common/lifecycle.js'; +import { createDecorator } from '../../../../../platform/instantiation/common/instantiation.js'; +import { registerSingleton, InstantiationType } from '../../../../../platform/instantiation/common/extensions.js'; +import { IStorageService, StorageScope, StorageTarget } from '../../../../../platform/storage/common/storage.js'; +import { + FREE_TIER_QUOTAS, + FreeTierProviderId, + resolveEffectiveQuota, +} from './freeTierConstants.js'; + +/** Persistent storage key. Single JSON blob keyed by provider id. */ +export const FREE_TIER_QUOTA_STORAGE_KEY = 'cortexide.freeTier.quotaState'; + +/** Public per-provider quota snapshot. */ +export interface FreeTierRemaining { + readonly providerId: FreeTierProviderId; + /** Effective limits at the moment of query, after per-model overrides. */ + readonly limits: { rpd: number | null; rpm: number | null; tpm: number | null }; + /** Remaining requests today (null if uncapped). */ + readonly rpd: number | null; + /** Remaining requests in the current minute window (null if uncapped). */ + readonly rpm: number | null; + /** Remaining tokens in the current minute window (null if uncapped). */ + readonly tpm: number | null; + /** True when provider returned 429 recently and resetAt is still in the future. */ + readonly exhausted: boolean; + /** When the rate-limit reset is expected (epoch ms), or `null`. */ + readonly resetAt: number | null; +} + +/** Internal persisted shape. */ +interface PersistedProviderState { + /** Window start (epoch ms, midnight UTC of the current RPD window). */ + rpdWindowStart: number; + /** Requests sent today. */ + rpdUsed: number; + /** Sliding 60s window start. */ + rpmWindowStart: number; + /** Requests in the current 60s window. */ + rpmUsed: number; + /** Tokens used in the current 60s window. */ + tpmUsed: number; + /** Set by 429 handler; provider is considered exhausted until now() > resetAt. */ + exhaustedUntil: number | null; +} + +interface PersistedState { + /** Schema version - bump on breaking changes. */ + version: 1; + providers: { [k in FreeTierProviderId]?: PersistedProviderState }; +} + +const ONE_MINUTE_MS = 60_000; + +/** Returns the most recent UTC midnight, in epoch ms, at or before `now`. */ +function utcDayStart(now: number): number { + const d = new Date(now); + return Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()); +} + +function emptyProviderState(now: number): PersistedProviderState { + return { + rpdWindowStart: utcDayStart(now), + rpdUsed: 0, + rpmWindowStart: now, + rpmUsed: 0, + tpmUsed: 0, + exhaustedUntil: null, + }; +} + +/** Roll over windows in-place if they've elapsed. */ +function rollWindows(state: PersistedProviderState, now: number): void { + const currentDayStart = utcDayStart(now); + if (currentDayStart > state.rpdWindowStart) { + state.rpdWindowStart = currentDayStart; + state.rpdUsed = 0; + } + if (now - state.rpmWindowStart >= ONE_MINUTE_MS) { + state.rpmWindowStart = now; + state.rpmUsed = 0; + state.tpmUsed = 0; + } + if (state.exhaustedUntil !== null && now >= state.exhaustedUntil) { + state.exhaustedUntil = null; + } +} + +export interface IFreeTierQuotaService { + readonly _serviceBrand: undefined; + + /** Fires whenever quota state changes (recordCall, markExhausted, rollover). */ + readonly onQuotaChange: Event; + + /** Returns remaining quota for a provider+model. Snapshot, not live. */ + getRemaining(providerId: FreeTierProviderId, modelName: string): FreeTierRemaining; + + /** Returns remaining quota for every free-tier provider we know about. */ + getAllRemaining(modelName?: string): readonly FreeTierRemaining[]; + + /** + * Increment counters after a successful call. `tokensUsed` is the best + * estimate available (output text length / 4 is a common cheap proxy). + */ + recordCall(providerId: FreeTierProviderId, modelName: string, tokensUsed: number): void; + + /** + * Mark a provider as exhausted (rate-limited). `resetAt` may be null when + * the provider didn't tell us when to retry; in that case we use a 60s + * conservative default. + */ + markExhausted(providerId: FreeTierProviderId, resetAt: number | null): void; + + /** Test helper: clear all state. */ + resetAll(): void; +} + +export const IFreeTierQuotaService = createDecorator('FreeTierQuotaService'); + +export class FreeTierQuotaService extends Disposable implements IFreeTierQuotaService { + readonly _serviceBrand: undefined; + + private readonly _onQuotaChange = this._register(new Emitter()); + readonly onQuotaChange: Event = this._onQuotaChange.event; + + private _state: PersistedState; + + constructor( + @IStorageService private readonly _storageService: IStorageService, + ) { + super(); + this._state = this._readState(); + } + + private _readState(): PersistedState { + const raw = this._storageService.get( + FREE_TIER_QUOTA_STORAGE_KEY, + StorageScope.APPLICATION, + ); + if (!raw) { + return { version: 1, providers: {} }; + } + try { + const parsed = JSON.parse(raw) as PersistedState; + if (parsed && parsed.version === 1 && typeof parsed.providers === 'object') { + return parsed; + } + } catch (_err) { + // Corrupt JSON - silently reset rather than block startup. + } + return { version: 1, providers: {} }; + } + + private _writeState(): void { + this._storageService.store( + FREE_TIER_QUOTA_STORAGE_KEY, + JSON.stringify(this._state), + StorageScope.APPLICATION, + StorageTarget.MACHINE, + ); + } + + private _getOrCreate(providerId: FreeTierProviderId, now: number): PersistedProviderState { + let s = this._state.providers[providerId]; + if (!s) { + s = emptyProviderState(now); + this._state.providers[providerId] = s; + } + rollWindows(s, now); + return s; + } + + getRemaining(providerId: FreeTierProviderId, modelName: string): FreeTierRemaining { + const now = Date.now(); + const s = this._getOrCreate(providerId, now); + const limits = resolveEffectiveQuota(providerId, modelName); + + const rpdRemaining = limits.rpd === null ? null : Math.max(0, limits.rpd - s.rpdUsed); + const rpmRemaining = limits.rpm === null ? null : Math.max(0, limits.rpm - s.rpmUsed); + const tpmRemaining = limits.tpm === null ? null : Math.max(0, limits.tpm - s.tpmUsed); + + return { + providerId, + limits, + rpd: rpdRemaining, + rpm: rpmRemaining, + tpm: tpmRemaining, + exhausted: s.exhaustedUntil !== null && now < s.exhaustedUntil, + resetAt: s.exhaustedUntil, + }; + } + + getAllRemaining(modelName: string = ''): readonly FreeTierRemaining[] { + const out: FreeTierRemaining[] = []; + for (const id of Object.keys(FREE_TIER_QUOTAS) as FreeTierProviderId[]) { + out.push(this.getRemaining(id, modelName)); + } + return out; + } + + recordCall(providerId: FreeTierProviderId, modelName: string, tokensUsed: number): void { + const now = Date.now(); + const s = this._getOrCreate(providerId, now); + s.rpdUsed += 1; + s.rpmUsed += 1; + if (tokensUsed > 0) { + s.tpmUsed += tokensUsed; + } + // modelName is currently unused for the increment but reserved for + // future per-model accounting; reference it to satisfy linters. + void modelName; + this._writeState(); + this._onQuotaChange.fire(); + } + + markExhausted(providerId: FreeTierProviderId, resetAt: number | null): void { + const now = Date.now(); + const s = this._getOrCreate(providerId, now); + // If the provider didn't tell us, assume 60s. + const effectiveResetAt = resetAt !== null && resetAt > now ? resetAt : now + ONE_MINUTE_MS; + s.exhaustedUntil = effectiveResetAt; + this._writeState(); + this._onQuotaChange.fire(); + } + + resetAll(): void { + this._state = { version: 1, providers: {} }; + this._writeState(); + this._onQuotaChange.fire(); + } +} + +registerSingleton(IFreeTierQuotaService, FreeTierQuotaService, InstantiationType.Delayed); diff --git a/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts b/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts index 525da6b33d3..c2337454247 100644 --- a/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts +++ b/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts @@ -18,6 +18,8 @@ import { ISecretDetectionService } from './secretDetectionService.js'; import { INotificationService, Severity } from '../../../../platform/notification/common/notification.js'; import { ILogService } from '../../../../platform/log/common/log.js'; import { isWeb } from '../../../../base/common/platform.js'; +import { IFreeTierQuotaService } from './routing/freeTierQuotaService.js'; +import { freeTierIdOfProviderName } from './routing/freeTierConstants.js'; // calls channel to implement features export const ILLMMessageService = createDecorator('llmMessageService'); @@ -69,6 +71,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService @IMCPService private readonly mcpService: IMCPService, @ISecretDetectionService private readonly secretDetectionService: ISecretDetectionService, @ILogService private readonly logService: ILogService, + @IFreeTierQuotaService private readonly freeTierQuotaService: IFreeTierQuotaService, ) { super() @@ -236,9 +239,42 @@ export class LLMMessageService extends Disposable implements ILLMMessageService // add state for request id const requestId = generateUuid(); + + // Free-tier quota tracking: wrap success/error callbacks so we update + // the in-process quota service whenever a call completes or hits 429. + // Wrapping happens here (common/ layer) rather than electron-main to + // keep the quota service strictly in common/ - the impl has no way to + // reach back to common/ services. + const freeTierId = freeTierIdOfProviderName(modelSelection.providerName); + const wrappedOnFinalMessage = freeTierId === null + ? onFinalMessage + : (params: EventLLMMessageOnFinalMessageParams) => { + try { + // Cheap proxy for tokens until SDK responses expose real usage. + // Output tokens ~ chars/4 is the standard approximation. + const estTokens = Math.ceil((params.fullText?.length ?? 0) / 4); + this.freeTierQuotaService.recordCall(freeTierId, modelSelection.modelName, estTokens); + } catch (err) { + this.logService.warn('[FreeTierQuota] recordCall failed', err); + } + onFinalMessage(params); + }; + const wrappedOnError = freeTierId === null + ? onError + : (params: EventLLMMessageOnErrorParams) => { + try { + if (isRateLimitError(params)) { + this.freeTierQuotaService.markExhausted(freeTierId, parseRetryAt(params)); + } + } catch (err) { + this.logService.warn('[FreeTierQuota] markExhausted failed', err); + } + onError(params); + }; + this.llmMessageHooks.onText[requestId] = onText - this.llmMessageHooks.onFinalMessage[requestId] = onFinalMessage - this.llmMessageHooks.onError[requestId] = onError + this.llmMessageHooks.onFinalMessage[requestId] = wrappedOnFinalMessage + this.llmMessageHooks.onError[requestId] = wrappedOnError this.llmMessageHooks.onAbort[requestId] = onAbort // used internally only // params will be stripped of all its functions over the IPC channel @@ -309,5 +345,59 @@ export class LLMMessageService extends Disposable implements ILLMMessageService } } +/** + * Detect 429 / rate-limit errors from the provider error payload. The + * underlying impl normalises a wide range of provider-specific shapes into a + * single `message` string, plus an opaque `fullError`. We sniff both. + */ +function isRateLimitError(params: EventLLMMessageOnErrorParams): boolean { + const msg = (params.message || '').toLowerCase(); + if (msg.includes('rate limit') || msg.includes('rate-limit') || msg.includes('429') || msg.includes('resource_exhausted') || msg.includes('quota')) { + return true; + } + const full = params.fullError as unknown; + if (full && typeof full === 'object') { + const candidate = full as { status?: unknown; code?: unknown }; + if (candidate.status === 429 || candidate.code === 429) { + return true; + } + } + return false; +} + +/** + * Best-effort: extract a retry-at timestamp from a rate-limit error. If + * nothing is parseable, returns `null` - the quota service applies a + * conservative 60s default. + */ +function parseRetryAt(params: EventLLMMessageOnErrorParams): number | null { + const full = params.fullError as unknown; + if (full && typeof full === 'object') { + const candidate = full as { headers?: Record; retryAfter?: unknown }; + const headers = candidate.headers; + if (headers && typeof headers === 'object') { + const retryAfter = headers['retry-after'] || headers['Retry-After']; + if (retryAfter) { + const seconds = Number(retryAfter); + if (Number.isFinite(seconds) && seconds > 0) { + return Date.now() + seconds * 1000; + } + } + } + if (typeof candidate.retryAfter === 'number' && candidate.retryAfter > 0) { + return Date.now() + candidate.retryAfter * 1000; + } + } + // Try to parse "...retry in 57s..." patterns from the message + const m = (params.message || '').match(/retry in\s+(\d+(?:\.\d+)?)\s*s/i); + if (m) { + const seconds = Number(m[1]); + if (Number.isFinite(seconds) && seconds > 0) { + return Date.now() + seconds * 1000; + } + } + return null; +} + registerSingleton(ILLMMessageService, LLMMessageService, InstantiationType.Eager); diff --git a/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts b/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts new file mode 100644 index 00000000000..be5c06eeb80 --- /dev/null +++ b/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts @@ -0,0 +1,128 @@ +/*-------------------------------------------------------------------------------------- + * Copyright 2025 Glass Devtools, Inc. All rights reserved. + * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. + *--------------------------------------------------------------------------------------*/ + +import * as assert from 'assert'; +import { suite, test } from 'mocha'; +import { buildFreeTierLadder, pickTopFromLadder } from '../../common/routing/freeTierLadder.js'; +import { FreeTierRemaining } from '../../common/routing/freeTierQuotaService.js'; +import { FREE_TIER_QUOTAS, FreeTierProviderId } from '../../common/routing/freeTierConstants.js'; +import { ModelSelection } from '../../common/cortexideSettingsTypes.js'; + +/** Build a FreeTierRemaining snapshot for a provider with sensible defaults. */ +function snap( + providerId: FreeTierProviderId, + overrides: Partial> = {}, +): FreeTierRemaining { + const base = FREE_TIER_QUOTAS[providerId]; + return { + providerId, + limits: { rpd: base.rpd, rpm: base.rpm, tpm: base.tpm }, + rpd: base.rpd, + rpm: base.rpm, + tpm: base.tpm, + exhausted: false, + resetAt: null, + ...overrides, + }; +} + +suite('FreeTierLadder', () => { + + test('respects privacy gate: returns empty ladder regardless of configured providers', () => { + const configured: ModelSelection[] = [ + { providerName: 'groq', modelName: 'llama-3.3-70b-versatile' }, + { providerName: 'gemini', modelName: 'gemini-2.5-flash' }, + ]; + const ladder = buildFreeTierLadder({ + configuredModels: configured, + quotas: [snap('groq'), snap('gemini')], + privacyMode: true, + }); + assert.strictEqual(ladder.length, 0, 'privacy mode must produce an empty ladder'); + assert.strictEqual(pickTopFromLadder(ladder), null); + }); + + test('skips exhausted providers (429 marked) and falls through to next quality tier', () => { + const configured: ModelSelection[] = [ + { providerName: 'groq', modelName: 'llama-3.3-70b-versatile' }, + { providerName: 'gemini', modelName: 'gemini-2.5-flash' }, + { providerName: 'openRouter', modelName: 'openrouter/auto' }, + ]; + const ladder = buildFreeTierLadder({ + configuredModels: configured, + quotas: [ + snap('groq', { exhausted: true, resetAt: Date.now() + 60_000 }), + snap('gemini'), + snap('openRouter'), + ], + privacyMode: false, + }); + assert.ok(ladder.length >= 2, 'gemini + openRouter should remain after groq is dropped'); + assert.notStrictEqual(ladder[0].providerId, 'groq', 'exhausted groq must not be top'); + assert.strictEqual(ladder[0].providerId, 'gemini', 'next-best quality should win'); + }); + + test('picks highest-quality available provider when all have quota', () => { + const configured: ModelSelection[] = [ + { providerName: 'openRouter', modelName: 'openrouter/auto' }, + { providerName: 'gemini', modelName: 'gemini-2.5-flash-lite' }, + { providerName: 'groq', modelName: 'llama-3.3-70b-versatile' }, + { providerName: 'mistral', modelName: 'mistral-large-latest' }, + ]; + const ladder = buildFreeTierLadder({ + configuredModels: configured, + quotas: [snap('groq'), snap('gemini'), snap('openRouter'), snap('mistral')], + privacyMode: false, + }); + assert.strictEqual(ladder.length, 4); + // Expected order per FREE_TIER_QUOTAS qualityRank: groq(80) > gemini(60) > openRouter(40) > mistral(30) + assert.strictEqual(ladder[0].providerId, 'groq'); + assert.strictEqual(ladder[1].providerId, 'gemini'); + assert.strictEqual(ladder[2].providerId, 'openRouter'); + assert.strictEqual(ladder[3].providerId, 'mistral'); + }); + + test('zero remaining RPD removes provider from ladder', () => { + const configured: ModelSelection[] = [ + { providerName: 'groq', modelName: 'llama-3.3-70b-versatile' }, + { providerName: 'gemini', modelName: 'gemini-2.5-flash' }, + ]; + const ladder = buildFreeTierLadder({ + configuredModels: configured, + quotas: [ + snap('groq', { rpd: 0 }), + snap('gemini'), + ], + privacyMode: false, + }); + assert.strictEqual(ladder.length, 1); + assert.strictEqual(ladder[0].providerId, 'gemini'); + }); + + test('non-free-tier providers (e.g. anthropic, openAI) are silently ignored', () => { + const configured: ModelSelection[] = [ + { providerName: 'anthropic', modelName: 'claude-3-5-sonnet-20241022' }, + { providerName: 'openAI', modelName: 'gpt-4o' }, + { providerName: 'groq', modelName: 'llama-3.3-70b-versatile' }, + ]; + const ladder = buildFreeTierLadder({ + configuredModels: configured, + quotas: [snap('groq')], + privacyMode: false, + }); + assert.strictEqual(ladder.length, 1); + assert.strictEqual(ladder[0].providerId, 'groq'); + }); + + test('empty configured list -> empty ladder', () => { + const ladder = buildFreeTierLadder({ + configuredModels: [], + quotas: [], + privacyMode: false, + }); + assert.strictEqual(ladder.length, 0); + assert.strictEqual(pickTopFromLadder(ladder), null); + }); +});