diff --git a/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts b/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts
index cbe02bde8cf..ed75eddd343 100644
--- a/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts
+++ b/src/vs/workbench/contrib/cortexide/browser/cortexideStatusBar.ts
@@ -13,6 +13,9 @@ import { registerWorkbenchContribution2, WorkbenchPhase } from '../../../common/
 import { IChatThreadService } from './chatThreadService.js';
 import { localProviderNames } from '../common/cortexideSettingsTypes.js';
 import { ProviderName } from '../common/cortexideSettingsTypes.js';
+import { IFreeTierQuotaService, FreeTierRemaining } from '../common/routing/freeTierQuotaService.js';
+import { freeTierIdOfProviderName, FREE_TIER_QUOTAS } from '../common/routing/freeTierConstants.js';
+import { ICortexideI18nService } from '../common/i18n/i18nService.js';
 
 export class CortexideStatusBarContribution extends Disposable implements IWorkbenchContribution {
 	static readonly ID = 'workbench.contrib.cortexideStatusBar';
@@ -20,12 +23,15 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb
 	private modelEntry: IStatusbarEntryAccessor | undefined;
 	private latencyEntry: IStatusbarEntryAccessor | undefined;
 	private privacyEntry: IStatusbarEntryAccessor | undefined;
+	private freeTierEntry: IStatusbarEntryAccessor | undefined;
 	private readonly updateDisposables = this._register(new MutableDisposable());
 
 	constructor(
 		@IStatusbarService private readonly statusbarService: IStatusbarService,
 		@ICortexideSettingsService private readonly cortexideSettingsService: ICortexideSettingsService,
 		@IChatThreadService private readonly chatThreadService: IChatThreadService,
+		@IFreeTierQuotaService private readonly freeTierQuotaService: IFreeTierQuotaService,
+		@ICortexideI18nService private readonly i18nService: ICortexideI18nService,
 	) {
 		super();
 		this.create();
@@ -56,6 +62,14 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb
 			StatusbarAlignment.RIGHT,
 			{ location: { id: 'status.editor.mode', priority: 100.4 }, alignment: StatusbarAlignment.RIGHT }
 		);
+
+		// Free-tier quota widget
+		this.freeTierEntry = this.statusbarService.addEntry(
+			this.getFreeTierEntryProps(),
+			'cortexide.freeTier',
+			StatusbarAlignment.RIGHT,
+			{ location: { id: 'status.editor.mode', priority: 100.5 }, alignment: StatusbarAlignment.RIGHT }
+		);
 	}
 
 	private registerListeners(): void {
@@ -76,6 +90,20 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb
 		}, 500);
 
 		this._register({ dispose: () => clearInterval(latencyUpdateInterval) });
+
+		// Refresh free-tier widget on every quota mutation (recordCall, markExhausted)
+		this._register(this.freeTierQuotaService.onQuotaChange(() => {
+			this.freeTierEntry?.update(this.getFreeTierEntryProps());
+		}));
+		// Also refresh on settings changes so newly-added providers appear immediately
+		this._register(this.cortexideSettingsService.onDidChangeState(() => {
+			this.freeTierEntry?.update(this.getFreeTierEntryProps());
+		}));
+		// Slow tick to keep window rollovers visible to the user
+		const quotaTick = setInterval(() => {
+			this.freeTierEntry?.update(this.getFreeTierEntryProps());
+		}, 15_000);
+		this._register({ dispose: () => clearInterval(quotaTick) });
 	}
 
 	private getModelEntryProps(): IStatusbarEntry {
@@ -229,14 +257,109 @@ export class CortexideStatusBarContribution extends Disposable implements IWorkb
 		};
 	}
 
+	/**
+	 * Free-tier quota widget.  Hides itself when no free-tier providers are
+	 * configured; otherwise shows the most-constrained remaining metric for
+	 * the top-quality provider, with a multiline tooltip listing every
+	 * provider's status.
+	 */
+	private getFreeTierEntryProps(): IStatusbarEntry {
+		const t = (key: Parameters<typeof this.i18nService.t>[0], fallback?: string) => this.i18nService.t(key, fallback);
+		const configuredFreeTierProviders = this.collectConfiguredFreeTierProviders();
+		if (configuredFreeTierProviders.length === 0) {
+			return {
+				name: t('routing.statusBar.label', 'Free-tier quota'),
+				text: '',
+				ariaLabel: '',
+				tooltip: t('routing.statusBar.tooltipNoProviders', 'No free-tier providers configured.'),
+			};
+		}
+
+		// Build display text from the highest-quality configured provider.
+		// Sort by quality rank descending and use the first usable entry.
+		const enriched = configuredFreeTierProviders
+			.map(p => ({ ...p, remaining: this.freeTierQuotaService.getRemaining(p.providerId, p.modelName) }))
+			.sort((a, b) => b.qualityRank - a.qualityRank);
+
+		const top = enriched[0];
+		let text: string;
+		if (top.remaining.exhausted) {
+			text = `$(warning) ${this.formatProviderStatus(top.remaining)}`;
+		} else if (top.remaining.rpd !== null && top.remaining.limits.rpd !== null) {
+			text = `$(pulse) ${this.formatProviderStatus(top.remaining)}`;
+		} else if (top.remaining.rpm !== null && top.remaining.limits.rpm !== null) {
+			text = `$(pulse) ${this.formatProviderStatus(top.remaining)}`;
+		} else {
+			text = `$(pulse) ${this.formatProviderStatus(top.remaining)}`;
+		}
+
+		// Multiline tooltip listing every provider's status.
+		const lines: string[] = [t('routing.statusBar.tooltipTitle', 'Free-tier provider quotas')];
+		for (const p of enriched) {
+			lines.push(this.formatProviderStatus(p.remaining));
+		}
+		const tooltip = lines.join('\n');
+
+		return {
+			name: t('routing.statusBar.label', 'Free-tier quota'),
+			text,
+			ariaLabel: text,
+			tooltip,
+		};
+	}
+
+	/**
+	 * Inspect settings to find configured free-tier providers with at least
+	 * one visible model.  Returns provider id + first visible model name.
+	 */
+	private collectConfiguredFreeTierProviders(): Array<{ providerId: NonNullable<ReturnType<typeof freeTierIdOfProviderName>>; providerName: ProviderName; modelName: string; qualityRank: number }> {
+		const settings = this.cortexideSettingsService.state;
+		const out: Array<{ providerId: NonNullable<ReturnType<typeof freeTierIdOfProviderName>>; providerName: ProviderName; modelName: string; qualityRank: number }> = [];
+		for (const providerName of Object.keys(settings.settingsOfProvider) as ProviderName[]) {
+			const ps = settings.settingsOfProvider[providerName];
+			if (!ps._didFillInProviderSettings) continue;
+			const ftId = freeTierIdOfProviderName(providerName);
+			if (ftId === null) continue;
+			const firstModel = ps.models.find(m => !m.isHidden);
+			if (!firstModel) continue;
+			out.push({
+				providerId: ftId,
+				providerName,
+				modelName: firstModel.modelName,
+				qualityRank: FREE_TIER_QUOTAS[ftId].qualityRank,
+			});
+		}
+		return out;
+	}
+
+	private formatProviderStatus(remaining: FreeTierRemaining): string {
+		const t = (key: Parameters<typeof this.i18nService.t>[0], ...args: string[]) =>
+			args.reduce((acc, arg, i) => acc.replace(`{${i}}`, arg), this.i18nService.t(key));
+		const name = remaining.providerId;
+		if (remaining.exhausted) {
+			return t('routing.statusBar.exhausted', name);
+		}
+		if (remaining.rpd !== null && remaining.limits.rpd !== null) {
+			const used = remaining.limits.rpd - remaining.rpd;
+			return t('routing.statusBar.entry', name, String(used), String(remaining.limits.rpd));
+		}
+		if (remaining.rpm !== null && remaining.limits.rpm !== null) {
+			const used = remaining.limits.rpm - remaining.rpm;
+			return t('routing.statusBar.entryRpm', name, String(used), String(remaining.limits.rpm));
+		}
+		return t('routing.statusBar.uncapped', name);
+	}
+
 	override dispose(): void {
 		super.dispose();
 		this.modelEntry?.dispose();
 		this.latencyEntry?.dispose();
 		this.privacyEntry?.dispose();
+		this.freeTierEntry?.dispose();
 		this.modelEntry = undefined;
 		this.latencyEntry = undefined;
 		this.privacyEntry = undefined;
+		this.freeTierEntry = undefined;
 	}
 }
 
diff --git a/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx b/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx
index 436f7ea586d..e8a212a9ece 100644
--- a/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx
+++ b/src/vs/workbench/contrib/cortexide/browser/react/src/settings/Settings.tsx
@@ -1932,6 +1932,29 @@ export const Settings = () => {
 											</div>
 										</div>
 
+										{/* Routing Policy Section */}
+										<ErrorBoundary>
+											<div>
+												<h4 className={`text-base`}>Routing policy</h4>
+												<div className='text-sm text-void-fg-3 mt-1'>
+													Controls how CortexIDE picks between configured model providers. Free-tier ladder tracks per-provider quotas and auto-fails-over on 429.
+												</div>
+												<div className='my-2'>
+													<select
+														className='text-xs bg-void-bg-1 text-void-fg-1 border border-void-border-1 rounded px-1 py-0.5'
+														value={settingsState.globalSettings.routingPolicy ?? 'auto-cheapest'}
+														onChange={(e) => cortexideSettingsService.setGlobalSetting('routingPolicy', e.target.value as ('auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid'))}
+														title='Routing policy'
+													>
+														<option value='auto-cheapest'>Auto (cheapest viable)</option>
+														<option value='free-tier'>Free-tier ladder</option>
+														<option value='local-only'>Local only</option>
+														<option value='byok-paid'>BYOK paid models</option>
+													</select>
+												</div>
+											</div>
+										</ErrorBoundary>
+
 										{/* YOLO Mode Section */}
 										<ErrorBoundary>
 											<div>
diff --git a/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts b/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts
index 1913aaa68ad..a08c5c46f4d 100644
--- a/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts
+++ b/src/vs/workbench/contrib/cortexide/common/cortexideSettingsTypes.ts
@@ -532,8 +532,18 @@ export type GlobalSettings = {
 	};
 	// Local-First AI: When enabled, heavily bias router toward local models
 	localFirstAI?: boolean; // Prefer local models over cloud models (default: false)
+	// Routing policy: controls how the model router selects between configured providers.
+	// - 'auto-cheapest': existing behaviour - score-based mixture of rules + learned (default)
+	// - 'free-tier':     prefer free-tier providers in quality-ranked order with quota tracking
+	// - 'local-only':    never dispatch to a cloud provider, even if the model selection points there
+	// - 'byok-paid':     prefer paid BYOK models, skipping free-tier ladders entirely
+	routingPolicy?: RoutingPolicy;
 }
 
+/** User-selectable routing policy for the model router. */
+export type RoutingPolicy = 'auto-cheapest' | 'free-tier' | 'local-only' | 'byok-paid';
+export const routingPolicies: readonly RoutingPolicy[] = ['auto-cheapest', 'free-tier', 'local-only', 'byok-paid'];
+
 export const defaultGlobalSettings: GlobalSettings = {
 	autoRefreshModels: true,
 	aiInstructions: '',
@@ -589,6 +599,7 @@ export const defaultGlobalSettings: GlobalSettings = {
 		routerCacheTtlMs: 2000, // 2 second cache TTL (caching enabled)
 	},
 	localFirstAI: false, // Local-First AI disabled by default (users can enable for privacy/performance)
+	routingPolicy: 'auto-cheapest', // Existing scoring behaviour remains the default
 }
 
 export type GlobalSettingName = keyof GlobalSettings
diff --git a/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts b/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts
index 38fbba9c5fb..e5a69351a11 100644
--- a/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts
+++ b/src/vs/workbench/contrib/cortexide/common/i18n/i18nService.ts
@@ -276,6 +276,23 @@ export const EN_TRANSLATIONS = {
 	'common.copy': 'Copy',
 	'common.copied': 'Copied!',
 	'common.open': 'Open',
+
+	// allow-any-unicode-next-line
+	// ── Routing / free-tier router ───────────────────────────────────────────
+	'routing.policy.label': 'Routing policy',
+	'routing.policy.description': 'Controls how CortexIDE picks between configured model providers.',
+	'routing.policy.autoCheapest': 'Auto (cheapest viable)',
+	'routing.policy.freeTier': 'Free-tier ladder',
+	'routing.policy.localOnly': 'Local only',
+	'routing.policy.byokPaid': 'BYOK paid models',
+	'routing.statusBar.label': 'Free-tier quota',
+	'routing.statusBar.none': 'No free-tier providers',
+	'routing.statusBar.entry': '{0}: {1}/{2} RPD',
+	'routing.statusBar.entryRpm': '{0}: {1}/{2} RPM',
+	'routing.statusBar.exhausted': '{0}: exhausted',
+	'routing.statusBar.uncapped': '{0}: uncapped',
+	'routing.statusBar.tooltipTitle': 'Free-tier provider quotas',
+	'routing.statusBar.tooltipNoProviders': 'No free-tier providers are configured. Add a free-tier API key (Groq, Gemini, OpenRouter, Mistral) to see live quota tracking.',
 } as const;
 
 // allow-any-unicode-next-line
diff --git a/src/vs/workbench/contrib/cortexide/common/modelRouter.ts b/src/vs/workbench/contrib/cortexide/common/modelRouter.ts
index 837fb69a136..edc0a5edf03 100644
--- a/src/vs/workbench/contrib/cortexide/common/modelRouter.ts
+++ b/src/vs/workbench/contrib/cortexide/common/modelRouter.ts
@@ -5,7 +5,7 @@
 
 import { ProviderName, ModelSelection } from './cortexideSettingsTypes.js';
 import { getModelCapabilities, CortexideStaticModelInfo } from './modelCapabilities.js';
-import { ICortexideSettingsService } from './cortexideSettingsService.js';
+import { ICortexideSettingsService, CortexideSettingsState } from './cortexideSettingsService.js';
 import { localProviderNames } from './cortexideSettingsTypes.js';
 import { Disposable } from '../../../../base/common/lifecycle.js';
 import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
@@ -14,6 +14,8 @@ import { RoutingEvaluationService } from './routingEvaluation.js';
 import { IStorageService } from '../../../../platform/storage/common/storage.js';
 import { shouldUseSpeculativeEscalation } from './routingEscalation.js';
 import { getPerformanceHarness } from './performanceHarness.js';
+import { IFreeTierQuotaService } from './routing/freeTierQuotaService.js';
+import { buildFreeTierLadder, pickTopFromLadder } from './routing/freeTierLadder.js';
 
 /**
  * Task types for automatic model selection
@@ -90,7 +92,8 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
 
 	constructor(
 		@ICortexideSettingsService private readonly settingsService: ICortexideSettingsService,
-		@IStorageService private readonly storageService: IStorageService
+		@IStorageService private readonly storageService: IStorageService,
+		@IFreeTierQuotaService private readonly freeTierQuotaService: IFreeTierQuotaService,
 	) {
 		super();
 		this.evaluationService = new RoutingEvaluationService(this.storageService);
@@ -198,6 +201,34 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
 			// This is handled in scoreModel by applying heavy bonuses to local models
 		}
 
+		// Routing policy: 'free-tier' -> consult the smart free-tier router first.
+		// If the ladder is empty (no configured free-tier providers, all exhausted,
+		// or privacy gate engaged), fall through to the standard scoring path so
+		// the user is never stranded.
+		const routingPolicy = settingsState.globalSettings.routingPolicy ?? 'auto-cheapest';
+		if (routingPolicy === 'free-tier') {
+			const ladderDecision = this.routeViaFreeTierLadder(context, settingsState);
+			if (ladderDecision) {
+				this.routingCache.set(cacheKey, { decision: ladderDecision, timestamp: Date.now() });
+				return ladderDecision;
+			}
+		} else if (routingPolicy === 'local-only') {
+			// Hard local-only: refuse to dispatch to any cloud provider.
+			const localDecision = this.routeToLocalModel(context);
+			if (localDecision) {
+				this.routingCache.set(cacheKey, { decision: localDecision, timestamp: Date.now() });
+				return localDecision;
+			}
+			return {
+				modelSelection: { providerName: 'auto', modelName: 'auto' },
+				confidence: 0.0,
+				reasoning: 'Routing policy is local-only but no local models are configured.',
+				qualityTier: 'abstain',
+				shouldAbstain: true,
+				abstainReason: 'No local models for local-only routing policy',
+			};
+		}
+
 		// Quality gate: pre-flight quality estimate
 		const qualityTier = this.estimateQualityTier(context);
 
@@ -1438,6 +1469,53 @@ export class TaskAwareModelRouter extends Disposable implements ITaskAwareModelR
 	 * Route to a local model (privacy/offline mode)
 	 * Returns null if no local models are available (caller must handle fallback)
 	 */
+	/**
+	 * Route via the smart free-tier ladder.  Returns `null` when no free-tier
+	 * provider is currently usable (caller should fall through to standard
+	 * scoring or local fallback).
+	 *
+	 * Cloud providers are only considered when the privacy gate is NOT engaged
+	 * - `requiresPrivacy` short-circuits to `null` here so callers can route
+	 * to local.
+	 */
+	private routeViaFreeTierLadder(
+		context: TaskContext,
+		settingsState: CortexideSettingsState,
+	): RoutingDecision | null {
+		if (context.requiresPrivacy) {
+			return null;
+		}
+
+		const configured = this.getAvailableModels(settingsState);
+		const quotas = this.freeTierQuotaService.getAllRemaining();
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas,
+			privacyMode: !!context.requiresPrivacy,
+		});
+
+		const top = pickTopFromLadder(ladder);
+		if (!top) {
+			return null;
+		}
+
+		const fallbackChain: ModelSelection[] = ladder.slice(1, 4).map(c => ({
+			providerName: c.providerName,
+			modelName: c.modelName,
+		}));
+
+		const timeoutMs = this.getModelTimeout(top, context, settingsState);
+
+		return {
+			modelSelection: top,
+			confidence: 0.75,
+			reasoning: `Free-tier ladder selected ${top.providerName}/${top.modelName} (next: ${fallbackChain.map(m => m.providerName).join(', ') || 'none'})`,
+			fallbackChain,
+			qualityTier: 'cheap_fast',
+			timeoutMs,
+		};
+	}
+
 	private routeToLocalModel(context: TaskContext): RoutingDecision | null {
 		const settingsState = this.settingsService.state;
 		const localModels: ModelSelection[] = [];
diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts
new file mode 100644
index 00000000000..c6c45acfc21
--- /dev/null
+++ b/src/vs/workbench/contrib/cortexide/common/routing/freeTierConstants.ts
@@ -0,0 +1,181 @@
+/*--------------------------------------------------------------------------------------
+ *  Copyright 2025 Glass Devtools, Inc. All rights reserved.
+ *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
+ *--------------------------------------------------------------------------------------*/
+
+/**
+ * Free-tier quota and quality configuration for the smart free-tier router.
+ *
+ * Source of truth for the rate limits we know about as of 2026-05.  These are
+ * intentionally NOT hardcoded URLs - only request/token rate limits.  When a
+ * provider changes its published limits, update this file in one place.
+ *
+ * RPD = requests per day.
+ * RPM = requests per minute.
+ * TPM = tokens per minute.
+ *
+ * `qualityRank` is a relative ordering used by `freeTierLadder` when more
+ * than one provider has remaining quota.  Higher = preferred.  See research
+ * doc section 4.1 for the full rationale.
+ *
+ *     Cerebras > Groq > Gemini Flash > OpenRouter free > Mistral > Cloudflare
+ */
+
+import { ProviderName } from '../cortexideSettingsTypes.js';
+
+/**
+ * Free-tier-routable provider names.
+ *
+ * NOTE: Cerebras is intentionally absent because it is not yet wired into
+ * `modelCapabilities.ts` / `cortexideSettingsTypes.ts` - see PR body.  It is
+ * still represented in this constants file so the ladder is ready to pick it
+ * up the moment a Cerebras provider exists.
+ */
+export type FreeTierProviderId =
+	| 'cerebras'
+	| 'groq'
+	| 'gemini'
+	| 'mistral'
+	| 'openRouter'
+	| 'cloudflareWorkersAI';
+
+/**
+ * Per-provider free-tier policy.
+ */
+export interface FreeTierQuota {
+	readonly providerId: FreeTierProviderId;
+	/** Provider name in CortexIDE's `ProviderName` enum, or `null` if not yet wired. */
+	readonly cortexProviderName: ProviderName | null;
+	/** Higher value = preferred when multiple providers have quota. */
+	readonly qualityRank: number;
+	/** Requests per day, or `null` if not capped. */
+	readonly rpd: number | null;
+	/** Requests per minute, or `null` if not capped. */
+	readonly rpm: number | null;
+	/** Tokens per minute, or `null` if not capped. */
+	readonly tpm: number | null;
+	/** Free-text note for tooltips / docs. */
+	readonly notes: string;
+}
+
+/**
+ * Free-tier quota table.  Update here when provider docs change.
+ */
+export const FREE_TIER_QUOTAS: { readonly [K in FreeTierProviderId]: FreeTierQuota } = {
+	cerebras: {
+		providerId: 'cerebras',
+		cortexProviderName: null,
+		qualityRank: 100,
+		rpd: null,
+		rpm: 30,
+		tpm: null,
+		notes: '1M tokens/day; 8K context cap',
+	},
+	groq: {
+		providerId: 'groq',
+		cortexProviderName: 'groq',
+		qualityRank: 80,
+		rpd: 1000,
+		rpm: 30,
+		tpm: 6000,
+		notes: '',
+	},
+	gemini: {
+		providerId: 'gemini',
+		cortexProviderName: 'gemini',
+		qualityRank: 60,
+		// We track the broadest tier (Flash-Lite) here; per-model tightening is
+		// done in `freeTierLadder.ts` based on the actual model name.
+		rpd: 1000,
+		rpm: 15,
+		tpm: null,
+		notes: 'Flash-Lite limits; Flash/Pro tighter',
+	},
+	openRouter: {
+		providerId: 'openRouter',
+		cortexProviderName: 'openRouter',
+		qualityRank: 40,
+		rpd: 50,
+		rpm: 20,
+		tpm: null,
+		notes: '1000 RPD with $10 top-up',
+	},
+	mistral: {
+		providerId: 'mistral',
+		cortexProviderName: 'mistral',
+		qualityRank: 30,
+		rpd: null,
+		rpm: 2,
+		tpm: null,
+		notes: '1B tokens/month (Experiment tier)',
+	},
+	cloudflareWorkersAI: {
+		providerId: 'cloudflareWorkersAI',
+		cortexProviderName: null,
+		qualityRank: 20,
+		rpd: null,
+		rpm: null,
+		tpm: null,
+		notes: '10,000 Neurons/day',
+	},
+};
+
+/**
+ * Per-model overrides for Gemini (tighter than the entry-level Flash-Lite
+ * limits in the master table).  Matched case-insensitively by substring.
+ */
+export interface GeminiModelQuotaOverride {
+	readonly modelNameSubstring: string;
+	readonly rpd: number | null;
+	readonly rpm: number | null;
+}
+
+export const GEMINI_MODEL_OVERRIDES: readonly GeminiModelQuotaOverride[] = [
+	{ modelNameSubstring: 'pro', rpd: 100, rpm: 5 },
+	{ modelNameSubstring: 'flash-lite', rpd: 1000, rpm: 15 },
+	{ modelNameSubstring: 'flash', rpd: 250, rpm: 10 },
+];
+
+/**
+ * Resolve effective per-call limits for a given provider+model.  Returns the
+ * tightest applicable RPD/RPM/TPM triple.
+ */
+export function resolveEffectiveQuota(
+	providerId: FreeTierProviderId,
+	modelName: string,
+): { rpd: number | null; rpm: number | null; tpm: number | null } {
+	const base = FREE_TIER_QUOTAS[providerId];
+	let rpd = base.rpd;
+	let rpm = base.rpm;
+	const tpm = base.tpm;
+
+	if (providerId === 'gemini') {
+		const lower = modelName.toLowerCase();
+		for (const override of GEMINI_MODEL_OVERRIDES) {
+			if (lower.includes(override.modelNameSubstring)) {
+				rpd = override.rpd;
+				rpm = override.rpm;
+				break;
+			}
+		}
+	}
+
+	return { rpd, rpm, tpm };
+}
+
+/**
+ * Reverse lookup: CortexIDE `ProviderName` -> free-tier id, or `null` if the
+ * provider isn't on the free-tier ladder.  Accepts the union with `'auto'`
+ * so callers don't have to narrow first; `'auto'` always returns `null`.
+ */
+export function freeTierIdOfProviderName(providerName: ProviderName | 'auto'): FreeTierProviderId | null {
+	if (providerName === 'auto') {
+		return null;
+	}
+	for (const id of Object.keys(FREE_TIER_QUOTAS) as FreeTierProviderId[]) {
+		if (FREE_TIER_QUOTAS[id].cortexProviderName === providerName) {
+			return id;
+		}
+	}
+	return null;
+}
diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierLadder.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierLadder.ts
new file mode 100644
index 00000000000..71bc46cf2b2
--- /dev/null
+++ b/src/vs/workbench/contrib/cortexide/common/routing/freeTierLadder.ts
@@ -0,0 +1,122 @@
+/*--------------------------------------------------------------------------------------
+ *  Copyright 2025 Glass Devtools, Inc. All rights reserved.
+ *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
+ *--------------------------------------------------------------------------------------*/
+
+/**
+ * Pure function: given configured providers + privacy state + remaining
+ * quotas, return an ordered list of [provider, model] candidates for the
+ * free-tier router.
+ *
+ * Layer: `common/`. Pure. No I/O.  Tested in isolation under
+ * `test/common/freeTierLadder.test.ts`.
+ */
+
+import { ModelSelection, ProviderName } from '../cortexideSettingsTypes.js';
+import {
+	FREE_TIER_QUOTAS,
+	FreeTierProviderId,
+	freeTierIdOfProviderName,
+} from './freeTierConstants.js';
+import { FreeTierRemaining } from './freeTierQuotaService.js';
+
+/** A configured free-tier provider candidate (always a real provider, never `'auto'`). */
+export interface FreeTierCandidate {
+	readonly providerName: ProviderName;
+	readonly providerId: FreeTierProviderId;
+	readonly modelName: string;
+	/** Higher = preferred. */
+	readonly qualityRank: number;
+}
+
+/** Inputs to the ladder computation - all caller-supplied, no service deps. */
+export interface FreeTierLadderInput {
+	/** Provider/model pairs the user has actually configured + enabled. */
+	readonly configuredModels: readonly ModelSelection[];
+	/**
+	 * Quota snapshots from `IFreeTierQuotaService.getAllRemaining()`, indexed
+	 * implicitly by `providerId`.  Providers absent from this list are
+	 * treated as having unlimited quota.
+	 */
+	readonly quotas: readonly FreeTierRemaining[];
+	/**
+	 * If true, the privacy gate is engaged - the ladder MUST be empty so the
+	 * caller falls back to local models.
+	 */
+	readonly privacyMode: boolean;
+}
+
+/**
+ * Build the ordered candidate list.  Filters out:
+ *   - providers not on the free-tier table
+ *   - providers without configured models
+ *   - providers marked exhausted (429)
+ *   - providers with zero remaining RPD or RPM
+ * then sorts the remainder by descending `qualityRank`.
+ *
+ * If `privacyMode` is true, returns `[]`.
+ */
+export function buildFreeTierLadder(input: FreeTierLadderInput): readonly FreeTierCandidate[] {
+	if (input.privacyMode) {
+		return [];
+	}
+
+	const quotaById = new Map<FreeTierProviderId, FreeTierRemaining>();
+	for (const q of input.quotas) {
+		quotaById.set(q.providerId, q);
+	}
+
+	const candidates: FreeTierCandidate[] = [];
+
+	for (const model of input.configuredModels) {
+		if (model.providerName === 'auto') {
+			continue;
+		}
+		// model.providerName is now narrowed to ProviderName
+		const providerName: ProviderName = model.providerName;
+		const providerId = freeTierIdOfProviderName(providerName);
+		if (providerId === null) {
+			continue;
+		}
+		const def = FREE_TIER_QUOTAS[providerId];
+
+		const remaining = quotaById.get(providerId);
+		if (remaining) {
+			if (remaining.exhausted) {
+				continue;
+			}
+			if (remaining.rpd !== null && remaining.rpd <= 0) {
+				continue;
+			}
+			if (remaining.rpm !== null && remaining.rpm <= 0) {
+				continue;
+			}
+		}
+
+		candidates.push({
+			providerName,
+			providerId,
+			modelName: model.modelName,
+			qualityRank: def.qualityRank,
+		});
+	}
+
+	candidates.sort((a, b) => b.qualityRank - a.qualityRank);
+	return candidates;
+}
+
+/**
+ * Convenience: convert the first ladder candidate into a `ModelSelection`,
+ * or return `null` if the ladder is empty.
+ */
+export function pickTopFromLadder(
+	ladder: readonly FreeTierCandidate[],
+): ModelSelection | null {
+	if (ladder.length === 0) {
+		return null;
+	}
+	return {
+		providerName: ladder[0].providerName,
+		modelName: ladder[0].modelName,
+	};
+}
diff --git a/src/vs/workbench/contrib/cortexide/common/routing/freeTierQuotaService.ts b/src/vs/workbench/contrib/cortexide/common/routing/freeTierQuotaService.ts
new file mode 100644
index 00000000000..a1381a20cbb
--- /dev/null
+++ b/src/vs/workbench/contrib/cortexide/common/routing/freeTierQuotaService.ts
@@ -0,0 +1,252 @@
+/*--------------------------------------------------------------------------------------
+ *  Copyright 2025 Glass Devtools, Inc. All rights reserved.
+ *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
+ *--------------------------------------------------------------------------------------*/
+
+/**
+ * Free-tier quota tracking service.
+ *
+ * Tracks per-provider request counts (RPD / RPM) and rough token counts (TPM)
+ * for free-tier providers, persisted across restarts via VS Code's storage.
+ *
+ * The service is the in-process source of truth.  It has no network access of
+ * its own; updates come from `recordCall(...)` and `markExhausted(...)` which
+ * are invoked by the LLM message pipeline.
+ *
+ * Layer: `common/`. No DOM, Node, or Electron imports.
+ */
+
+import { Emitter, Event } from '../../../../../base/common/event.js';
+import { Disposable } from '../../../../../base/common/lifecycle.js';
+import { createDecorator } from '../../../../../platform/instantiation/common/instantiation.js';
+import { registerSingleton, InstantiationType } from '../../../../../platform/instantiation/common/extensions.js';
+import { IStorageService, StorageScope, StorageTarget } from '../../../../../platform/storage/common/storage.js';
+import {
+	FREE_TIER_QUOTAS,
+	FreeTierProviderId,
+	resolveEffectiveQuota,
+} from './freeTierConstants.js';
+
+/** Persistent storage key.  Single JSON blob keyed by provider id. */
+export const FREE_TIER_QUOTA_STORAGE_KEY = 'cortexide.freeTier.quotaState';
+
+/** Public per-provider quota snapshot. */
+export interface FreeTierRemaining {
+	readonly providerId: FreeTierProviderId;
+	/** Effective limits at the moment of query, after per-model overrides. */
+	readonly limits: { rpd: number | null; rpm: number | null; tpm: number | null };
+	/** Remaining requests today (null if uncapped). */
+	readonly rpd: number | null;
+	/** Remaining requests in the current minute window (null if uncapped). */
+	readonly rpm: number | null;
+	/** Remaining tokens in the current minute window (null if uncapped). */
+	readonly tpm: number | null;
+	/** True when provider returned 429 recently and resetAt is still in the future. */
+	readonly exhausted: boolean;
+	/** When the rate-limit reset is expected (epoch ms), or `null`. */
+	readonly resetAt: number | null;
+}
+
+/** Internal persisted shape. */
+interface PersistedProviderState {
+	/** Window start (epoch ms, midnight UTC of the current RPD window). */
+	rpdWindowStart: number;
+	/** Requests sent today. */
+	rpdUsed: number;
+	/** Sliding 60s window start. */
+	rpmWindowStart: number;
+	/** Requests in the current 60s window. */
+	rpmUsed: number;
+	/** Tokens used in the current 60s window. */
+	tpmUsed: number;
+	/** Set by 429 handler; provider is considered exhausted until now() > resetAt. */
+	exhaustedUntil: number | null;
+}
+
+interface PersistedState {
+	/** Schema version - bump on breaking changes. */
+	version: 1;
+	providers: { [k in FreeTierProviderId]?: PersistedProviderState };
+}
+
+const ONE_MINUTE_MS = 60_000;
+
+/** Returns the most recent UTC midnight, in epoch ms, at or before `now`. */
+function utcDayStart(now: number): number {
+	const d = new Date(now);
+	return Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate());
+}
+
+function emptyProviderState(now: number): PersistedProviderState {
+	return {
+		rpdWindowStart: utcDayStart(now),
+		rpdUsed: 0,
+		rpmWindowStart: now,
+		rpmUsed: 0,
+		tpmUsed: 0,
+		exhaustedUntil: null,
+	};
+}
+
+/** Roll over windows in-place if they've elapsed. */
+function rollWindows(state: PersistedProviderState, now: number): void {
+	const currentDayStart = utcDayStart(now);
+	if (currentDayStart > state.rpdWindowStart) {
+		state.rpdWindowStart = currentDayStart;
+		state.rpdUsed = 0;
+	}
+	if (now - state.rpmWindowStart >= ONE_MINUTE_MS) {
+		state.rpmWindowStart = now;
+		state.rpmUsed = 0;
+		state.tpmUsed = 0;
+	}
+	if (state.exhaustedUntil !== null && now >= state.exhaustedUntil) {
+		state.exhaustedUntil = null;
+	}
+}
+
+export interface IFreeTierQuotaService {
+	readonly _serviceBrand: undefined;
+
+	/** Fires whenever quota state changes (recordCall, markExhausted, rollover). */
+	readonly onQuotaChange: Event<void>;
+
+	/** Returns remaining quota for a provider+model.  Snapshot, not live. */
+	getRemaining(providerId: FreeTierProviderId, modelName: string): FreeTierRemaining;
+
+	/** Returns remaining quota for every free-tier provider we know about. */
+	getAllRemaining(modelName?: string): readonly FreeTierRemaining[];
+
+	/**
+	 * Increment counters after a successful call.  `tokensUsed` is the best
+	 * estimate available (output text length / 4 is a common cheap proxy).
+	 */
+	recordCall(providerId: FreeTierProviderId, modelName: string, tokensUsed: number): void;
+
+	/**
+	 * Mark a provider as exhausted (rate-limited).  `resetAt` may be null when
+	 * the provider didn't tell us when to retry; in that case we use a 60s
+	 * conservative default.
+	 */
+	markExhausted(providerId: FreeTierProviderId, resetAt: number | null): void;
+
+	/** Test helper: clear all state. */
+	resetAll(): void;
+}
+
+export const IFreeTierQuotaService = createDecorator<IFreeTierQuotaService>('FreeTierQuotaService');
+
+export class FreeTierQuotaService extends Disposable implements IFreeTierQuotaService {
+	readonly _serviceBrand: undefined;
+
+	private readonly _onQuotaChange = this._register(new Emitter<void>());
+	readonly onQuotaChange: Event<void> = this._onQuotaChange.event;
+
+	private _state: PersistedState;
+
+	constructor(
+		@IStorageService private readonly _storageService: IStorageService,
+	) {
+		super();
+		this._state = this._readState();
+	}
+
+	private _readState(): PersistedState {
+		const raw = this._storageService.get(
+			FREE_TIER_QUOTA_STORAGE_KEY,
+			StorageScope.APPLICATION,
+		);
+		if (!raw) {
+			return { version: 1, providers: {} };
+		}
+		try {
+			const parsed = JSON.parse(raw) as PersistedState;
+			if (parsed && parsed.version === 1 && typeof parsed.providers === 'object') {
+				return parsed;
+			}
+		} catch (_err) {
+			// Corrupt JSON - silently reset rather than block startup.
+		}
+		return { version: 1, providers: {} };
+	}
+
+	private _writeState(): void {
+		this._storageService.store(
+			FREE_TIER_QUOTA_STORAGE_KEY,
+			JSON.stringify(this._state),
+			StorageScope.APPLICATION,
+			StorageTarget.MACHINE,
+		);
+	}
+
+	private _getOrCreate(providerId: FreeTierProviderId, now: number): PersistedProviderState {
+		let s = this._state.providers[providerId];
+		if (!s) {
+			s = emptyProviderState(now);
+			this._state.providers[providerId] = s;
+		}
+		rollWindows(s, now);
+		return s;
+	}
+
+	getRemaining(providerId: FreeTierProviderId, modelName: string): FreeTierRemaining {
+		const now = Date.now();
+		const s = this._getOrCreate(providerId, now);
+		const limits = resolveEffectiveQuota(providerId, modelName);
+
+		const rpdRemaining = limits.rpd === null ? null : Math.max(0, limits.rpd - s.rpdUsed);
+		const rpmRemaining = limits.rpm === null ? null : Math.max(0, limits.rpm - s.rpmUsed);
+		const tpmRemaining = limits.tpm === null ? null : Math.max(0, limits.tpm - s.tpmUsed);
+
+		return {
+			providerId,
+			limits,
+			rpd: rpdRemaining,
+			rpm: rpmRemaining,
+			tpm: tpmRemaining,
+			exhausted: s.exhaustedUntil !== null && now < s.exhaustedUntil,
+			resetAt: s.exhaustedUntil,
+		};
+	}
+
+	getAllRemaining(modelName: string = ''): readonly FreeTierRemaining[] {
+		const out: FreeTierRemaining[] = [];
+		for (const id of Object.keys(FREE_TIER_QUOTAS) as FreeTierProviderId[]) {
+			out.push(this.getRemaining(id, modelName));
+		}
+		return out;
+	}
+
+	recordCall(providerId: FreeTierProviderId, modelName: string, tokensUsed: number): void {
+		const now = Date.now();
+		const s = this._getOrCreate(providerId, now);
+		s.rpdUsed += 1;
+		s.rpmUsed += 1;
+		if (tokensUsed > 0) {
+			s.tpmUsed += tokensUsed;
+		}
+		// modelName is currently unused for the increment but reserved for
+		// future per-model accounting; reference it to satisfy linters.
+		void modelName;
+		this._writeState();
+		this._onQuotaChange.fire();
+	}
+
+	markExhausted(providerId: FreeTierProviderId, resetAt: number | null): void {
+		const now = Date.now();
+		const s = this._getOrCreate(providerId, now);
+		// If the provider didn't tell us, assume 60s.
+		const effectiveResetAt = resetAt !== null && resetAt > now ? resetAt : now + ONE_MINUTE_MS;
+		s.exhaustedUntil = effectiveResetAt;
+		this._writeState();
+		this._onQuotaChange.fire();
+	}
+
+	resetAll(): void {
+		this._state = { version: 1, providers: {} };
+		this._writeState();
+		this._onQuotaChange.fire();
+	}
+}
+
+registerSingleton(IFreeTierQuotaService, FreeTierQuotaService, InstantiationType.Delayed);
diff --git a/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts b/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts
index 525da6b33d3..c2337454247 100644
--- a/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts
+++ b/src/vs/workbench/contrib/cortexide/common/sendLLMMessageService.ts
@@ -18,6 +18,8 @@ import { ISecretDetectionService } from './secretDetectionService.js';
 import { INotificationService, Severity } from '../../../../platform/notification/common/notification.js';
 import { ILogService } from '../../../../platform/log/common/log.js';
 import { isWeb } from '../../../../base/common/platform.js';
+import { IFreeTierQuotaService } from './routing/freeTierQuotaService.js';
+import { freeTierIdOfProviderName } from './routing/freeTierConstants.js';
 
 // calls channel to implement features
 export const ILLMMessageService = createDecorator<ILLMMessageService>('llmMessageService');
@@ -69,6 +71,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 		@IMCPService private readonly mcpService: IMCPService,
 		@ISecretDetectionService private readonly secretDetectionService: ISecretDetectionService,
 		@ILogService private readonly logService: ILogService,
+		@IFreeTierQuotaService private readonly freeTierQuotaService: IFreeTierQuotaService,
 	) {
 		super()
 
@@ -236,9 +239,42 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 
 		// add state for request id
 		const requestId = generateUuid();
+
+		// Free-tier quota tracking: wrap success/error callbacks so we update
+		// the in-process quota service whenever a call completes or hits 429.
+		// Wrapping happens here (common/ layer) rather than electron-main to
+		// keep the quota service strictly in common/ - the impl has no way to
+		// reach back to common/ services.
+		const freeTierId = freeTierIdOfProviderName(modelSelection.providerName);
+		const wrappedOnFinalMessage = freeTierId === null
+			? onFinalMessage
+			: (params: EventLLMMessageOnFinalMessageParams) => {
+				try {
+					// Cheap proxy for tokens until SDK responses expose real usage.
+					// Output tokens ~ chars/4 is the standard approximation.
+					const estTokens = Math.ceil((params.fullText?.length ?? 0) / 4);
+					this.freeTierQuotaService.recordCall(freeTierId, modelSelection.modelName, estTokens);
+				} catch (err) {
+					this.logService.warn('[FreeTierQuota] recordCall failed', err);
+				}
+				onFinalMessage(params);
+			};
+		const wrappedOnError = freeTierId === null
+			? onError
+			: (params: EventLLMMessageOnErrorParams) => {
+				try {
+					if (isRateLimitError(params)) {
+						this.freeTierQuotaService.markExhausted(freeTierId, parseRetryAt(params));
+					}
+				} catch (err) {
+					this.logService.warn('[FreeTierQuota] markExhausted failed', err);
+				}
+				onError(params);
+			};
+
 		this.llmMessageHooks.onText[requestId] = onText
-		this.llmMessageHooks.onFinalMessage[requestId] = onFinalMessage
-		this.llmMessageHooks.onError[requestId] = onError
+		this.llmMessageHooks.onFinalMessage[requestId] = wrappedOnFinalMessage
+		this.llmMessageHooks.onError[requestId] = wrappedOnError
 		this.llmMessageHooks.onAbort[requestId] = onAbort // used internally only
 
 		// params will be stripped of all its functions over the IPC channel
@@ -309,5 +345,59 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 	}
 }
 
+/**
+ * Detect 429 / rate-limit errors from the provider error payload.  The
+ * underlying impl normalises a wide range of provider-specific shapes into a
+ * single `message` string, plus an opaque `fullError`.  We sniff both.
+ */
+function isRateLimitError(params: EventLLMMessageOnErrorParams): boolean {
+	const msg = (params.message || '').toLowerCase();
+	if (msg.includes('rate limit') || msg.includes('rate-limit') || msg.includes('429') || msg.includes('resource_exhausted') || msg.includes('quota')) {
+		return true;
+	}
+	const full = params.fullError as unknown;
+	if (full && typeof full === 'object') {
+		const candidate = full as { status?: unknown; code?: unknown };
+		if (candidate.status === 429 || candidate.code === 429) {
+			return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * Best-effort: extract a retry-at timestamp from a rate-limit error.  If
+ * nothing is parseable, returns `null` - the quota service applies a
+ * conservative 60s default.
+ */
+function parseRetryAt(params: EventLLMMessageOnErrorParams): number | null {
+	const full = params.fullError as unknown;
+	if (full && typeof full === 'object') {
+		const candidate = full as { headers?: Record<string, string>; retryAfter?: unknown };
+		const headers = candidate.headers;
+		if (headers && typeof headers === 'object') {
+			const retryAfter = headers['retry-after'] || headers['Retry-After'];
+			if (retryAfter) {
+				const seconds = Number(retryAfter);
+				if (Number.isFinite(seconds) && seconds > 0) {
+					return Date.now() + seconds * 1000;
+				}
+			}
+		}
+		if (typeof candidate.retryAfter === 'number' && candidate.retryAfter > 0) {
+			return Date.now() + candidate.retryAfter * 1000;
+		}
+	}
+	// Try to parse "...retry in 57s..." patterns from the message
+	const m = (params.message || '').match(/retry in\s+(\d+(?:\.\d+)?)\s*s/i);
+	if (m) {
+		const seconds = Number(m[1]);
+		if (Number.isFinite(seconds) && seconds > 0) {
+			return Date.now() + seconds * 1000;
+		}
+	}
+	return null;
+}
+
 registerSingleton(ILLMMessageService, LLMMessageService, InstantiationType.Eager);
 
diff --git a/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts b/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts
new file mode 100644
index 00000000000..be5c06eeb80
--- /dev/null
+++ b/src/vs/workbench/contrib/cortexide/test/common/freeTierLadder.test.ts
@@ -0,0 +1,128 @@
+/*--------------------------------------------------------------------------------------
+ *  Copyright 2025 Glass Devtools, Inc. All rights reserved.
+ *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
+ *--------------------------------------------------------------------------------------*/
+
+import * as assert from 'assert';
+import { suite, test } from 'mocha';
+import { buildFreeTierLadder, pickTopFromLadder } from '../../common/routing/freeTierLadder.js';
+import { FreeTierRemaining } from '../../common/routing/freeTierQuotaService.js';
+import { FREE_TIER_QUOTAS, FreeTierProviderId } from '../../common/routing/freeTierConstants.js';
+import { ModelSelection } from '../../common/cortexideSettingsTypes.js';
+
+/** Build a FreeTierRemaining snapshot for a provider with sensible defaults. */
+function snap(
+	providerId: FreeTierProviderId,
+	overrides: Partial<Omit<FreeTierRemaining, 'providerId'>> = {},
+): FreeTierRemaining {
+	const base = FREE_TIER_QUOTAS[providerId];
+	return {
+		providerId,
+		limits: { rpd: base.rpd, rpm: base.rpm, tpm: base.tpm },
+		rpd: base.rpd,
+		rpm: base.rpm,
+		tpm: base.tpm,
+		exhausted: false,
+		resetAt: null,
+		...overrides,
+	};
+}
+
+suite('FreeTierLadder', () => {
+
+	test('respects privacy gate: returns empty ladder regardless of configured providers', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+			{ providerName: 'gemini', modelName: 'gemini-2.5-flash' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [snap('groq'), snap('gemini')],
+			privacyMode: true,
+		});
+		assert.strictEqual(ladder.length, 0, 'privacy mode must produce an empty ladder');
+		assert.strictEqual(pickTopFromLadder(ladder), null);
+	});
+
+	test('skips exhausted providers (429 marked) and falls through to next quality tier', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+			{ providerName: 'gemini', modelName: 'gemini-2.5-flash' },
+			{ providerName: 'openRouter', modelName: 'openrouter/auto' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [
+				snap('groq', { exhausted: true, resetAt: Date.now() + 60_000 }),
+				snap('gemini'),
+				snap('openRouter'),
+			],
+			privacyMode: false,
+		});
+		assert.ok(ladder.length >= 2, 'gemini + openRouter should remain after groq is dropped');
+		assert.notStrictEqual(ladder[0].providerId, 'groq', 'exhausted groq must not be top');
+		assert.strictEqual(ladder[0].providerId, 'gemini', 'next-best quality should win');
+	});
+
+	test('picks highest-quality available provider when all have quota', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'openRouter', modelName: 'openrouter/auto' },
+			{ providerName: 'gemini', modelName: 'gemini-2.5-flash-lite' },
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+			{ providerName: 'mistral', modelName: 'mistral-large-latest' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [snap('groq'), snap('gemini'), snap('openRouter'), snap('mistral')],
+			privacyMode: false,
+		});
+		assert.strictEqual(ladder.length, 4);
+		// Expected order per FREE_TIER_QUOTAS qualityRank: groq(80) > gemini(60) > openRouter(40) > mistral(30)
+		assert.strictEqual(ladder[0].providerId, 'groq');
+		assert.strictEqual(ladder[1].providerId, 'gemini');
+		assert.strictEqual(ladder[2].providerId, 'openRouter');
+		assert.strictEqual(ladder[3].providerId, 'mistral');
+	});
+
+	test('zero remaining RPD removes provider from ladder', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+			{ providerName: 'gemini', modelName: 'gemini-2.5-flash' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [
+				snap('groq', { rpd: 0 }),
+				snap('gemini'),
+			],
+			privacyMode: false,
+		});
+		assert.strictEqual(ladder.length, 1);
+		assert.strictEqual(ladder[0].providerId, 'gemini');
+	});
+
+	test('non-free-tier providers (e.g. anthropic, openAI) are silently ignored', () => {
+		const configured: ModelSelection[] = [
+			{ providerName: 'anthropic', modelName: 'claude-3-5-sonnet-20241022' },
+			{ providerName: 'openAI', modelName: 'gpt-4o' },
+			{ providerName: 'groq', modelName: 'llama-3.3-70b-versatile' },
+		];
+		const ladder = buildFreeTierLadder({
+			configuredModels: configured,
+			quotas: [snap('groq')],
+			privacyMode: false,
+		});
+		assert.strictEqual(ladder.length, 1);
+		assert.strictEqual(ladder[0].providerId, 'groq');
+	});
+
+	test('empty configured list -> empty ladder', () => {
+		const ladder = buildFreeTierLadder({
+			configuredModels: [],
+			quotas: [],
+			privacyMode: false,
+		});
+		assert.strictEqual(ladder.length, 0);
+		assert.strictEqual(pickTopFromLadder(ladder), null);
+	});
+});