From 66da3145eb303beb85946d5466ad1694386eff98 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Sun, 3 May 2026 09:43:37 +0000
Subject: [PATCH] feat: add FIM inline code completion support
 (Fill-in-the-Middle)

Implements a lightweight FIM completion provider that registers as a
VS Code InlineCompletionItemProvider to provide ghost-text suggestions
as the user types, similar to GitHub Copilot.

Key components:
- FimTokenFormatter: Maps model families to FIM token formats (DeepSeek,
  CodeLlama, StarCoder, Mistral/Codestral, Qwen, generic)
- FimApiClient: Lightweight API client supporting /v1/completions,
  Ollama /api/generate, and Mistral /v1/fim/completions endpoints
- FimCompletionProvider: VS Code InlineCompletionItemProvider with
  debouncing, caching, and cancellation support
- FimService: Orchestrator managing provider lifecycle based on settings

Settings added to GlobalSettings:
- fimEnabled, fimProvider, fimModelId, fimBaseUrl, fimDebounceMs,
  fimMaxTokens, fimApiKey (secret)

Supported providers: openai-compatible, deepseek, mistral, ollama

Closes #12261
---
 packages/types/src/global-settings.ts         |  13 +
 src/__mocks__/vscode.js                       |  16 ++
 src/extension.ts                              |  23 ++
 src/services/fim/FimApiClient.ts              | 153 ++++++++++++
 src/services/fim/FimCompletionProvider.ts     | 228 ++++++++++++++++++
 src/services/fim/FimService.ts                | 102 ++++++++
 src/services/fim/FimTokenFormatter.ts         |  94 ++++++++
 .../fim/__tests__/FimApiClient.spec.ts        | 225 +++++++++++++++++
 .../__tests__/FimCompletionProvider.spec.ts   | 205 ++++++++++++++++
 src/services/fim/__tests__/FimService.spec.ts | 157 ++++++++++++
 .../fim/__tests__/FimTokenFormatter.spec.ts   |  76 ++++++
 src/services/fim/index.ts                     |   4 +
 12 files changed, 1296 insertions(+)
 create mode 100644 src/services/fim/FimApiClient.ts
 create mode 100644 src/services/fim/FimCompletionProvider.ts
 create mode 100644 src/services/fim/FimService.ts
 create mode 100644 src/services/fim/FimTokenFormatter.ts
 create mode 100644 src/services/fim/__tests__/FimApiClient.spec.ts
 create mode 100644 src/services/fim/__tests__/FimCompletionProvider.spec.ts
 create mode 100644 src/services/fim/__tests__/FimService.spec.ts
 create mode 100644 src/services/fim/__tests__/FimTokenFormatter.spec.ts
 create mode 100644 src/services/fim/index.ts
diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts
index 288f6c2118c..0f06f0f51a5 100644
--- a/packages/types/src/global-settings.ts
+++ b/packages/types/src/global-settings.ts
@@ -232,6 +232,18 @@ export const globalSettingsSchema = z.object({
 	 * Tools in this list will be excluded from prompt generation and rejected at execution time.
 	 */
 	disabledTools: z.array(toolNamesSchema).optional(),
+
+	/**
+	 * FIM (Fill-in-the-Middle) inline code completion settings.
+	 * These are configured separately from the chat model to allow using
+	 * a cheap/fast FIM-specialized model (e.g., DeepSeek Coder, Codestral).
+	 */
+	fimEnabled: z.boolean().optional(),
+	fimProvider: z.enum(["openai-compatible", "deepseek", "mistral", "ollama"]).optional(),
+	fimModelId: z.string().optional(),
+	fimBaseUrl: z.string().optional(),
+	fimDebounceMs: z.number().min(0).optional(),
+	fimMaxTokens: z.number().min(1).optional(),
 })
 
 export type GlobalSettings = z.infer<typeof globalSettingsSchema>
@@ -285,6 +297,7 @@ export const SECRET_STATE_KEYS = [
 // Global secrets that are part of GlobalSettings (not ProviderSettings)
 export const GLOBAL_SECRET_KEYS = [
 	"openRouterImageApiKey", // For image generation
+	"fimApiKey", // For FIM inline code completion
 ] as const
 
 // Type for the actual secret storage keys
diff --git a/src/__mocks__/vscode.js b/src/__mocks__/vscode.js
index 505497fc9f1..0512189fd1d 100644
--- a/src/__mocks__/vscode.js
+++ b/src/__mocks__/vscode.js
@@ -96,6 +96,7 @@ export const languages = {
 		clear: () => {},
 		dispose: () => {},
 	}),
+	registerInlineCompletionItemProvider: () => mockDisposable,
 }
 
 export const extensions = {
@@ -152,6 +153,19 @@ export const CodeActionKind = {
 
 export const EventEmitter = mockEventEmitter
 
+export const InlineCompletionTriggerKind = {
+	Invoke: 0,
+	Automatic: 1,
+}
+
+export const InlineCompletionItem = class {
+	constructor(insertText, range, command) {
+		this.insertText = insertText
+		this.range = range
+		this.command = command
+	}
+}
+
 export default {
 	workspace,
 	window,
@@ -171,4 +185,6 @@ export default {
 	EventEmitter,
 	CodeAction,
 	CodeActionKind,
+	InlineCompletionTriggerKind,
+	InlineCompletionItem,
 }
diff --git a/src/extension.ts b/src/extension.ts
index 19c0d70585a..a808646e95a 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -36,6 +36,7 @@ import { openAiCodexOAuthManager } from "./integrations/openai-codex/oauth"
 import { McpServerManager } from "./services/mcp/McpServerManager"
 import { CodeIndexManager } from "./services/code-index/manager"
 import { MdmService } from "./services/mdm/MdmService"
+import { FimService } from "./services/fim"
 import { migrateSettings } from "./utils/migrateSettings"
 import { autoImportSettings } from "./utils/autoImportSettings"
 import { API } from "./extension/api"
@@ -168,6 +169,28 @@ export async function activate(context: vscode.ExtensionContext) {
 
 	const contextProxy = await ContextProxy.getInstance(context)
 
+	// Initialize FIM (Fill-in-the-Middle) inline completion service.
+	const fimService = new FimService(outputChannel)
+	context.subscriptions.push(fimService)
+
+	// Initialize FIM with current settings
+	const initFimSettings = async () => {
+		const globalSettings = contextProxy.getGlobalSettings()
+		const fimApiKey = await context.secrets.get("fimApiKey")
+		fimService.updateSettings(globalSettings, fimApiKey)
+	}
+
+	void initFimSettings()
+
+	// Listen for secret storage changes to update FIM API key
+	context.secrets.onDidChange(async (e: vscode.SecretStorageChangeEvent) => {
+		if (e.key === "fimApiKey") {
+			const globalSettings = contextProxy.getGlobalSettings()
+			const fimApiKey = await context.secrets.get("fimApiKey")
+			fimService.updateSettings(globalSettings, fimApiKey)
+		}
+	})
+
 	// Initialize code index managers for all workspace folders.
 	const codeIndexManagers: CodeIndexManager[] = []
 
diff --git a/src/services/fim/FimApiClient.ts b/src/services/fim/FimApiClient.ts
new file mode 100644
index 00000000000..b1281a739db
--- /dev/null
+++ b/src/services/fim/FimApiClient.ts
@@ -0,0 +1,153 @@
+/**
+ * Lightweight API client for FIM (Fill-in-the-Middle) completion requests.
+ *
+ * Supports the `/v1/completions` endpoint used by OpenAI-compatible servers,
+ * DeepSeek, Ollama, and similar providers. This is the legacy completions
+ * endpoint (not chat completions), which is better suited for raw FIM prompts.
+ */
+
+import { formatFimPrompt } from "./FimTokenFormatter"
+
+export interface FimRequestOptions {
+	/** The FIM provider type */
+	provider: "openai-compatible" | "deepseek" | "mistral" | "ollama"
+	/** Base URL for the API endpoint */
+	baseUrl: string
+	/** API key for authentication */
+	apiKey?: string
+	/** Model ID to use */
+	modelId: string
+	/** Text before the cursor */
+	prefix: string
+	/** Text after the cursor */
+	suffix: string
+	/** Maximum tokens to generate */
+	maxTokens: number
+	/** Abort signal for cancellation */
+	signal?: AbortSignal
+}
+
+export interface FimResponse {
+	/** The generated completion text */
+	completion: string
+}
+
+/**
+ * Normalize a base URL by removing trailing slashes.
+ */
+function normalizeBaseUrl(url: string): string {
+	return url.replace(/\/+$/, "")
+}
+
+/**
+ * Build the API endpoint URL based on the provider type.
+ */
+function buildEndpointUrl(provider: string, baseUrl: string): string {
+	const normalized = normalizeBaseUrl(baseUrl)
+
+	switch (provider) {
+		case "ollama":
+			return `${normalized}/api/generate`
+		case "mistral":
+			return `${normalized}/v1/fim/completions`
+		default:
+			// openai-compatible and deepseek use /v1/completions
+			return `${normalized}/v1/completions`
+	}
+}
+
+/**
+ * Build the request body based on the provider type.
+ */
+function buildRequestBody(options: FimRequestOptions): Record<string, unknown> {
+	const { provider, modelId, prefix, suffix, maxTokens } = options
+
+	switch (provider) {
+		case "ollama":
+			return {
+				model: modelId,
+				prompt: prefix,
+				suffix: suffix,
+				stream: false,
+				options: {
+					num_predict: maxTokens,
+					temperature: 0.2,
+					top_p: 0.9,
+				},
+			}
+		case "mistral":
+			return {
+				model: modelId,
+				prompt: prefix,
+				suffix: suffix,
+				max_tokens: maxTokens,
+				temperature: 0.2,
+				top_p: 0.9,
+				stop: ["\n\n"],
+			}
+		default: {
+			// openai-compatible and deepseek: format the FIM prompt with special tokens
+			const prompt = formatFimPrompt(modelId, prefix, suffix)
+			return {
+				model: modelId,
+				prompt,
+				max_tokens: maxTokens,
+				temperature: 0.2,
+				top_p: 0.9,
+				stop: ["\n\n", "<|fim", "<fim_", "[/MIDDLE]"],
+			}
+		}
+	}
+}
+
+/**
+ * Extract the completion text from the provider response.
+ */
+function extractCompletion(provider: string, data: Record<string, unknown>): string {
+	switch (provider) {
+		case "ollama": {
+			return (data.response as string) ?? ""
+		}
+		default: {
+			// OpenAI-compatible response format
+			const choices = data.choices as Array<{ text?: string; message?: { content?: string } }> | undefined
+			if (!choices || choices.length === 0) {
+				return ""
+			}
+			return choices[0].text ?? choices[0].message?.content ?? ""
+		}
+	}
+}
+
+/**
+ * Send a FIM completion request to the configured provider.
+ */
+export async function requestFimCompletion(options: FimRequestOptions): Promise<FimResponse> {
+	const url = buildEndpointUrl(options.provider, options.baseUrl)
+	const body = buildRequestBody(options)
+
+	const headers: Record<string, string> = {
+		"Content-Type": "application/json",
+	}
+
+	if (options.apiKey) {
+		headers["Authorization"] = `Bearer ${options.apiKey}`
+	}
+
+	const response = await fetch(url, {
+		method: "POST",
+		headers,
+		body: JSON.stringify(body),
+		signal: options.signal,
+	})
+
+	if (!response.ok) {
+		const errorText = await response.text().catch(() => "Unknown error")
+		throw new Error(`FIM API request failed (${response.status}): ${errorText}`)
+	}
+
+	const data = (await response.json()) as Record<string, unknown>
+	const completion = extractCompletion(options.provider, data)
+
+	return { completion }
+}
diff --git a/src/services/fim/FimCompletionProvider.ts b/src/services/fim/FimCompletionProvider.ts
new file mode 100644
index 00000000000..d22ed01aff0
--- /dev/null
+++ b/src/services/fim/FimCompletionProvider.ts
@@ -0,0 +1,228 @@
+/**
+ * VS Code InlineCompletionItemProvider for FIM-based code completions.
+ *
+ * Provides ghost-text-style inline suggestions as the user types,
+ * similar to GitHub Copilot. Uses a FIM-compatible API to generate
+ * completions based on the prefix/suffix context around the cursor.
+ */
+
+import * as vscode from "vscode"
+
+import { requestFimCompletion, type FimRequestOptions } from "./FimApiClient"
+
+/** Default debounce delay in milliseconds */
+const DEFAULT_DEBOUNCE_MS = 300
+
+/** Default max tokens for completions */
+const DEFAULT_MAX_TOKENS = 128
+
+/** Default base URLs per provider */
+const DEFAULT_BASE_URLS: Record<string, string> = {
+	"openai-compatible": "http://localhost:1234",
+	deepseek: "https://api.deepseek.com",
+	mistral: "https://api.mistral.ai",
+	ollama: "http://localhost:11434",
+}
+
+export interface FimProviderConfig {
+	enabled: boolean
+	provider: "openai-compatible" | "deepseek" | "mistral" | "ollama"
+	modelId: string
+	baseUrl?: string
+	apiKey?: string
+	debounceMs?: number
+	maxTokens?: number
+}
+
+/**
+ * Simple completion cache to avoid redundant API calls for identical contexts.
+ */
+interface CacheEntry {
+	completion: string
+	timestamp: number
+}
+
+const CACHE_TTL_MS = 10_000 // 10 seconds
+const MAX_CACHE_SIZE = 50
+
+export class FimCompletionProvider implements vscode.InlineCompletionItemProvider {
+	private config: FimProviderConfig
+	private cache = new Map<string, CacheEntry>()
+	private pendingRequest: AbortController | null = null
+
+	constructor(config: FimProviderConfig) {
+		this.config = config
+	}
+
+	/**
+	 * Update the provider configuration. Called when settings change.
+	 */
+	updateConfig(config: FimProviderConfig): void {
+		this.config = config
+		this.cache.clear()
+	}
+
+	/**
+	 * Provide inline completion items for the given position.
+	 */
+	async provideInlineCompletionItems(
+		document: vscode.TextDocument,
+		position: vscode.Position,
+		context: vscode.InlineCompletionContext,
+		token: vscode.CancellationToken,
+	): Promise<vscode.InlineCompletionItem[] | undefined> {
+		if (!this.config.enabled) {
+			return undefined
+		}
+
+		// Don't trigger on empty documents
+		if (document.getText().trim().length === 0) {
+			return undefined
+		}
+
+		// Cancel any pending request
+		if (this.pendingRequest) {
+			this.pendingRequest.abort()
+			this.pendingRequest = null
+		}
+
+		// Debounce: wait before making the request
+		const debounceMs = this.config.debounceMs ?? DEFAULT_DEBOUNCE_MS
+		if (debounceMs > 0) {
+			const cancelled = await this.debounce(debounceMs, token)
+			if (cancelled) {
+				return undefined
+			}
+		}
+
+		// Extract prefix and suffix from the document
+		const prefix = document.getText(new vscode.Range(new vscode.Position(0, 0), position))
+		const suffix = document.getText(new vscode.Range(position, document.lineAt(document.lineCount - 1).range.end))
+
+		// Check cache
+		const cacheKey = this.buildCacheKey(prefix, suffix)
+		const cached = this.getFromCache(cacheKey)
+		if (cached) {
+			return [new vscode.InlineCompletionItem(cached)]
+		}
+
+		// Prepare the request
+		const abortController = new AbortController()
+		this.pendingRequest = abortController
+
+		// Also abort when the token is cancelled
+		const disposable = token.onCancellationRequested(() => {
+			abortController.abort()
+		})
+
+		try {
+			const requestOptions: FimRequestOptions = {
+				provider: this.config.provider,
+				baseUrl:
+					this.config.baseUrl ??
+					DEFAULT_BASE_URLS[this.config.provider] ??
+					DEFAULT_BASE_URLS["openai-compatible"],
+				apiKey: this.config.apiKey,
+				modelId: this.config.modelId,
+				prefix,
+				suffix,
+				maxTokens: this.config.maxTokens ?? DEFAULT_MAX_TOKENS,
+				signal: abortController.signal,
+			}
+
+			const response = await requestFimCompletion(requestOptions)
+
+			// Filter empty or whitespace-only completions
+			const completion = response.completion.trimEnd()
+			if (!completion || completion.trim().length === 0) {
+				return undefined
+			}
+
+			// Cache the result
+			this.addToCache(cacheKey, completion)
+
+			return [new vscode.InlineCompletionItem(completion)]
+		} catch (error: unknown) {
+			// Don't log abort errors (expected during cancellation)
+			if (error instanceof Error && error.name === "AbortError") {
+				return undefined
+			}
+			// Log other errors but don't show to user (silent failure for inline completions)
+			console.warn("[FIM] Completion request failed:", error instanceof Error ? error.message : String(error))
+			return undefined
+		} finally {
+			disposable.dispose()
+			if (this.pendingRequest === abortController) {
+				this.pendingRequest = null
+			}
+		}
+	}
+
+	/**
+	 * Debounce helper that resolves after a delay unless the token is cancelled.
+	 * Returns true if cancelled, false if the delay completed.
+	 */
+	private debounce(ms: number, token: vscode.CancellationToken): Promise<boolean> {
+		return new Promise((resolve) => {
+			const timeout = setTimeout(() => {
+				disposable.dispose()
+				resolve(false)
+			}, ms)
+
+			const disposable = token.onCancellationRequested(() => {
+				clearTimeout(timeout)
+				resolve(true)
+			})
+		})
+	}
+
+	/**
+	 * Build a cache key from the prefix and suffix context.
+	 * Uses the last N characters to keep keys manageable.
+	 */
+	private buildCacheKey(prefix: string, suffix: string): string {
+		const prefixTail = prefix.slice(-500)
+		const suffixHead = suffix.slice(0, 200)
+		return `${prefixTail}|||${suffixHead}`
+	}
+
+	/**
+	 * Get a completion from the cache if it's still valid.
+	 */
+	private getFromCache(key: string): string | undefined {
+		const entry = this.cache.get(key)
+		if (!entry) {
+			return undefined
+		}
+		if (Date.now() - entry.timestamp > CACHE_TTL_MS) {
+			this.cache.delete(key)
+			return undefined
+		}
+		return entry.completion
+	}
+
+	/**
+	 * Add a completion to the cache, evicting old entries if needed.
+	 */
+	private addToCache(key: string, completion: string): void {
+		// Evict oldest entries if cache is full
+		if (this.cache.size >= MAX_CACHE_SIZE) {
+			const firstKey = this.cache.keys().next().value
+			if (firstKey !== undefined) {
+				this.cache.delete(firstKey)
+			}
+		}
+		this.cache.set(key, { completion, timestamp: Date.now() })
+	}
+
+	/**
+	 * Dispose of any pending requests and clear the cache.
+	 */
+	dispose(): void {
+		if (this.pendingRequest) {
+			this.pendingRequest.abort()
+			this.pendingRequest = null
+		}
+		this.cache.clear()
+	}
+}
diff --git a/src/services/fim/FimService.ts b/src/services/fim/FimService.ts
new file mode 100644
index 00000000000..ce4edda7971
--- /dev/null
+++ b/src/services/fim/FimService.ts
@@ -0,0 +1,102 @@
+/**
+ * FIM Service - Orchestrates the FIM inline completion lifecycle.
+ *
+ * Manages the registration and disposal of the FimCompletionProvider
+ * based on user settings. Listens for configuration changes to
+ * enable/disable the provider dynamically.
+ */
+
+import * as vscode from "vscode"
+
+import type { GlobalSettings } from "@roo-code/types"
+
+import { FimCompletionProvider, type FimProviderConfig } from "./FimCompletionProvider"
+
+/**
+ * Extract FIM configuration from global settings.
+ */
+function extractFimConfig(settings: GlobalSettings, apiKey?: string): FimProviderConfig {
+	return {
+		enabled: settings.fimEnabled ?? false,
+		provider: settings.fimProvider ?? "openai-compatible",
+		modelId: settings.fimModelId ?? "",
+		baseUrl: settings.fimBaseUrl,
+		apiKey,
+		debounceMs: settings.fimDebounceMs,
+		maxTokens: settings.fimMaxTokens,
+	}
+}
+
+export class FimService implements vscode.Disposable {
+	private provider: FimCompletionProvider | null = null
+	private registration: vscode.Disposable | null = null
+	private outputChannel: vscode.OutputChannel
+
+	constructor(outputChannel: vscode.OutputChannel) {
+		this.outputChannel = outputChannel
+	}
+
+	/**
+	 * Initialize or update the FIM service based on current settings.
+	 */
+	updateSettings(settings: GlobalSettings, apiKey?: string): void {
+		const config = extractFimConfig(settings, apiKey)
+
+		if (!config.enabled || !config.modelId) {
+			this.deactivate()
+			return
+		}
+
+		if (this.provider) {
+			// Update existing provider config
+			this.provider.updateConfig(config)
+			this.outputChannel.appendLine(
+				`[FIM] Updated configuration: provider=${config.provider}, model=${config.modelId}`,
+			)
+		} else {
+			// Create and register new provider
+			this.activate(config)
+		}
+	}
+
+	/**
+	 * Activate the FIM completion provider.
+	 */
+	private activate(config: FimProviderConfig): void {
+		this.provider = new FimCompletionProvider(config)
+
+		this.registration = vscode.languages.registerInlineCompletionItemProvider({ pattern: "**" }, this.provider)
+
+		this.outputChannel.appendLine(`[FIM] Activated: provider=${config.provider}, model=${config.modelId}`)
+	}
+
+	/**
+	 * Deactivate the FIM completion provider.
+	 */
+	private deactivate(): void {
+		if (this.registration) {
+			this.registration.dispose()
+			this.registration = null
+		}
+
+		if (this.provider) {
+			this.provider.dispose()
+			this.provider = null
+			this.outputChannel.appendLine("[FIM] Deactivated")
+		}
+	}
+
+	/**
+	 * Check if the FIM service is currently active.
+	 */
+	isActive(): boolean {
+		return this.provider !== null
+	}
+
+	/**
+	 * Dispose of the FIM service and all resources.
+	 */
+	dispose(): void {
+		this.deactivate()
+	}
+}
diff --git a/src/services/fim/FimTokenFormatter.ts b/src/services/fim/FimTokenFormatter.ts
new file mode 100644
index 00000000000..c7677f90613
--- /dev/null
+++ b/src/services/fim/FimTokenFormatter.ts
@@ -0,0 +1,94 @@
+/**
+ * FIM (Fill-in-the-Middle) token formatting for different model families.
+ *
+ * Each model family uses different special tokens to delimit the prefix,
+ * suffix, and middle sections of a FIM prompt. This module maps model
+ * families to their respective token formats.
+ */
+
+export interface FimTokens {
+	prefix: string
+	suffix: string
+	middle: string
+}
+
+/**
+ * Known FIM token formats by model family.
+ */
+const FIM_TOKEN_FORMATS: Record<string, FimTokens> = {
+	// DeepSeek Coder / CodeLlama / StarCoder2
+	deepseek: {
+		prefix: "<|fim▁begin|>",
+		suffix: "<|fim▁hole|>",
+		middle: "<|fim▁end|>",
+	},
+	// CodeLlama
+	codellama: {
+		prefix: "<PRE> ",
+		suffix: " <SUF>",
+		middle: " <MID>",
+	},
+	// StarCoder
+	starcoder: {
+		prefix: "<fim_prefix>",
+		suffix: "<fim_suffix>",
+		middle: "<fim_middle>",
+	},
+	// Mistral / Codestral
+	mistral: {
+		prefix: "[PREFIX]",
+		suffix: "[SUFFIX]",
+		middle: "[MIDDLE]",
+	},
+	// Qwen2.5-Coder
+	qwen: {
+		prefix: "<|fim_prefix|>",
+		suffix: "<|fim_suffix|>",
+		middle: "<|fim_middle|>",
+	},
+	// Generic fallback (OpenAI-compatible FIM)
+	generic: {
+		prefix: "<|fim_prefix|>",
+		suffix: "<|fim_suffix|>",
+		middle: "<|fim_middle|>",
+	},
+}
+
+/**
+ * Model ID patterns mapped to their token format keys.
+ */
+const MODEL_FAMILY_PATTERNS: Array<{ pattern: RegExp; family: string }> = [
+	{ pattern: /deepseek/i, family: "deepseek" },
+	{ pattern: /codellama/i, family: "codellama" },
+	{ pattern: /starcoder/i, family: "starcoder" },
+	{ pattern: /mistral|codestral/i, family: "mistral" },
+	{ pattern: /qwen/i, family: "qwen" },
+]
+
+/**
+ * Detect the FIM token format based on the model ID.
+ */
+export function detectFimTokens(modelId: string): FimTokens {
+	for (const { pattern, family } of MODEL_FAMILY_PATTERNS) {
+		if (pattern.test(modelId)) {
+			return FIM_TOKEN_FORMATS[family]
+		}
+	}
+	return FIM_TOKEN_FORMATS.generic
+}
+
+/**
+ * Format a FIM prompt using the appropriate tokens for the given model.
+ */
+export function formatFimPrompt(modelId: string, prefix: string, suffix: string): string {
+	const tokens = detectFimTokens(modelId)
+	return `${tokens.prefix}${prefix}${tokens.suffix}${suffix}${tokens.middle}`
+}
+
+/**
+ * Get the FIM tokens for a given model family name.
+ * Falls back to generic tokens if the family is not recognized.
+ */
+export function getFimTokensByFamily(family: string): FimTokens {
+	return FIM_TOKEN_FORMATS[family] ?? FIM_TOKEN_FORMATS.generic
+}
diff --git a/src/services/fim/__tests__/FimApiClient.spec.ts b/src/services/fim/__tests__/FimApiClient.spec.ts
new file mode 100644
index 00000000000..f94b9055029
--- /dev/null
+++ b/src/services/fim/__tests__/FimApiClient.spec.ts
@@ -0,0 +1,225 @@
+import { requestFimCompletion, type FimRequestOptions } from "../FimApiClient"
+
+// Mock global fetch
+const mockFetch = vi.fn()
+vi.stubGlobal("fetch", mockFetch)
+
+describe("FimApiClient", () => {
+	beforeEach(() => {
+		mockFetch.mockReset()
+	})
+
+	describe("requestFimCompletion", () => {
+		const baseOptions: FimRequestOptions = {
+			provider: "openai-compatible",
+			baseUrl: "http://localhost:1234",
+			apiKey: "test-key",
+			modelId: "deepseek-coder",
+			prefix: "function hello() {",
+			suffix: "}",
+			maxTokens: 128,
+		}
+
+		it("should make a request to the correct OpenAI-compatible endpoint", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: '\n  console.log("hello");\n' }],
+				}),
+			})
+
+			await requestFimCompletion(baseOptions)
+
+			expect(mockFetch).toHaveBeenCalledWith(
+				"http://localhost:1234/v1/completions",
+				expect.objectContaining({
+					method: "POST",
+					headers: expect.objectContaining({
+						"Content-Type": "application/json",
+						Authorization: "Bearer test-key",
+					}),
+				}),
+			)
+		})
+
+		it("should use the Ollama endpoint for Ollama provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					response: '\n  console.log("hello");\n',
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				provider: "ollama",
+				baseUrl: "http://localhost:11434",
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith("http://localhost:11434/api/generate", expect.anything())
+		})
+
+		it("should use the Mistral FIM endpoint for Mistral provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "completion" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				provider: "mistral",
+				baseUrl: "https://api.mistral.ai",
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith("https://api.mistral.ai/v1/fim/completions", expect.anything())
+		})
+
+		it("should extract completion text from OpenAI-compatible response", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "  return 42;" }],
+				}),
+			})
+
+			const result = await requestFimCompletion(baseOptions)
+			expect(result.completion).toBe("  return 42;")
+		})
+
+		it("should extract completion text from Ollama response", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					response: "  return 42;",
+				}),
+			})
+
+			const result = await requestFimCompletion({
+				...baseOptions,
+				provider: "ollama",
+				baseUrl: "http://localhost:11434",
+			})
+			expect(result.completion).toBe("  return 42;")
+		})
+
+		it("should return empty string for empty choices", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [],
+				}),
+			})
+
+			const result = await requestFimCompletion(baseOptions)
+			expect(result.completion).toBe("")
+		})
+
+		it("should throw an error for non-OK responses", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: false,
+				status: 401,
+				text: async () => "Unauthorized",
+			})
+
+			await expect(requestFimCompletion(baseOptions)).rejects.toThrow(
+				"FIM API request failed (401): Unauthorized",
+			)
+		})
+
+		it("should not include Authorization header when no API key is provided", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				apiKey: undefined,
+			})
+
+			const callArgs = mockFetch.mock.calls[0]
+			const headers = callArgs[1].headers
+			expect(headers).not.toHaveProperty("Authorization")
+		})
+
+		it("should normalize base URL by removing trailing slashes", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				baseUrl: "http://localhost:1234///",
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith("http://localhost:1234/v1/completions", expect.anything())
+		})
+
+		it("should pass the abort signal to fetch", async () => {
+			const controller = new AbortController()
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				signal: controller.signal,
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith(
+				expect.any(String),
+				expect.objectContaining({
+					signal: controller.signal,
+				}),
+			)
+		})
+
+		it("should include FIM tokens in the prompt for openai-compatible provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion(baseOptions)
+
+			const callArgs = mockFetch.mock.calls[0]
+			const body = JSON.parse(callArgs[1].body)
+			// DeepSeek model should use DeepSeek FIM tokens
+			expect(body.prompt).toContain("<|fim▁begin|>")
+			expect(body.prompt).toContain("<|fim▁hole|>")
+			expect(body.prompt).toContain("<|fim▁end|>")
+		})
+
+		it("should use native prefix/suffix for Ollama provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					response: "test",
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				provider: "ollama",
+				baseUrl: "http://localhost:11434",
+			})
+
+			const callArgs = mockFetch.mock.calls[0]
+			const body = JSON.parse(callArgs[1].body)
+			expect(body.prompt).toBe("function hello() {")
+			expect(body.suffix).toBe("}")
+		})
+	})
+})
diff --git a/src/services/fim/__tests__/FimCompletionProvider.spec.ts b/src/services/fim/__tests__/FimCompletionProvider.spec.ts
new file mode 100644
index 00000000000..f6cf9e5d93a
--- /dev/null
+++ b/src/services/fim/__tests__/FimCompletionProvider.spec.ts
@@ -0,0 +1,205 @@
+import * as vscode from "vscode"
+
+import { FimCompletionProvider } from "../FimCompletionProvider"
+import * as FimApiClient from "../FimApiClient"
+
+// Mock the API client
+vi.mock("../FimApiClient", () => ({
+	requestFimCompletion: vi.fn(),
+}))
+
+describe("FimCompletionProvider", () => {
+	const mockRequestFimCompletion = vi.mocked(FimApiClient.requestFimCompletion)
+
+	const defaultConfig = {
+		enabled: true,
+		provider: "openai-compatible" as const,
+		modelId: "deepseek-coder",
+		baseUrl: "http://localhost:1234",
+		apiKey: "test-key",
+		debounceMs: 0, // No debounce for tests
+		maxTokens: 128,
+	}
+
+	// Create mock document
+	function createMockDocument(text: string): vscode.TextDocument {
+		const lines = text.split("\n")
+		return {
+			getText: vi.fn((range?: vscode.Range) => {
+				if (!range) return text
+				// Simplified: return text between positions
+				const startOffset = getOffset(text, range.start.line, range.start.character)
+				const endOffset = getOffset(text, range.end.line, range.end.character)
+				return text.substring(startOffset, endOffset)
+			}),
+			lineCount: lines.length,
+			lineAt: vi.fn((line: number) => ({
+				range: {
+					end: new vscode.Position(line, lines[line]?.length ?? 0),
+				},
+			})),
+		} as unknown as vscode.TextDocument
+	}
+
+	function getOffset(text: string, line: number, character: number): number {
+		const lines = text.split("\n")
+		let offset = 0
+		for (let i = 0; i < line && i < lines.length; i++) {
+			offset += lines[i].length + 1 // +1 for newline
+		}
+		return offset + character
+	}
+
+	// Create mock cancellation token
+	function createMockToken(cancelled = false): vscode.CancellationToken {
+		return {
+			isCancellationRequested: cancelled,
+			onCancellationRequested: vi.fn(() => ({ dispose: vi.fn() })),
+		}
+	}
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+	})
+
+	it("should return undefined when disabled", async () => {
+		const provider = new FimCompletionProvider({ ...defaultConfig, enabled: false })
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+		expect(mockRequestFimCompletion).not.toHaveBeenCalled()
+	})
+
+	it("should return undefined for empty documents", async () => {
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("")
+		const pos = new vscode.Position(0, 0)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+	})
+
+	it("should return completion items on successful API response", async () => {
+		mockRequestFimCompletion.mockResolvedValueOnce({
+			completion: '  console.log("hello");',
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("function hello() {\n}")
+		const pos = new vscode.Position(0, 19)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeDefined()
+		expect(result).toHaveLength(1)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce()
+	})
+
+	it("should return undefined for whitespace-only completions", async () => {
+		mockRequestFimCompletion.mockResolvedValueOnce({
+			completion: "   \n  \t  ",
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+	})
+
+	it("should return undefined when API request fails", async () => {
+		mockRequestFimCompletion.mockRejectedValueOnce(new Error("Network error"))
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+	})
+
+	it("should cache results and return cached completions", async () => {
+		mockRequestFimCompletion.mockResolvedValue({
+			completion: "42",
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+		const context = { triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext
+
+		// First call - should hit API
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce()
+
+		// Second call with same context - should use cache
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce() // Still only 1 call
+	})
+
+	it("should clear cache when config is updated", async () => {
+		mockRequestFimCompletion.mockResolvedValue({
+			completion: "42",
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+		const context = { triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext
+
+		// First call
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce()
+
+		// Update config - should clear cache
+		provider.updateConfig(defaultConfig)
+
+		// Third call - should hit API again since cache was cleared
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledTimes(2)
+	})
+
+	it("should dispose properly", () => {
+		const provider = new FimCompletionProvider(defaultConfig)
+		expect(() => provider.dispose()).not.toThrow()
+	})
+})
diff --git a/src/services/fim/__tests__/FimService.spec.ts b/src/services/fim/__tests__/FimService.spec.ts
new file mode 100644
index 00000000000..d396140b4a2
--- /dev/null
+++ b/src/services/fim/__tests__/FimService.spec.ts
@@ -0,0 +1,157 @@
+import * as vscode from "vscode"
+
+import { FimService } from "../FimService"
+import type { GlobalSettings } from "@roo-code/types"
+
+// Mock vscode.languages.registerInlineCompletionItemProvider
+const mockRegisterDisposable = { dispose: vi.fn() }
+vscode.languages.registerInlineCompletionItemProvider = vi.fn().mockReturnValue(mockRegisterDisposable)
+
+describe("FimService", () => {
+	let outputChannel: vscode.OutputChannel
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+		outputChannel = {
+			appendLine: vi.fn(),
+		} as unknown as vscode.OutputChannel
+	})
+
+	it("should not activate when FIM is disabled", () => {
+		const service = new FimService(outputChannel)
+		const settings: GlobalSettings = {
+			fimEnabled: false,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		}
+
+		service.updateSettings(settings)
+
+		expect(service.isActive()).toBe(false)
+		expect(vscode.languages.registerInlineCompletionItemProvider).not.toHaveBeenCalled()
+	})
+
+	it("should not activate when model ID is empty", () => {
+		const service = new FimService(outputChannel)
+		const settings: GlobalSettings = {
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "",
+		}
+
+		service.updateSettings(settings)
+
+		expect(service.isActive()).toBe(false)
+	})
+
+	it("should activate when FIM is enabled with a model ID", () => {
+		const service = new FimService(outputChannel)
+		const settings: GlobalSettings = {
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		}
+
+		service.updateSettings(settings)
+
+		expect(service.isActive()).toBe(true)
+		expect(vscode.languages.registerInlineCompletionItemProvider).toHaveBeenCalledOnce()
+	})
+
+	it("should deactivate when settings change to disabled", () => {
+		const service = new FimService(outputChannel)
+
+		// First activate
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+		expect(service.isActive()).toBe(true)
+
+		// Then deactivate
+		service.updateSettings({
+			fimEnabled: false,
+		})
+		expect(service.isActive()).toBe(false)
+		expect(mockRegisterDisposable.dispose).toHaveBeenCalled()
+	})
+
+	it("should update config without re-registering when already active", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+
+		// Update with new model
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "codestral-latest",
+		})
+
+		// Should only register once
+		expect(vscode.languages.registerInlineCompletionItemProvider).toHaveBeenCalledOnce()
+		expect(service.isActive()).toBe(true)
+	})
+
+	it("should pass API key to config", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings(
+			{
+				fimEnabled: true,
+				fimProvider: "deepseek",
+				fimModelId: "deepseek-coder",
+			},
+			"my-secret-key",
+		)
+
+		expect(service.isActive()).toBe(true)
+	})
+
+	it("should use default provider when not specified", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimModelId: "some-model",
+		})
+
+		expect(service.isActive()).toBe(true)
+	})
+
+	it("should dispose properly", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+
+		service.dispose()
+
+		expect(service.isActive()).toBe(false)
+		expect(mockRegisterDisposable.dispose).toHaveBeenCalled()
+	})
+
+	it("should log activation and deactivation messages", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+
+		expect(outputChannel.appendLine).toHaveBeenCalledWith(expect.stringContaining("[FIM] Activated"))
+
+		service.updateSettings({ fimEnabled: false })
+
+		expect(outputChannel.appendLine).toHaveBeenCalledWith("[FIM] Deactivated")
+	})
+})
diff --git a/src/services/fim/__tests__/FimTokenFormatter.spec.ts b/src/services/fim/__tests__/FimTokenFormatter.spec.ts
new file mode 100644
index 00000000000..5a1af6cc35b
--- /dev/null
+++ b/src/services/fim/__tests__/FimTokenFormatter.spec.ts
@@ -0,0 +1,76 @@
+import { detectFimTokens, formatFimPrompt, getFimTokensByFamily } from "../FimTokenFormatter"
+
+describe("FimTokenFormatter", () => {
+	describe("detectFimTokens", () => {
+		it("should detect DeepSeek model tokens", () => {
+			const tokens = detectFimTokens("deepseek-coder-v2")
+			expect(tokens.prefix).toBe("<|fim▁begin|>")
+			expect(tokens.suffix).toBe("<|fim▁hole|>")
+			expect(tokens.middle).toBe("<|fim▁end|>")
+		})
+
+		it("should detect CodeLlama model tokens", () => {
+			const tokens = detectFimTokens("codellama-13b")
+			expect(tokens.prefix).toBe("<PRE> ")
+			expect(tokens.suffix).toBe(" <SUF>")
+			expect(tokens.middle).toBe(" <MID>")
+		})
+
+		it("should detect StarCoder model tokens", () => {
+			const tokens = detectFimTokens("starcoder2-15b")
+			expect(tokens.prefix).toBe("<fim_prefix>")
+			expect(tokens.suffix).toBe("<fim_suffix>")
+			expect(tokens.middle).toBe("<fim_middle>")
+		})
+
+		it("should detect Mistral/Codestral model tokens", () => {
+			const tokens = detectFimTokens("codestral-latest")
+			expect(tokens.prefix).toBe("[PREFIX]")
+			expect(tokens.suffix).toBe("[SUFFIX]")
+			expect(tokens.middle).toBe("[MIDDLE]")
+		})
+
+		it("should detect Qwen model tokens", () => {
+			const tokens = detectFimTokens("qwen2.5-coder-7b")
+			expect(tokens.prefix).toBe("<|fim_prefix|>")
+			expect(tokens.suffix).toBe("<|fim_suffix|>")
+			expect(tokens.middle).toBe("<|fim_middle|>")
+		})
+
+		it("should return generic tokens for unknown models", () => {
+			const tokens = detectFimTokens("some-unknown-model")
+			expect(tokens.prefix).toBe("<|fim_prefix|>")
+			expect(tokens.suffix).toBe("<|fim_suffix|>")
+			expect(tokens.middle).toBe("<|fim_middle|>")
+		})
+
+		it("should be case-insensitive", () => {
+			const tokens = detectFimTokens("DeepSeek-Coder-V2")
+			expect(tokens.prefix).toBe("<|fim▁begin|>")
+		})
+	})
+
+	describe("formatFimPrompt", () => {
+		it("should format a FIM prompt with correct tokens for DeepSeek", () => {
+			const result = formatFimPrompt("deepseek-coder", "function hello() {", "}")
+			expect(result).toBe("<|fim▁begin|>function hello() {<|fim▁hole|>}<|fim▁end|>")
+		})
+
+		it("should format a FIM prompt with generic tokens for unknown models", () => {
+			const result = formatFimPrompt("unknown-model", "const x = ", ";")
+			expect(result).toBe("<|fim_prefix|>const x = <|fim_suffix|>;<|fim_middle|>")
+		})
+	})
+
+	describe("getFimTokensByFamily", () => {
+		it("should return tokens for known families", () => {
+			const tokens = getFimTokensByFamily("deepseek")
+			expect(tokens.prefix).toBe("<|fim▁begin|>")
+		})
+
+		it("should return generic tokens for unknown families", () => {
+			const tokens = getFimTokensByFamily("nonexistent")
+			expect(tokens.prefix).toBe("<|fim_prefix|>")
+		})
+	})
+})
diff --git a/src/services/fim/index.ts b/src/services/fim/index.ts
new file mode 100644
index 00000000000..1a60758a53f
--- /dev/null
+++ b/src/services/fim/index.ts
@@ -0,0 +1,4 @@
+export { FimService } from "./FimService"
+export { FimCompletionProvider, type FimProviderConfig } from "./FimCompletionProvider"
+export { requestFimCompletion, type FimRequestOptions, type FimResponse } from "./FimApiClient"
+export { detectFimTokens, formatFimPrompt, getFimTokensByFamily, type FimTokens } from "./FimTokenFormatter"