From 66da3145eb303beb85946d5466ad1694386eff98 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Sun, 3 May 2026 09:43:37 +0000 Subject: [PATCH] feat: add FIM inline code completion support (Fill-in-the-Middle) Implements a lightweight FIM completion provider that registers as a VS Code InlineCompletionItemProvider to provide ghost-text suggestions as the user types, similar to GitHub Copilot. Key components: - FimTokenFormatter: Maps model families to FIM token formats (DeepSeek, CodeLlama, StarCoder, Mistral/Codestral, Qwen, generic) - FimApiClient: Lightweight API client supporting /v1/completions, Ollama /api/generate, and Mistral /v1/fim/completions endpoints - FimCompletionProvider: VS Code InlineCompletionItemProvider with debouncing, caching, and cancellation support - FimService: Orchestrator managing provider lifecycle based on settings Settings added to GlobalSettings: - fimEnabled, fimProvider, fimModelId, fimBaseUrl, fimDebounceMs, fimMaxTokens, fimApiKey (secret) Supported providers: openai-compatible, deepseek, mistral, ollama Closes #12261 --- packages/types/src/global-settings.ts | 13 + src/__mocks__/vscode.js | 16 ++ src/extension.ts | 23 ++ src/services/fim/FimApiClient.ts | 153 ++++++++++++ src/services/fim/FimCompletionProvider.ts | 228 ++++++++++++++++++ src/services/fim/FimService.ts | 102 ++++++++ src/services/fim/FimTokenFormatter.ts | 94 ++++++++ .../fim/__tests__/FimApiClient.spec.ts | 225 +++++++++++++++++ .../__tests__/FimCompletionProvider.spec.ts | 205 ++++++++++++++++ src/services/fim/__tests__/FimService.spec.ts | 157 ++++++++++++ .../fim/__tests__/FimTokenFormatter.spec.ts | 76 ++++++ src/services/fim/index.ts | 4 + 12 files changed, 1296 insertions(+) create mode 100644 src/services/fim/FimApiClient.ts create mode 100644 src/services/fim/FimCompletionProvider.ts create mode 100644 src/services/fim/FimService.ts create mode 100644 src/services/fim/FimTokenFormatter.ts create mode 100644 src/services/fim/__tests__/FimApiClient.spec.ts create mode 100644 src/services/fim/__tests__/FimCompletionProvider.spec.ts create mode 100644 src/services/fim/__tests__/FimService.spec.ts create mode 100644 src/services/fim/__tests__/FimTokenFormatter.spec.ts create mode 100644 src/services/fim/index.ts diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index 288f6c2118c..0f06f0f51a5 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -232,6 +232,18 @@ export const globalSettingsSchema = z.object({ * Tools in this list will be excluded from prompt generation and rejected at execution time. */ disabledTools: z.array(toolNamesSchema).optional(), + + /** + * FIM (Fill-in-the-Middle) inline code completion settings. + * These are configured separately from the chat model to allow using + * a cheap/fast FIM-specialized model (e.g., DeepSeek Coder, Codestral). + */ + fimEnabled: z.boolean().optional(), + fimProvider: z.enum(["openai-compatible", "deepseek", "mistral", "ollama"]).optional(), + fimModelId: z.string().optional(), + fimBaseUrl: z.string().optional(), + fimDebounceMs: z.number().min(0).optional(), + fimMaxTokens: z.number().min(1).optional(), }) export type GlobalSettings = z.infer @@ -285,6 +297,7 @@ export const SECRET_STATE_KEYS = [ // Global secrets that are part of GlobalSettings (not ProviderSettings) export const GLOBAL_SECRET_KEYS = [ "openRouterImageApiKey", // For image generation + "fimApiKey", // For FIM inline code completion ] as const // Type for the actual secret storage keys diff --git a/src/__mocks__/vscode.js b/src/__mocks__/vscode.js index 505497fc9f1..0512189fd1d 100644 --- a/src/__mocks__/vscode.js +++ b/src/__mocks__/vscode.js @@ -96,6 +96,7 @@ export const languages = { clear: () => {}, dispose: () => {}, }), + registerInlineCompletionItemProvider: () => mockDisposable, } export const extensions = { @@ -152,6 +153,19 @@ export const CodeActionKind = { export const EventEmitter = mockEventEmitter +export const InlineCompletionTriggerKind = { + Invoke: 0, + Automatic: 1, +} + +export const InlineCompletionItem = class { + constructor(insertText, range, command) { + this.insertText = insertText + this.range = range + this.command = command + } +} + export default { workspace, window, @@ -171,4 +185,6 @@ export default { EventEmitter, CodeAction, CodeActionKind, + InlineCompletionTriggerKind, + InlineCompletionItem, } diff --git a/src/extension.ts b/src/extension.ts index 19c0d70585a..a808646e95a 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -36,6 +36,7 @@ import { openAiCodexOAuthManager } from "./integrations/openai-codex/oauth" import { McpServerManager } from "./services/mcp/McpServerManager" import { CodeIndexManager } from "./services/code-index/manager" import { MdmService } from "./services/mdm/MdmService" +import { FimService } from "./services/fim" import { migrateSettings } from "./utils/migrateSettings" import { autoImportSettings } from "./utils/autoImportSettings" import { API } from "./extension/api" @@ -168,6 +169,28 @@ export async function activate(context: vscode.ExtensionContext) { const contextProxy = await ContextProxy.getInstance(context) + // Initialize FIM (Fill-in-the-Middle) inline completion service. + const fimService = new FimService(outputChannel) + context.subscriptions.push(fimService) + + // Initialize FIM with current settings + const initFimSettings = async () => { + const globalSettings = contextProxy.getGlobalSettings() + const fimApiKey = await context.secrets.get("fimApiKey") + fimService.updateSettings(globalSettings, fimApiKey) + } + + void initFimSettings() + + // Listen for secret storage changes to update FIM API key + context.secrets.onDidChange(async (e: vscode.SecretStorageChangeEvent) => { + if (e.key === "fimApiKey") { + const globalSettings = contextProxy.getGlobalSettings() + const fimApiKey = await context.secrets.get("fimApiKey") + fimService.updateSettings(globalSettings, fimApiKey) + } + }) + // Initialize code index managers for all workspace folders. const codeIndexManagers: CodeIndexManager[] = [] diff --git a/src/services/fim/FimApiClient.ts b/src/services/fim/FimApiClient.ts new file mode 100644 index 00000000000..b1281a739db --- /dev/null +++ b/src/services/fim/FimApiClient.ts @@ -0,0 +1,153 @@ +/** + * Lightweight API client for FIM (Fill-in-the-Middle) completion requests. + * + * Supports the `/v1/completions` endpoint used by OpenAI-compatible servers, + * DeepSeek, Ollama, and similar providers. This is the legacy completions + * endpoint (not chat completions), which is better suited for raw FIM prompts. + */ + +import { formatFimPrompt } from "./FimTokenFormatter" + +export interface FimRequestOptions { + /** The FIM provider type */ + provider: "openai-compatible" | "deepseek" | "mistral" | "ollama" + /** Base URL for the API endpoint */ + baseUrl: string + /** API key for authentication */ + apiKey?: string + /** Model ID to use */ + modelId: string + /** Text before the cursor */ + prefix: string + /** Text after the cursor */ + suffix: string + /** Maximum tokens to generate */ + maxTokens: number + /** Abort signal for cancellation */ + signal?: AbortSignal +} + +export interface FimResponse { + /** The generated completion text */ + completion: string +} + +/** + * Normalize a base URL by removing trailing slashes. + */ +function normalizeBaseUrl(url: string): string { + return url.replace(/\/+$/, "") +} + +/** + * Build the API endpoint URL based on the provider type. + */ +function buildEndpointUrl(provider: string, baseUrl: string): string { + const normalized = normalizeBaseUrl(baseUrl) + + switch (provider) { + case "ollama": + return `${normalized}/api/generate` + case "mistral": + return `${normalized}/v1/fim/completions` + default: + // openai-compatible and deepseek use /v1/completions + return `${normalized}/v1/completions` + } +} + +/** + * Build the request body based on the provider type. + */ +function buildRequestBody(options: FimRequestOptions): Record { + const { provider, modelId, prefix, suffix, maxTokens } = options + + switch (provider) { + case "ollama": + return { + model: modelId, + prompt: prefix, + suffix: suffix, + stream: false, + options: { + num_predict: maxTokens, + temperature: 0.2, + top_p: 0.9, + }, + } + case "mistral": + return { + model: modelId, + prompt: prefix, + suffix: suffix, + max_tokens: maxTokens, + temperature: 0.2, + top_p: 0.9, + stop: ["\n\n"], + } + default: { + // openai-compatible and deepseek: format the FIM prompt with special tokens + const prompt = formatFimPrompt(modelId, prefix, suffix) + return { + model: modelId, + prompt, + max_tokens: maxTokens, + temperature: 0.2, + top_p: 0.9, + stop: ["\n\n", "<|fim", "): string { + switch (provider) { + case "ollama": { + return (data.response as string) ?? "" + } + default: { + // OpenAI-compatible response format + const choices = data.choices as Array<{ text?: string; message?: { content?: string } }> | undefined + if (!choices || choices.length === 0) { + return "" + } + return choices[0].text ?? choices[0].message?.content ?? "" + } + } +} + +/** + * Send a FIM completion request to the configured provider. + */ +export async function requestFimCompletion(options: FimRequestOptions): Promise { + const url = buildEndpointUrl(options.provider, options.baseUrl) + const body = buildRequestBody(options) + + const headers: Record = { + "Content-Type": "application/json", + } + + if (options.apiKey) { + headers["Authorization"] = `Bearer ${options.apiKey}` + } + + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: options.signal, + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => "Unknown error") + throw new Error(`FIM API request failed (${response.status}): ${errorText}`) + } + + const data = (await response.json()) as Record + const completion = extractCompletion(options.provider, data) + + return { completion } +} diff --git a/src/services/fim/FimCompletionProvider.ts b/src/services/fim/FimCompletionProvider.ts new file mode 100644 index 00000000000..d22ed01aff0 --- /dev/null +++ b/src/services/fim/FimCompletionProvider.ts @@ -0,0 +1,228 @@ +/** + * VS Code InlineCompletionItemProvider for FIM-based code completions. + * + * Provides ghost-text-style inline suggestions as the user types, + * similar to GitHub Copilot. Uses a FIM-compatible API to generate + * completions based on the prefix/suffix context around the cursor. + */ + +import * as vscode from "vscode" + +import { requestFimCompletion, type FimRequestOptions } from "./FimApiClient" + +/** Default debounce delay in milliseconds */ +const DEFAULT_DEBOUNCE_MS = 300 + +/** Default max tokens for completions */ +const DEFAULT_MAX_TOKENS = 128 + +/** Default base URLs per provider */ +const DEFAULT_BASE_URLS: Record = { + "openai-compatible": "http://localhost:1234", + deepseek: "https://api.deepseek.com", + mistral: "https://api.mistral.ai", + ollama: "http://localhost:11434", +} + +export interface FimProviderConfig { + enabled: boolean + provider: "openai-compatible" | "deepseek" | "mistral" | "ollama" + modelId: string + baseUrl?: string + apiKey?: string + debounceMs?: number + maxTokens?: number +} + +/** + * Simple completion cache to avoid redundant API calls for identical contexts. + */ +interface CacheEntry { + completion: string + timestamp: number +} + +const CACHE_TTL_MS = 10_000 // 10 seconds +const MAX_CACHE_SIZE = 50 + +export class FimCompletionProvider implements vscode.InlineCompletionItemProvider { + private config: FimProviderConfig + private cache = new Map() + private pendingRequest: AbortController | null = null + + constructor(config: FimProviderConfig) { + this.config = config + } + + /** + * Update the provider configuration. Called when settings change. + */ + updateConfig(config: FimProviderConfig): void { + this.config = config + this.cache.clear() + } + + /** + * Provide inline completion items for the given position. + */ + async provideInlineCompletionItems( + document: vscode.TextDocument, + position: vscode.Position, + context: vscode.InlineCompletionContext, + token: vscode.CancellationToken, + ): Promise { + if (!this.config.enabled) { + return undefined + } + + // Don't trigger on empty documents + if (document.getText().trim().length === 0) { + return undefined + } + + // Cancel any pending request + if (this.pendingRequest) { + this.pendingRequest.abort() + this.pendingRequest = null + } + + // Debounce: wait before making the request + const debounceMs = this.config.debounceMs ?? DEFAULT_DEBOUNCE_MS + if (debounceMs > 0) { + const cancelled = await this.debounce(debounceMs, token) + if (cancelled) { + return undefined + } + } + + // Extract prefix and suffix from the document + const prefix = document.getText(new vscode.Range(new vscode.Position(0, 0), position)) + const suffix = document.getText(new vscode.Range(position, document.lineAt(document.lineCount - 1).range.end)) + + // Check cache + const cacheKey = this.buildCacheKey(prefix, suffix) + const cached = this.getFromCache(cacheKey) + if (cached) { + return [new vscode.InlineCompletionItem(cached)] + } + + // Prepare the request + const abortController = new AbortController() + this.pendingRequest = abortController + + // Also abort when the token is cancelled + const disposable = token.onCancellationRequested(() => { + abortController.abort() + }) + + try { + const requestOptions: FimRequestOptions = { + provider: this.config.provider, + baseUrl: + this.config.baseUrl ?? + DEFAULT_BASE_URLS[this.config.provider] ?? + DEFAULT_BASE_URLS["openai-compatible"], + apiKey: this.config.apiKey, + modelId: this.config.modelId, + prefix, + suffix, + maxTokens: this.config.maxTokens ?? DEFAULT_MAX_TOKENS, + signal: abortController.signal, + } + + const response = await requestFimCompletion(requestOptions) + + // Filter empty or whitespace-only completions + const completion = response.completion.trimEnd() + if (!completion || completion.trim().length === 0) { + return undefined + } + + // Cache the result + this.addToCache(cacheKey, completion) + + return [new vscode.InlineCompletionItem(completion)] + } catch (error: unknown) { + // Don't log abort errors (expected during cancellation) + if (error instanceof Error && error.name === "AbortError") { + return undefined + } + // Log other errors but don't show to user (silent failure for inline completions) + console.warn("[FIM] Completion request failed:", error instanceof Error ? error.message : String(error)) + return undefined + } finally { + disposable.dispose() + if (this.pendingRequest === abortController) { + this.pendingRequest = null + } + } + } + + /** + * Debounce helper that resolves after a delay unless the token is cancelled. + * Returns true if cancelled, false if the delay completed. + */ + private debounce(ms: number, token: vscode.CancellationToken): Promise { + return new Promise((resolve) => { + const timeout = setTimeout(() => { + disposable.dispose() + resolve(false) + }, ms) + + const disposable = token.onCancellationRequested(() => { + clearTimeout(timeout) + resolve(true) + }) + }) + } + + /** + * Build a cache key from the prefix and suffix context. + * Uses the last N characters to keep keys manageable. + */ + private buildCacheKey(prefix: string, suffix: string): string { + const prefixTail = prefix.slice(-500) + const suffixHead = suffix.slice(0, 200) + return `${prefixTail}|||${suffixHead}` + } + + /** + * Get a completion from the cache if it's still valid. + */ + private getFromCache(key: string): string | undefined { + const entry = this.cache.get(key) + if (!entry) { + return undefined + } + if (Date.now() - entry.timestamp > CACHE_TTL_MS) { + this.cache.delete(key) + return undefined + } + return entry.completion + } + + /** + * Add a completion to the cache, evicting old entries if needed. + */ + private addToCache(key: string, completion: string): void { + // Evict oldest entries if cache is full + if (this.cache.size >= MAX_CACHE_SIZE) { + const firstKey = this.cache.keys().next().value + if (firstKey !== undefined) { + this.cache.delete(firstKey) + } + } + this.cache.set(key, { completion, timestamp: Date.now() }) + } + + /** + * Dispose of any pending requests and clear the cache. + */ + dispose(): void { + if (this.pendingRequest) { + this.pendingRequest.abort() + this.pendingRequest = null + } + this.cache.clear() + } +} diff --git a/src/services/fim/FimService.ts b/src/services/fim/FimService.ts new file mode 100644 index 00000000000..ce4edda7971 --- /dev/null +++ b/src/services/fim/FimService.ts @@ -0,0 +1,102 @@ +/** + * FIM Service - Orchestrates the FIM inline completion lifecycle. + * + * Manages the registration and disposal of the FimCompletionProvider + * based on user settings. Listens for configuration changes to + * enable/disable the provider dynamically. + */ + +import * as vscode from "vscode" + +import type { GlobalSettings } from "@roo-code/types" + +import { FimCompletionProvider, type FimProviderConfig } from "./FimCompletionProvider" + +/** + * Extract FIM configuration from global settings. + */ +function extractFimConfig(settings: GlobalSettings, apiKey?: string): FimProviderConfig { + return { + enabled: settings.fimEnabled ?? false, + provider: settings.fimProvider ?? "openai-compatible", + modelId: settings.fimModelId ?? "", + baseUrl: settings.fimBaseUrl, + apiKey, + debounceMs: settings.fimDebounceMs, + maxTokens: settings.fimMaxTokens, + } +} + +export class FimService implements vscode.Disposable { + private provider: FimCompletionProvider | null = null + private registration: vscode.Disposable | null = null + private outputChannel: vscode.OutputChannel + + constructor(outputChannel: vscode.OutputChannel) { + this.outputChannel = outputChannel + } + + /** + * Initialize or update the FIM service based on current settings. + */ + updateSettings(settings: GlobalSettings, apiKey?: string): void { + const config = extractFimConfig(settings, apiKey) + + if (!config.enabled || !config.modelId) { + this.deactivate() + return + } + + if (this.provider) { + // Update existing provider config + this.provider.updateConfig(config) + this.outputChannel.appendLine( + `[FIM] Updated configuration: provider=${config.provider}, model=${config.modelId}`, + ) + } else { + // Create and register new provider + this.activate(config) + } + } + + /** + * Activate the FIM completion provider. + */ + private activate(config: FimProviderConfig): void { + this.provider = new FimCompletionProvider(config) + + this.registration = vscode.languages.registerInlineCompletionItemProvider({ pattern: "**" }, this.provider) + + this.outputChannel.appendLine(`[FIM] Activated: provider=${config.provider}, model=${config.modelId}`) + } + + /** + * Deactivate the FIM completion provider. + */ + private deactivate(): void { + if (this.registration) { + this.registration.dispose() + this.registration = null + } + + if (this.provider) { + this.provider.dispose() + this.provider = null + this.outputChannel.appendLine("[FIM] Deactivated") + } + } + + /** + * Check if the FIM service is currently active. + */ + isActive(): boolean { + return this.provider !== null + } + + /** + * Dispose of the FIM service and all resources. + */ + dispose(): void { + this.deactivate() + } +} diff --git a/src/services/fim/FimTokenFormatter.ts b/src/services/fim/FimTokenFormatter.ts new file mode 100644 index 00000000000..c7677f90613 --- /dev/null +++ b/src/services/fim/FimTokenFormatter.ts @@ -0,0 +1,94 @@ +/** + * FIM (Fill-in-the-Middle) token formatting for different model families. + * + * Each model family uses different special tokens to delimit the prefix, + * suffix, and middle sections of a FIM prompt. This module maps model + * families to their respective token formats. + */ + +export interface FimTokens { + prefix: string + suffix: string + middle: string +} + +/** + * Known FIM token formats by model family. + */ +const FIM_TOKEN_FORMATS: Record = { + // DeepSeek Coder / CodeLlama / StarCoder2 + deepseek: { + prefix: "<|fim▁begin|>", + suffix: "<|fim▁hole|>", + middle: "<|fim▁end|>", + }, + // CodeLlama + codellama: { + prefix: "
 ",
+		suffix: " ",
+		middle: " ",
+	},
+	// StarCoder
+	starcoder: {
+		prefix: "",
+		suffix: "",
+		middle: "",
+	},
+	// Mistral / Codestral
+	mistral: {
+		prefix: "[PREFIX]",
+		suffix: "[SUFFIX]",
+		middle: "[MIDDLE]",
+	},
+	// Qwen2.5-Coder
+	qwen: {
+		prefix: "<|fim_prefix|>",
+		suffix: "<|fim_suffix|>",
+		middle: "<|fim_middle|>",
+	},
+	// Generic fallback (OpenAI-compatible FIM)
+	generic: {
+		prefix: "<|fim_prefix|>",
+		suffix: "<|fim_suffix|>",
+		middle: "<|fim_middle|>",
+	},
+}
+
+/**
+ * Model ID patterns mapped to their token format keys.
+ */
+const MODEL_FAMILY_PATTERNS: Array<{ pattern: RegExp; family: string }> = [
+	{ pattern: /deepseek/i, family: "deepseek" },
+	{ pattern: /codellama/i, family: "codellama" },
+	{ pattern: /starcoder/i, family: "starcoder" },
+	{ pattern: /mistral|codestral/i, family: "mistral" },
+	{ pattern: /qwen/i, family: "qwen" },
+]
+
+/**
+ * Detect the FIM token format based on the model ID.
+ */
+export function detectFimTokens(modelId: string): FimTokens {
+	for (const { pattern, family } of MODEL_FAMILY_PATTERNS) {
+		if (pattern.test(modelId)) {
+			return FIM_TOKEN_FORMATS[family]
+		}
+	}
+	return FIM_TOKEN_FORMATS.generic
+}
+
+/**
+ * Format a FIM prompt using the appropriate tokens for the given model.
+ */
+export function formatFimPrompt(modelId: string, prefix: string, suffix: string): string {
+	const tokens = detectFimTokens(modelId)
+	return `${tokens.prefix}${prefix}${tokens.suffix}${suffix}${tokens.middle}`
+}
+
+/**
+ * Get the FIM tokens for a given model family name.
+ * Falls back to generic tokens if the family is not recognized.
+ */
+export function getFimTokensByFamily(family: string): FimTokens {
+	return FIM_TOKEN_FORMATS[family] ?? FIM_TOKEN_FORMATS.generic
+}
diff --git a/src/services/fim/__tests__/FimApiClient.spec.ts b/src/services/fim/__tests__/FimApiClient.spec.ts
new file mode 100644
index 00000000000..f94b9055029
--- /dev/null
+++ b/src/services/fim/__tests__/FimApiClient.spec.ts
@@ -0,0 +1,225 @@
+import { requestFimCompletion, type FimRequestOptions } from "../FimApiClient"
+
+// Mock global fetch
+const mockFetch = vi.fn()
+vi.stubGlobal("fetch", mockFetch)
+
+describe("FimApiClient", () => {
+	beforeEach(() => {
+		mockFetch.mockReset()
+	})
+
+	describe("requestFimCompletion", () => {
+		const baseOptions: FimRequestOptions = {
+			provider: "openai-compatible",
+			baseUrl: "http://localhost:1234",
+			apiKey: "test-key",
+			modelId: "deepseek-coder",
+			prefix: "function hello() {",
+			suffix: "}",
+			maxTokens: 128,
+		}
+
+		it("should make a request to the correct OpenAI-compatible endpoint", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: '\n  console.log("hello");\n' }],
+				}),
+			})
+
+			await requestFimCompletion(baseOptions)
+
+			expect(mockFetch).toHaveBeenCalledWith(
+				"http://localhost:1234/v1/completions",
+				expect.objectContaining({
+					method: "POST",
+					headers: expect.objectContaining({
+						"Content-Type": "application/json",
+						Authorization: "Bearer test-key",
+					}),
+				}),
+			)
+		})
+
+		it("should use the Ollama endpoint for Ollama provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					response: '\n  console.log("hello");\n',
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				provider: "ollama",
+				baseUrl: "http://localhost:11434",
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith("http://localhost:11434/api/generate", expect.anything())
+		})
+
+		it("should use the Mistral FIM endpoint for Mistral provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "completion" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				provider: "mistral",
+				baseUrl: "https://api.mistral.ai",
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith("https://api.mistral.ai/v1/fim/completions", expect.anything())
+		})
+
+		it("should extract completion text from OpenAI-compatible response", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "  return 42;" }],
+				}),
+			})
+
+			const result = await requestFimCompletion(baseOptions)
+			expect(result.completion).toBe("  return 42;")
+		})
+
+		it("should extract completion text from Ollama response", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					response: "  return 42;",
+				}),
+			})
+
+			const result = await requestFimCompletion({
+				...baseOptions,
+				provider: "ollama",
+				baseUrl: "http://localhost:11434",
+			})
+			expect(result.completion).toBe("  return 42;")
+		})
+
+		it("should return empty string for empty choices", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [],
+				}),
+			})
+
+			const result = await requestFimCompletion(baseOptions)
+			expect(result.completion).toBe("")
+		})
+
+		it("should throw an error for non-OK responses", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: false,
+				status: 401,
+				text: async () => "Unauthorized",
+			})
+
+			await expect(requestFimCompletion(baseOptions)).rejects.toThrow(
+				"FIM API request failed (401): Unauthorized",
+			)
+		})
+
+		it("should not include Authorization header when no API key is provided", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				apiKey: undefined,
+			})
+
+			const callArgs = mockFetch.mock.calls[0]
+			const headers = callArgs[1].headers
+			expect(headers).not.toHaveProperty("Authorization")
+		})
+
+		it("should normalize base URL by removing trailing slashes", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				baseUrl: "http://localhost:1234///",
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith("http://localhost:1234/v1/completions", expect.anything())
+		})
+
+		it("should pass the abort signal to fetch", async () => {
+			const controller = new AbortController()
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				signal: controller.signal,
+			})
+
+			expect(mockFetch).toHaveBeenCalledWith(
+				expect.any(String),
+				expect.objectContaining({
+					signal: controller.signal,
+				}),
+			)
+		})
+
+		it("should include FIM tokens in the prompt for openai-compatible provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					choices: [{ text: "test" }],
+				}),
+			})
+
+			await requestFimCompletion(baseOptions)
+
+			const callArgs = mockFetch.mock.calls[0]
+			const body = JSON.parse(callArgs[1].body)
+			// DeepSeek model should use DeepSeek FIM tokens
+			expect(body.prompt).toContain("<|fim▁begin|>")
+			expect(body.prompt).toContain("<|fim▁hole|>")
+			expect(body.prompt).toContain("<|fim▁end|>")
+		})
+
+		it("should use native prefix/suffix for Ollama provider", async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				json: async () => ({
+					response: "test",
+				}),
+			})
+
+			await requestFimCompletion({
+				...baseOptions,
+				provider: "ollama",
+				baseUrl: "http://localhost:11434",
+			})
+
+			const callArgs = mockFetch.mock.calls[0]
+			const body = JSON.parse(callArgs[1].body)
+			expect(body.prompt).toBe("function hello() {")
+			expect(body.suffix).toBe("}")
+		})
+	})
+})
diff --git a/src/services/fim/__tests__/FimCompletionProvider.spec.ts b/src/services/fim/__tests__/FimCompletionProvider.spec.ts
new file mode 100644
index 00000000000..f6cf9e5d93a
--- /dev/null
+++ b/src/services/fim/__tests__/FimCompletionProvider.spec.ts
@@ -0,0 +1,205 @@
+import * as vscode from "vscode"
+
+import { FimCompletionProvider } from "../FimCompletionProvider"
+import * as FimApiClient from "../FimApiClient"
+
+// Mock the API client
+vi.mock("../FimApiClient", () => ({
+	requestFimCompletion: vi.fn(),
+}))
+
+describe("FimCompletionProvider", () => {
+	const mockRequestFimCompletion = vi.mocked(FimApiClient.requestFimCompletion)
+
+	const defaultConfig = {
+		enabled: true,
+		provider: "openai-compatible" as const,
+		modelId: "deepseek-coder",
+		baseUrl: "http://localhost:1234",
+		apiKey: "test-key",
+		debounceMs: 0, // No debounce for tests
+		maxTokens: 128,
+	}
+
+	// Create mock document
+	function createMockDocument(text: string): vscode.TextDocument {
+		const lines = text.split("\n")
+		return {
+			getText: vi.fn((range?: vscode.Range) => {
+				if (!range) return text
+				// Simplified: return text between positions
+				const startOffset = getOffset(text, range.start.line, range.start.character)
+				const endOffset = getOffset(text, range.end.line, range.end.character)
+				return text.substring(startOffset, endOffset)
+			}),
+			lineCount: lines.length,
+			lineAt: vi.fn((line: number) => ({
+				range: {
+					end: new vscode.Position(line, lines[line]?.length ?? 0),
+				},
+			})),
+		} as unknown as vscode.TextDocument
+	}
+
+	function getOffset(text: string, line: number, character: number): number {
+		const lines = text.split("\n")
+		let offset = 0
+		for (let i = 0; i < line && i < lines.length; i++) {
+			offset += lines[i].length + 1 // +1 for newline
+		}
+		return offset + character
+	}
+
+	// Create mock cancellation token
+	function createMockToken(cancelled = false): vscode.CancellationToken {
+		return {
+			isCancellationRequested: cancelled,
+			onCancellationRequested: vi.fn(() => ({ dispose: vi.fn() })),
+		}
+	}
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+	})
+
+	it("should return undefined when disabled", async () => {
+		const provider = new FimCompletionProvider({ ...defaultConfig, enabled: false })
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+		expect(mockRequestFimCompletion).not.toHaveBeenCalled()
+	})
+
+	it("should return undefined for empty documents", async () => {
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("")
+		const pos = new vscode.Position(0, 0)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+	})
+
+	it("should return completion items on successful API response", async () => {
+		mockRequestFimCompletion.mockResolvedValueOnce({
+			completion: '  console.log("hello");',
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("function hello() {\n}")
+		const pos = new vscode.Position(0, 19)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeDefined()
+		expect(result).toHaveLength(1)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce()
+	})
+
+	it("should return undefined for whitespace-only completions", async () => {
+		mockRequestFimCompletion.mockResolvedValueOnce({
+			completion: "   \n  \t  ",
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+	})
+
+	it("should return undefined when API request fails", async () => {
+		mockRequestFimCompletion.mockRejectedValueOnce(new Error("Network error"))
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+
+		const result = await provider.provideInlineCompletionItems(
+			doc,
+			pos,
+			{ triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext,
+			token,
+		)
+
+		expect(result).toBeUndefined()
+	})
+
+	it("should cache results and return cached completions", async () => {
+		mockRequestFimCompletion.mockResolvedValue({
+			completion: "42",
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+		const context = { triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext
+
+		// First call - should hit API
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce()
+
+		// Second call with same context - should use cache
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce() // Still only 1 call
+	})
+
+	it("should clear cache when config is updated", async () => {
+		mockRequestFimCompletion.mockResolvedValue({
+			completion: "42",
+		})
+
+		const provider = new FimCompletionProvider(defaultConfig)
+		const doc = createMockDocument("const x = ")
+		const pos = new vscode.Position(0, 10)
+		const token = createMockToken()
+		const context = { triggerKind: vscode.InlineCompletionTriggerKind.Automatic } as vscode.InlineCompletionContext
+
+		// First call
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledOnce()
+
+		// Update config - should clear cache
+		provider.updateConfig(defaultConfig)
+
+		// Third call - should hit API again since cache was cleared
+		await provider.provideInlineCompletionItems(doc, pos, context, token)
+		expect(mockRequestFimCompletion).toHaveBeenCalledTimes(2)
+	})
+
+	it("should dispose properly", () => {
+		const provider = new FimCompletionProvider(defaultConfig)
+		expect(() => provider.dispose()).not.toThrow()
+	})
+})
diff --git a/src/services/fim/__tests__/FimService.spec.ts b/src/services/fim/__tests__/FimService.spec.ts
new file mode 100644
index 00000000000..d396140b4a2
--- /dev/null
+++ b/src/services/fim/__tests__/FimService.spec.ts
@@ -0,0 +1,157 @@
+import * as vscode from "vscode"
+
+import { FimService } from "../FimService"
+import type { GlobalSettings } from "@roo-code/types"
+
+// Mock vscode.languages.registerInlineCompletionItemProvider
+const mockRegisterDisposable = { dispose: vi.fn() }
+vscode.languages.registerInlineCompletionItemProvider = vi.fn().mockReturnValue(mockRegisterDisposable)
+
+describe("FimService", () => {
+	let outputChannel: vscode.OutputChannel
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+		outputChannel = {
+			appendLine: vi.fn(),
+		} as unknown as vscode.OutputChannel
+	})
+
+	it("should not activate when FIM is disabled", () => {
+		const service = new FimService(outputChannel)
+		const settings: GlobalSettings = {
+			fimEnabled: false,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		}
+
+		service.updateSettings(settings)
+
+		expect(service.isActive()).toBe(false)
+		expect(vscode.languages.registerInlineCompletionItemProvider).not.toHaveBeenCalled()
+	})
+
+	it("should not activate when model ID is empty", () => {
+		const service = new FimService(outputChannel)
+		const settings: GlobalSettings = {
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "",
+		}
+
+		service.updateSettings(settings)
+
+		expect(service.isActive()).toBe(false)
+	})
+
+	it("should activate when FIM is enabled with a model ID", () => {
+		const service = new FimService(outputChannel)
+		const settings: GlobalSettings = {
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		}
+
+		service.updateSettings(settings)
+
+		expect(service.isActive()).toBe(true)
+		expect(vscode.languages.registerInlineCompletionItemProvider).toHaveBeenCalledOnce()
+	})
+
+	it("should deactivate when settings change to disabled", () => {
+		const service = new FimService(outputChannel)
+
+		// First activate
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+		expect(service.isActive()).toBe(true)
+
+		// Then deactivate
+		service.updateSettings({
+			fimEnabled: false,
+		})
+		expect(service.isActive()).toBe(false)
+		expect(mockRegisterDisposable.dispose).toHaveBeenCalled()
+	})
+
+	it("should update config without re-registering when already active", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+
+		// Update with new model
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "codestral-latest",
+		})
+
+		// Should only register once
+		expect(vscode.languages.registerInlineCompletionItemProvider).toHaveBeenCalledOnce()
+		expect(service.isActive()).toBe(true)
+	})
+
+	it("should pass API key to config", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings(
+			{
+				fimEnabled: true,
+				fimProvider: "deepseek",
+				fimModelId: "deepseek-coder",
+			},
+			"my-secret-key",
+		)
+
+		expect(service.isActive()).toBe(true)
+	})
+
+	it("should use default provider when not specified", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimModelId: "some-model",
+		})
+
+		expect(service.isActive()).toBe(true)
+	})
+
+	it("should dispose properly", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+
+		service.dispose()
+
+		expect(service.isActive()).toBe(false)
+		expect(mockRegisterDisposable.dispose).toHaveBeenCalled()
+	})
+
+	it("should log activation and deactivation messages", () => {
+		const service = new FimService(outputChannel)
+
+		service.updateSettings({
+			fimEnabled: true,
+			fimProvider: "openai-compatible",
+			fimModelId: "deepseek-coder",
+		})
+
+		expect(outputChannel.appendLine).toHaveBeenCalledWith(expect.stringContaining("[FIM] Activated"))
+
+		service.updateSettings({ fimEnabled: false })
+
+		expect(outputChannel.appendLine).toHaveBeenCalledWith("[FIM] Deactivated")
+	})
+})
diff --git a/src/services/fim/__tests__/FimTokenFormatter.spec.ts b/src/services/fim/__tests__/FimTokenFormatter.spec.ts
new file mode 100644
index 00000000000..5a1af6cc35b
--- /dev/null
+++ b/src/services/fim/__tests__/FimTokenFormatter.spec.ts
@@ -0,0 +1,76 @@
+import { detectFimTokens, formatFimPrompt, getFimTokensByFamily } from "../FimTokenFormatter"
+
+describe("FimTokenFormatter", () => {
+	describe("detectFimTokens", () => {
+		it("should detect DeepSeek model tokens", () => {
+			const tokens = detectFimTokens("deepseek-coder-v2")
+			expect(tokens.prefix).toBe("<|fim▁begin|>")
+			expect(tokens.suffix).toBe("<|fim▁hole|>")
+			expect(tokens.middle).toBe("<|fim▁end|>")
+		})
+
+		it("should detect CodeLlama model tokens", () => {
+			const tokens = detectFimTokens("codellama-13b")
+			expect(tokens.prefix).toBe("
 ")
+			expect(tokens.suffix).toBe(" ")
+			expect(tokens.middle).toBe(" ")
+		})
+
+		it("should detect StarCoder model tokens", () => {
+			const tokens = detectFimTokens("starcoder2-15b")
+			expect(tokens.prefix).toBe("")
+			expect(tokens.suffix).toBe("")
+			expect(tokens.middle).toBe("")
+		})
+
+		it("should detect Mistral/Codestral model tokens", () => {
+			const tokens = detectFimTokens("codestral-latest")
+			expect(tokens.prefix).toBe("[PREFIX]")
+			expect(tokens.suffix).toBe("[SUFFIX]")
+			expect(tokens.middle).toBe("[MIDDLE]")
+		})
+
+		it("should detect Qwen model tokens", () => {
+			const tokens = detectFimTokens("qwen2.5-coder-7b")
+			expect(tokens.prefix).toBe("<|fim_prefix|>")
+			expect(tokens.suffix).toBe("<|fim_suffix|>")
+			expect(tokens.middle).toBe("<|fim_middle|>")
+		})
+
+		it("should return generic tokens for unknown models", () => {
+			const tokens = detectFimTokens("some-unknown-model")
+			expect(tokens.prefix).toBe("<|fim_prefix|>")
+			expect(tokens.suffix).toBe("<|fim_suffix|>")
+			expect(tokens.middle).toBe("<|fim_middle|>")
+		})
+
+		it("should be case-insensitive", () => {
+			const tokens = detectFimTokens("DeepSeek-Coder-V2")
+			expect(tokens.prefix).toBe("<|fim▁begin|>")
+		})
+	})
+
+	describe("formatFimPrompt", () => {
+		it("should format a FIM prompt with correct tokens for DeepSeek", () => {
+			const result = formatFimPrompt("deepseek-coder", "function hello() {", "}")
+			expect(result).toBe("<|fim▁begin|>function hello() {<|fim▁hole|>}<|fim▁end|>")
+		})
+
+		it("should format a FIM prompt with generic tokens for unknown models", () => {
+			const result = formatFimPrompt("unknown-model", "const x = ", ";")
+			expect(result).toBe("<|fim_prefix|>const x = <|fim_suffix|>;<|fim_middle|>")
+		})
+	})
+
+	describe("getFimTokensByFamily", () => {
+		it("should return tokens for known families", () => {
+			const tokens = getFimTokensByFamily("deepseek")
+			expect(tokens.prefix).toBe("<|fim▁begin|>")
+		})
+
+		it("should return generic tokens for unknown families", () => {
+			const tokens = getFimTokensByFamily("nonexistent")
+			expect(tokens.prefix).toBe("<|fim_prefix|>")
+		})
+	})
+})
diff --git a/src/services/fim/index.ts b/src/services/fim/index.ts
new file mode 100644
index 00000000000..1a60758a53f
--- /dev/null
+++ b/src/services/fim/index.ts
@@ -0,0 +1,4 @@
+export { FimService } from "./FimService"
+export { FimCompletionProvider, type FimProviderConfig } from "./FimCompletionProvider"
+export { requestFimCompletion, type FimRequestOptions, type FimResponse } from "./FimApiClient"
+export { detectFimTokens, formatFimPrompt, getFimTokensByFamily, type FimTokens } from "./FimTokenFormatter"