From 4c4cf8857b4292a272634dcf96bc5d413e29340f Mon Sep 17 00:00:00 2001
From: Jacob Fu <141651335+FuJacob@users.noreply.github.com>
Date: Thu, 14 May 2026 21:33:44 -0700
Subject: [PATCH 1/2] Add Compose interaction mode foundation

---
 .../SuggestionCoordinator+Prediction.swift    | 18 +++++
 tabby/Models/SuggestionEngineModels.swift     | 51 +++++++++++++-
 tabby/Models/SuggestionSettingsModel.swift    | 70 +++++++++++++++----
 tabby/UI/MenuBarView.swift                    | 22 +++++-
 tabby/UI/SettingsView.swift                   | 22 +++++-
 5 files changed, 168 insertions(+), 15 deletions(-)

diff --git a/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift b/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
index f8d6048..d4ce05e 100644
--- a/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
+++ b/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
@@ -7,6 +7,11 @@ extension SuggestionCoordinator {
     // MARK: - Prediction Pipeline
 
     func schedulePrediction() {
+        guard settingsSnapshot.selectedInteractionMode == .autocomplete else {
+            disablePredictionsPreservingVisualContext(reason: composeModePendingReason)
+            return
+        }
+
         if let disabledReason = SuggestionAvailabilityEvaluator.disabledReason(
             globallyEnabled: settingsSnapshot.isGloballyEnabled,
             disabledAppBundleIdentifiers: settingsSnapshot.disabledAppBundleIdentifiers,
@@ -32,6 +37,11 @@ extension SuggestionCoordinator {
 
     /// Refreshes focus after debounce, materializes a stable context, and starts generation.
     func generateFromCurrentFocus(workID: UInt64) async {
+        guard settingsSnapshot.selectedInteractionMode == .autocomplete else {
+            disablePredictionsPreservingVisualContext(reason: composeModePendingReason)
+            return
+        }
+
         guard workController.isCurrent(workID) else {
             return
         }
@@ -411,6 +421,10 @@ extension SuggestionCoordinator {
     /// Once screenshot context becomes ready, regenerate only if the user is still in the same
     /// field and there is enough typed text for a real inline completion request.
     func schedulePredictionForCurrentFocusIfPossible(matching identity: FocusedInputIdentity) {
+        guard settingsSnapshot.selectedInteractionMode == .autocomplete else {
+            return
+        }
+
         focusModel.refreshNow()
         let snapshot = focusModel.snapshot
 
@@ -423,4 +437,8 @@ extension SuggestionCoordinator {
 
         schedulePrediction()
     }
+
+    private var composeModePendingReason: String {
+        "Compose Mode is selected. Draft generation will be enabled after the Compose request pipeline is installed."
+    }
 }
diff --git a/tabby/Models/SuggestionEngineModels.swift b/tabby/Models/SuggestionEngineModels.swift
index e046f34..0b3bb4a 100644
--- a/tabby/Models/SuggestionEngineModels.swift
+++ b/tabby/Models/SuggestionEngineModels.swift
@@ -1,12 +1,40 @@
 import Foundation
 
 /// File overview:
-/// Defines the product-facing engine choices for Tabby's autocomplete pipeline.
+/// Defines the product-facing writing mode and engine choices for Tabby's suggestion pipeline.
 /// This file exists because "which engine is active?" is a domain concept, not a UI-only detail.
+/// The same applies to the interaction mode: runtime code needs an immutable value that says
+/// whether Tabby is completing a short inline tail or preparing for a deliberate full draft.
 ///
 /// The important architectural distinction is:
+/// - autocomplete vs. compose is an interaction contract
 /// - a local GGUF file is a model option inside the llama runtime
 /// - Apple Intelligence vs. local llama is an engine choice above the runtime layer
+enum SuggestionInteractionMode: String, CaseIterable, Equatable, Hashable, Sendable, Identifiable {
+    case autocomplete
+    case compose
+
+    var id: String { rawValue }
+
+    var displayLabel: String {
+        switch self {
+        case .autocomplete:
+            return "Autocomplete"
+        case .compose:
+            return "Compose"
+        }
+    }
+
+    var explanatoryText: String {
+        switch self {
+        case .autocomplete:
+            return "Predicts a short inline continuation near the caret."
+        case .compose:
+            return "Prepares a full draft for deliberate review before typing."
+        }
+    }
+}
+
 enum SuggestionEngineKind: String, CaseIterable, Equatable, Hashable, Sendable, Identifiable {
     case appleIntelligence
     case llamaOpenSource
@@ -49,6 +77,7 @@ struct DisabledApplicationRule: Codable, Equatable, Identifiable, Sendable {
 struct SuggestionSettingsSnapshot: Equatable, Sendable {
     let isGloballyEnabled: Bool
     let disabledAppBundleIdentifiers: Set<String>
+    let selectedInteractionMode: SuggestionInteractionMode
     let selectedEngine: SuggestionEngineKind
     let selectedWordCountPreset: SuggestionWordCountPreset
     let isClipboardContextEnabled: Bool
@@ -56,4 +85,24 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable {
     /// This travels in the snapshot so generation uses the same value the Settings UI shows.
     let userName: String
     let userTags: [String]
+
+    init(
+        isGloballyEnabled: Bool,
+        disabledAppBundleIdentifiers: Set<String>,
+        selectedInteractionMode: SuggestionInteractionMode = .autocomplete,
+        selectedEngine: SuggestionEngineKind,
+        selectedWordCountPreset: SuggestionWordCountPreset,
+        isClipboardContextEnabled: Bool,
+        userName: String,
+        userTags: [String]
+    ) {
+        self.isGloballyEnabled = isGloballyEnabled
+        self.disabledAppBundleIdentifiers = disabledAppBundleIdentifiers
+        self.selectedInteractionMode = selectedInteractionMode
+        self.selectedEngine = selectedEngine
+        self.selectedWordCountPreset = selectedWordCountPreset
+        self.isClipboardContextEnabled = isClipboardContextEnabled
+        self.userName = userName
+        self.userTags = userTags
+    }
 }
diff --git a/tabby/Models/SuggestionSettingsModel.swift b/tabby/Models/SuggestionSettingsModel.swift
index e7d616b..02469ee 100644
--- a/tabby/Models/SuggestionSettingsModel.swift
+++ b/tabby/Models/SuggestionSettingsModel.swift
@@ -2,9 +2,9 @@ import Combine
 import Foundation
 
 /// File overview:
-/// Owns the durable autocomplete preferences that are shared across the app:
-/// engine selection, completion length, indicator appearance, and profile
-/// personalization.
+/// Owns the durable suggestion preferences that are shared across the app:
+/// interaction mode, engine selection, completion length, indicator appearance,
+/// and profile personalization.
 ///
 /// This type is the right owner for these values because they are product settings, not
 /// `SuggestionCoordinator` session state. The coordinator should react to settings changes, not
@@ -15,6 +15,7 @@ final class SuggestionSettingsModel: ObservableObject {
     @Published private(set) var selectedIndicatorMode: ActivationIndicatorMode
     @Published private(set) var disabledAppRules: [DisabledApplicationRule]
     @Published private(set) var customSuggestionTextColorHex: String?
+    @Published private(set) var selectedInteractionMode: SuggestionInteractionMode
     @Published private(set) var selectedEngine: SuggestionEngineKind
     @Published private(set) var selectedWordCountPreset: SuggestionWordCountPreset
     @Published private(set) var isClipboardContextEnabled: Bool
@@ -28,6 +29,7 @@ final class SuggestionSettingsModel: ObservableObject {
     private static let showCaretIndicatorDefaultsKey = "tabbyShowCaretIndicator"
     private static let selectedIndicatorModeDefaultsKey = "tabbySelectedIndicatorMode"
     private static let customSuggestionTextColorHexDefaultsKey = "tabbyCustomSuggestionTextColorHex"
+    private static let selectedInteractionModeDefaultsKey = "tabbySelectedInteractionMode"
     private static let selectedEngineDefaultsKey = "selectedSuggestionEngine"
     private static let selectedWordCountPresetDefaultsKey = "selectedSuggestionWordCountPreset"
     private static let clipboardContextEnabledDefaultsKey = "tabbyClipboardContextEnabled"
@@ -50,6 +52,10 @@ final class SuggestionSettingsModel: ObservableObject {
         let resolvedCustomSuggestionTextColorHex = Self.normalizedHexString(
             userDefaults.string(forKey: Self.customSuggestionTextColorHexDefaultsKey)
         )
+        let resolvedInteractionMode = userDefaults
+            .string(forKey: Self.selectedInteractionModeDefaultsKey)
+            .flatMap(SuggestionInteractionMode.init(rawValue:))
+            ?? .autocomplete
         let resolvedEngine = userDefaults
             .string(forKey: Self.selectedEngineDefaultsKey)
             .flatMap(SuggestionEngineKind.init(rawValue:))
@@ -76,6 +82,7 @@ final class SuggestionSettingsModel: ObservableObject {
         disabledAppRules = resolvedDisabledAppRules
         selectedIndicatorMode = resolvedIndicatorMode
         customSuggestionTextColorHex = resolvedCustomSuggestionTextColorHex
+        selectedInteractionMode = resolvedInteractionMode
         selectedEngine = resolvedEngine
         selectedWordCountPreset = resolvedWordCountPreset
         isClipboardContextEnabled = resolvedClipboardContextEnabled
@@ -86,6 +93,7 @@ final class SuggestionSettingsModel: ObservableObject {
         persistDisabledAppRules(resolvedDisabledAppRules)
         persistSelectedIndicatorMode(resolvedIndicatorMode)
         persistCustomSuggestionTextColorHex(resolvedCustomSuggestionTextColorHex)
+        persistSelectedInteractionMode(resolvedInteractionMode)
         persistSelectedEngine(resolvedEngine)
         persistSelectedWordCountPreset(resolvedWordCountPreset)
         persistClipboardContextEnabled(resolvedClipboardContextEnabled)
@@ -100,9 +108,10 @@ final class SuggestionSettingsModel: ObservableObject {
     }
 
     var snapshot: SuggestionSettingsSnapshot {
-        SuggestionSettingsSnapshot(
+        Self.makeSnapshot(
             isGloballyEnabled: isGloballyEnabled,
-            disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)),
+            disabledAppRules: disabledAppRules,
+            selectedInteractionMode: selectedInteractionMode,
             selectedEngine: selectedEngine,
             selectedWordCountPreset: selectedWordCountPreset,
             isClipboardContextEnabled: isClipboardContextEnabled,
@@ -111,6 +120,15 @@ final class SuggestionSettingsModel: ObservableObject {
         )
     }
 
+    func selectInteractionMode(_ mode: SuggestionInteractionMode) {
+        guard selectedInteractionMode != mode else {
+            return
+        }
+
+        selectedInteractionMode = mode
+        persistSelectedInteractionMode(mode)
+    }
+
     func selectEngine(_ engine: SuggestionEngineKind) {
         guard selectedEngine != engine else {
             return
@@ -268,6 +286,32 @@ final class SuggestionSettingsModel: ObservableObject {
         persistUserTags(tags)
     }
 
+    private static func makeSnapshot(
+        isGloballyEnabled: Bool,
+        disabledAppRules: [DisabledApplicationRule],
+        selectedInteractionMode: SuggestionInteractionMode,
+        selectedEngine: SuggestionEngineKind,
+        selectedWordCountPreset: SuggestionWordCountPreset,
+        isClipboardContextEnabled: Bool,
+        userName: String,
+        userTags: [String]
+    ) -> SuggestionSettingsSnapshot {
+        SuggestionSettingsSnapshot(
+            isGloballyEnabled: isGloballyEnabled,
+            disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)),
+            selectedInteractionMode: selectedInteractionMode,
+            selectedEngine: selectedEngine,
+            selectedWordCountPreset: selectedWordCountPreset,
+            isClipboardContextEnabled: isClipboardContextEnabled,
+            userName: userName,
+            userTags: userTags
+        )
+    }
+
+    private func persistSelectedInteractionMode(_ mode: SuggestionInteractionMode) {
+        userDefaults.set(mode.rawValue, forKey: Self.selectedInteractionModeDefaultsKey)
+    }
+
     private func persistSelectedEngine(_ engine: SuggestionEngineKind) {
         userDefaults.set(engine.rawValue, forKey: Self.selectedEngineDefaultsKey)
     }
@@ -402,22 +446,24 @@ final class SuggestionSettingsModel: ObservableObject {
 
 extension SuggestionSettingsModel: SuggestionSettingsProviding {
     var snapshotPublisher: AnyPublisher<SuggestionSettingsSnapshot, Never> {
-        Publishers.CombineLatest3(
+        Publishers.CombineLatest4(
             Publishers.CombineLatest4(
                 $isGloballyEnabled,
                 $disabledAppRules,
-                $selectedEngine,
-                $selectedWordCountPreset
+                $selectedInteractionMode,
+                $selectedEngine
             ),
+            $selectedWordCountPreset,
             $isClipboardContextEnabled,
             Publishers.CombineLatest($userName, $userTags)
         )
-        .map { combinedSettings, clipboardContextEnabled, profile in
-            let (globallyEnabled, disabledAppRules, engine, wordCountPreset) = combinedSettings
+        .map { coreSettings, wordCountPreset, clipboardContextEnabled, profile in
+            let (globallyEnabled, disabledAppRules, interactionMode, engine) = coreSettings
             let (userName, userTags) = profile
-            return SuggestionSettingsSnapshot(
+            return Self.makeSnapshot(
                 isGloballyEnabled: globallyEnabled,
-                disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)),
+                disabledAppRules: disabledAppRules,
+                selectedInteractionMode: interactionMode,
                 selectedEngine: engine,
                 selectedWordCountPreset: wordCountPreset,
                 isClipboardContextEnabled: clipboardContextEnabled,
diff --git a/tabby/UI/MenuBarView.swift b/tabby/UI/MenuBarView.swift
index b74d00f..9e9b09f 100644
--- a/tabby/UI/MenuBarView.swift
+++ b/tabby/UI/MenuBarView.swift
@@ -60,7 +60,7 @@ struct MenuBarView: View {
 
             Spacer(minLength: 0)
 
-            Text("\(suggestionCoordinator.totalTabAcceptedWordCount) words accepted")
+            Text("\(suggestionSettings.selectedInteractionMode.displayLabel) - \(suggestionCoordinator.totalTabAcceptedWordCount) words accepted")
                 .font(.subheadline)
                 .foregroundStyle(.secondary)
         }
@@ -88,6 +88,17 @@ struct MenuBarView: View {
                     .controlSize(.small)
             }
 
+            MenuBarPickerRow(title: "Mode") {
+                Picker("Mode", selection: selectedInteractionModeBinding) {
+                    ForEach(SuggestionInteractionMode.allCases) { mode in
+                        Text(mode.displayLabel)
+                            .tag(mode)
+                    }
+                }
+                .labelsHidden()
+                .pickerStyle(.menu)
+            }
+
             MenuBarPickerRow(title: "Indicator") {
                 Picker("Indicator", selection: selectedIndicatorModeBinding) {
                     ForEach(ActivationIndicatorMode.allCases) { mode in
@@ -255,6 +266,15 @@ struct MenuBarView: View {
         )
     }
 
+    private var selectedInteractionModeBinding: Binding<SuggestionInteractionMode> {
+        Binding(
+            get: { suggestionSettings.selectedInteractionMode },
+            set: { mode in
+                suggestionSettings.selectInteractionMode(mode)
+            }
+        )
+    }
+
     private var selectedEngineBinding: Binding<SuggestionEngineKind> {
         Binding(
             get: { suggestionSettings.selectedEngine },
diff --git a/tabby/UI/SettingsView.swift b/tabby/UI/SettingsView.swift
index 6cb96f6..cc30224 100644
--- a/tabby/UI/SettingsView.swift
+++ b/tabby/UI/SettingsView.swift
@@ -110,11 +110,22 @@ struct SettingsView: View {
 
     @ViewBuilder
     private var autocompleteSection: some View {
-        Section("Autocomplete") {
+        Section("Writing") {
             Toggle("Enable Globally", isOn: globallyEnabledBinding)
 
             Toggle("Clipboard Context", isOn: clipboardContextEnabledBinding)
 
+            Picker("Interaction Mode", selection: selectedInteractionModeBinding) {
+                ForEach(SuggestionInteractionMode.allCases) { mode in
+                    Text(mode.displayLabel)
+                        .tag(mode)
+                }
+            }
+
+            Text("Compose prepares a full draft. Autocomplete predicts a short continuation.")
+                .font(.caption)
+                .foregroundStyle(.secondary)
+
             Picker("Indicator", selection: selectedIndicatorModeBinding) {
                 ForEach(ActivationIndicatorMode.allCases) { mode in
                     Text(mode.displayLabel)
@@ -450,6 +461,15 @@ struct SettingsView: View {
         )
     }
 
+    private var selectedInteractionModeBinding: Binding<SuggestionInteractionMode> {
+        Binding(
+            get: { suggestionSettings.selectedInteractionMode },
+            set: { mode in
+                suggestionSettings.selectInteractionMode(mode)
+            }
+        )
+    }
+
     /// The color picker always needs a concrete color. When the user has not picked one yet we feed
     /// it the current automatic fallback so the control still previews something sensible. The first
     /// user interaction promotes that preview into a persisted custom color.

From 069bf5165f1763932a94a79ed9cd09842435453b Mon Sep 17 00:00:00 2001
From: Jacob Fu <141651335+FuJacob@users.noreply.github.com>
Date: Wed, 20 May 2026 20:54:26 -0700
Subject: [PATCH 2/2] WIP: Compose interaction mode - additional changes

Builds on the Compose interaction mode foundation with additional
service-layer plumbing, context normalization, prompt rendering,
and overlay/insertion support for compose-style suggestions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 rfc.md                                        | 605 ++++++++++++++++++
 .../SuggestionCoordinator+Input.swift         |   5 +-
 .../SuggestionCoordinator+Lifecycle.swift     |   1 +
 .../SuggestionCoordinator+Prediction.swift    |   2 +-
 .../Coordinators/SuggestionCoordinator.swift  |   3 +
 tabby/App/Core/TabbyAppEnvironment.swift      |   2 +
 tabby/Models/LlamaRuntimeModels.swift         |   6 +
 tabby/Models/SuggestionModels.swift           |  73 ++-
 .../Models/SuggestionSubsystemContracts.swift |   6 +
 .../Context/ComposeContextCollector.swift     | 217 +++++++
 .../Input/InputSuppressionController.swift    |   9 +-
 .../FoundationModelSuggestionEngine.swift     |   6 +
 .../Runtime/LlamaRuntimeManager.swift         |  36 +-
 .../Runtime/LlamaSuggestionEngine.swift       |  50 +-
 .../Runtime/SuggestionEngineRouter.swift      |  11 +-
 .../Suggestion/SuggestionInserter.swift       |  79 ++-
 .../SuggestionInteractionState.swift          |  62 +-
 .../SuggestionOverlayPresenter.swift          |  18 +
 tabby/Services/UI/OverlayController.swift     | 104 ++-
 tabby/Support/AXHelper.swift                  |   6 +
 tabby/Support/ComposeContextNormalizer.swift  | 118 ++++
 tabby/Support/ComposePromptRenderer.swift     |  56 ++
 tabby/Support/ComposeRequestFactory.swift     | 102 +++
 tabby/Support/ComposeTextNormalizer.swift     | 125 ++++
 .../SuggestionAvailabilityEvaluator.swift     |  11 +-
 .../ComposeContextNormalizerTests.swift       |  43 ++
 26 files changed, 1704 insertions(+), 52 deletions(-)
 create mode 100644 rfc.md
 create mode 100644 tabby/Services/Context/ComposeContextCollector.swift
 create mode 100644 tabby/Support/ComposeContextNormalizer.swift
 create mode 100644 tabby/Support/ComposePromptRenderer.swift
 create mode 100644 tabby/Support/ComposeRequestFactory.swift
 create mode 100644 tabby/Support/ComposeTextNormalizer.swift
 create mode 100644 tabbyTests/ComposeContextNormalizerTests.swift

diff --git a/rfc.md b/rfc.md
new file mode 100644
index 0000000..4a35e5c
--- /dev/null
+++ b/rfc.md
@@ -0,0 +1,605 @@
+# RFC 0001: Compose Mode
+
+Status: Draft  
+Related issues: [#66](https://github.com/FuJacob/tabby/issues/66), [#67](https://github.com/FuJacob/tabby/issues/67), [#68](https://github.com/FuJacob/tabby/issues/68), [#69](https://github.com/FuJacob/tabby/issues/69), [#70](https://github.com/FuJacob/tabby/issues/70)
+
+## Summary
+
+Tabby should support two explicit writing modes:
+
+- Autocomplete Mode: the current behavior. Tabby predicts a short inline continuation and shows ghost text near the caret.
+- Compose Mode: a new deliberate mode where Tabby gathers broader screen and text context, generates a complete draft, and manually types the full response into the focused field when the user accepts it.
+
+The first motivating case is a pull request comment. Instead of only completing the next few words, the user focuses the GitHub comment box, presses `Tab`, and Tabby drafts the full comment based on the surrounding page context and the text already typed in the field.
+
+The core architectural proposal is to model Compose as first-class interaction state, not as a scattered set of booleans inside the existing autocomplete path. Mode selection should live in shared settings, request construction should branch through explicit request/prompt types, and acceptance should use Compose-specific typing rules.
+
+## Goals
+
+- Add a persisted interaction mode: `autocomplete` or `compose`.
+- Keep Autocomplete Mode as the default and preserve current behavior.
+- Give runtime code an immutable mode value through `SuggestionSettingsSnapshot`.
+- Add UI in the menu bar and Settings so the active mode is visible and changeable.
+- Add Compose-specific context gathering from Accessibility tree traversal.
+- Add Compose-specific prompt construction for full comments, replies, and text blocks.
+- Require the local `tabby-depth-1` model for Compose Mode.
+- Prevent users from editing the model choice while Compose Mode is active.
+- Manually type the generated Compose draft only when focus and context are still valid.
+
+## Non-Goals
+
+- Do not replace the existing autocomplete pipeline.
+- Do not add hosted API dependencies.
+- Do not accept, persist, log, or commit provider API keys or other secrets as part of Compose Mode.
+- Do not build a chat interface or multi-turn agent workflow.
+- Do not support rewrite/edit workflows in the first implementation.
+- Do not send raw screenshots or unbounded AX tree text directly to the model.
+- Do not make Compose work in every possible macOS text surface in the first slice.
+
+## Privacy And Secret Handling
+
+Compose Mode must preserve Tabby's local-first contract. The first implementation should use only on-device runtimes and local context sources.
+
+Operational guardrails:
+
+- Do not add OpenAI, Anthropic, or other hosted-provider clients for this RFC.
+- Do not store API keys in source, fixtures, markdown, `UserDefaults`, debug logs, prompt previews, or crash diagnostics.
+- If a future RFC adds hosted providers, secrets must be stored in Keychain, must be opt-in per provider, and must not be mixed into the Compose Mode implementation proposed here.
+- Compose prompt previews and diagnostics should be bounded summaries. Full surrounding AX context can include sensitive page text and should not be retained longer than the active generation flow needs it.
+
+## Current System
+
+Tabby's current pipeline is optimized around short inline continuations:
+
+1. `FocusTracker` polls the active AX focus and produces a `FocusSnapshot`.
+2. `SuggestionCoordinator` decides whether focus, permissions, settings, and input events allow prediction.
+3. `SuggestionRequestFactory` builds one `SuggestionRequest`.
+4. `LlamaPromptRenderer` or `FoundationModelPromptRenderer` formats an autocomplete prompt.
+5. `SuggestionEngineRouter` routes the request to Apple Intelligence or llama.
+6. `SuggestionTextNormalizer` reduces raw model output into a short continuation.
+7. `SuggestionInteractionState` stores the active suggestion tail.
+8. `SuggestionInserter` commits accepted chunks when the user presses `Tab`.
+
+That design is correct for autocomplete because it minimizes prompt size, latency, and insertion risk. Compose Mode needs a different contract because it intentionally produces larger output from broader context.
+
+Two current ownership boundaries matter for implementation:
+
+- `SuggestionSettingsModel` owns durable product preferences such as engine, word-count preset, clipboard context, profile, and the proposed interaction mode.
+- `RuntimeBootstrapModel` and `LlamaRuntimeManager` own local model selection and loading. Compose should coordinate with them; it should not move model persistence into `SuggestionSettingsModel`.
+
+## Proposed Product Behavior
+
+Compose Mode should be a global mode in the first implementation.
+
+When Compose Mode is active:
+
+1. The menu bar and Settings UI show "Compose" as the selected mode.
+2. The local model selection UI is locked to `tabby-depth-1`.
+3. Pressing `Tab` in a supported empty or partially typed text field starts Compose generation.
+4. Tabby gathers the focused field context plus nearby readable page context.
+5. Tabby generates one full draft.
+6. Tabby shows a preview state before typing starts.
+7. Pressing `Tab` again accepts the full draft if focus is still valid.
+8. Tabby types the accepted draft into the field with a visible manual typing effect.
+9. `Esc`, focus change, mode change, disabled app state, or global disable cancels the draft.
+
+This two-step Tab behavior is recommended for safety: first `Tab` generates, second `Tab` starts manual typing. A single `Tab` that immediately types a multi-sentence draft would be fast, but it makes accidental long output too easy and conflicts with the current product expectation that `Tab` accepts only visible ghost text.
+
+## Mode Model
+
+Add a new value type in `tabby/Models/`, likely next to `SuggestionEngineKind`:
+
+```swift
+enum SuggestionInteractionMode: String, CaseIterable, Equatable, Hashable, Sendable, Identifiable {
+    case autocomplete
+    case compose
+}
+```
+
+Add this to `SuggestionSettingsModel`:
+
+- `@Published private(set) var selectedInteractionMode: SuggestionInteractionMode`
+- `private static let selectedInteractionModeDefaultsKey = "tabbySelectedInteractionMode"`
+- `func selectInteractionMode(_ mode: SuggestionInteractionMode)`
+
+Add this to `SuggestionSettingsSnapshot`:
+
+- `let selectedInteractionMode: SuggestionInteractionMode`
+
+Implementation notes:
+
+- Put the enum near `SuggestionEngineKind` in `SuggestionEngineModels.swift` so engine, mode, and snapshot values stay together.
+- Default to `.autocomplete` when no persisted value exists. There is no legacy Compose state to migrate.
+- Update both `SuggestionSettingsModel.snapshot` and `snapshotPublisher`. Prefer a private `makeSnapshot(...)` helper so the direct snapshot and Combine publisher cannot drift when future settings are added.
+- `handleSuggestionSettingsChange(_:)` must treat mode changes like engine changes: cancel work, reset cached generation context, clear active sessions, and hide stale UI before scheduling new work.
+
+Why this belongs in settings:
+
+- The selected mode must persist across restart.
+- The coordinator should react to mode changes, not own preference storage.
+- Prompt construction and runtime routing need the same immutable setting value.
+- Tests can assert snapshot emission without driving UI.
+
+## Runtime Model Requirement
+
+Compose Mode should require `tabby-depth-1`, currently mapped to:
+
+```swift
+gemma-3n-E4B-it-Q4_K_M.gguf
+```
+
+Recommended behavior:
+
+- Entering Compose Mode selects `tabby-depth-1` if installed.
+- If not installed, Compose Mode enters an unavailable state with a CTA to download it.
+- While Compose Mode is active, model selection controls are disabled and explain that Compose requires `tabby-depth-1`.
+- Switching back to Autocomplete restores the user's previous local model selection.
+- The required model should be identified by filename or capability, not by display label. Display labels are product copy and should not drive runtime behavior.
+
+Tradeoff:
+
+- Forcing one model reduces user control, but it protects Compose quality and simplifies evals.
+- Allowing every local model would be more flexible, but smaller models are more likely to produce incomplete or unsafe full drafts.
+
+Implementation notes:
+
+- Add a canonical runtime capability such as `RuntimeModelCapability.compose` or constants/helpers such as `RuntimeModelCatalog.composeRequiredFilename` and `RuntimeModelCatalog.supportsCompose(filename:)`.
+- Keep the user's previous autocomplete model selection in the runtime-selection layer, not in `SuggestionSettingsModel`. The mode setting says what interaction the user wants; the runtime model layer says which GGUF file is selected.
+- Do not silently switch to an arbitrary custom model if `tabby-depth-1` is missing. Surface Compose as unavailable and offer the download action.
+- Avoid a reload loop: mode changes can request a model switch, and model switches already call `prepareForRuntimeModelSwitch()`. The implementation should make one owner responsible for this transition so cancellation and UI messaging happen once.
+
+## Mode-Aware Availability
+
+`SuggestionAvailabilityEvaluator` currently gates autocomplete on Accessibility, Input Monitoring, Screen Recording, app blocklist, global enablement, and focus support. Compose needs mode-aware gating because the first AX-tree slice can work without screenshot/OCR context.
+
+Recommended behavior:
+
+- Both modes require Accessibility-derived focused text support and Input Monitoring.
+- Autocomplete keeps the current Screen Recording gate while visual context is part of the active autocomplete prompt path.
+- Compose should require Screen Recording only if Compose uses screenshot/OCR visual context in that generation. AX-only Compose context should not be blocked by missing Screen Recording permission.
+- The disabled reason should name the active mode so users understand whether Tabby is waiting on autocomplete visual context or Compose context.
+
+Implementation note: extend `SuggestionAvailabilityEvaluator` to accept `SuggestionSettingsSnapshot` or `SuggestionInteractionMode` instead of passing separate booleans indefinitely. This keeps the permission matrix in one pure place as modes diverge.
+
+## Compose Context Gathering
+
+Add a new service boundary:
+
+```text
+tabby/Services/Context/ComposeContextCollector.swift
+```
+
+Responsibility:
+
+- Reacquire or receive the current focused AX element on the main actor.
+- Validate that the AX element still matches the active `FocusedInputContext` before using collected context.
+- Walk up to the parent window or nearest stable container.
+- Walk down the subtree with bounded depth-first traversal.
+- Extract readable text only from allowlisted roles.
+- Normalize, deduplicate, and bound the text before prompt construction.
+
+This should not live inside `SuggestionCoordinator` because tree traversal is side-effectful, app-specific, and independently testable through pure normalization helpers.
+
+### Tree Walk Algorithm
+
+Initial algorithm:
+
+1. Start with the current `FocusedInputContext` value created from `FocusSnapshot.context`.
+2. On the main actor, reacquire the focused AX element through the focus subsystem or `AXHelper.focusedElement()`.
+3. Validate process identity, role/subrole, and focus identity before collecting. Do not store raw `AXUIElement` references in `FocusedInputContext`; those values are not `Sendable` and can become stale across async boundaries.
+4. Resolve the parent chain until reaching `AXWindow` or a max ancestor depth.
+5. Run bounded DFS from that root.
+6. Visit at most `maxNodes` nodes and `maxDepth` levels.
+7. Read `AXRole`, `AXValue`, `AXTitle`, `AXDescription`, and `AXChildren` through `AXHelper` so Core Foundation ownership and casting stay centralized.
+8. Keep text only for allowed readable roles:
+
+```swift
+let allowedRoles: Set<String> = [
+    "AXStaticText",
+    "AXTextArea",
+    "AXTextField",
+    "AXDocument"
+]
+```
+
+9. Skip known noisy or unsafe roles:
+
+```swift
+let blockedRoles: Set<String> = [
+    "AXButton",
+    "AXCheckBox",
+    "AXRadioButton",
+    "AXScrollBar",
+    "AXMenuItem",
+    "AXImage"
+]
+```
+
+10. Join extracted strings using newlines, not only spaces, so document structure survives.
+11. Normalize whitespace, repeated symbols, navigation noise, and duplicate lines.
+12. Bound the final context by characters and approximate tokens.
+13. Check cancellation between traversal batches so a focus change or mode change can stop slow AX work quickly.
+
+Why bounded DFS matters:
+
+- AX trees can be huge in browsers and Electron apps.
+- Some nodes are slow or unreliable to query.
+- A hard node/depth budget prevents UI stalls and protects local generation latency.
+
+Recommended first limits:
+
+- `maxAncestorDepth = 8`
+- `maxDFSDepth = 12`
+- `maxNodes = 500`
+- `maxRawContextCharacters = 30_000`
+- `maxNormalizedContextCharacters = 8_000`
+
+These are starting values and should be tuned with real GitHub, Gmail, Slack, Discord, and Notes examples.
+
+The collector API should be `async` even if the first implementation performs AX calls on the main actor. That gives the coordinator a cancellation point, lets the implementation yield between traversal batches, and preserves the option to move pure normalization work off the main actor later.
+
+### Context Normalization
+
+Add pure helpers in `tabby/Support/ComposeContextNormalizer.swift`.
+
+The normalizer should:
+
+- Trim leading and trailing whitespace.
+- Collapse runs of spaces and tabs.
+- Preserve meaningful newlines between extracted text blocks.
+- Drop lines with only punctuation or repeated symbols.
+- Drop obvious repeated navigation/action labels.
+- Deduplicate exact repeated lines while preserving first occurrence.
+- Bound individual lines to avoid one massive AX value dominating the prompt.
+- Bound the final normalized context.
+
+This split follows the project change strategy: pure `Support/` rules first, service boundary second.
+
+## Request Model
+
+The current `SuggestionRequest` encodes an autocomplete request. Compose should not overload all fields with mode-specific meaning.
+
+Recommended approach:
+
+```swift
+enum GenerationRequest: Equatable, Sendable {
+    case autocomplete(SuggestionRequest)
+    case compose(ComposeRequest)
+}
+```
+
+Add:
+
+```swift
+struct ComposeRequest: Equatable, Sendable {
+    let context: FocusedInputContext
+    let typedPrefix: String
+    let trailingText: String
+    let surroundingContext: String
+    let visualContextSummary: String?
+    let clipboardContext: String?
+    let applicationName: String
+    let generation: UInt64
+    let maxPredictionTokens: Int
+    let temperature: Double
+    let topK: Int
+    let topP: Double
+    let minP: Double
+    let repetitionPenalty: Double
+    let randomSeed: UInt32?
+    let userName: String?
+    let userTags: [String]?
+}
+```
+
+Why a separate request type:
+
+- Autocomplete wants a short continuation; Compose wants a full draft.
+- Autocomplete normalizes aggressively to one fragment; Compose must preserve paragraphs.
+- Compose will need larger token budgets and possibly different sampling values.
+- Compose may combine AX, optional screenshot/OCR summary, and optional clipboard context, while autocomplete keeps those as small prompt augmentations.
+- Tests can validate each prompt contract independently.
+
+Alternative: add `mode` and optional fields to `SuggestionRequest`.
+
+- Pros: smaller diff and fewer protocol changes.
+- Cons: many fields become invalid depending on mode, which creates weaker invariants and more defensive code.
+
+Recommendation: use a sum type (`GenerationRequest`) or parallel protocol methods. The extra type cost is worth the stronger model.
+
+Protocol migration options:
+
+- Replace `SuggestionGenerating.generateSuggestion(for:)` with `generate(for request: GenerationRequest)` and make unsupported combinations explicit in the router.
+- Or add a parallel `generateCompose(for:)` path first and keep autocomplete signatures unchanged until Compose is proven.
+
+The lower-risk first slice is the parallel method because it avoids forcing Apple Intelligence, llama autocomplete, and tests to understand a request case they do not support yet. A later cleanup can collapse both into `GenerationRequest` once both modes are stable.
+
+## Prompt Design
+
+Add `ComposePromptRenderer` in `tabby/Support/`.
+
+The prompt should make the output contract explicit:
+
+- Draft the complete text that should be typed at the caret.
+- Use the user's typed prefix as the start or intent signal.
+- Use surrounding AX context to infer the situation.
+- For PR comments, write the actual comment, not analysis about the comment.
+- Return only the typeable text.
+- No labels, markdown fences, explanations, or quoted prompt text.
+- Match the tone and language of the context.
+- Keep output bounded.
+
+Example prompt shape:
+
+```text
+Task:
+- Write the complete text the user wants typed at the caret.
+- This is Compose Mode, not autocomplete and not chat.
+- Return only the final typeable draft.
+- Do not include labels, explanations, or quote the surrounding context.
+- If the context is insufficient, write a concise useful draft based on the typed prefix.
+
+User profile:
+...
+
+App:
+GitHub
+
+Text already typed in the focused field:
+...
+
+Relevant surrounding context:
+...
+
+Final instruction:
+Write the full comment now.
+```
+
+Compose should have its own output normalizer, likely `ComposeTextNormalizer`, because the existing `SuggestionTextNormalizer` intentionally truncates to a short inline continuation.
+
+`ComposeTextNormalizer` should preserve paragraph boundaries while still removing non-typeable wrapper text. It should strip labels such as `Final answer:`, markdown fences, surrounding quotes that wrap the entire response, and repeated prompt fragments, but it should not collapse multiline drafts into one sentence.
+
+## Engine Routing
+
+`SuggestionEngineRouter` currently routes by engine kind. Compose adds a second routing dimension:
+
+- Interaction mode: autocomplete vs compose.
+- Engine backend: Apple Intelligence vs llama.
+
+For the first implementation, Compose should route only to llama with `tabby-depth-1`.
+
+Recommended behavior:
+
+- `Autocomplete + Apple Intelligence`: supported.
+- `Autocomplete + llama`: supported.
+- `Compose + llama + tabby-depth-1`: supported.
+- `Compose + Apple Intelligence`: unavailable in first slice.
+- `Compose + llama + other model`: unavailable or auto-switch to `tabby-depth-1`.
+
+This keeps Compose local-first while avoiding a partial Apple prompt path before the product behavior is proven.
+
+Routing should reject unsupported combinations before calling a backend. For example, `Compose + Apple Intelligence` should produce a user-facing unavailable state rather than sending a Compose prompt through `FoundationModelSuggestionEngine` and hoping the backend behaves.
+
+## Acceptance Flow
+
+Autocomplete currently supports partial chunk acceptance. Compose should not.
+
+Compose acceptance should use a separate active session type:
+
+```swift
+enum ActiveGenerationSession: Equatable, Sendable {
+    case autocomplete(ActiveSuggestionSession)
+    case compose(ActiveComposeSession)
+}
+
+struct ActiveComposeSession: Equatable, Sendable {
+    let baseContext: FocusedInputContext
+    let fullText: String
+    let latency: TimeInterval
+}
+```
+
+Compose acceptance rules:
+
+- Type the full draft through a controlled manual typing effect instead of inserting it instantly.
+- Require current focus process, `FocusedInputIdentity`, and compatible content signature to still match the base context. Process-only validation is acceptable for autocomplete's short chunks, but Compose's larger writes need a stricter guard.
+- Require selection state to be compatible with the generated request.
+- Pass `Tab` through when no Compose draft is ready.
+- Cancel on mode changes, focus changes, app-disabled state, permission loss, or global disable.
+- Do not keep a remaining tail after typing.
+
+Manual typing behavior:
+
+- Use synthetic Unicode keyboard events in small chunks so the host app receives normal text input.
+- Type at a bounded cadence that feels deliberate but does not take too long for a full comment.
+- Register synthetic input with `InputSuppressionController` so Tabby does not treat its own typing as user edits.
+- Extend suppression for multi-event insertion. The current autocomplete inserter suppresses one synthetic keydown for one accepted chunk; Compose typing needs either per-chunk rearming or a counted suppression window sized to the number of posted keydown events.
+- Keep cancellation explicit: `Esc`, focus change, mode change, disabled app state, or permission loss should stop any remaining queued typing.
+- Preserve generation/focus validation before starting, and re-check focus between chunks for longer drafts.
+
+Typing strategy tradeoff:
+
+- Synthetic Unicode event input is already used and can be extended into a chunked typing effect.
+- Pasteboard-based insertion is more reliable for long multiline text, but temporarily touches user clipboard state.
+- AX value mutation can be precise, but app support is inconsistent and higher risk.
+
+Recommendation for first implementation: build a `ComposeTypingController` or insertion strategy that sends chunked synthetic Unicode input at a controlled cadence. This preserves the desired "Tabby is typing this out" product feel while reusing the same macOS input primitive as autocomplete acceptance. Pasteboard insertion should remain a fallback to evaluate only if multiline synthetic typing is unreliable in target apps.
+
+State ownership: replace `SuggestionInteractionState.activeSession: ActiveSuggestionSession?` with one active sum type rather than adding a parallel `activeComposeSession` property. A single active-session slot prevents autocomplete and Compose sessions from both appearing valid after a mode switch or focus race.
+
+## UI
+
+Menu bar:
+
+- Add a segmented picker or compact menu row for "Autocomplete" and "Compose".
+- Show current mode in the status area.
+- Disable model picker in Compose Mode and show `tabby-depth-1`.
+- If `tabby-depth-1` is missing, show a download action.
+
+Settings:
+
+- Add an "Interaction Mode" control near engine/model settings.
+- Explain in one concise sentence that Compose writes a full draft while Autocomplete predicts a short continuation.
+- Keep deeper implementation language out of user-facing UI.
+
+Overlay:
+
+- Autocomplete keeps current ghost text.
+- Compose should use a distinct preview surface, probably a compact multiline preview near the field.
+- The preview must make acceptance deliberate. A collapsed preview plus "Tab to type" state is safer than rendering a long ghost paragraph inline.
+
+## Implementation Plan
+
+### Phase 1: Mode Foundation
+
+- Add `SuggestionInteractionMode`.
+- Persist mode in `SuggestionSettingsModel`.
+- Include mode in `SuggestionSettingsSnapshot`.
+- Add snapshot publisher coverage.
+- Add UI controls in Menu Bar and Settings.
+- On mode changes, cancel active work, clear overlay, and reset active sessions.
+- Keep Autocomplete Mode behavior unchanged.
+
+Validation:
+
+- Unit tests for default mode.
+- Unit tests for persistence.
+- Unit tests for snapshot emission.
+- Build with `xcodebuild -project tabby.xcodeproj -scheme tabby -destination 'platform=macOS' build`.
+
+### Phase 2: Blinder-Equipped Tree Walker
+
+- Add `ComposeContextNormalizer`.
+- Add `ComposeContextCollector`.
+- Add role allowlist and blocked-role filters.
+- Add bounded ancestor walk and DFS.
+- Add debug diagnostics for visited nodes, retained text count, and dropped text count.
+- Do not feed raw context directly to the model.
+
+Validation:
+
+- Unit tests for context normalization.
+- Unit tests for context bounding and duplicate-line removal.
+- Manual AX tests in GitHub PR comments, Gmail reply, Slack/Discord message fields, and Notes.
+- Confirm traversal budget prevents stalls on large browser pages.
+
+### Phase 3: Compose Request And Prompt
+
+- Add `ComposeRequest`.
+- Add `ComposePromptRenderer`.
+- Add `ComposeTextNormalizer`.
+- Add Compose token/sampling defaults.
+- Route Compose requests to llama only.
+- Require `tabby-depth-1`.
+- Add mode-aware availability for required model, engine, and optional Screen Recording usage.
+
+Validation:
+
+- Unit tests for PR comment, email reply, empty prefix, and selected text cases.
+- Prompt snapshots for stable output shape.
+- Normalizer tests for multiline drafts, labels, markdown fences, and quoted whole-response wrappers.
+- Tests that autocomplete prompt output remains unchanged.
+
+### Phase 4: Compose Generation And Preview
+
+- Add active Compose session state.
+- Generate on first `Tab` when Compose Mode has no ready draft.
+- Show preview state instead of inline ghost tail.
+- Cancel stale work with the existing work ID and generation checks.
+- Preserve existing autocomplete debounce behavior separately.
+
+Validation:
+
+- Tests for stale generation drop.
+- Tests for mode-change cancellation.
+- Tests for disabled-app and permission cancellation.
+
+### Phase 5: Manual Draft Typing
+
+- Type the whole Compose draft on explicit accept.
+- Keep focus and context validation before typing starts.
+- Add a typing controller that emits bounded synthetic text chunks.
+- Stop queued typing on cancellation or stale focus.
+- Add insertion strategy abstraction if multiline synthetic typing proves unreliable.
+- Clear session after successful typing.
+- Pass `Tab` through when no valid Compose draft exists.
+
+Validation:
+
+- Unit tests for valid typing path.
+- Unit tests for stale focus prevention.
+- Unit tests for pass-through behavior.
+- Manual multiline typing tests in browser comments and native AppKit text fields.
+
+## Testing Strategy
+
+Prioritize pure logic first:
+
+- `SuggestionSettingsModel` mode persistence and snapshots.
+- `ComposeContextNormalizer`.
+- `ComposePromptRenderer`.
+- `ComposeTextNormalizer`.
+- Request factory branching.
+- Session acceptance rules.
+
+Then test orchestration:
+
+- Mode change cancels active autocomplete.
+- Mode change cancels active compose.
+- Compose unavailable state when required model is missing.
+- Compose result is dropped if generation changes.
+- Compose result is dropped if focus changes.
+- Compose does not require Screen Recording for AX-only context.
+- Compose requires Screen Recording only when optional screenshot/OCR context is enabled for that request.
+
+Manual QA scenarios:
+
+- GitHub PR comment with visible diff and previous comments.
+- GitHub issue reply.
+- Gmail reply.
+- Slack or Discord thread reply.
+- Notes app text field.
+- Empty focused field with insufficient context.
+- Secure/password fields remain blocked.
+- Disabled app remains disabled.
+
+## Risks
+
+- AX traversal can collect irrelevant page chrome.
+- AX traversal can be slow in large browser trees.
+- Large prompts can exceed the llama context window.
+- Full draft typing has higher user-visible risk than short autocomplete.
+- Synthetic keyboard insertion may be unreliable for long multiline text.
+- Forcing `tabby-depth-1` may surprise users who intentionally selected a faster model.
+
+Mitigations:
+
+- Use strict role filtering and context budgets.
+- Keep Compose behind explicit mode selection.
+- Require preview before typing starts.
+- Use stale-focus checks before accepting.
+- Add diagnostics for prompt size and context source.
+- Preserve the user's prior autocomplete model selection when leaving Compose Mode.
+
+## Open Questions
+
+- Should Compose Mode eventually support a temporary one-shot command in addition to global mode?
+- Should first `Tab` generate and second `Tab` type, or should generation happen proactively after typing?
+- Should Compose support selected text as an instruction target in v1, or block selection like autocomplete?
+- What is the maximum draft length before stronger confirmation or slower typing cancellation controls are needed?
+- Should Compose be limited to known writing apps at first?
+- Should Apple Intelligence support Compose later, or should Compose remain local llama-only?
+- How much GitHub-specific structure should the context normalizer preserve?
+
+## Recommended First Slice
+
+Implement Phase 1 and Phase 2 first.
+
+Why:
+
+- Issue #67 needs shared mode state before any prompt or typing work can stay clean.
+- The AX tree walker is the highest-uncertainty technical risk.
+- Building the mode foundation and context collector first gives real data for prompt design without touching typing behavior yet.
+
+After that, Phase 3 can define the Compose prompt and evaluate `tabby-depth-1` output quality before enabling manual draft typing.
+
diff --git a/tabby/App/Coordinators/SuggestionCoordinator+Input.swift b/tabby/App/Coordinators/SuggestionCoordinator+Input.swift
index 3bbc1cf..5834774 100644
--- a/tabby/App/Coordinators/SuggestionCoordinator+Input.swift
+++ b/tabby/App/Coordinators/SuggestionCoordinator+Input.swift
@@ -12,6 +12,7 @@ extension SuggestionCoordinator {
         if SuggestionAvailabilityEvaluator.shouldSchedulePrediction(
             globallyEnabled: settingsSnapshot.isGloballyEnabled,
             disabledAppBundleIdentifiers: settingsSnapshot.disabledAppBundleIdentifiers,
+            interactionMode: settingsSnapshot.selectedInteractionMode,
             inputMonitoringGranted: permissionManager.inputMonitoringGranted,
             screenRecordingGranted: permissionManager.screenRecordingGranted,
             focusSnapshot: focusModel.snapshot
@@ -33,6 +34,7 @@ extension SuggestionCoordinator {
         if let disabledReason = SuggestionAvailabilityEvaluator.disabledReason(
             globallyEnabled: settingsSnapshot.isGloballyEnabled,
             disabledAppBundleIdentifiers: settingsSnapshot.disabledAppBundleIdentifiers,
+            interactionMode: settingsSnapshot.selectedInteractionMode,
             inputMonitoringGranted: permissionManager.inputMonitoringGranted,
             screenRecordingGranted: permissionManager.screenRecordingGranted,
             focusSnapshot: snapshot
@@ -78,6 +80,7 @@ extension SuggestionCoordinator {
         if let disabledReason = SuggestionAvailabilityEvaluator.disabledReason(
             globallyEnabled: settingsSnapshot.isGloballyEnabled,
             disabledAppBundleIdentifiers: settingsSnapshot.disabledAppBundleIdentifiers,
+            interactionMode: settingsSnapshot.selectedInteractionMode,
             inputMonitoringGranted: permissionManager.inputMonitoringGranted,
             screenRecordingGranted: permissionManager.screenRecordingGranted,
             focusSnapshot: focusModel.snapshot
@@ -90,7 +93,7 @@ extension SuggestionCoordinator {
             return acceptCurrentSuggestion()
         }
 
-        if let activeSession = interactionState.activeSession {
+        if let activeSession = interactionState.activeAutocompleteSession {
             return handleInputEvent(event, with: activeSession)
         }
 
diff --git a/tabby/App/Coordinators/SuggestionCoordinator+Lifecycle.swift b/tabby/App/Coordinators/SuggestionCoordinator+Lifecycle.swift
index 9aa475f..5e8739b 100644
--- a/tabby/App/Coordinators/SuggestionCoordinator+Lifecycle.swift
+++ b/tabby/App/Coordinators/SuggestionCoordinator+Lifecycle.swift
@@ -71,6 +71,7 @@ extension SuggestionCoordinator {
         if SuggestionAvailabilityEvaluator.shouldSchedulePrediction(
             globallyEnabled: settingsSnapshot.isGloballyEnabled,
             disabledAppBundleIdentifiers: settingsSnapshot.disabledAppBundleIdentifiers,
+            interactionMode: settingsSnapshot.selectedInteractionMode,
             inputMonitoringGranted: permissionManager.inputMonitoringGranted,
             screenRecordingGranted: permissionManager.screenRecordingGranted,
             focusSnapshot: focusModel.snapshot
diff --git a/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift b/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
index d4ce05e..d18b59f 100644
--- a/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
+++ b/tabby/App/Coordinators/SuggestionCoordinator+Prediction.swift
@@ -273,7 +273,7 @@ extension SuggestionCoordinator {
     /// This is the heart of partial acceptance: a text change is not automatically "stale" anymore.
     /// It may instead mean "the user consumed the next expected part of the suggestion."
     func reconcileActiveSession(with snapshot: FocusSnapshot) {
-        guard interactionState.activeSession != nil else {
+        guard interactionState.activeAutocompleteSession != nil else {
             if overlayState.isVisible {
                 hideOverlay(reason: "Overlay hidden because no ready suggestion remains.")
             }
diff --git a/tabby/App/Coordinators/SuggestionCoordinator.swift b/tabby/App/Coordinators/SuggestionCoordinator.swift
index b9c6f71..5e09cfe 100644
--- a/tabby/App/Coordinators/SuggestionCoordinator.swift
+++ b/tabby/App/Coordinators/SuggestionCoordinator.swift
@@ -43,6 +43,7 @@ final class SuggestionCoordinator: ObservableObject {
     let suggestionSettings: any SuggestionSettingsProviding
     let clipboardContextProvider: any ClipboardContextProviding
     let visualContextCoordinator: any VisualContextCoordinating
+    let composeContextCollector: ComposeContextCollector
     let interactionState: SuggestionInteractionState
     let workController: SuggestionWorkController
     let configuration: SuggestionConfiguration
@@ -71,6 +72,7 @@ final class SuggestionCoordinator: ObservableObject {
         suggestionSettings: any SuggestionSettingsProviding,
         clipboardContextProvider: any ClipboardContextProviding,
         visualContextCoordinator: any VisualContextCoordinating,
+        composeContextCollector: ComposeContextCollector,
         interactionState: SuggestionInteractionState,
         workController: SuggestionWorkController,
         configuration: SuggestionConfiguration,
@@ -88,6 +90,7 @@ final class SuggestionCoordinator: ObservableObject {
         self.suggestionSettings = suggestionSettings
         self.clipboardContextProvider = clipboardContextProvider
         self.visualContextCoordinator = visualContextCoordinator
+        self.composeContextCollector = composeContextCollector
         self.interactionState = interactionState
         self.workController = workController
         self.configuration = configuration
diff --git a/tabby/App/Core/TabbyAppEnvironment.swift b/tabby/App/Core/TabbyAppEnvironment.swift
index df5c58a..7f050b4 100644
--- a/tabby/App/Core/TabbyAppEnvironment.swift
+++ b/tabby/App/Core/TabbyAppEnvironment.swift
@@ -79,6 +79,7 @@ final class TabbyAppEnvironment {
             screenshotContextGenerator: screenshotContextGenerator,
             screenRecordingPermissionProvider: { permissionManager.screenRecordingGranted }
         )
+        let composeContextCollector = ComposeContextCollector()
         let suggestionEngine: any SuggestionGenerating = SuggestionEngineRouter(
             suggestionSettings: suggestionSettings,
             foundationModelEngine: FoundationModelSuggestionEngine(
@@ -99,6 +100,7 @@ final class TabbyAppEnvironment {
             suggestionSettings: suggestionSettings,
             clipboardContextProvider: clipboardContextProvider,
             visualContextCoordinator: visualContextCoordinator,
+            composeContextCollector: composeContextCollector,
             interactionState: interactionState,
             workController: workController,
             configuration: configuration
diff --git a/tabby/Models/LlamaRuntimeModels.swift b/tabby/Models/LlamaRuntimeModels.swift
index 96ba979..2c5639a 100644
--- a/tabby/Models/LlamaRuntimeModels.swift
+++ b/tabby/Models/LlamaRuntimeModels.swift
@@ -83,6 +83,12 @@ struct DownloadableRuntimeModel: Equatable, Hashable, Sendable, Identifiable {
 }
 
 enum RuntimeModelCatalog {
+    static let composeRequiredFilename = "gemma-3n-E4B-it-Q4_K_M.gguf"
+
+    static func supportsCompose(filename: String?) -> Bool {
+        filename == composeRequiredFilename
+    }
+
     static func displayName(for filename: String) -> String {
         switch filename {
         case "Qwen3-0.6B-Q4_K_M.gguf":
diff --git a/tabby/Models/SuggestionModels.swift b/tabby/Models/SuggestionModels.swift
index 843ffd6..92a42f1 100644
--- a/tabby/Models/SuggestionModels.swift
+++ b/tabby/Models/SuggestionModels.swift
@@ -2,8 +2,8 @@ import CoreGraphics
 import Foundation
 
 /// File overview:
-/// Defines the pure value types that describe Tabby's autocomplete domain:
-/// configuration, generation requests, normalized model output, active suggestion sessions,
+/// Defines the pure value types that describe Tabby's suggestion domain:
+/// configuration, generation requests, normalized model output, active generation sessions,
 /// and overlay visibility.
 ///
 /// This file is intentionally free of AppKit, AX, and runtime side effects so maintainers can
@@ -250,6 +250,36 @@ struct SuggestionResult: Equatable, Sendable {
     let latency: TimeInterval
 }
 
+/// Compose Mode request data. Unlike autocomplete, this asks the model for a complete draft from
+/// broad surrounding context and preserves multiline output.
+struct ComposeRequest: Equatable, Sendable {
+    let context: FocusedInputContext
+    let typedPrefix: String
+    let trailingText: String
+    let surroundingContext: String
+    let visualContextSummary: String?
+    let clipboardContext: String?
+    let applicationName: String
+    let generation: UInt64
+    let maxPredictionTokens: Int
+    let temperature: Double
+    let topK: Int
+    let topP: Double
+    let minP: Double
+    let repetitionPenalty: Double
+    let randomSeed: UInt32?
+    let userName: String?
+    let userTags: [String]?
+}
+
+/// The engine's normalized Compose draft, including raw model text for diagnostics.
+struct ComposeResult: Equatable, Sendable {
+    let generation: UInt64
+    let rawText: String
+    let text: String
+    let latency: TimeInterval
+}
+
 /// Represents one active inline-completion session after the model has produced a suggestion.
 /// The key architectural shift is that a suggestion is no longer "fire once and forget."
 /// Instead, it becomes durable interaction state that can be partially consumed over time.
@@ -319,12 +349,27 @@ struct ActiveSuggestionSession: Equatable, Sendable {
     }
 }
 
+/// A ready Compose draft is accepted as one deliberate write, not as an autocomplete tail.
+struct ActiveComposeSession: Equatable, Sendable {
+    let baseContext: FocusedInputContext
+    let fullText: String
+    let latency: TimeInterval
+}
+
+/// A single active-generation slot prevents autocomplete and Compose sessions from both appearing
+/// valid after mode switches, focus races, or delayed runtime results.
+enum ActiveGenerationSession: Equatable, Sendable {
+    case autocomplete(ActiveSuggestionSession)
+    case compose(ActiveComposeSession)
+}
+
 /// High-level suggestion states surfaced to the menu and overlay logic.
 enum SuggestionDebugState: Equatable {
     case idle
     case disabled(String)
     case debouncing
     case generating
+    case typing
     case ready(text: String, latency: TimeInterval)
     case failed(String)
 
@@ -338,6 +383,8 @@ enum SuggestionDebugState: Equatable {
             return "Debouncing"
         case .generating:
             return "Generating"
+        case .typing:
+            return "Typing"
         case .ready:
             return "Ready"
         case .failed:
@@ -355,6 +402,8 @@ enum SuggestionDebugState: Equatable {
             return "Waiting for typing to settle."
         case .generating:
             return "Requesting a completion from the active suggestion backend."
+        case .typing:
+            return "Typing the accepted Compose draft into the focused field."
         case .ready:
             return "Ready means Tabby has buffered a non-empty normalized completion for this field and can render it as ghost text."
         }
@@ -380,12 +429,13 @@ struct SuggestionOverlayGeometry: Equatable, Sendable {
 enum OverlayState: Equatable {
     case hidden(reason: String)
     case visible(text: String, geometry: SuggestionOverlayGeometry)
+    case composePreview(text: String, geometry: SuggestionOverlayGeometry)
 
     var shortLabel: String {
         switch self {
         case .hidden:
             return "Hidden"
-        case .visible:
+        case .visible, .composePreview:
             return "Visible"
         }
     }
@@ -398,23 +448,28 @@ enum OverlayState: Equatable {
             return "Showing \(text.count) characters near " +
                 "(\(Int(geometry.caretRect.minX)), \(Int(geometry.caretRect.minY))) " +
                 "using \(geometry.caretQuality.label) caret geometry."
+        case let .composePreview(text, geometry):
+            return "Showing Compose preview with \(text.count) characters near " +
+                "(\(Int(geometry.caretRect.minX)), \(Int(geometry.caretRect.minY)))."
         }
     }
 
     var isVisible: Bool {
-        if case .visible = self {
+        switch self {
+        case .visible, .composePreview:
             return true
+        case .hidden:
+            return false
         }
-
-        return false
     }
 
     var visibleText: String? {
-        guard case let .visible(text, _) = self else {
+        switch self {
+        case let .visible(text, _), let .composePreview(text, _):
+            return text
+        case .hidden:
             return nil
         }
-
-        return text
     }
 }
 
diff --git a/tabby/Models/SuggestionSubsystemContracts.swift b/tabby/Models/SuggestionSubsystemContracts.swift
index 773d78b..ec17f15 100644
--- a/tabby/Models/SuggestionSubsystemContracts.swift
+++ b/tabby/Models/SuggestionSubsystemContracts.swift
@@ -39,6 +39,7 @@ protocol SuggestionInputMonitoring: AnyObject {
 @MainActor
 protocol SuggestionGenerating: AnyObject {
     func generateSuggestion(for request: SuggestionRequest) async throws -> SuggestionResult
+    func generateCompose(for request: ComposeRequest) async throws -> ComposeResult
     /// Clears backend-local continuation state when the focused editing context is no longer
     /// continuous. Stateless engines may implement this as a no-op.
     func resetCachedGenerationContext() async
@@ -60,6 +61,10 @@ protocol SuggestionInserting: AnyObject {
     var lastErrorMessage: String? { get }
 
     func insert(_ suggestion: String) -> Bool
+    func typeDraft(
+        _ draft: String,
+        shouldContinue: @escaping @MainActor () -> Bool
+    ) async -> Bool
 }
 
 @MainActor
@@ -68,6 +73,7 @@ protocol SuggestionOverlayControlling: AnyObject {
     var onStateChange: ((OverlayState) -> Void)? { get set }
 
     func showSuggestion(_ text: String, geometry: SuggestionOverlayGeometry)
+    func showComposePreview(_ text: String, geometry: SuggestionOverlayGeometry)
     func hide(reason: String)
 }
 
diff --git a/tabby/Services/Context/ComposeContextCollector.swift b/tabby/Services/Context/ComposeContextCollector.swift
new file mode 100644
index 0000000..cdf2814
--- /dev/null
+++ b/tabby/Services/Context/ComposeContextCollector.swift
@@ -0,0 +1,217 @@
+import ApplicationServices
+import Foundation
+
+/// Bounded Accessibility tree collection for Compose Mode.
+///
+/// This service is intentionally separate from `SuggestionCoordinator`: AX tree walking is a
+/// side-effectful macOS boundary with app-specific failure modes. The coordinator should ask for a
+/// bounded, normalized context string; it should not own traversal budgets or Core Foundation reads.
+@MainActor
+final class ComposeContextCollector {
+    struct Limits: Equatable, Sendable {
+        let maxAncestorDepth: Int
+        let maxDFSDepth: Int
+        let maxNodes: Int
+        let maxRawContextCharacters: Int
+        let normalizerLimits: ComposeContextNormalizer.Limits
+
+        static let standard = Limits(
+            maxAncestorDepth: 8,
+            maxDFSDepth: 12,
+            maxNodes: 500,
+            maxRawContextCharacters: 30_000,
+            normalizerLimits: .standard
+        )
+    }
+
+    struct Result: Equatable, Sendable {
+        let text: String
+        let visitedNodeCount: Int
+        let retainedTextCount: Int
+        let droppedTextCount: Int
+    }
+
+    enum CollectionError: LocalizedError, Equatable {
+        case noFocusedElement
+        case staleFocus
+
+        var errorDescription: String? {
+            switch self {
+            case .noFocusedElement:
+                return "No focused Accessibility element was available for Compose context."
+            case .staleFocus:
+                return "Focused field changed before Compose context could be collected."
+            }
+        }
+    }
+
+    private struct TraversalRoot {
+        let element: AXUIElement
+        let matchedFocusedInput: Bool
+    }
+
+    private let limits: Limits
+
+    private static let allowedRoles: Set<String> = [
+        kAXStaticTextRole as String,
+        kAXTextAreaRole as String,
+        kAXTextFieldRole as String,
+        kAXDocumentRole as String
+    ]
+
+    private static let blockedRoles: Set<String> = [
+        kAXButtonRole as String,
+        kAXCheckBoxRole as String,
+        kAXRadioButtonRole as String,
+        kAXScrollBarRole as String,
+        kAXMenuItemRole as String,
+        kAXImageRole as String
+    ]
+
+    init(limits: Limits = .standard) {
+        self.limits = limits
+    }
+
+    func collect(for context: FocusedInputContext) async throws -> Result {
+        try Task.checkCancellation()
+
+        guard let focusedElement = AXHelper.focusedElement() else {
+            throw CollectionError.noFocusedElement
+        }
+
+        guard AXHelper.processIdentifier(for: focusedElement) == context.processIdentifier else {
+            throw CollectionError.staleFocus
+        }
+
+        let traversalRoot = try resolveTraversalRoot(startingAt: focusedElement, context: context)
+        guard traversalRoot.matchedFocusedInput else {
+            throw CollectionError.staleFocus
+        }
+
+        var stack: [(element: AXUIElement, depth: Int)] = [(traversalRoot.element, 0)]
+        var seenElementIdentities = Set<String>()
+        var rawTextBlocks: [String] = []
+        var visitedNodeCount = 0
+        var retainedTextCount = 0
+        var droppedTextCount = 0
+        var rawCharacterCount = 0
+
+        while let next = stack.popLast(), visitedNodeCount < limits.maxNodes {
+            try Task.checkCancellation()
+
+            let identity = AXHelper.elementIdentity(for: next.element)
+            guard seenElementIdentities.insert(identity).inserted else {
+                continue
+            }
+
+            visitedNodeCount += 1
+
+            if visitedNodeCount.isMultiple(of: 25) {
+                await Task.yield()
+            }
+
+            let role = AXHelper.stringValue(for: kAXRoleAttribute as CFString, on: next.element)
+
+            if let role, Self.blockedRoles.contains(role) {
+                droppedTextCount += 1
+                continue
+            }
+
+            if let role, Self.allowedRoles.contains(role) {
+                for text in readableTextBlocks(from: next.element) {
+                    guard rawCharacterCount < limits.maxRawContextCharacters else {
+                        droppedTextCount += 1
+                        continue
+                    }
+
+                    rawTextBlocks.append(text)
+                    rawCharacterCount += text.count + 1
+                    retainedTextCount += 1
+                }
+            }
+
+            guard next.depth < limits.maxDFSDepth else {
+                continue
+            }
+
+            let children = AXHelper.childElements(of: next.element)
+            for child in children.reversed() {
+                stack.append((child, next.depth + 1))
+            }
+        }
+
+        let normalizedText = ComposeContextNormalizer.normalize(
+            rawTextBlocks.joined(separator: "\n"),
+            limits: limits.normalizerLimits
+        )
+
+        return Result(
+            text: normalizedText,
+            visitedNodeCount: visitedNodeCount,
+            retainedTextCount: retainedTextCount,
+            droppedTextCount: droppedTextCount
+        )
+    }
+
+    private func resolveTraversalRoot(
+        startingAt focusedElement: AXUIElement,
+        context: FocusedInputContext
+    ) throws -> TraversalRoot {
+        var current: AXUIElement? = focusedElement
+        var fallbackRoot = focusedElement
+        var matchedFocusedInput = false
+
+        for _ in 0...limits.maxAncestorDepth {
+            guard let element = current else {
+                break
+            }
+
+            fallbackRoot = element
+            if elementMatchesContext(element, context: context) {
+                matchedFocusedInput = true
+            }
+
+            let role = AXHelper.stringValue(for: kAXRoleAttribute as CFString, on: element)
+            if role == kAXWindowRole as String {
+                return TraversalRoot(element: element, matchedFocusedInput: matchedFocusedInput)
+            }
+
+            current = AXHelper.parentElement(of: element)
+        }
+
+        return TraversalRoot(element: fallbackRoot, matchedFocusedInput: matchedFocusedInput)
+    }
+
+    private func elementMatchesContext(
+        _ element: AXUIElement,
+        context: FocusedInputContext
+    ) -> Bool {
+        guard AXHelper.elementIdentifier(for: element, bundleIdentifier: context.bundleIdentifier) == context.elementIdentifier else {
+            return false
+        }
+
+        let role = AXHelper.stringValue(for: kAXRoleAttribute as CFString, on: element)
+        let subrole = AXHelper.stringValue(for: kAXSubroleAttribute as CFString, on: element)
+        return role == context.role && subrole == context.subrole
+    }
+
+    private func readableTextBlocks(from element: AXUIElement) -> [String] {
+        var blocks: [String] = []
+        var seenBlocks = Set<String>()
+
+        for attribute in [kAXValueAttribute, kAXTitleAttribute, kAXDescriptionAttribute] {
+            guard let text = AXHelper.stringValue(for: attribute as CFString, on: element) else {
+                continue
+            }
+
+            let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
+            guard !trimmed.isEmpty, seenBlocks.insert(trimmed).inserted else {
+                continue
+            }
+
+            blocks.append(trimmed)
+        }
+
+        return blocks
+    }
+}
diff --git a/tabby/Services/Input/InputSuppressionController.swift b/tabby/Services/Input/InputSuppressionController.swift
index 2532926..489ff34 100644
--- a/tabby/Services/Input/InputSuppressionController.swift
+++ b/tabby/Services/Input/InputSuppressionController.swift
@@ -13,9 +13,12 @@ final class InputSuppressionController {
     private var suppressionExpiry = Date.distantPast
 
     /// Arms a short-lived suppression window for the synthetic keydown events Tabby is about to post.
-    func registerSyntheticInsertion(expectedKeyDownCount: Int) {
-        remainingKeyDownSuppressions = max(expectedKeyDownCount, 0)
-        suppressionExpiry = Date().addingTimeInterval(1.0)
+    func registerSyntheticInsertion(
+        expectedKeyDownCount: Int,
+        duration: TimeInterval = 1.0
+    ) {
+        remainingKeyDownSuppressions += max(expectedKeyDownCount, 0)
+        suppressionExpiry = max(suppressionExpiry, Date().addingTimeInterval(duration))
     }
 
     /// Consumes one pending suppression token if the current event still falls inside the expiry window.
diff --git a/tabby/Services/Runtime/FoundationModelSuggestionEngine.swift b/tabby/Services/Runtime/FoundationModelSuggestionEngine.swift
index 680542a..ac912a8 100644
--- a/tabby/Services/Runtime/FoundationModelSuggestionEngine.swift
+++ b/tabby/Services/Runtime/FoundationModelSuggestionEngine.swift
@@ -63,6 +63,12 @@ final class FoundationModelSuggestionEngine {
         }
     }
 
+    func generateCompose(for request: ComposeRequest) async throws -> ComposeResult {
+        throw SuggestionClientError.unavailable(
+            "Compose Mode uses the local open-source runtime in this version."
+        )
+    }
+
     /// Foundation Models sessions are already one-shot, so there is no backend context to clear.
     func resetCachedGenerationContext() async {}
 
diff --git a/tabby/Services/Runtime/LlamaRuntimeManager.swift b/tabby/Services/Runtime/LlamaRuntimeManager.swift
index 4a66124..700caa3 100644
--- a/tabby/Services/Runtime/LlamaRuntimeManager.swift
+++ b/tabby/Services/Runtime/LlamaRuntimeManager.swift
@@ -22,6 +22,10 @@ final class LlamaRuntimeManager: ObservableObject {
     private var cachedRuntime: PreparedLlamaRuntime?
     private var selectedModelFilename: String?
 
+    var selectedModelSupportsCompose: Bool {
+        RuntimeModelCatalog.supportsCompose(filename: selectedModelFilename)
+    }
+
     convenience init() {
         self.init(
             configuration: .default,
@@ -109,10 +113,6 @@ final class LlamaRuntimeManager: ObservableObject {
         }
     }
 
-    /// Clears the native prompt KV cache without unloading the model.
-    /// The manager exposes this as a lifecycle command because focus/settings resets originate in
-    /// the app layer, while the actor still owns the raw llama pointers.
-
     /// Generates a short summary using an ephemeral context so the autocomplete cache is unaffected.
     func summarize(
         prompt: String,
@@ -141,6 +141,34 @@ final class LlamaRuntimeManager: ObservableObject {
         }
     }
 
+    /// Generates a longer uncached response for Compose so autocomplete KV reuse cannot leak across
+    /// the two interaction contracts.
+    func generateUncached(
+        prompt: String,
+        options: LlamaGenerationOptions
+    ) async throws -> String {
+        _ = try await preparedRuntime()
+
+        do {
+            return try await core.summarize(
+                prompt: prompt,
+                options: options
+            )
+        } catch is CancellationError {
+            throw LlamaRuntimeError.cancelled
+        } catch let error as LlamaRuntimeError {
+            diagnostics.lastError = error.localizedDescription
+            throw error
+        } catch {
+            let runtimeError = LlamaRuntimeError.generationFailed(error.localizedDescription)
+            diagnostics.lastError = runtimeError.localizedDescription
+            throw runtimeError
+        }
+    }
+
+    /// Clears the native prompt KV cache without unloading the model.
+    /// The manager exposes this as a lifecycle command because focus/settings resets originate in
+    /// the app layer, while the actor still owns the raw llama pointers.
     func resetPromptCache() async {
         await core.resetPromptCache()
     }
diff --git a/tabby/Services/Runtime/LlamaSuggestionEngine.swift b/tabby/Services/Runtime/LlamaSuggestionEngine.swift
index eac94f4..fae9922 100644
--- a/tabby/Services/Runtime/LlamaSuggestionEngine.swift
+++ b/tabby/Services/Runtime/LlamaSuggestionEngine.swift
@@ -2,8 +2,8 @@ import CoreGraphics
 import Foundation
 
 /// File overview:
-/// Wraps the raw llama runtime with prompt/result normalization that is specific to inline
-/// completion. This is where raw generated text becomes a short suggestion Tabby can safely show.
+/// Wraps the raw llama runtime with prompt/result normalization for Tabby's local suggestion modes.
+/// This is where raw generated text becomes either a short inline tail or a full Compose draft.
 ///
 /// Keeps prompt normalization separate from the raw llama runtime.
 /// That separation matters because prompt strategy changes far more often than model lifecycle code.
@@ -58,6 +58,52 @@ final class LlamaSuggestionEngine {
         }
     }
 
+    func generateCompose(for request: ComposeRequest) async throws -> ComposeResult {
+        guard runtimeManager.selectedModelSupportsCompose else {
+            throw SuggestionClientError.unavailable(
+                "Compose Mode requires tabby-depth-1. Select or download \(RuntimeModelCatalog.composeRequiredFilename)."
+            )
+        }
+
+        do {
+            let startTime = Date()
+            let prompt = ComposePromptRenderer.prompt(for: request)
+            let rawDraft = try await runtimeManager.generateUncached(
+                prompt: prompt,
+                options: LlamaGenerationOptions(
+                    maxPredictionTokens: request.maxPredictionTokens,
+                    temperature: request.temperature,
+                    topK: request.topK,
+                    topP: request.topP,
+                    minP: request.minP,
+                    repetitionPenalty: request.repetitionPenalty,
+                    seed: request.randomSeed
+                )
+            )
+            try Task.checkCancellation()
+
+            let normalizedDraft = ComposeTextNormalizer.normalize(
+                rawDraft,
+                prompt: prompt,
+                request: request
+            )
+            return ComposeResult(
+                generation: request.generation,
+                rawText: rawDraft,
+                text: normalizedDraft,
+                latency: Date().timeIntervalSince(startTime)
+            )
+        } catch is CancellationError {
+            throw SuggestionClientError.cancelled
+        } catch let error as LlamaRuntimeError {
+            throw SuggestionClientError.unavailable(error.localizedDescription)
+        } catch let error as SuggestionClientError {
+            throw error
+        } catch {
+            throw SuggestionClientError.generationFailed(error.localizedDescription)
+        }
+    }
+
     /// Clears both the Swift-side hint tracker and the native llama KV cache.
     /// The tracker reset is synchronous because it protects the next request from advertising
     /// stale reuse; awaiting the runtime reset keeps native KV invalidation ordered before the next
diff --git a/tabby/Services/Runtime/SuggestionEngineRouter.swift b/tabby/Services/Runtime/SuggestionEngineRouter.swift
index 02aa897..e9e6a4e 100644
--- a/tabby/Services/Runtime/SuggestionEngineRouter.swift
+++ b/tabby/Services/Runtime/SuggestionEngineRouter.swift
@@ -1,7 +1,7 @@
 import Foundation
 
 /// File overview:
-/// Routes generation requests to the currently selected autocomplete engine.
+/// Routes generation requests to the currently selected engine and interaction mode.
 /// This keeps engine selection in the composition/runtime layer instead of forcing
 /// `SuggestionCoordinator` to know about concrete backend types.
 @MainActor
@@ -29,6 +29,15 @@ final class SuggestionEngineRouter {
         }
     }
 
+    func generateCompose(for request: ComposeRequest) async throws -> ComposeResult {
+        switch suggestionSettings.selectedEngine {
+        case .appleIntelligence:
+            return try await foundationModelEngine.generateCompose(for: request)
+        case .llamaOpenSource:
+            return try await llamaEngine.generateCompose(for: request)
+        }
+    }
+
     /// Clears backend-local continuation state when the coordinator knows the editing context is
     /// no longer continuous. The router fans this out so switching engines cannot leave stale
     /// llama KV state behind.
diff --git a/tabby/Services/Suggestion/SuggestionInserter.swift b/tabby/Services/Suggestion/SuggestionInserter.swift
index fafef40..50ed76e 100644
--- a/tabby/Services/Suggestion/SuggestionInserter.swift
+++ b/tabby/Services/Suggestion/SuggestionInserter.swift
@@ -9,6 +9,12 @@ import Foundation
 /// This is simpler than AX field mutation for a first slice, but it is also more brittle.
 @MainActor
 final class SuggestionInserter {
+    private enum DraftTyping {
+        static let chunkCharacterCount = 8
+        static let delayNanoseconds: UInt64 = 12_000_000
+        static let suppressionDuration: TimeInterval = 3.0
+    }
+
     private let suppressionController: InputSuppressionController
 
     private(set) var lastErrorMessage: String?
@@ -25,14 +31,60 @@ final class SuggestionInserter {
             return false
         }
 
+        return postUnicodeKeyboardEvent(normalized, suppressionDuration: 1.0)
+    }
+
+    /// Types a Compose draft in small synthetic chunks. This keeps the host app on the normal text
+    /// input path while making cancellation and focus checks possible between chunks.
+    func typeDraft(
+        _ draft: String,
+        shouldContinue: @escaping @MainActor () -> Bool
+    ) async -> Bool {
+        let normalized = draft.replacingOccurrences(of: "\r", with: "")
+        guard !normalized.isEmpty else {
+            lastErrorMessage = "Compose draft was empty."
+            return false
+        }
+
+        for chunk in Self.chunks(
+            from: normalized,
+            maxCharacters: DraftTyping.chunkCharacterCount
+        ) {
+            guard !Task.isCancelled, shouldContinue() else {
+                lastErrorMessage = "Compose typing was cancelled because focus changed."
+                return false
+            }
+
+            guard postUnicodeKeyboardEvent(
+                chunk,
+                suppressionDuration: DraftTyping.suppressionDuration
+            ) else {
+                return false
+            }
+
+            try? await Task.sleep(nanoseconds: DraftTyping.delayNanoseconds)
+        }
+
+        lastErrorMessage = nil
+        return true
+    }
+
+    private func postUnicodeKeyboardEvent(
+        _ text: String,
+        suppressionDuration: TimeInterval
+    ) -> Bool {
         guard let keyDownEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true),
-              let keyUpEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: false) else {
+              let keyUpEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: false)
+        else {
             lastErrorMessage = "Unable to create a synthetic keyboard event."
             return false
         }
 
-        let utf16CodeUnits = Array(normalized.utf16)
-        suppressionController.registerSyntheticInsertion(expectedKeyDownCount: 1)
+        let utf16CodeUnits = Array(text.utf16)
+        suppressionController.registerSyntheticInsertion(
+            expectedKeyDownCount: 1,
+            duration: suppressionDuration
+        )
         keyDownEvent.keyboardSetUnicodeString(stringLength: utf16CodeUnits.count, unicodeString: utf16CodeUnits)
         keyUpEvent.keyboardSetUnicodeString(stringLength: utf16CodeUnits.count, unicodeString: utf16CodeUnits)
         keyDownEvent.post(tap: .cghidEventTap)
@@ -40,6 +92,27 @@ final class SuggestionInserter {
         lastErrorMessage = nil
         return true
     }
+
+    private static func chunks(from text: String, maxCharacters: Int) -> [String] {
+        guard maxCharacters > 0 else {
+            return [text]
+        }
+
+        var chunks: [String] = []
+        var currentIndex = text.startIndex
+
+        while currentIndex < text.endIndex {
+            let nextIndex = text.index(
+                currentIndex,
+                offsetBy: maxCharacters,
+                limitedBy: text.endIndex
+            ) ?? text.endIndex
+            chunks.append(String(text[currentIndex..<nextIndex]))
+            currentIndex = nextIndex
+        }
+
+        return chunks
+    }
 }
 
 extension SuggestionInserter: SuggestionInserting {}
diff --git a/tabby/Services/Suggestion/SuggestionInteractionState.swift b/tabby/Services/Suggestion/SuggestionInteractionState.swift
index fd1f2fe..4d2b483 100644
--- a/tabby/Services/Suggestion/SuggestionInteractionState.swift
+++ b/tabby/Services/Suggestion/SuggestionInteractionState.swift
@@ -2,8 +2,8 @@ import Foundation
 
 /// File overview:
 /// Owns the mutable interaction state that sits between Accessibility snapshots and a live
-/// suggestion session. This includes the buffered focused-input context, the active suggestion
-/// session, and the AX-lag sentinel used after partial Tab acceptance.
+/// generation session. This includes the buffered focused-input context, the active autocomplete or
+/// Compose session, and the AX-lag sentinel used after partial Tab acceptance.
 ///
 /// The architectural lesson is that `SuggestionCoordinator` should orchestrate state transitions,
 /// not store every mutable implementation detail itself. This type becomes the home for that
@@ -12,7 +12,7 @@ import Foundation
 final class SuggestionInteractionState {
     private let contextBuffer: ContextBuffer
 
-    private(set) var activeSession: ActiveSuggestionSession?
+    private(set) var activeSession: ActiveGenerationSession?
     private(set) var pendingInsertionConsumedCount: Int?
 
     init(contextBuffer: ContextBuffer? = nil) {
@@ -25,6 +25,22 @@ final class SuggestionInteractionState {
         contextBuffer.currentContext
     }
 
+    var activeAutocompleteSession: ActiveSuggestionSession? {
+        guard case .autocomplete(let session) = activeSession else {
+            return nil
+        }
+
+        return session
+    }
+
+    var activeComposeSession: ActiveComposeSession? {
+        guard case .compose(let session) = activeSession else {
+            return nil
+        }
+
+        return session
+    }
+
     /// Exposes the higher-level meaning of `pendingInsertionConsumedCount` without leaking the
     /// sentinel's storage detail to the coordinator. When this is true, Tabby has already inserted
     /// suggestion text and is waiting for Accessibility to publish a matching live snapshot.
@@ -56,7 +72,22 @@ final class SuggestionInteractionState {
             fullText: fullText,
             latency: latency
         )
-        activeSession = session
+        activeSession = .autocomplete(session)
+        pendingInsertionConsumedCount = nil
+        return session
+    }
+
+    func startComposeSession(
+        fullText: String,
+        liveContext: FocusedInputContext,
+        latency: TimeInterval
+    ) -> ActiveComposeSession {
+        let session = ActiveComposeSession(
+            baseContext: liveContext,
+            fullText: fullText,
+            latency: latency
+        )
+        activeSession = .compose(session)
         pendingInsertionConsumedCount = nil
         return session
     }
@@ -77,7 +108,7 @@ final class SuggestionInteractionState {
     func reconcileActiveSession(
         with snapshot: FocusedInputSnapshot
     ) -> SuggestionStoredSessionReconciliation? {
-        guard let activeSession else {
+        guard let activeSession = activeAutocompleteSession else {
             return nil
         }
 
@@ -88,7 +119,7 @@ final class SuggestionInteractionState {
             pendingInsertionConsumedCount: pendingInsertionConsumedCount
         ) {
         case let .valid(reconciledSession, advancement, nextPendingInsertionConsumedCount):
-            self.activeSession = reconciledSession
+            self.activeSession = .autocomplete(reconciledSession)
             pendingInsertionConsumedCount = nextPendingInsertionConsumedCount
             return .valid(
                 liveContext: liveContext,
@@ -108,7 +139,7 @@ final class SuggestionInteractionState {
         from snapshot: FocusedInputSnapshot,
         overlayState: OverlayState
     ) -> SuggestionAcceptancePreparation {
-        guard let activeSession else {
+        guard let activeSession = activeAutocompleteSession else {
             return .invalid("Tab passed through because no valid suggestion was ready.")
         }
 
@@ -138,7 +169,7 @@ final class SuggestionInteractionState {
                 return .invalid(reason)
 
             case let .valid(reconciledSession, _, nextPendingInsertionConsumedCount):
-                self.activeSession = reconciledSession
+                self.activeSession = .autocomplete(reconciledSession)
                 pendingInsertionConsumedCount = nextPendingInsertionConsumedCount
                 sessionForAcceptance = reconciledSession
             }
@@ -184,7 +215,7 @@ final class SuggestionInteractionState {
             return .exhausted(generation: liveContext.generation)
         }
 
-        activeSession = advancedSession
+        activeSession = .autocomplete(advancedSession)
         return .advanced(session: advancedSession, generation: liveContext.generation)
     }
 
@@ -193,7 +224,7 @@ final class SuggestionInteractionState {
         _ typedCharacters: String,
         expectedSession: ActiveSuggestionSession
     ) -> ActiveSuggestionSession? {
-        guard let activeSession,
+        guard let activeSession = activeAutocompleteSession,
               activeSession == expectedSession,
               let advancedSession = SuggestionSessionReconciler.advanceIfTypedCharactersMatch(
                   typedCharacters,
@@ -203,9 +234,18 @@ final class SuggestionInteractionState {
             return nil
         }
 
-        self.activeSession = advancedSession
+        self.activeSession = .autocomplete(advancedSession)
         return advancedSession
     }
+
+    func clearComposeSession(_ session: ActiveComposeSession) {
+        guard activeComposeSession == session else {
+            return
+        }
+
+        activeSession = nil
+        pendingInsertionConsumedCount = nil
+    }
 }
 
 /// Wraps reconciliation results with the live buffered context the coordinator needs for UI updates.
diff --git a/tabby/Services/Suggestion/SuggestionOverlayPresenter.swift b/tabby/Services/Suggestion/SuggestionOverlayPresenter.swift
index 95fe37f..fe968dc 100644
--- a/tabby/Services/Suggestion/SuggestionOverlayPresenter.swift
+++ b/tabby/Services/Suggestion/SuggestionOverlayPresenter.swift
@@ -57,6 +57,24 @@ struct SuggestionOverlayPresenter {
         }
     }
 
+    func presentComposePreview(
+        text: String,
+        geometry: SuggestionOverlayGeometry,
+        previousState: OverlayState
+    ) -> String? {
+        let displayText = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !displayText.isEmpty else {
+            return hide(reason: "Overlay hidden because the Compose draft was empty.")
+        }
+
+        guard previousState != .composePreview(text: displayText, geometry: geometry) else {
+            return nil
+        }
+
+        overlayController.showComposePreview(displayText, geometry: geometry)
+        return "Displayed Compose draft preview near the caret."
+    }
+
     func hide(reason: String) -> String {
         overlayController.hide(reason: reason)
         return reason
diff --git a/tabby/Services/UI/OverlayController.swift b/tabby/Services/UI/OverlayController.swift
index 30985ff..0be461f 100644
--- a/tabby/Services/UI/OverlayController.swift
+++ b/tabby/Services/UI/OverlayController.swift
@@ -30,7 +30,7 @@ final class OverlayController: SuggestionOverlayControlling {
     /// Reused across overlay updates to avoid allocating a new SwiftUI hosting view on every
     /// tab-per-word cycle. Only the rootView is swapped, which triggers a lightweight diff
     /// instead of a full view rebuild + layout pass.
-    private var hostingView: NSHostingView<GhostSuggestionView>?
+    private var hostingView: NSHostingView<AnyView>?
 
     init(suggestionSettings: SuggestionSettingsModel) {
         self.suggestionSettings = suggestionSettings
@@ -79,22 +79,17 @@ final class OverlayController: SuggestionOverlayControlling {
         let customGhostColor = SuggestionTextColorCodec.color(
             fromHex: suggestionSettings.customSuggestionTextColorHex
         )
-        let contentView: NSHostingView<GhostSuggestionView>
+        let rootView = AnyView(GhostSuggestionView(
+            layout: layout,
+            fontSize: fontSize,
+            customColor: customGhostColor
+        ))
+        let contentView: NSHostingView<AnyView>
         if let existing = hostingView {
-            existing.rootView = GhostSuggestionView(
-                layout: layout,
-                fontSize: fontSize,
-                customColor: customGhostColor
-            )
+            existing.rootView = rootView
             contentView = existing
         } else {
-            let fresh = NSHostingView(
-                rootView: GhostSuggestionView(
-                    layout: layout,
-                    fontSize: fontSize,
-                    customColor: customGhostColor
-                )
-            )
+            let fresh = NSHostingView(rootView: rootView)
             hostingView = fresh
             panel.contentView = fresh
             contentView = fresh
@@ -109,6 +104,49 @@ final class OverlayController: SuggestionOverlayControlling {
         state = .visible(text: text, geometry: geometry)
     }
 
+    /// Shows a compact multiline draft preview. Compose output is intentionally not drawn as inline
+    /// ghost text because accepting a full paragraph needs a more deliberate visual affordance.
+    func showComposePreview(_ text: String, geometry: SuggestionOverlayGeometry) {
+        let previewText = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !previewText.isEmpty else {
+            hide(reason: "Overlay not shown because the Compose draft was empty.")
+            return
+        }
+
+        let contentView: NSHostingView<AnyView>
+        let rootView = AnyView(ComposePreviewView(text: previewText))
+        if let existing = hostingView {
+            existing.rootView = rootView
+            contentView = existing
+        } else {
+            let fresh = NSHostingView(rootView: rootView)
+            hostingView = fresh
+            panel.contentView = fresh
+            contentView = fresh
+        }
+        contentView.layoutSubtreeIfNeeded()
+
+        let visibleFrame = targetScreenVisibleFrame(for: geometry.caretRect)
+        let contentSize = contentView.fittingSize
+        let width = min(max(contentSize.width, 260), min(420, visibleFrame.width - 32))
+        let height = min(max(contentSize.height, 96), min(260, visibleFrame.height - 32))
+        let originX = min(
+            max(geometry.caretRect.maxX + 8, visibleFrame.minX + 16),
+            visibleFrame.maxX - width - 16
+        )
+        let preferredOriginY = geometry.caretRect.minY - height - 10
+        let originY = preferredOriginY >= visibleFrame.minY + 16
+            ? preferredOriginY
+            : min(geometry.caretRect.maxY + 10, visibleFrame.maxY - height - 16)
+
+        panel.setFrame(
+            CGRect(x: originX, y: originY, width: width, height: height).integral,
+            display: true
+        )
+        panel.orderFrontRegardless()
+        state = .composePreview(text: previewText, geometry: geometry)
+    }
+
     /// Hides the floating panel and records why the overlay is no longer visible.
     func hide(reason: String) {
         panel.orderOut(nil)
@@ -188,6 +226,44 @@ private struct GhostSuggestionView: View {
     }
 }
 
+private struct ComposePreviewView: View {
+    let text: String
+
+    var body: some View {
+        VStack(alignment: .leading, spacing: 10) {
+            HStack(spacing: 8) {
+                Text("Compose Draft")
+                    .font(.system(size: 11, weight: .semibold, design: .rounded))
+                    .foregroundStyle(.secondary)
+
+                Spacer(minLength: 0)
+
+                Text("tab to type")
+                    .font(.system(size: 10, weight: .medium, design: .rounded))
+                    .foregroundStyle(.secondary)
+                    .padding(.horizontal, 7)
+                    .padding(.vertical, 3)
+                    .background(.quaternary, in: Capsule())
+            }
+
+            Text(text)
+                .font(.system(size: 13))
+                .foregroundStyle(.primary)
+                .lineLimit(8)
+                .multilineTextAlignment(.leading)
+                .frame(maxWidth: 380, alignment: .leading)
+        }
+        .padding(12)
+        .frame(maxWidth: 420, alignment: .leading)
+        .background(.regularMaterial, in: RoundedRectangle(cornerRadius: 14, style: .continuous))
+        .overlay(
+            RoundedRectangle(cornerRadius: 14, style: .continuous)
+                .stroke(.quaternary, lineWidth: 1)
+        )
+        .fixedSize(horizontal: false, vertical: true)
+    }
+}
+
 /// Visual hint that teaches the user which key accepts the suggestion.
 private struct GhostTabKeycap: View {
     @Environment(\.colorScheme) var colorScheme
diff --git a/tabby/Support/AXHelper.swift b/tabby/Support/AXHelper.swift
index 6d47895..6d1cb66 100644
--- a/tabby/Support/AXHelper.swift
+++ b/tabby/Support/AXHelper.swift
@@ -251,6 +251,12 @@ enum AXHelper {
         return "\(pid)-\(CFHash(element))"
     }
 
+    static func processIdentifier(for element: AXUIElement) -> pid_t {
+        var pid: pid_t = 0
+        AXUIElementGetPid(element, &pid)
+        return pid
+    }
+
     /// Builds a stable identifier for an AX element by combining bundle identity and AX identity.
     static func elementIdentifier(for element: AXUIElement, bundleIdentifier: String) -> String {
         "\(bundleIdentifier)-\(elementIdentity(for: element))"
diff --git a/tabby/Support/ComposeContextNormalizer.swift b/tabby/Support/ComposeContextNormalizer.swift
new file mode 100644
index 0000000..a658937
--- /dev/null
+++ b/tabby/Support/ComposeContextNormalizer.swift
@@ -0,0 +1,118 @@
+import Foundation
+
+/// Pure cleanup for readable Accessibility text collected for Compose Mode.
+///
+/// The collector is allowed to be app-specific and side-effectful; this normalizer is not. Keeping
+/// whitespace, dedupe, and prompt-size rules here makes the sensitive AX text boundary testable
+/// before any model sees it.
+enum ComposeContextNormalizer {
+    struct Limits: Equatable, Sendable {
+        let maxLineCharacters: Int
+        let maxContextCharacters: Int
+
+        static let standard = Limits(
+            maxLineCharacters: 400,
+            maxContextCharacters: 8_000
+        )
+    }
+
+    private static let obviousNavigationLines: Set<String> = [
+        "back",
+        "cancel",
+        "close",
+        "copy",
+        "delete",
+        "edit",
+        "forward",
+        "menu",
+        "more",
+        "next",
+        "open",
+        "previous",
+        "save",
+        "search",
+        "share",
+        "skip to content",
+        "submit"
+    ]
+
+    static func normalize(
+        _ rawContext: String,
+        limits: Limits = .standard
+    ) -> String {
+        var seenLines = Set<String>()
+        var retainedLines: [String] = []
+        var retainedCharacterCount = 0
+
+        for rawLine in rawContext.replacingOccurrences(of: "\r", with: "\n").components(separatedBy: .newlines) {
+            var line = collapseHorizontalWhitespace(in: rawLine)
+                .trimmingCharacters(in: .whitespacesAndNewlines)
+
+            guard !line.isEmpty,
+                  !isSymbolNoise(line),
+                  !isObviousNavigationLine(line)
+            else {
+                continue
+            }
+
+            if line.count > limits.maxLineCharacters {
+                line = String(line.prefix(limits.maxLineCharacters)).trimmingCharacters(in: .whitespaces) + "..."
+            }
+
+            guard seenLines.insert(line).inserted else {
+                continue
+            }
+
+            let separatorCost = retainedLines.isEmpty ? 0 : 1
+            if retainedCharacterCount + separatorCost + line.count > limits.maxContextCharacters {
+                let remaining = limits.maxContextCharacters - retainedCharacterCount - separatorCost
+                if remaining > 0 {
+                    retainedLines.append(String(line.prefix(remaining)))
+                }
+                break
+            }
+
+            retainedLines.append(line)
+            retainedCharacterCount += separatorCost + line.count
+        }
+
+        return retainedLines.joined(separator: "\n")
+    }
+
+    private static func collapseHorizontalWhitespace(in text: String) -> String {
+        var result = ""
+        var previousWasWhitespace = false
+
+        for scalar in text.unicodeScalars {
+            if CharacterSet.whitespaces.contains(scalar) {
+                if !previousWasWhitespace {
+                    result.append(" ")
+                }
+                previousWasWhitespace = true
+            } else {
+                result.unicodeScalars.append(scalar)
+                previousWasWhitespace = false
+            }
+        }
+
+        return result
+    }
+
+    private static func isSymbolNoise(_ line: String) -> Bool {
+        let scalars = line.unicodeScalars.filter { !CharacterSet.whitespacesAndNewlines.contains($0) }
+        guard scalars.count >= 2 else {
+            return false
+        }
+
+        let noiseCharacters = CharacterSet.punctuationCharacters.union(.symbols)
+        guard scalars.allSatisfy({ noiseCharacters.contains($0) }) else {
+            return false
+        }
+
+        return Set(scalars).count <= 2
+    }
+
+    private static func isObviousNavigationLine(_ line: String) -> Bool {
+        obviousNavigationLines.contains(line.lowercased())
+    }
+}
diff --git a/tabby/Support/ComposePromptRenderer.swift b/tabby/Support/ComposePromptRenderer.swift
new file mode 100644
index 0000000..2ab5242
--- /dev/null
+++ b/tabby/Support/ComposePromptRenderer.swift
@@ -0,0 +1,56 @@
+import Foundation
+
+/// Prompt renderer for Compose Mode.
+///
+/// Compose has a different contract from inline autocomplete: it should return the exact text to
+/// type at the caret, preserving paragraphs when useful, and it must not explain itself like chat.
+enum ComposePromptRenderer {
+    static func prompt(for request: ComposeRequest) -> String {
+        var sections: [String] = []
+
+        sections.append(
+            """
+            Task:
+            - Write the complete text the user wants typed at the caret.
+            - This is Compose Mode, not autocomplete and not chat.
+            - Return only the final typeable draft.
+            - Do not include labels, explanations, markdown fences, or quoted prompt text.
+            - Do not repeat text already typed in the focused field unless repetition is necessary.
+            - If the context is insufficient, write a concise useful draft based on the typed prefix.
+            """
+        )
+
+        if let userName = request.userName?.trimmingCharacters(in: .whitespacesAndNewlines), !userName.isEmpty {
+            sections.append("User name:\n\(userName)")
+        }
+
+        if let userTags = request.userTags, !userTags.isEmpty {
+            sections.append("User profile tags:\n\(userTags.joined(separator: ", "))")
+        }
+
+        sections.append("App:\n\(request.applicationName)")
+        sections.append("Text already typed in the focused field:\n\(emptyPlaceholder(for: request.typedPrefix))")
+
+        if !request.trailingText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
+            sections.append("Text after the caret:\n\(request.trailingText)")
+        }
+
+        if let clipboardContext = request.clipboardContext, !clipboardContext.isEmpty {
+            sections.append("Clipboard context:\n\(clipboardContext)")
+        }
+
+        if let visualContextSummary = request.visualContextSummary, !visualContextSummary.isEmpty {
+            sections.append("Visual context summary:\n\(visualContextSummary)")
+        }
+
+        sections.append("Relevant surrounding context:\n\(emptyPlaceholder(for: request.surroundingContext))")
+        sections.append("Final instruction:\nWrite the full draft now.")
+
+        return sections.joined(separator: "\n\n")
+    }
+
+    private static func emptyPlaceholder(for text: String) -> String {
+        let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        return trimmed.isEmpty ? "(empty)" : text
+    }
+}
diff --git a/tabby/Support/ComposeRequestFactory.swift b/tabby/Support/ComposeRequestFactory.swift
new file mode 100644
index 0000000..2222069
--- /dev/null
+++ b/tabby/Support/ComposeRequestFactory.swift
@@ -0,0 +1,102 @@
+import Foundation
+
+struct ComposeRequestBuildResult: Equatable, Sendable {
+    let request: ComposeRequest
+    let promptPreview: String
+}
+
+/// Builds the engine-facing request for Compose Mode.
+///
+/// This stays separate from `SuggestionRequestFactory` because Compose keeps broader context,
+/// larger token budgets, and paragraph-preserving prompt rules.
+enum ComposeRequestFactory {
+    private static let maxTypedPrefixCharacters = 4_000
+    private static let maxTrailingTextCharacters = 1_000
+    private static let maxClipboardContextCharacters = 1_200
+    private static let maxSurroundingContextCharacters = 8_000
+
+    static func buildRequest(
+        context: FocusedInputContext,
+        settings: SuggestionSettingsSnapshot,
+        configuration: SuggestionConfiguration,
+        surroundingContext: String,
+        clipboardContext: String?,
+        visualContextSummary: String? = nil
+    ) -> ComposeRequestBuildResult {
+        let request = ComposeRequest(
+            context: context,
+            typedPrefix: clippedText(context.precedingText, maxCharacters: maxTypedPrefixCharacters),
+            trailingText: clippedText(context.trailingText, maxCharacters: maxTrailingTextCharacters),
+            surroundingContext: clippedText(surroundingContext, maxCharacters: maxSurroundingContextCharacters),
+            visualContextSummary: activeOptionalContext(visualContextSummary, maxCharacters: maxSurroundingContextCharacters),
+            clipboardContext: activeClipboardContext(rawContext: clipboardContext, settings: settings),
+            applicationName: context.applicationName,
+            generation: context.generation,
+            maxPredictionTokens: max(256, configuration.maxPredictionTokens),
+            temperature: max(0.35, configuration.temperature),
+            topK: max(40, configuration.topK),
+            topP: max(0.9, configuration.topP),
+            minP: min(configuration.minP, 0.05),
+            repetitionPenalty: max(1.08, configuration.repetitionPenalty),
+            randomSeed: configuration.randomSeed,
+            userName: activeUserName(settings: settings),
+            userTags: activeUserTags(settings: settings)
+        )
+        let prompt = ComposePromptRenderer.prompt(for: request)
+
+        return ComposeRequestBuildResult(
+            request: request,
+            promptPreview: prompt
+        )
+    }
+
+    private static func activeUserName(settings: SuggestionSettingsSnapshot) -> String? {
+        let trimmed = settings.userName.trimmingCharacters(in: .whitespacesAndNewlines)
+        return trimmed.isEmpty ? nil : trimmed
+    }
+
+    private static func activeUserTags(settings: SuggestionSettingsSnapshot) -> [String]? {
+        let tags = settings.userTags
+            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
+            .filter { !$0.isEmpty }
+        return tags.isEmpty ? nil : tags
+    }
+
+    private static func activeClipboardContext(
+        rawContext: String?,
+        settings: SuggestionSettingsSnapshot
+    ) -> String? {
+        guard settings.isClipboardContextEnabled else {
+            return nil
+        }
+
+        return activeOptionalContext(rawContext, maxCharacters: maxClipboardContextCharacters)
+    }
+
+    private static func activeOptionalContext(
+        _ rawContext: String?,
+        maxCharacters: Int
+    ) -> String? {
+        guard let rawContext else {
+            return nil
+        }
+
+        let trimmed = rawContext.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmed.isEmpty else {
+            return nil
+        }
+
+        return clippedText(trimmed, maxCharacters: maxCharacters)
+    }
+
+    private static func clippedText(_ text: String, maxCharacters: Int) -> String {
+        guard text.count > maxCharacters else {
+            return text
+        }
+
+        let suffix = "..."
+        let allowedPrefixCount = max(maxCharacters - suffix.count, 0)
+        return String(text.prefix(allowedPrefixCount))
+            .trimmingCharacters(in: .whitespacesAndNewlines) + suffix
+    }
+}
diff --git a/tabby/Support/ComposeTextNormalizer.swift b/tabby/Support/ComposeTextNormalizer.swift
new file mode 100644
index 0000000..d1ceeed
--- /dev/null
+++ b/tabby/Support/ComposeTextNormalizer.swift
@@ -0,0 +1,125 @@
+import Foundation
+
+/// Last-mile cleanup for full Compose drafts.
+///
+/// This deliberately does not reuse `SuggestionTextNormalizer`: autocomplete should collapse to one
+/// inline fragment, while Compose needs to preserve paragraph boundaries and only remove wrappers.
+enum ComposeTextNormalizer {
+    private static let leadingLabels = [
+        "final answer:",
+        "final draft:",
+        "draft:",
+        "comment:",
+        "response:",
+        "reply:"
+    ]
+
+    static func normalize(
+        _ rawText: String,
+        prompt: String,
+        request: ComposeRequest
+    ) -> String {
+        var normalized = rawText.replacingOccurrences(of: "\r", with: "")
+        normalized = normalized.replacingOccurrences(of: "<|im_end|>", with: "")
+        normalized = normalized.replacingOccurrences(of: "<|im_start|>", with: "")
+
+        if !prompt.isEmpty, normalized.hasPrefix(prompt) {
+            normalized.removeFirst(prompt.count)
+        }
+
+        normalized = normalized.trimmingCharacters(in: .controlCharacters.union(.newlines))
+        normalized = stripMarkdownFence(from: normalized)
+        normalized = stripLeadingLabel(from: normalized)
+        normalized = stripWholeResponseQuotes(from: normalized)
+        normalized = stripTypedPrefixEcho(from: normalized, typedPrefix: request.typedPrefix)
+        normalized = trimExcessBlankLines(in: normalized)
+
+        return normalized.trimmingCharacters(in: .controlCharacters.union(.newlines))
+    }
+
+    private static func stripMarkdownFence(from text: String) -> String {
+        let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard trimmed.hasPrefix("```") else {
+            return text
+        }
+
+        var lines = trimmed.components(separatedBy: .newlines)
+        guard let firstLine = lines.first, firstLine.hasPrefix("```") else {
+            return text
+        }
+
+        lines.removeFirst()
+        if lines.last?.trimmingCharacters(in: .whitespacesAndNewlines) == "```" {
+            lines.removeLast()
+        }
+
+        return lines.joined(separator: "\n")
+    }
+
+    private static func stripLeadingLabel(from text: String) -> String {
+        let trimmedLeading = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        let lowercased = trimmedLeading.lowercased()
+
+        for label in leadingLabels where lowercased.hasPrefix(label) {
+            let start = trimmedLeading.index(trimmedLeading.startIndex, offsetBy: label.count)
+            return String(trimmedLeading[start...]).trimmingCharacters(in: .whitespacesAndNewlines)
+        }
+
+        return text
+    }
+
+    private static func stripWholeResponseQuotes(from text: String) -> String {
+        let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard trimmed.count >= 2 else {
+            return text
+        }
+
+        let quotePairs: [(Character, Character)] = [
+            ("\"", "\""),
+            ("'", "'")
+        ]
+
+        for (opening, closing) in quotePairs
+        where trimmed.first == opening && trimmed.last == closing {
+            return String(trimmed.dropFirst().dropLast())
+        }
+
+        return text
+    }
+
+    private static func stripTypedPrefixEcho(from text: String, typedPrefix: String) -> String {
+        let trimmedPrefix = typedPrefix.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmedPrefix.isEmpty else {
+            return text
+        }
+
+        let leadingTrimmedText = text.trimmingCharacters(in: .newlines)
+        guard leadingTrimmedText.localizedCaseInsensitiveContains(trimmedPrefix),
+              leadingTrimmedText.lowercased().hasPrefix(trimmedPrefix.lowercased())
+        else {
+            return text
+        }
+
+        let endIndex = leadingTrimmedText.index(leadingTrimmedText.startIndex, offsetBy: trimmedPrefix.count)
+        return String(leadingTrimmedText[endIndex...])
+    }
+
+    private static func trimExcessBlankLines(in text: String) -> String {
+        var result: [String] = []
+        var blankLineCount = 0
+
+        for line in text.components(separatedBy: .newlines) {
+            if line.trimmingCharacters(in: .whitespaces).isEmpty {
+                blankLineCount += 1
+                if blankLineCount <= 1 {
+                    result.append("")
+                }
+            } else {
+                blankLineCount = 0
+                result.append(line.trimmingCharacters(in: .whitespaces))
+            }
+        }
+
+        return result.joined(separator: "\n")
+    }
+}
diff --git a/tabby/Support/SuggestionAvailabilityEvaluator.swift b/tabby/Support/SuggestionAvailabilityEvaluator.swift
index 51c39e0..4db69b5 100644
--- a/tabby/Support/SuggestionAvailabilityEvaluator.swift
+++ b/tabby/Support/SuggestionAvailabilityEvaluator.swift
@@ -10,6 +10,7 @@ enum SuggestionAvailabilityEvaluator {
     static func disabledReason(
         globallyEnabled: Bool = true,
         disabledAppBundleIdentifiers: Set<String> = [],
+        interactionMode: SuggestionInteractionMode = .autocomplete,
         inputMonitoringGranted: Bool,
         screenRecordingGranted: Bool,
         focusSnapshot: FocusSnapshot
@@ -27,9 +28,11 @@ enum SuggestionAvailabilityEvaluator {
             return "Input Monitoring permission is required before Tabby can react to typing."
         }
 
-        guard screenRecordingGranted else {
-            return "Screen Recording permission is required before Tabby can build visual context "
-                + "for autocomplete."
+        if interactionMode == .autocomplete {
+            guard screenRecordingGranted else {
+                return "Screen Recording permission is required before Tabby can build visual context "
+                    + "for autocomplete."
+            }
         }
 
         switch focusSnapshot.capability {
@@ -43,6 +46,7 @@ enum SuggestionAvailabilityEvaluator {
     static func shouldSchedulePrediction(
         globallyEnabled: Bool = true,
         disabledAppBundleIdentifiers: Set<String> = [],
+        interactionMode: SuggestionInteractionMode = .autocomplete,
         inputMonitoringGranted: Bool,
         screenRecordingGranted: Bool,
         focusSnapshot: FocusSnapshot
@@ -50,6 +54,7 @@ enum SuggestionAvailabilityEvaluator {
         disabledReason(
             globallyEnabled: globallyEnabled,
             disabledAppBundleIdentifiers: disabledAppBundleIdentifiers,
+            interactionMode: interactionMode,
             inputMonitoringGranted: inputMonitoringGranted,
             screenRecordingGranted: screenRecordingGranted,
             focusSnapshot: focusSnapshot
diff --git a/tabbyTests/ComposeContextNormalizerTests.swift b/tabbyTests/ComposeContextNormalizerTests.swift
new file mode 100644
index 0000000..5ccd2b0
--- /dev/null
+++ b/tabbyTests/ComposeContextNormalizerTests.swift
@@ -0,0 +1,43 @@
+import XCTest
+@testable import tabby
+
+/// Tests the pure cleanup layer that makes broad Accessibility text safe to include in Compose prompts.
+final class ComposeContextNormalizerTests: XCTestCase {
+    func test_normalize_collapsesWhitespaceAndPreservesLineStructure() {
+        let normalized = ComposeContextNormalizer.normalize(
+            " First\t\tline  with   spaces \n\nSecond line "
+        )
+
+        XCTAssertEqual(normalized, "First line with spaces\nSecond line")
+    }
+
+    func test_normalize_dropsSymbolNoiseAndObviousNavigationLines() {
+        let normalized = ComposeContextNormalizer.normalize(
+            "Share\n---\nThis is the useful page context.\nSkip to content\n**"
+        )
+
+        XCTAssertEqual(normalized, "This is the useful page context.")
+    }
+
+    func test_normalize_deduplicatesRepeatedLines() {
+        let normalized = ComposeContextNormalizer.normalize(
+            "A review comment\nA review comment\nAnother detail\nA review comment"
+        )
+
+        XCTAssertEqual(normalized, "A review comment\nAnother detail")
+    }
+
+    func test_normalize_boundsIndividualLinesAndFinalContext() {
+        let limits = ComposeContextNormalizer.Limits(
+            maxLineCharacters: 10,
+            maxContextCharacters: 18
+        )
+
+        let normalized = ComposeContextNormalizer.normalize(
+            "abcdefghijklmnopqrstuvwxyz\nsecond useful line",
+            limits: limits
+        )
+
+        XCTAssertEqual(normalized, "abcdefghij...\nseco")
+    }
+}