From f865fdcdc97e5bf4b05d757a23a60cd3f27df1ca Mon Sep 17 00:00:00 2001
From: Jacob Fu <141651335+FuJacob@users.noreply.github.com>
Date: Mon, 1 Jun 2026 20:46:18 -0700
Subject: [PATCH 1/2] Add token-aware prompt budgeting as an opt-in path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The base-model prompt is budgeted in characters as a deliberate ~4-chars-per-token
approximation. That ratio is far off for code and non-Latin text, where it can
under- or over-fill the real context window. This adds a token-aware path that
swaps in an estimated token count, exactly as PromptSectionBudget's own comment
anticipated, without paying for the runtime tokenizer on the main-actor prompt
path.

- TokenCountEstimator is a pure, cheap, word-aware heuristic (roughly four
  characters per token within a word, every word at least one token) — closer to
  real subword tokenization than a single global ratio, deterministic for tests.
- PromptSectionBudget gains an additive allocate(_:totalTokens:estimate:) that
  fills by priority against an estimated-token budget, converting each section's
  token cap to a character cap via that content's own density so the existing
  character-based truncate is reused unchanged. The character allocate is untouched.
- BaseCompletionPromptRenderer takes an optional tokenBudget; nil keeps the
  character path, so shipped behavior is unchanged.

The estimator, the token allocator, and the renderer's token path are all
unit-tested (the caret prefix stays un-starved under a tight token budget). Wiring
a caller to pass a real token budget is the follow-up: the right budget value and
the quality delta need on-device validation, so it stays opt-in until then.
---
 Cotabby.xcodeproj/project.pbxproj             |  8 ++++
 .../BaseCompletionPromptRenderer.swift        | 17 +++++++-
 Cotabby/Support/PromptSectionBudget.swift     | 43 +++++++++++++++++++
 Cotabby/Support/TokenCountEstimator.swift     | 23 ++++++++++
 .../BaseCompletionPromptRendererTests.swift   | 16 +++++++
 CotabbyTests/PromptSectionBudgetTests.swift   | 35 +++++++++++++++
 CotabbyTests/TokenCountEstimatorTests.swift   | 36 ++++++++++++++++
 7 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 Cotabby/Support/TokenCountEstimator.swift
 create mode 100644 CotabbyTests/TokenCountEstimatorTests.swift

diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj
index ab32d71..e83bc29 100644
--- a/Cotabby.xcodeproj/project.pbxproj
+++ b/Cotabby.xcodeproj/project.pbxproj
@@ -184,6 +184,7 @@
 		A0657CE0488F69F0BD559CBC /* SuggestionCoordinator+Acceptance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 72B13136DF7318F3E96DF0D3 /* SuggestionCoordinator+Acceptance.swift */; };
 		A0BB87E3665EF6C209034798 /* GhostSuggestionLayoutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5AD3F4F9FBE82007E4E15F58 /* GhostSuggestionLayoutTests.swift */; };
 		A147C5EC3F2214A670F7556E /* FocusPollBackoffTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 273B4DC844F79B4BE2C8910F /* FocusPollBackoffTests.swift */; };
+		A26E14A6E73036222419C424 /* TokenCountEstimatorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B78AA11B52A6588119ABF76F /* TokenCountEstimatorTests.swift */; };
 		A2B3F4D38BCB0FED452B2A3F /* FocusTrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = B6D42CD456B4B3C988B148A6 /* FocusTrackingModel.swift */; };
 		A36481222BB5B2A67349D389 /* ApplicationBundleMetadataTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = A168A7B6A7AD11559B60C56B /* ApplicationBundleMetadataTests.swift */; };
 		A5A6CE0EF01CA6A9AFA7A400 /* RequestID.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6DC693E00430F46E41CB56E6 /* RequestID.swift */; };
@@ -234,6 +235,7 @@
 		DA23422A2CF77CFD3B1283C8 /* OnboardingTemplateFeatureListTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D814BBA41CF29E8DD9954651 /* OnboardingTemplateFeatureListTests.swift */; };
 		DA2A22F5386CC25420E98E6C /* FillInMiddlePolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = 276FA037D0F8DF51AABF4292 /* FillInMiddlePolicy.swift */; };
 		DB1310FF3576ACA6472C4DB1 /* TrailingDuplicationFilterTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E19A5B462891263BDFB56607 /* TrailingDuplicationFilterTests.swift */; };
+		DC84D6A6A2F9A1060CD20ABB /* TokenCountEstimator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1BA30E71C21C77BB6EA4C166 /* TokenCountEstimator.swift */; };
 		DCABB8D2B391C7820D6CA5FF /* InsertionSafetyGate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7D472F9F396672E57873303B /* InsertionSafetyGate.swift */; };
 		DD7FA343F1C21C4569F6D181 /* ScreenshotContextGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9B84BAE361626891F19DC9DB /* ScreenshotContextGenerator.swift */; };
 		DDEDCBAA2196303455F6926A /* AcceptanceModePickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E5DAF68AEBFE334F68A65E82 /* AcceptanceModePickerView.swift */; };
@@ -314,6 +316,7 @@
 		19BE12C28A4AB8A4A58C2FF7 /* SettingsPaneScaffold.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsPaneScaffold.swift; sourceTree = "<group>"; };
 		19DB9558F4D3AFB108D71649 /* SuggestionStateHelperTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionStateHelperTests.swift; sourceTree = "<group>"; };
 		1A8414BEB7E34F57607E37FE /* EmojiVariantResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiVariantResolver.swift; sourceTree = "<group>"; };
+		1BA30E71C21C77BB6EA4C166 /* TokenCountEstimator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenCountEstimator.swift; sourceTree = "<group>"; };
 		1BD71ECC2AE4821B643E0935 /* ConfidenceSuppressionPolicy.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConfidenceSuppressionPolicy.swift; sourceTree = "<group>"; };
 		1CE61E74928C221B8BB261C6 /* SuggestionTextColorCodec.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SuggestionTextColorCodec.swift; sourceTree = "<group>"; };
 		1D00A031C0D9CF2A7A2330D9 /* PermissionDragSourceView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionDragSourceView.swift; sourceTree = "<group>"; };
@@ -465,6 +468,7 @@
 		B4B4A2E2DD6733658EC05BD8 /* DownloadFileRescuer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadFileRescuer.swift; sourceTree = "<group>"; };
 		B6ACCB12E4DB32D2F2BEA567 /* PermissionHostApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionHostApp.swift; sourceTree = "<group>"; };
 		B6D42CD456B4B3C988B148A6 /* FocusTrackingModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTrackingModel.swift; sourceTree = "<group>"; };
+		B78AA11B52A6588119ABF76F /* TokenCountEstimatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenCountEstimatorTests.swift; sourceTree = "<group>"; };
 		B7B185BA246A526CBA85E581 /* EmojiPickerPanelLayoutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiPickerPanelLayoutTests.swift; sourceTree = "<group>"; };
 		B81DD30EB657368AACE9625A /* InputMonitor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputMonitor.swift; sourceTree = "<group>"; };
 		B997EC69E1C65B1E18234221 /* BrowserAppDetector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BrowserAppDetector.swift; sourceTree = "<group>"; };
@@ -839,6 +843,7 @@
 				C71031E8DB171047318B92FC /* SyntheticReplacePlannerTests.swift */,
 				43E37A7E835D3BDE6265843C /* TerminalAppDetectorTests.swift */,
 				FC24FD54860CE6737E65EF65 /* TextDirectionDetectorTests.swift */,
+				B78AA11B52A6588119ABF76F /* TokenCountEstimatorTests.swift */,
 				F394B8A6E30CC47015772089 /* TokenProfileCacheTests.swift */,
 				E7D0BF193110927BEB865748 /* TokenProfileTests.swift */,
 				E19A5B462891263BDFB56607 /* TrailingDuplicationFilterTests.swift */,
@@ -998,6 +1003,7 @@
 				B424E2AC97C99D335B0D5751 /* SuggestionTextNormalizer.swift */,
 				7F4C4A7EAF886E0CC945BFEF /* TerminalAppDetector.swift */,
 				328847A0F494360033366791 /* TextDirectionDetector.swift */,
+				1BA30E71C21C77BB6EA4C166 /* TokenCountEstimator.swift */,
 				F3CEFE8C321E17BB3873C893 /* TokenProfile.swift */,
 				E73C04A71D85B25998144F11 /* TokenProfileCache.swift */,
 				D408D647412C59F3E692C42B /* TrailingDuplicationFilter.swift */,
@@ -1289,6 +1295,7 @@
 				AB9C9C001F97F9D14F8B192A /* TerminalAppDetector.swift in Sources */,
 				96782E57CA26A16409368B69 /* TextDirectionDetector.swift in Sources */,
 				6014B31E2570EFFE45557E33 /* TickMarkSlider.swift in Sources */,
+				DC84D6A6A2F9A1060CD20ABB /* TokenCountEstimator.swift in Sources */,
 				8EED2B55999A119AE3B67359 /* TokenProfile.swift in Sources */,
 				D747A2C2B49450D26C6179A7 /* TokenProfileCache.swift in Sources */,
 				D3B43622E5A41B11E7AF527E /* TrailingDuplicationFilter.swift in Sources */,
@@ -1384,6 +1391,7 @@
 				EF5BAB96DDADABB86F9E02D9 /* SyntheticReplacePlannerTests.swift in Sources */,
 				DE236C9285635C686D66A2F6 /* TerminalAppDetectorTests.swift in Sources */,
 				5A441797D71A880A7482077D /* TextDirectionDetectorTests.swift in Sources */,
+				A26E14A6E73036222419C424 /* TokenCountEstimatorTests.swift in Sources */,
 				D9B992A608F7FC9924D13271 /* TokenProfileCacheTests.swift in Sources */,
 				CA8F453AA4AD02FAA8C961F7 /* TokenProfileTests.swift in Sources */,
 				DB1310FF3576ACA6472C4DB1 /* TrailingDuplicationFilterTests.swift in Sources */,
diff --git a/Cotabby/Support/BaseCompletionPromptRenderer.swift b/Cotabby/Support/BaseCompletionPromptRenderer.swift
index d7c6cbf..f92ac62 100644
--- a/Cotabby/Support/BaseCompletionPromptRenderer.swift
+++ b/Cotabby/Support/BaseCompletionPromptRenderer.swift
@@ -27,7 +27,8 @@ enum BaseCompletionPromptRenderer {
         languageInstruction: String? = nil,
         clipboardContext: String? = nil,
         visualContextSummary: String? = nil,
-        contextBudget: Int = defaultContextBudget
+        contextBudget: Int = defaultContextBudget,
+        tokenBudget: Int? = nil
     ) -> String {
         let trimmedPrefix = Self.trimmingTrailingWhitespace(prefixText)
 
@@ -65,7 +66,19 @@ enum BaseCompletionPromptRenderer {
             )
         )
 
-        let kept = PromptSectionBudget.allocate(sections, totalChars: contextBudget)
+        // Token-aware budgeting (opt-in): when a token budget is supplied, fill sections against an
+        // estimated-token window instead of the character approximation. Defaults to the character
+        // path so shipped behavior is unchanged.
+        let kept: [PromptSection]
+        if let tokenBudget {
+            kept = PromptSectionBudget.allocate(
+                sections,
+                totalTokens: tokenBudget,
+                estimate: TokenCountEstimator.estimate
+            )
+        } else {
+            kept = PromptSectionBudget.allocate(sections, totalChars: contextBudget)
+        }
         let prefix = kept.first { $0.name == "prefix" }?.content ?? trimmedPrefix
         let preface = kept.filter { $0.name != "prefix" }.map(\.content)
 
diff --git a/Cotabby/Support/PromptSectionBudget.swift b/Cotabby/Support/PromptSectionBudget.swift
index 0bea6e0..a3df1a3 100644
--- a/Cotabby/Support/PromptSectionBudget.swift
+++ b/Cotabby/Support/PromptSectionBudget.swift
@@ -69,6 +69,49 @@ enum PromptSectionBudget {
         return sections.indices.compactMap { kept[$0] }
     }
 
+    /// Token-aware variant of `allocate`: the budget and remaining are counted in *estimated tokens*
+    /// (via `estimate`) instead of characters, so a base model's real context window is respected more
+    /// faithfully than a flat chars-per-token ratio — which matters most for code or non-Latin text,
+    /// where that ratio is far from four. Each section's intrinsic `minChars`/`maxChars` still bound
+    /// the content itself; the per-section token cap is converted to a character cap using that
+    /// content's own character-per-token density, so the character-based `truncate` is reused as is.
+    static func allocate(
+        _ sections: [PromptSection],
+        totalTokens: Int,
+        estimate: (String) -> Int
+    ) -> [PromptSection] {
+        var remainingTokens = max(0, totalTokens)
+
+        let fillOrder = sections.enumerated().sorted { lhs, rhs in
+            lhs.element.priority == rhs.element.priority
+                ? lhs.offset < rhs.offset
+                : lhs.element.priority > rhs.element.priority
+        }
+
+        var kept: [Int: PromptSection] = [:]
+        for (index, section) in fillOrder {
+            guard remainingTokens > 0 else { break }
+            let contentTokens = max(1, estimate(section.content))
+            let charsPerToken = Double(section.content.count) / Double(contentTokens)
+            let remainingChars = Int((Double(remainingTokens) * charsPerToken).rounded(.down))
+            let cap = min(section.maxChars, section.content.count, remainingChars)
+            if cap < section.minChars {
+                continue
+            }
+            let truncated = truncate(section.content, toChars: cap, mode: section.truncation)
+                .trimmingCharacters(in: .whitespacesAndNewlines)
+            guard !truncated.isEmpty else {
+                continue
+            }
+            var copy = section
+            copy.content = truncated
+            kept[index] = copy
+            remainingTokens -= estimate(truncated)
+        }
+
+        return sections.indices.compactMap { kept[$0] }
+    }
+
     /// Truncates `text` to at most `chars`, keeping the start or the end per `mode`. Returns the
     /// input unchanged when it already fits, and the empty string when `chars <= 0`.
     static func truncate(_ text: String, toChars chars: Int, mode: PromptSection.Truncation) -> String {
diff --git a/Cotabby/Support/TokenCountEstimator.swift b/Cotabby/Support/TokenCountEstimator.swift
new file mode 100644
index 0000000..559d549
--- /dev/null
+++ b/Cotabby/Support/TokenCountEstimator.swift
@@ -0,0 +1,23 @@
+import Foundation
+
+/// File overview:
+/// A pure, cheap estimate of how many model tokens a string occupies, used to budget the base-model
+/// prompt more faithfully than a flat character count without paying for a real tokenizer on the
+/// main-actor prompt path.
+///
+/// It is intentionally an approximation: a word-aware heuristic (roughly four characters per token
+/// within a word, every word at least one token) is closer to real subword tokenization than a single
+/// global chars-per-token ratio — especially for code or short function words — while staying
+/// allocation-light and deterministic for tests. It is not exact, so it is used only for relative
+/// budgeting decisions, never to assert a hard token limit.
+enum TokenCountEstimator {
+    static func estimate(_ text: String) -> Int {
+        let words = text.split(whereSeparator: { $0.isWhitespace })
+        guard !words.isEmpty else {
+            return 0
+        }
+        return words.reduce(0) { total, word in
+            total + max(1, Int((Double(word.count) / 4.0).rounded()))
+        }
+    }
+}
diff --git a/CotabbyTests/BaseCompletionPromptRendererTests.swift b/CotabbyTests/BaseCompletionPromptRendererTests.swift
index 89a6a44..891043c 100644
--- a/CotabbyTests/BaseCompletionPromptRendererTests.swift
+++ b/CotabbyTests/BaseCompletionPromptRendererTests.swift
@@ -42,6 +42,22 @@ final class BaseCompletionPromptRendererTests: XCTestCase {
         XCTAssertTrue(prompt.hasSuffix("the meeting is at"))
     }
 
+    func test_tokenBudget_keepsCaretPrefixUnderATightBudget() {
+        // The opt-in token-budgeted path must keep the caret prefix (top priority) at the very end,
+        // exactly like the character path, while a tight budget trims lower-priority context.
+        let prompt = BaseCompletionPromptRenderer.prompt(
+            prefixText: "the meeting is at",
+            applicationName: "Slack",
+            userName: "Jacob",
+            customRules: ["terse"],
+            extendedContext: "Project Matcha ships in June with a great many additional notes kept here.",
+            clipboardContext: "zoom link",
+            visualContextSummary: "Calendar: Q3 planning 3pm",
+            tokenBudget: 8
+        )
+        XCTAssertTrue(prompt.hasSuffix("the meeting is at"), "the caret prefix is never starved under a token budget")
+    }
+
     func test_personaFramingConditionsOnNameStyleAndLanguage() {
         let prompt = BaseCompletionPromptRenderer.prompt(
             prefixText: "Hi team,",
diff --git a/CotabbyTests/PromptSectionBudgetTests.swift b/CotabbyTests/PromptSectionBudgetTests.swift
index 899ac3d..916226e 100644
--- a/CotabbyTests/PromptSectionBudgetTests.swift
+++ b/CotabbyTests/PromptSectionBudgetTests.swift
@@ -78,4 +78,39 @@ final class PromptSectionBudgetTests: XCTestCase {
     func test_truncate_returnsInputWhenItFits() {
         XCTAssertEqual(PromptSectionBudget.truncate("abc", toChars: 10, mode: .preserveEnd), "abc")
     }
+
+    // MARK: - Token-aware allocate
+
+    func test_tokenAllocate_keepsAllWhenBudgetAmple() {
+        let kept = PromptSectionBudget.allocate(
+            [section("a", "alpha", priority: 10), section("b", "beta", priority: 5)],
+            totalTokens: 1000,
+            estimate: TokenCountEstimator.estimate
+        )
+        XCTAssertEqual(kept.map(\.name), ["a", "b"])
+    }
+
+    func test_tokenAllocate_dropsLowerPriorityWhenBudgetTight() {
+        let low = String(repeating: "word ", count: 5)
+        let high = String(repeating: "term ", count: 5)
+        let kept = PromptSectionBudget.allocate(
+            [section("low", low, priority: 1), section("high", high, priority: 9)],
+            totalTokens: 5,
+            estimate: TokenCountEstimator.estimate
+        )
+        XCTAssertEqual(kept.map(\.name), ["high"])
+    }
+
+    func test_tokenAllocate_respectsTokenBudget() {
+        let kept = PromptSectionBudget.allocate(
+            [
+                section("a", String(repeating: "alpha ", count: 20), priority: 9),
+                section("b", String(repeating: "bravo ", count: 20), priority: 8)
+            ],
+            totalTokens: 25,
+            estimate: TokenCountEstimator.estimate
+        )
+        let used = kept.reduce(0) { $0 + TokenCountEstimator.estimate($1.content) }
+        XCTAssertLessThanOrEqual(used, 25)
+    }
 }
diff --git a/CotabbyTests/TokenCountEstimatorTests.swift b/CotabbyTests/TokenCountEstimatorTests.swift
new file mode 100644
index 0000000..cd12672
--- /dev/null
+++ b/CotabbyTests/TokenCountEstimatorTests.swift
@@ -0,0 +1,36 @@
+import XCTest
+@testable import Cotabby
+
+/// Tests for the heuristic token-count estimator. It is deliberately approximate, so these lock down
+/// robust *relationships* (empty is zero, longer text estimates more, every word counts) rather than
+/// exact token counts a real tokenizer would produce.
+final class TokenCountEstimatorTests: XCTestCase {
+    func test_emptyOrWhitespaceIsZero() {
+        XCTAssertEqual(TokenCountEstimator.estimate(""), 0)
+        XCTAssertEqual(TokenCountEstimator.estimate("   \n\t "), 0)
+    }
+
+    func test_everyWordIsAtLeastOneToken() {
+        XCTAssertEqual(TokenCountEstimator.estimate("a"), 1)
+        XCTAssertGreaterThanOrEqual(TokenCountEstimator.estimate("hi there"), 2)
+    }
+
+    func test_longerTextEstimatesMoreTokens() {
+        let short = TokenCountEstimator.estimate("the cat sat")
+        let long = TokenCountEstimator.estimate("the cat sat on the warm windowsill all afternoon long")
+        XCTAssertGreaterThan(long, short)
+    }
+
+    func test_longWordCountsForMoreThanShortWord() {
+        XCTAssertGreaterThan(
+            TokenCountEstimator.estimate("internationalization"),
+            TokenCountEstimator.estimate("cat")
+        )
+    }
+
+    func test_scalesWithWordCount() {
+        let oneWord = TokenCountEstimator.estimate("word")
+        let fiveWords = TokenCountEstimator.estimate("word word word word word")
+        XCTAssertEqual(fiveWords, oneWord * 5)
+    }
+}

From a96eb89b87d5b9c08ddeac33f084f4c379b1fd85 Mon Sep 17 00:00:00 2001
From: Jacob Fu <141651335+FuJacob@users.noreply.github.com>
Date: Mon, 1 Jun 2026 21:42:18 -0700
Subject: [PATCH 2/2] Address review feedback on token budgeting

- PromptSectionBudget: clamp remainingTokens at zero. A truncated slice can be
  token-denser than the section average, so deducting its estimate could drive the
  remaining budget negative and wrongly drop the next section even when it fits.
- TokenCountEstimator: split on punctuation as well as whitespace, so contractions
  ("can't") and punctuation-joined identifiers ("foo.bar") aren't undercounted as a
  single word.
---
 Cotabby/Support/PromptSectionBudget.swift   | 5 ++++-
 Cotabby/Support/TokenCountEstimator.swift   | 5 ++++-
 CotabbyTests/TokenCountEstimatorTests.swift | 7 +++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/Cotabby/Support/PromptSectionBudget.swift b/Cotabby/Support/PromptSectionBudget.swift
index a3df1a3..be6e111 100644
--- a/Cotabby/Support/PromptSectionBudget.swift
+++ b/Cotabby/Support/PromptSectionBudget.swift
@@ -106,7 +106,10 @@ enum PromptSectionBudget {
             var copy = section
             copy.content = truncated
             kept[index] = copy
-            remainingTokens -= estimate(truncated)
+            // Clamp: a truncated slice can be token-denser than the section average, so deducting its
+            // estimate could drive `remainingTokens` negative and wrongly drop the next section even
+            // when it would fit. Floor at zero so over-deduction never reads as a hard stop.
+            remainingTokens = max(0, remainingTokens - estimate(truncated))
         }
 
         return sections.indices.compactMap { kept[$0] }
diff --git a/Cotabby/Support/TokenCountEstimator.swift b/Cotabby/Support/TokenCountEstimator.swift
index 559d549..03191b8 100644
--- a/Cotabby/Support/TokenCountEstimator.swift
+++ b/Cotabby/Support/TokenCountEstimator.swift
@@ -12,7 +12,10 @@ import Foundation
 /// budgeting decisions, never to assert a hard token limit.
 enum TokenCountEstimator {
     static func estimate(_ text: String) -> Int {
-        let words = text.split(whereSeparator: { $0.isWhitespace })
+        // Split on punctuation as well as whitespace: real subword tokenizers break "can't", "end.",
+        // and "func()" into multiple tokens, so gluing punctuation to a word would systematically
+        // undercount code and punctuation-heavy prose.
+        let words = text.split(whereSeparator: { $0.isWhitespace || $0.isPunctuation })
         guard !words.isEmpty else {
             return 0
         }
diff --git a/CotabbyTests/TokenCountEstimatorTests.swift b/CotabbyTests/TokenCountEstimatorTests.swift
index cd12672..18d9fac 100644
--- a/CotabbyTests/TokenCountEstimatorTests.swift
+++ b/CotabbyTests/TokenCountEstimatorTests.swift
@@ -33,4 +33,11 @@ final class TokenCountEstimatorTests: XCTestCase {
         let fiveWords = TokenCountEstimator.estimate("word word word word word")
         XCTAssertEqual(fiveWords, oneWord * 5)
     }
+
+    func test_splitsOnPunctuationBoundaries() {
+        // Punctuation creates token boundaries (like real subword tokenizers), so a contraction or a
+        // punctuation-joined identifier estimates more tokens than the same letters with none.
+        XCTAssertGreaterThan(TokenCountEstimator.estimate("can't"), TokenCountEstimator.estimate("cant"))
+        XCTAssertGreaterThan(TokenCountEstimator.estimate("foo.bar.baz"), TokenCountEstimator.estimate("foobarbaz"))
+    }
 }