FuJacob · FuJacob · Jun 2, 2026 · Jun 2, 2026 · greptile-apps · Jun 2, 2026
diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
@@ -448,7 +448,8 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
                 topK: topK,
                 noRepeatNgramSize: Self.noRepeatNgramSize
             ),
-            isSingleLine: options.singleLine
+            isSingleLine: options.singleLine,
+            isMidWord: options.forceWordContinuation
         )
         let best = candidates.first
         CotabbyLogger.runtime.debug(

diff --git a/Cotabby/Support/ConstrainedBeamSearch.swift b/Cotabby/Support/ConstrainedBeamSearch.swift
@@ -67,13 +67,15 @@ enum ConstrainedBeamSearch {
         nextLogits: @escaping BeamLogitsProvider,
         profile: TokenProfile,
         configuration: BeamSearchConfiguration,
-        isSingleLine: Bool
+        isSingleLine: Bool,
+        isMidWord: Bool = false
     ) -> [BeamCandidate] {
         Engine(
             nextLogits: nextLogits,
             profile: profile,
             configuration: configuration,
-            isSingleLine: isSingleLine
+            isSingleLine: isSingleLine,
+            isMidWord: isMidWord
         ).run()
     }
 }
@@ -85,6 +87,7 @@ private struct Engine {
     let profile: TokenProfile
     let configuration: BeamSearchConfiguration
     let isSingleLine: Bool
+    let isMidWord: Bool
 
     func run() -> [BeamCandidate] {
         var frontier: [BeamCandidate] = [BeamCandidate(tokenIDs: [], bytes: [], cumulativeLogprob: 0)]
@@ -121,13 +124,19 @@ private struct Engine {
             history: branch.tokenIDs,
             ngramSize: configuration.noRepeatNgramSize
         )
-        let candidates = ConstrainedSampler.rankedAdmissibleTokens(
+        var candidates = ConstrainedSampler.rankedAdmissibleTokens(
             logits: logits,
             profile: profile,
             admissibleTokenIDs: nil,
             topK: configuration.topK,
             blockedTokenIDs: blocked
         )
+        // Mid-word: the first generated token must finish the current word, not start a new token with
+        // punctuation / whitespace / a symbol. Applies only to the first step; later tokens generate
+        // freely once the word is being continued.
+        if isMidWord, branch.tokenIDs.isEmpty {
+            candidates = candidates.filter { profile.continuesWordMidStream($0) }
+        }
         for tokenID in candidates {
             if profile.isEndOfGeneration(tokenID) {
                 completed.append(branch)

diff --git a/Cotabby/Support/TokenProfile.swift b/Cotabby/Support/TokenProfile.swift
@@ -105,6 +105,27 @@ struct TokenProfile {
         entry(for: id)?.isWhitespaceOnly ?? false
     }
 
+    /// Whether `id` can continue the current word mid-stream: its first byte is an ASCII letter or
+    /// digit, a common within-word mark (apostrophe or hyphen), or a non-ASCII lead byte (which starts
+    /// a multi-byte letter or ideograph). Tokens that begin with whitespace, breaking punctuation, or a
+    /// symbol are rejected, so a mid-word completion finishes the word instead of starting a new token.
+    /// False for an out-of-range or empty (control) token.
-    /// Whether `id` can continue the current word mid-stream: its first byte is an ASCII letter or
-    /// digit, a common within-word mark (apostrophe or hyphen), or a non-ASCII lead byte (which starts
-    /// a multi-byte letter or ideograph). Tokens that begin with whitespace, breaking punctuation, or a
-    /// symbol are rejected, so a mid-word completion finishes the word instead of starting a new token.
-    /// False for an out-of-range or empty (control) token.
+    /// Whether `id` can continue the current word mid-stream: its first byte is an ASCII letter or
+    /// digit, a common within-word mark (apostrophe or hyphen), or a non-ASCII lead byte (which starts
+    /// a multi-byte letter or ideograph). Tokens that begin with whitespace, breaking punctuation, or a
+    /// symbol are rejected, so a mid-word completion finishes the word instead of starting a new token.
+    /// False for an out-of-range id or a token whose byte sequence is empty.
-    /// Whether `id` can continue the current word mid-stream: its first byte is an ASCII letter or
-    /// digit, a common within-word mark (apostrophe or hyphen), or a non-ASCII lead byte (which starts
-    /// a multi-byte letter or ideograph). Tokens that begin with whitespace, breaking punctuation, or a
-    /// symbol are rejected, so a mid-word completion finishes the word instead of starting a new token.
-    /// False for an out-of-range or empty (control) token.
+    /// Whether `id` can continue the current word mid-stream: its first byte is an ASCII letter or
+    /// digit, a common within-word mark (apostrophe or hyphen), or a non-ASCII lead byte (which starts
+    /// a multi-byte letter or ideograph). Tokens that begin with whitespace, breaking punctuation, or a
+    /// symbol are rejected, so a mid-word completion finishes the word instead of starting a new token.
+    /// False for an out-of-range id or a token whose byte sequence is empty.
+    func continuesWordMidStream(_ id: Int) -> Bool {
+        guard let bytes = entry(for: id)?.bytes, !bytes.isEmpty else {
+            return false
+        }
+        // Inspect the first character with Unicode-aware classification: letters (including CJK and
+        // other scripts) and digits continue a word, as do the two common within-word marks; whitespace,
+        // punctuation, and symbols (ASCII or not, e.g. an em dash or arrow) do not. The lossy decode is
+        // fine because only the first scalar is examined and a malformed lead decodes to U+FFFD, which
+        // is not a letter, so it is rejected.
+        // swiftlint:disable:next optional_data_string_conversion
+        guard let first = String(decoding: bytes, as: UTF8.self).first else {
+            return false
+        }
+        return first.isLetter || first.isNumber || first == "'" || first == "-"
+    }
+
     private func entry(for id: Int) -> Entry? {
         guard id >= 0, id < entries.count else {
             return nil
@@ -133,4 +154,5 @@ struct TokenProfile {
             return false
         }
     }
+
 }
diff --git a/CotabbyTests/ConstrainedBeamSearchTests.swift b/CotabbyTests/ConstrainedBeamSearchTests.swift
@@ -150,6 +150,24 @@ final class ConstrainedBeamSearchTests: XCTestCase {
         XCTAssertFalse(recorder.paths.contains([0, 1]), "search must stop at the sentence and not step past it")
     }
 
+    func test_search_midWord_firstTokenMustContinueTheWord() {
+        // token 0 breaks the word (leading punctuation) but has the higher logit; token 1 continues it.
+        // Mid-word, only a word-continuing token may start the completion.
+        let profile = makeProfile(byteStrings: [", and", "ing"])
+        let rows: [[Int]: [Float]] = [[]: row([0: 9, 1: 1], vocabSize: 2)]
+        let normal = ConstrainedBeamSearch.search(
+            nextLogits: provider(vocabSize: 2, rows: rows), profile: profile,
+            configuration: BeamSearchConfiguration(beamWidth: 1, maxTokens: 1, topK: 5),
+            isSingleLine: false, isMidWord: false)
+        let midWord = ConstrainedBeamSearch.search(
+            nextLogits: provider(vocabSize: 2, rows: rows), profile: profile,
+            configuration: BeamSearchConfiguration(beamWidth: 1, maxTokens: 1, topK: 5),
+            isSingleLine: false, isMidWord: true)
+
+        XCTAssertEqual(normal.first?.tokenIDs, [0], "without mid-word, the highest-logit token wins")
+        XCTAssertEqual(midWord.first?.tokenIDs, [1], "mid-word, the word-breaking token is filtered out")
+    }
+
     func test_search_respectsMaxTokenBudget() {
         // No EOG / sentence end: every token keeps generating, so the budget bounds the length.
         let profile = makeProfile(byteStrings: ["a", "b"])

diff --git a/CotabbyTests/TokenProfileTests.swift b/CotabbyTests/TokenProfileTests.swift
@@ -106,4 +106,26 @@ final class TokenProfileTests: XCTestCase {
         XCTAssertFalse(profile.isNewline(5))
         XCTAssertFalse(profile.isWhitespaceOnly(5))
     }
+
+    func test_continuesWordMidStream_acceptsWordCharactersAndRejectsBreakers() {
+        let profile = makeProfile([
+            Stub(bytes: bytes("rrow"), control: false, eog: false),    // 0: letters
+            Stub(bytes: bytes("3rd"), control: false, eog: false),     // 1: leading digit
+            Stub(bytes: bytes("'t"), control: false, eog: false),      // 2: apostrophe (don't)
+            Stub(bytes: bytes("-op"), control: false, eog: false),     // 3: hyphen (co-op)
+            Stub(bytes: bytes("中文"), control: false, eog: false),      // 4: CJK letter
+            Stub(bytes: bytes(" word"), control: false, eog: false),   // 5: leading space
+            Stub(bytes: bytes(".rrow"), control: false, eog: false),   // 6: leading period
+            Stub(bytes: bytes("!stop"), control: false, eog: false),   // 7: leading punctuation
+            Stub(bytes: bytes("→x"), control: false, eog: false),      // 8: non-ASCII symbol
+            Stub(bytes: [], control: true, eog: false)                 // 9: empty / control
+        ])
+        for id in [0, 1, 2, 3, 4] {
+            XCTAssertTrue(profile.continuesWordMidStream(id), "id \(id) should continue a word")
+        }
+        for id in [5, 6, 7, 8, 9] {
+            XCTAssertFalse(profile.continuesWordMidStream(id), "id \(id) should not continue a word")
+        }
+        XCTAssertFalse(profile.continuesWordMidStream(-1))
+    }
 }