Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cotabby.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
27D4F5CACADE171F142178B4 /* SettingsSidebarView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BADB38D0160B47637572FC5E /* SettingsSidebarView.swift */; };
286B7022E2A2774275004447 /* WelcomeTemplateStepView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A9199B9CEAB320982CA333B8 /* WelcomeTemplateStepView.swift */; };
29EC35D67D9B4C3C50222619 /* ConstrainedBeamSearchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D891397DAFCEF9DDD612A771 /* ConstrainedBeamSearchTests.swift */; };
2A1ED4231B6EAD2640AE1568 /* RequiredPrefixConstraint.swift in Sources */ = {isa = PBXBuildFile; fileRef = F8D1BE91149614EE5D888672 /* RequiredPrefixConstraint.swift */; };
2C6159231472A849F15BD0AE /* ScreenFrameReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5484C8A04B9C00CF79D589EB /* ScreenFrameReader.swift */; };
2DF5A3826AAB99C279EBB8DE /* InputMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = B81DD30EB657368AACE9625A /* InputMonitor.swift */; };
2E3DEB7E89D0146274596F2E /* SettingsContainerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DB0CE9AB1286367BA2E82392 /* SettingsContainerView.swift */; };
Expand Down Expand Up @@ -161,6 +162,7 @@
909EBE545CE644C6C57F1B5D /* SuggestionCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8F961F5DF2A392F6F5F94F8A /* SuggestionCoordinator.swift */; };
90CD3F7238E223DEBA2B4D92 /* TagChip.swift in Sources */ = {isa = PBXBuildFile; fileRef = FB317C82CE2CBC69056BA4B8 /* TagChip.swift */; };
90DC9508F27F712EB61EEB06 /* PermissionReminderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 656F58E56FE9BC087B6F1D33 /* PermissionReminderView.swift */; };
912EE366AD8107CDAE0FBC6E /* RequiredPrefixConstraintTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = E498506C46AC3AB19D8AE6EC /* RequiredPrefixConstraintTests.swift */; };
91C27021750AC03AA4A0115A /* HuggingFaceAPIClient.swift in Sources */ = {isa = PBXBuildFile; fileRef = 110CB0B53016644EF7840301 /* HuggingFaceAPIClient.swift */; };
91D1F16B8C5DA281D4B7F699 /* CustomRulesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = AD752451330486FE270018B0 /* CustomRulesTests.swift */; };
91D8189EFCD1BA992EA6F038 /* ConfidenceSuppressionPolicyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */; };
Expand Down Expand Up @@ -505,6 +507,7 @@
E27B962C66727776D00069DE /* EmojiPopularity.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EmojiPopularity.swift; sourceTree = "<group>"; };
E3C84377F352140759B448C9 /* CaretGeometrySelector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CaretGeometrySelector.swift; sourceTree = "<group>"; };
E43E587E421AF544A8300CE4 /* CustomRulesCatalog.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomRulesCatalog.swift; sourceTree = "<group>"; };
E498506C46AC3AB19D8AE6EC /* RequiredPrefixConstraintTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RequiredPrefixConstraintTests.swift; sourceTree = "<group>"; };
E5DAF68AEBFE334F68A65E82 /* AcceptanceModePickerView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AcceptanceModePickerView.swift; sourceTree = "<group>"; };
E6423D6CC8CC371D2DA899DE /* PermissionOverlayTracker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionOverlayTracker.swift; sourceTree = "<group>"; };
E73C04A71D85B25998144F11 /* TokenProfileCache.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenProfileCache.swift; sourceTree = "<group>"; };
Expand All @@ -519,6 +522,7 @@
F308F6E274CC645E27CB651F /* OverlayController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OverlayController.swift; sourceTree = "<group>"; };
F394B8A6E30CC47015772089 /* TokenProfileCacheTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenProfileCacheTests.swift; sourceTree = "<group>"; };
F3CEFE8C321E17BB3873C893 /* TokenProfile.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TokenProfile.swift; sourceTree = "<group>"; };
F8D1BE91149614EE5D888672 /* RequiredPrefixConstraint.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RequiredPrefixConstraint.swift; sourceTree = "<group>"; };
FA4B45B91D4DEAC979C3113E /* PromptContextSanitizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PromptContextSanitizer.swift; sourceTree = "<group>"; };
FA878B447441BB4F3E327CC8 /* OnboardingTemplateRecommender.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OnboardingTemplateRecommender.swift; sourceTree = "<group>"; };
FB317C82CE2CBC69056BA4B8 /* TagChip.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TagChip.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -804,6 +808,7 @@
4696A84D17890B154533A08F /* PromptPolicyTests.swift */,
E260C4D08C786CDBD527B329 /* PromptSectionBudgetTests.swift */,
5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */,
E498506C46AC3AB19D8AE6EC /* RequiredPrefixConstraintTests.swift */,
B2BFD19A159680A495EE02FD /* ScreenshotContextGeneratorTests.swift */,
474560E524C1D74BAB1570DA /* SecureFieldDetectorTests.swift */,
2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */,
Expand Down Expand Up @@ -963,6 +968,7 @@
AFCFCCCB69C29A86E726B10A /* PromptSectionBudget.swift */,
04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */,
6DC693E00430F46E41CB56E6 /* RequestID.swift */,
F8D1BE91149614EE5D888672 /* RequiredPrefixConstraint.swift */,
1827565F4FAD3E4E61CA65C3 /* SecureFieldDetector.swift */,
D4B56C250DDEF3E81F9DCBD7 /* SentenceBoundaryClassifier.swift */,
2A02336442BB735EE2E8D064 /* SettingsAttentionEvaluator.swift */,
Expand Down Expand Up @@ -1221,6 +1227,7 @@
3C561CD717064F9250200667 /* PromptSectionBudget.swift in Sources */,
097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */,
A5A6CE0EF01CA6A9AFA7A400 /* RequestID.swift in Sources */,
2A1ED4231B6EAD2640AE1568 /* RequiredPrefixConstraint.swift in Sources */,
82D4ADEAF05337ABDE4C586C /* RuntimeBootstrapModel.swift in Sources */,
2C6159231472A849F15BD0AE /* ScreenFrameReader.swift in Sources */,
0C06CAD62975E87B2C852191 /* ScreenTextExtractor.swift in Sources */,
Expand Down Expand Up @@ -1337,6 +1344,7 @@
3CF1A4E39F24917DF0470A7D /* PromptPolicyTests.swift in Sources */,
7EB20783E0D36715D1230A5C /* PromptSectionBudgetTests.swift in Sources */,
ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */,
912EE366AD8107CDAE0FBC6E /* RequiredPrefixConstraintTests.swift in Sources */,
1B3FFCB9A979F49BF86EAAD4 /* ScreenshotContextGeneratorTests.swift in Sources */,
4FC52FB28AFC013F000D8FF9 /* SecureFieldDetectorTests.swift in Sources */,
1D1C6FF0B8F50AC14A1000F4 /* SentenceBoundaryClassifierTests.swift in Sources */,
Expand Down
80 changes: 65 additions & 15 deletions Cotabby/Support/ConstrainedBeamSearch.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ struct BeamCandidate: Equatable {
let tokenIDs: [Int]
let bytes: [UInt8]
let cumulativeLogprob: Double
/// Required-prefix bytes this branch must still emit before it may complete. Empty in the common
/// unconstrained case (so behavior is unchanged); non-empty only while steering the branch toward
/// a required continuation. A branch may finish only once this is empty.
let remainingPrefix: [UInt8]

init(tokenIDs: [Int], bytes: [UInt8], cumulativeLogprob: Double, remainingPrefix: [UInt8] = []) {
self.tokenIDs = tokenIDs
self.bytes = bytes
self.cumulativeLogprob = cumulativeLogprob
self.remainingPrefix = remainingPrefix
}

/// Mean per-token log-probability; ranks completed branches so a short confident continuation is
/// not unfairly beaten by a longer, lower-average one. An empty branch ranks last.
Expand All @@ -68,47 +79,56 @@ enum ConstrainedBeamSearch {
profile: TokenProfile,
configuration: BeamSearchConfiguration,
isSingleLine: Bool,
isMidWord: Bool = false
isMidWord: Bool = false,
requiredPrefix: [UInt8] = []
) -> [BeamCandidate] {
Engine(
nextLogits: nextLogits,
profile: profile,
configuration: configuration,
isSingleLine: isSingleLine,
isMidWord: isMidWord
isMidWord: isMidWord,
requiredPrefix: requiredPrefix
).run()
}
}

/// The mutable-free search context, holding the inputs so the per-step helpers stay small. A struct
/// rather than passing the same four values through every call.
/// rather than passing the same values through every call.
private struct Engine {
let nextLogits: BeamLogitsProvider
let profile: TokenProfile
let configuration: BeamSearchConfiguration
let isSingleLine: Bool
let isMidWord: Bool
/// Bytes every branch must emit before it may complete. Empty for an unconstrained search.
let requiredPrefix: [UInt8]

func run() -> [BeamCandidate] {
var frontier: [BeamCandidate] = [BeamCandidate(tokenIDs: [], bytes: [], cumulativeLogprob: 0)]
var frontier: [BeamCandidate] = [
BeamCandidate(tokenIDs: [], bytes: [], cumulativeLogprob: 0, remainingPrefix: requiredPrefix)
]
var completed: [BeamCandidate] = []

for _ in 0 ..< configuration.maxTokens {
guard !frontier.isEmpty else { break }
var nextFrontier: [BeamCandidate] = []
for branch in frontier {
guard let logits = nextLogits(branch.tokenIDs) else {
completed.append(branch)
// A stalled branch is only a valid completion if it has satisfied its requirement.
if branch.remainingPrefix.isEmpty {
completed.append(branch)
}
continue
}
expand(branch: branch, logits: logits, live: &nextFrontier, completed: &completed)
}
frontier = Self.prune(nextFrontier, to: configuration.beamWidth)
}
// Budget exhausted: surviving branches are valid completions too.
completed.append(contentsOf: frontier)
// Budget exhausted: surviving branches complete too, but only if their required prefix is met.
completed.append(contentsOf: frontier.filter { $0.remainingPrefix.isEmpty })
return completed
.filter { !$0.tokenIDs.isEmpty }
.filter { !$0.tokenIDs.isEmpty && $0.remainingPrefix.isEmpty }
.sorted { $0.meanLogprob > $1.meanLogprob }
Comment on lines 130 to 132
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The .remainingPrefix.isEmpty predicate in the return filter is redundant. Every code path that adds a candidate to completed already enforces an empty remaining prefix — via explicit if branch.remainingPrefix.isEmpty guards on EOG/newline/stalled branches, via the if remainingAfterToken.isEmpty guard on sentence boundaries, and via the .filter { $0.remainingPrefix.isEmpty } applied to the frontier at budget exhaustion. The second filter in the return statement can never catch anything the earlier guards missed, so removing it reduces noise without changing behaviour.

Suggested change
return completed
.filter { !$0.tokenIDs.isEmpty }
.filter { !$0.tokenIDs.isEmpty && $0.remainingPrefix.isEmpty }
.sorted { $0.meanLogprob > $1.meanLogprob }
return completed
.filter { !$0.tokenIDs.isEmpty }
.sorted { $0.meanLogprob > $1.meanLogprob }

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Fix in Codex Fix in Claude Code

}

Expand All @@ -124,11 +144,16 @@ private struct Engine {
history: branch.tokenIDs,
ngramSize: configuration.noRepeatNgramSize
)
// While a required prefix is unmet, the admissible token can rank far below the model's
// top-K (a forced continuation the model finds locally unlikely), so the pool must not be
// capped by raw logit — scan the full vocabulary and let the prefix rule below select. The
// unconstrained common case keeps the cheap top-K bound.
let effectiveTopK = branch.remainingPrefix.isEmpty ? configuration.topK : logits.count
var candidates = ConstrainedSampler.rankedAdmissibleTokens(
logits: logits,
profile: profile,
admissibleTokenIDs: nil,
topK: configuration.topK,
topK: effectiveTopK,
blockedTokenIDs: blocked
)
// Mid-word: the first generated token must finish the current word, not start a new token with
Expand All @@ -138,17 +163,40 @@ private struct Engine {
candidates = candidates.filter { profile.continuesWordMidStream($0) }
}
for tokenID in candidates {
// A branch may only stop (end-of-generation or single-line newline) once it has emitted
// its full required prefix; otherwise the would-be completion omits required bytes.
if profile.isEndOfGeneration(tokenID) {
completed.append(branch)
if branch.remainingPrefix.isEmpty {
completed.append(branch)
}
continue
}
if isSingleLine, profile.isNewline(tokenID) {
completed.append(branch)
if branch.remainingPrefix.isEmpty {
completed.append(branch)
}
continue
}
let tokenBytes = profile.bytes(for: tokenID)
let child = extend(branch, by: tokenID, tokenBytes: tokenBytes, logits: logits)
if Self.completesSentence(child.bytes, lastTokenBytes: tokenBytes) {
// Required-prefix admissibility: drop tokens that diverge, and carry the unconsumed tail.
let remainingAfterToken: [UInt8]
switch RequiredPrefixConstraint.step(remainingPrefix: branch.remainingPrefix, tokenBytes: tokenBytes) {
case .rejected:
continue
case .satisfied:
remainingAfterToken = []
case .advanced(let remaining):
remainingAfterToken = remaining
}
let child = extend(
branch,
by: tokenID,
tokenBytes: tokenBytes,
logits: logits,
remainingPrefix: remainingAfterToken
)
// A sentence boundary only finishes a branch that has also satisfied its required prefix.
if remainingAfterToken.isEmpty, Self.completesSentence(child.bytes, lastTokenBytes: tokenBytes) {
completed.append(child)
} else {
live.append(child)
Expand All @@ -160,15 +208,17 @@ private struct Engine {
_ branch: BeamCandidate,
by tokenID: Int,
tokenBytes: [UInt8],
logits: [Float]
logits: [Float],
remainingPrefix: [UInt8]
) -> BeamCandidate {
var bytes = branch.bytes
bytes.append(contentsOf: tokenBytes)
let logprob = ConstrainedSampler.logProb(ofTokenAt: tokenID, in: logits) ?? 0
return BeamCandidate(
tokenIDs: branch.tokenIDs + [tokenID],
bytes: bytes,
cumulativeLogprob: branch.cumulativeLogprob + logprob
cumulativeLogprob: branch.cumulativeLogprob + logprob,
remainingPrefix: remainingPrefix
)
}

Expand Down
55 changes: 55 additions & 0 deletions Cotabby/Support/RequiredPrefixConstraint.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import Foundation

/// File overview:
/// The required-prefix admissibility rule for constrained decoding. Given a byte prefix the
/// completion must still produce, it decides whether a candidate token may be emitted and what
/// prefix remains afterward. This is the foundation for steering generation onto a known
/// continuation — finishing a specific partially-formed word, for example — without ever letting the
/// model emit bytes that diverge from the requirement.
///
/// The rule is byte-exact and deliberately trie-free: correctness needs only the two-way prefix
/// check below (the token completes the requirement, or is itself a step toward it). A trie over the
/// vocabulary would only speed the per-step lookup, never change the result, so it can be layered in
/// later as a pure optimization without touching this contract. Working in bytes (not Characters)
/// keeps the rule correct across token boundaries that split a multi-byte UTF-8 scalar.
enum RequiredPrefixConstraint {
/// The outcome of testing one token against the remaining required prefix.
enum Step: Equatable {
/// The token satisfies the requirement: it either completes the remaining prefix, or the
/// prefix was already empty. Nothing is required of later tokens in this branch.
case satisfied
/// The token is a strict prefix of the requirement; `remaining` is what later tokens must
/// still produce.
case advanced(remaining: [UInt8])
/// The token diverges from the requirement and is inadmissible.
case rejected
}

/// Tests `tokenBytes` against `remainingPrefix` and reports whether the token may be emitted and
/// what prefix would remain afterward.
///
/// - An empty `remainingPrefix` means the requirement is already met: every token is `satisfied`.
/// - A token at least as long as the remaining prefix is admissible only if it *starts with* the
/// whole remaining prefix (it completes, and may extend past, the requirement).
/// - A shorter token is admissible only if the remaining prefix *starts with* the token's bytes
/// (it is a step toward the requirement); the unconsumed tail is what remains.
static func step(remainingPrefix: [UInt8], tokenBytes: [UInt8]) -> Step {
guard !remainingPrefix.isEmpty else {
return .satisfied
}
Comment on lines +36 to +39
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Empty tokenBytes is silently admitted and makes no forward progress. When tokenBytes is [], remainingPrefix.starts(with: []) is always true, so step returns .advanced(remaining: remainingPrefix) — the token is considered admissible but the remaining prefix is unchanged. Within the beam search this is harmless because rankedAdmissibleTokens already filters control/empty tokens, but the docstring explicitly advertises admits for external greedy-mask callers who might not share that invariant. An empty token in their vocabulary would be wrongly admitted. Adding an early guard removes the ambiguity.

Suggested change
static func step(remainingPrefix: [UInt8], tokenBytes: [UInt8]) -> Step {
guard !remainingPrefix.isEmpty else {
return .satisfied
}
static func step(remainingPrefix: [UInt8], tokenBytes: [UInt8]) -> Step {
guard !remainingPrefix.isEmpty else {
return .satisfied
}
guard !tokenBytes.isEmpty else {
return .rejected
}

Fix in Codex Fix in Claude Code

if tokenBytes.count >= remainingPrefix.count {
return tokenBytes.starts(with: remainingPrefix) ? .satisfied : .rejected
}
guard remainingPrefix.starts(with: tokenBytes) else {
return .rejected
}
return .advanced(remaining: Array(remainingPrefix.dropFirst(tokenBytes.count)))
}

/// Whether `tokenBytes` may be emitted next given `remainingPrefix`, ignoring the leftover. A thin
/// predicate over `step` for callers (e.g. a greedy admissibility mask) that do not track the
/// remainder themselves.
static func admits(remainingPrefix: [UInt8], tokenBytes: [UInt8]) -> Bool {
step(remainingPrefix: remainingPrefix, tokenBytes: tokenBytes) != .rejected
}
}
Loading