Add new changes#321
Conversation
d5e3724 to
b126729
Compare
| @@ -445,7 +276,7 @@ struct AXTextGeometryResolver { | |||
| !text.isEmpty, | |||
| let frame = AXHelper.rectValue(for: "AXFrame" as CFString, on: element), | |||
| !frame.isEmpty { | |||
| elements.append(element) | |||
| runs.append((text, frame)) | |||
| } | |||
|
|
|||
| guard depth < maxDepth else { | |||
| @@ -461,49 +292,7 @@ struct AXTextGeometryResolver { | |||
| walk(child, depth: 1) | |||
| } | |||
|
|
|||
| return elements | |||
| } | |||
|
|
|||
| /// Reads the current text and frame of each leaf and returns them in visual reading order. | |||
| /// Returns nil when a leaf no longer reports the static-text role — the signal that a cached | |||
| /// element list is stale and the caller must re-walk. Leaves that read empty are skipped rather | |||
| /// than treated as stale, since a line can be momentarily empty mid-edit. | |||
| private func textRuns(fromElements elements: [AXUIElement]) -> [(text: String, frame: CGRect)]? { | |||
| var runs: [(text: String, frame: CGRect)] = [] | |||
| for element in elements { | |||
| guard AXHelper.stringValue(for: kAXRoleAttribute as CFString, on: element) | |||
| == kAXStaticTextRole as String else { | |||
| return nil | |||
| } | |||
| guard let text = AXHelper.stringValue(for: kAXValueAttribute as CFString, on: element), | |||
| !text.isEmpty, | |||
| let frame = AXHelper.rectValue(for: "AXFrame" as CFString, on: element), | |||
| !frame.isEmpty else { | |||
| continue | |||
| } | |||
| runs.append((text, frame)) | |||
| } | |||
|
|
|||
| return runs.sorted { lhs, rhs in | |||
| let lhsFrame = AXHelper.cocoaRect(fromAccessibilityRect: lhs.frame) | |||
| let rhsFrame = AXHelper.cocoaRect(fromAccessibilityRect: rhs.frame) | |||
| // Bucket midY into fixed slots so every pair of runs on the same visual line maps to | |||
| // the same bucket. A direct `abs(Δ) > tolerance` comparison is non-transitive (A~B, B~C, | |||
| // but A≁C), which yields an undefined sort order and can map cumulative offsets to the | |||
| // wrong run — placing ghost text on the wrong line. | |||
| let lineTolerance: CGFloat = 4 | |||
| let lhsBucket = (lhsFrame.midY / lineTolerance).rounded(.toNearestOrAwayFromZero) | |||
| let rhsBucket = (rhsFrame.midY / lineTolerance).rounded(.toNearestOrAwayFromZero) | |||
| if lhsBucket != rhsBucket { | |||
| return lhsBucket > rhsBucket | |||
| } | |||
| return lhsFrame.minX < rhsFrame.minX | |||
| } | |||
| } | |||
|
|
|||
| /// One-shot run collection for callers that don't cache (e.g. line-height estimation). | |||
| private func collectStaticTextRuns(from root: AXUIElement) -> [(text: String, frame: CGRect)] { | |||
| textRuns(fromElements: collectStaticTextElements(from: root)) ?? [] | |||
| return runs | |||
There was a problem hiding this comment.
Text run order no longer guaranteed — can misplace caret in multi-run Chromium fields
collectStaticTextRuns appends runs in pre-order DFS tree walk order and returns them unsorted. The comment at line 253 concedes this works "in most editor trees," but the sort that was removed was explicitly there to handle editors where it doesn't: it bucketed midY into 4-pt slots and sorted by minX within a line so runs always arrived in visual reading order regardless of how Chrome exposes its child list. Without that sort, any field where the AX tree returns children out of left-to-right/top-to-bottom order will accumulate character offsets against the wrong run, parking the ghost overlay on a completely different line.
| } catch let error as ScreenshotContextGenerationError { | ||
| logGenerationError(error) | ||
| CotabbyLogger.app.warning("Visual context generation error: \(error.localizedDescription)") | ||
| setStatus(errorStatus(for: error), for: session.sessionID) |
There was a problem hiding this comment.
.unavailable is now logged at .warning — will spam logs for routine no-signal cases
ScreenshotContextGenerationError.unavailable is the expected outcome whenever a screenshot lacks enough visible text (sparse OCR, dark or blank windows, no matching window) — it is not a failure. The removed logGenerationError helper specifically routed .unavailable to .debug for this reason. Now every routine "no useful OCR text" event, including the common WindowScreenshotError.noVisibleWindowForProcess path (wrapped as .unavailable in ScreenshotContextGenerator), surfaces as a .warning in production diagnostics alongside real breakage.
| let matchingWindow = | ||
| bestWindow(for: context, in: substantialWindows) | ||
| ?? substantialWindows.first(where: { $0.isActive }) | ||
| ?? substantialWindows.first | ||
| ?? bestWindow(for: context, in: processWindows) | ||
| ?? processWindows.first(where: { $0.isActive }) | ||
| ?? processWindows.first | ||
| shareableContent.windows.first(where: { | ||
| $0.owningApplication?.processID == processIdentifier && $0.isActive && $0.isOnScreen | ||
| }) | ||
| ?? shareableContent.windows.first(where: { | ||
| $0.owningApplication?.processID == processIdentifier && $0.isOnScreen | ||
| }) |
There was a problem hiding this comment.
Simplified selection can capture a tiny Chrome toolbar or popover instead of the content window
The old logic filtered windows to those ≥240×240 px ("substantial") before falling back to unfiltered, and used caret/input-frame geometry as the primary signal for which window among Chrome's many surfaces actually owns the focused editor. The replacement takes the first isActive && isOnScreen window with no size gate and no anchor-geometry check. In Chrome and Electron apps that expose auxiliary surfaces (tooltips, DevTools overlays, the autofill popover) as separate SCKit windows, any one of those tiny active surfaces can now become the screenshot target, producing an unrelated or empty crop for OCR.
...
Greptile Summary
This PR is a significant simplification pass across the focus-tracking and visual-context subsystems: it removes the
CaretGeometrySourceCache, theAXChromeFocusProbedebug instrument, several Chromium-specific BoundsForRange guards, and theisSubstantialWindow/anchor-geometry window-selection logic, replacing them with a time-based deep-walk throttle (DeepGeometryWalkThrottle) and simpler fallback paths. UI views also revert from static-glyph workarounds back to standardProgressViewspinners.collectStaticTextRunsnow returns runs in DFS walk order without sorting by visual position, which risks incorrect cumulative-offset caret mapping when browser AX trees expose children out of reading order.ScreenshotContextGenerationError.unavailable(routine no-signal outcome) is now logged at.warningrather than.debug, conflating expected no-context events with real pipeline failures.Confidence Score: 3/5
Three independent regressions in the changed paths make this risky to merge without validation on Chromium editors and multi-window setups.
The caret placement path for Gmail/Outlook lost its visual-position sort, so any Chromium field where AX exposes child runs out of reading order will misplace the ghost overlay. The window screenshot selector no longer filters by size or anchor geometry, so an active Chrome tooltip can be captured instead of the content window. Routine no-OCR-signal events now emit as warnings, making it harder to distinguish real failures in diagnostics. All three affect the product's core use-case paths.
AXTextGeometryResolver.swift (run sort removal), WindowScreenshotService.swift (window selection), and VisualContextCoordinator.swift (log level) need the most attention before merging.
Important Files Changed
Flowchart
%%{init: {'theme': 'neutral'}}%% flowchart TD A[FocusTracker polls AX] --> B[FocusSnapshotResolver.resolveSnapshot] B --> C[candidateElements around focusedElement] C --> D[candidateSnapshot for each element] D --> E{primary caretQuality == .exact?} E -- Yes --> F[deepResult = nil] E -- No --> G[DeepGeometryWalkThrottle.result] G --> H{Same field + within interval?} H -- Yes --> I[Return cached deep result] H -- No --> J[resolveDeepGeometrySource BFS up to 200 nodes] J --> K[findDeepGeometrySource] K --> L[AXTextGeometryResolver.resolveCaretRect per node] L --> M[collectStaticTextRuns DFS walk - unsorted] M --> N[Cumulative offset mapping] F & I & N --> O[CaretGeometrySelector.select] O --> P[FocusSnapshot emitted] P --> Q[VisualContextCoordinator] Q --> R[WindowScreenshotService] R --> S{first active+onScreen window} S -- found --> T[ScreenshotContextGenerator OCR] T -- unavailable --> U[.warning log] T -- success --> V[VisualContextExcerpt]Reviews (2): Last reviewed commit: "Fix focus snapshot selection handling" | Re-trigger Greptile