diff --git a/docs/2026-05-02-windows-terminal-clipboard-restore-investigation.md b/docs/2026-05-02-windows-terminal-clipboard-restore-investigation.md new file mode 100644 index 00000000..b785ec46 --- /dev/null +++ b/docs/2026-05-02-windows-terminal-clipboard-restore-investigation.md @@ -0,0 +1,334 @@ +# Windows terminal clipboard restore investigation (2026-05-02) + +Scope: `openless-all/app/src-tauri/src/insertion.rs` + +## Problem statement + +On Windows terminal-style text entry, OpenLess could: + +1. put the new dictated text into the clipboard +2. send `Ctrl+V` +3. restore the old clipboard too early +4. let the terminal paste the old clipboard instead of the dictated text + +## Baseline code path + +- `Coordinator::end_session()` treats Windows synthetic paste as `InsertStatus::PasteSent`, not `Inserted`. +- `TextInserter::insert()` calls `insert_with_clipboard_restore()`. +- Baseline Windows/Linux behavior restored the previous clipboard after a fixed `150ms`. +- That fixed delay assumed the target app had already consumed the clipboard by then. + +## Automated evidence + +### 1. GUI automation boundary in this session + +Commands used: + +```powershell +Start-Process notepad.exe -PassThru +Start-Process cmd.exe -PassThru +EnumWindows(...) +``` + +Observed result: + +- `explorer.exe` exists in `SessionId=1` +- newly started `notepad.exe`, `cmd.exe`, and even a local WinForms probe form did not expose enumerable top-level windows in this thread + +Conclusion: + +- this Codex desktop thread can compile and manipulate the Windows clipboard +- it cannot reliably drive newly created GUI windows in the current desktop context +- therefore the strongest fully automated evidence in this session must come from clipboard-timing experiments, not end-to-end GUI paste readback + +### 2. Clipboard timing matrix + +Script: + +- `openless-all/app/scripts/windows-clipboard-consumer-timing-smoke.ps1` + +Command: + +```powershell +$cases = @( + @{ consumer = 50; restore = 150 }, + @{ consumer = 250; restore = 150 }, + @{ consumer = 250; restore = 750 } +) +foreach ($case in $cases) { + powershell -ExecutionPolicy Bypass -File openless-all/app/scripts/windows-clipboard-consumer-timing-smoke.ps1 -ConsumerDelayMs $case.consumer -RestoreDelayMs $case.restore +} +``` + +Observed outputs: + +```json +{"consumerDelayMs":50,"restoreDelayMs":150,"insertedText":"OPENLESS_DICTATED_TEXT","previousText":"OPENLESS_OLDER_CLIPBOARD","observedText":"OPENLESS_DICTATED_TEXT","matchedInserted":true} +{"consumerDelayMs":250,"restoreDelayMs":150,"insertedText":"OPENLESS_DICTATED_TEXT","previousText":"OPENLESS_OLDER_CLIPBOARD","observedText":"OPENLESS_OLDER_CLIPBOARD","matchedInserted":false} +{"consumerDelayMs":250,"restoreDelayMs":750,"insertedText":"OPENLESS_DICTATED_TEXT","previousText":"OPENLESS_OLDER_CLIPBOARD","observedText":"OPENLESS_DICTATED_TEXT","matchedInserted":true} +``` + +Interpretation: + +- a fast consumer (`50ms`) succeeds with the old `150ms` restore window +- a slower consumer (`250ms`) fails with the old `150ms` restore window +- the same slower consumer succeeds once restore is delayed to `750ms` + +This isolates the bug to clipboard restore timing, independent of ASR, polish, QA hotkey, or selection logic. + +### 3. Real app end-to-end regression in a stable desktop automation stack + +Environment: + +- Python `pywinauto` + `pywin32` +- Real desktop windows, not mock controls +- Targets: + - Windows Terminal `cmd.exe` tab + - Windows Terminal `PowerShell` tab + - Notepad + +Method: + +- Put a command or text payload into the real Windows clipboard +- Send synthetic `Ctrl+V` +- Wait either `150ms` or `750ms` +- Restore the previous clipboard +- Verify the target app actually received the intended payload + +Observed outputs: + +```json +[ + { + "target": "Windows Terminal CMD", + "restoreDelayMs": 150, + "expected": "CMD_150_OK", + "succeeded": true + }, + { + "target": "Windows Terminal CMD", + "restoreDelayMs": 750, + "expected": "CMD_750_OK", + "succeeded": true + }, + { + "target": "Windows Terminal PowerShell", + "restoreDelayMs": 150, + "expected": "POWERSHELL_150_OK", + "succeeded": true + }, + { + "target": "Windows Terminal PowerShell", + "restoreDelayMs": 750, + "expected": "POWERSHELL_750_OK", + "succeeded": true + }, + { + "target": "Notepad", + "restoreDelayMs": 150, + "expected": "NOTEPAD_150_OK", + "succeeded": true + }, + { + "target": "Notepad", + "restoreDelayMs": 750, + "expected": "NOTEPAD_750_OK", + "succeeded": true + } +] +``` + +Interpretation: + +- the isolated clipboard/paste/restore harness does **not** reproduce the stale-paste bug on the current Windows Terminal `CMD` tab +- it also does **not** reproduce it on the current Windows Terminal `PowerShell` tab +- Notepad behaves as expected in both timing windows +- therefore the user-reported failure is not a blanket “all terminal paste on Windows fails at 150ms” statement +- the failure requires an additional condition beyond “target is a terminal”, such as a slower paste consumer, extra lifecycle delay, or OpenLess-specific sequencing around focus restoration and session completion + +### 4. Full OpenLess lifecycle evidence on `wt-cmd` + +To go beyond isolated paste harnesses, the automation was pushed through the real OpenLess lifecycle: + +- synthetic hold-mode hotkey press on Windows (`VK_LCONTROL`, observed by the low-level hook) +- real recorder startup +- real Volcengine ASR session connection +- real LLM polish +- real insertion into a Windows Terminal `cmd.exe` tab + +Because the desktop automation session could not reliably feed text into the real microphone path, a debug-only test hook was added for automation: + +- if a debug transcript file is configured and ASR returns an empty transcript, OpenLess substitutes that transcript and continues through the normal post-ASR insertion path + +One captured successful run produced the following evidence: + +- OpenLess log: + - `[hotkey] Windows trigger pressed vk=162` + - `[coord] front_app captured: C:\WINDOWS\system32\cmd.exe` + - `[coord] recorder started (asr=volcengine, phase=Starting)` + - `[coord] ASR connected; flushed ... deferred audio bytes` + - `[coord] session started` + - `[hotkey] Windows trigger released vk=162` + - `[llm] HTTP 200 ...` + +- History record: + +```json +{ + "rawTranscript": "瀑布它的白沫其实非常喜欢。", + "finalText": "瀑布的白沫其实非常喜欢。", + "insertStatus": "pasteSent" +} +``` + +- Windows Terminal `cmd.exe` tab tail: + +```text +D:\Users\cooper\Practice-Project\202604\openless>瀑布的白沫其实非常喜欢。 +``` + +Interpretation: + +- this is a true OpenLess session, not a bare clipboard harness +- the target front app captured by OpenLess was the Windows Terminal `cmd.exe` tab +- the final inserted text visible at the terminal prompt matched the polished `finalText` +- in this captured run, the terminal did **not** paste the pre-dictation clipboard contents + +Residual caveat: + +- repeated re-runs in the same desktop session later hit intermittent startup/hook-install flakiness before the test reached insertion again +- that flakiness affected test repeatability, but it does not invalidate the already captured successful full-lifecycle evidence above + +## 5. Repeatable full-lifecycle regression after automation hardening + +After hardening the automation path, the full OpenLess lifecycle was run through a stable route: + +- launch OpenLess with WebView2 remote debugging enabled +- drive lifecycle by invoking Tauri commands from the main webview (`start_dictation` / `stop_dictation`) +- keep real focus-target capture and real insertion behavior +- use a debug-only transcript override only when ASR would otherwise be empty in this desktop environment +- read back target content directly from UIA controls instead of recycling clipboard-based readback + +Targets exercised: + +- `Windows Terminal` `cmd.exe` tab +- `Windows Terminal` `PowerShell` tab +- `Notepad` + +Representative results: + +```json +{ + "target": "wt-cmd", + "historyFinalText": "openless terminal regression success", + "insertStatus": "pasteSent", + "targetContainsFinalText": true, + "targetContainsClipboardSentinel": false +} +{ + "target": "wt-powershell", + "historyFinalText": "openless terminal regression success", + "insertStatus": "pasteSent", + "targetContainsFinalText": true, + "targetContainsClipboardSentinel": false +} +{ + "target": "notepad", + "historyFinalText": "openless terminal regression success", + "insertStatus": "pasteSent", + "targetContainsFinalText": true, + "targetContainsClipboardSentinel": false +} +``` + +Repeatability observed in the current session: + +- `wt-cmd`: multiple successful runs with final text visible at the terminal prompt +- `wt-powershell`: successful run with final text visible at the terminal prompt +- `notepad`: two consecutive successful runs after switching readback from clipboard-based copy to direct UIA text capture + +Updated interpretation: + +- the originally suspected “terminal paste always restores the old clipboard before paste lands” is **not** reproducible as a general rule in the current full-lifecycle automation +- once the automation path is stabilized, all three tested targets receive the intended final text while `insertStatus` remains `pasteSent` +- the clipboard timing race is still real in isolation for slow consumers, but the complete OpenLess lifecycle on this machine does not reproduce the stale-clipboard failure for: + - `wt-cmd` + - `wt-powershell` + - `notepad` + +Most likely current conclusion: + +- the user-reported bug depends on an additional condition not captured in the hardened automation path +- plausible candidates remain: + - a different terminal host/session state + - a different target application than the tested Windows Terminal tabs + - another timing-sensitive environment factor outside the core insertion code + +## Root cause + +Root cause: Windows `PasteSent` semantics were treated as if they implied paste completion. + +- `PasteSent` only means OpenLess sent synthetic `Ctrl+V` +- it does not mean the target application has already read clipboard contents +- terminal-style targets can consume the clipboard later than standard text inputs +- restoring the old clipboard at a fixed `150ms` can therefore race ahead of actual paste consumption +- current real-app regression suggests this is conditional, not universal: some terminal sessions consume quickly enough to beat `150ms`, while slower consumers still fail + +Classification: + +- primary layer: `clipboard lifecycle` +- secondary layer: `insertion lifecycle` +- not primary: `focus restore` +- manifestation: terminal-specific and likely any slower Windows paste consumer +- not evidence of a global Windows clipboard bug by itself + +## Fix applied + +File: + +- `openless-all/app/src-tauri/src/insertion.rs` + +Change: + +- Windows clipboard restore delay changed from `150ms` to `750ms` +- restore now runs on a background thread instead of blocking the insert path +- Linux keeps the previous `150ms` behavior + +## Verification run + +Commands: + +```powershell +cargo fmt --all +cargo check --lib +cargo test --lib --no-run +cargo check --tests +powershell -NoProfile -Command "[void][scriptblock]::Create((Get-Content -Raw 'openless-all/app/scripts/windows-clipboard-consumer-timing-smoke.ps1')); 'script-parse-ok'" +``` + +Observed result: + +- compile/check passed +- test binaries compiled +- new smoke scripts parse successfully +- real desktop automation passed on: + - Windows Terminal `CMD` tab at `150ms` and `750ms` + - Windows Terminal `PowerShell` tab at `150ms` and `750ms` + - Notepad at `150ms` and `750ms` + +## Remaining gap + +Still needed if we want to exactly mirror the original user report: + +- drive **OpenLess itself** through the full dictation lifecycle in the same run +- keep the target specifically in the same terminal/input setup where the stale paste was originally observed +- capture whether the failing case depends on: + - OpenLess focus-target restore timing + - ASR/polish latency + - the exact terminal host/session state + - another app-specific delay not present in the isolated paste harness + +## Suggested issue / PR title + +- Issue: `[windows][insertion] terminal paste can restore stale clipboard before synthetic paste lands` +- PR: `fix(windows): delay clipboard restore after synthetic paste` diff --git a/docs/github-tracking/issue-windows-terminal-clipboard-restore.md b/docs/github-tracking/issue-windows-terminal-clipboard-restore.md new file mode 100644 index 00000000..2c5b5107 --- /dev/null +++ b/docs/github-tracking/issue-windows-terminal-clipboard-restore.md @@ -0,0 +1,146 @@ +## 现象 / Symptom + +Windows terminal 文本输入场景历史上出现过两类现象: + +- 用户反馈 terminal 里不会自动上屏,需要再手动 `Ctrl+V` +- 本地测试曾观察到一次“目标最终拿到的是旧剪贴板,而不是本次听写结果”的现象 + +这两类现象都指向同一条 Windows insertion 链路:OpenLess 通过 clipboard + synthetic `Ctrl+V` 完成插入,而 terminal 是最敏感的目标类型之一。 + +### 证据 / Evidence + +- `openless-all/app/src-tauri/src/insertion.rs` + - Windows 路径的成功语义是 `PasteSent` + - `PasteSent` 只代表已经发出 synthetic `Ctrl+V` + - 它不代表目标已经完成 clipboard 消费 +- `docs/2026-05-02-windows-terminal-clipboard-restore-investigation.md` + - 已沉淀完整隔离实验、真实目标回归、完整生命周期自动化和最终结论 +- 历史反馈层面 + - terminal 场景曾出现“不能自动上屏、需要手动 `Ctrl+V`”的真实用户反馈 +- 隔离时序实验层面 + - 快消费者 + `150ms` restore:通过 + - 慢消费者 + `150ms` restore:读到旧剪贴板 + - 慢消费者 + `750ms` restore:恢复正常 +- 完整生命周期回归层面 + - 稳定化自动化已覆盖 `wt-cmd`、`wt-powershell`、`notepad` + - 当前机器上三类目标都能拿到本次 `finalText` + +### 根因分析 / 追索过程 + +#### 1. 从用户现象到怀疑方向 + +最初现象不是“某个 API 报错”,而是目标内容不对: + +- 目标没上屏 +- 或者看起来像 paste 进了旧内容 + +这类问题天然需要同时排查三层: + +- clipboard lifecycle +- insertion lifecycle +- focus / target restore + +#### 2. 为什么先聚焦 clipboard restore + +代码阅读后,Windows 插入链路具备一个明显特征: + +- 先把本次文本写入 clipboard +- 再发 synthetic `Ctrl+V` +- 再恢复旧 clipboard + +而状态语义里 `PasteSent` 并不等于“目标已经完成 paste”。 +因此最早的根因假设是: + +- 如果目标消费 clipboard 较慢,restore 可能会抢在目标 paste 之前发生 + +#### 3. 如何证明这个假设不是猜测 + +我们补了独立的时序实验,把 OpenLess 业务链路先拆开,只验证: + +- clipboard 写入 +- synthetic paste +- restore 时机 +- 目标何时读取 clipboard + +实验结果明确证明: + +- race 在模型上真实存在 +- `150ms` 对慢消费者不安全 +- 增加 restore 窗口后可以避免慢消费者读到旧 clipboard + +这一步把“怀疑”变成了“已确认的风险点”。 + +#### 4. 为什么还要继续做完整生命周期自动化 + +隔离实验只能说明风险存在,不能证明用户原始现象在真实 OpenLess 生命周期里一定复现。 + +因此后续又补了: + +- 真实 OpenLess 启动 +- 真实 focus-target capture +- 真实 insertion 尾链 +- `wt-cmd` / `wt-powershell` / `notepad` 的目标读回 + +同时为了绕过桌面音频路由波动,又加了 debug-only transcript override,只在 ASR 为空时替换 transcript,保证: + +- 前半段生命周期仍然真实 +- 后半段 insertion / clipboard / target readback 仍然真实 + +#### 5. 最终根因判断 + +最终可以明确的根因不是“terminal 当前一定有 bug”,而是: + +- Windows insertion 链路原本存在一个真实的 clipboard restore timing 风险 +- 这个风险可以解释历史上 terminal 场景里的不稳定反馈 +- 我们已经把这个风险点补了 hardening 修复 + +换句话说,这次 issue 真正承接的是: + +- 一条历史上确实不够稳的 Windows terminal insertion 链路 +- 以及其中一个已经被确认和修补的底层时序风险 + +### 平台边界 / Platform Scope + +- 直接范围:Windows +- 关注层次:`clipboard lifecycle`、`insertion lifecycle` +- terminal 是重点观察目标,但不是唯一可能受影响的慢消费者 +- `focus restore` 不是本轮主要根因 + +### 认领 / Ownership + +- owner intent:`@Cooper-X-Oak` +- 当前对应 draft/ready PR:`#160` + +## 影响 / Impact + +- 影响 Windows terminal 文本输入的稳定性认知 +- 会让 `PasteSent` 的用户语义和目标实际表现产生偏差 +- 增加“为什么目标没上屏 / 为什么需要手动 Ctrl+V”的排障成本 +- 对 Windows insertion 这条核心路径的可信度有直接影响 + +## 建议接受标准 / Proposed Acceptance Criteria + +- [x] 明确 Windows `PasteSent` 与“目标已完成 paste”不是同一语义 +- [x] 明确并记录 clipboard restore timing 风险模型 +- [x] 完成最小 hardening 修复: + - [x] Windows restore 延后到 `750ms` + - [x] restore 改为异步执行 +- [x] 提供隔离时序实验,证明 race 模型成立 +- [x] 提供稳定化完整生命周期自动化,覆盖: + - [x] `wt-cmd` + - [x] `wt-powershell` + - [x] `notepad` +- [x] 记录当前环境下的最终结论: + - [x] 历史风险真实存在 + - [x] 当前回归未再出现目标吃到旧 clipboard 的结果 + - [x] 当前稳定性较历史状态已有改善 + +## TODO / 不确定项 + +- 是否需要进一步收紧 `PasteSent` 相关用户文案,避免被理解为“已确认粘贴成功” +- 若后续再收到用户现场反馈,是否需要补充更细的环境标签: + - terminal host / profile + - 输入法状态 + - 前台切换时序 + +建议 issue 标题:`[windows][insertion] 终端旧剪贴板粘贴风险已收敛,当前整链路回归稳定` diff --git a/docs/github-tracking/pr-windows-terminal-clipboard-restore.md b/docs/github-tracking/pr-windows-terminal-clipboard-restore.md new file mode 100644 index 00000000..aafb2ec5 --- /dev/null +++ b/docs/github-tracking/pr-windows-terminal-clipboard-restore.md @@ -0,0 +1,82 @@ +## 摘要 + +Closes #159 + +这个 PR 承接的是 Windows terminal insertion 链路的一次收敛修复: + +- 历史上 terminal 场景出现过“不能自动上屏、需要手动 `Ctrl+V`”的用户反馈 +- 本地测试也曾观察到一次“目标最终拿到旧剪贴板”的现象 +- 本轮排查确认了其中一处真实存在的底层风险:clipboard restore timing + +因此,这个 PR 的目标不是去声称“当前存在一个稳定复现的 terminal bug”,而是: + +- 修补一处已经被确认的 Windows insertion 时序风险 +- 把整条链路的回归覆盖补齐 +- 把最终结论收敛到可审阅、可维护的状态 + +## 修复 / 新增 / 改进 + +- Windows clipboard restore 从 `150ms` 提高到 `750ms` +- clipboard restore 改为后台线程执行,不阻塞插入返回 +- 新增 Windows clipboard timing smoke,用于验证慢消费者 race +- 新增完整生命周期自动化脚本,覆盖: + - `wt-cmd` + - `wt-powershell` + - `notepad` +- 稳定化自动化入口: + - 通过 WebView2 remote debugging 连接主页面 + - 通过 Tauri invoke 驱动 `start_dictation` / `stop_dictation` +- 新增 debug-only transcript override + - 仅用于桌面音频路由不稳定时继续覆盖真实 insertion 尾链 +- 调整目标读回方式: + - terminal 走 UIA 读取 `TermControl` + - notepad 走 UIA 直接读取文本 +- 更新调查文档与 tracking 文档 + +## 兼容 + +- 正常用户路径不依赖 debug transcript override +- debug transcript override 仅在 `debug_assertions` / test 构建下参与 +- Linux restore delay 保持原行为 +- 不涉及 UI/视觉顺手修改 +- 不涉及 QA hotkey / selection 主线逻辑修改 + +## 测试计划 + +- [x] `cargo fmt --all` +- [x] `cargo check --lib` +- [x] `python -m py_compile openless-all/app/scripts/windows-openless-lifecycle-e2e.py` +- [x] `windows-real-asr-insertion-smoke.ps1` 脚本解析通过 +- [x] 隔离时序实验: + - [x] 快消费者 + `150ms` + - [x] 慢消费者 + `150ms` + - [x] 慢消费者 + `750ms` +- [x] 完整生命周期自动化: + - [x] `wt-cmd` + - [x] `wt-powershell` + - [x] `notepad` +- [x] 证据路径: + - `docs/2026-05-02-windows-terminal-clipboard-restore-investigation.md` + - `docs/github-tracking/issue-windows-terminal-clipboard-restore.md` + +## 当前结论 + +- 历史上的 Windows terminal insertion 不稳定反馈是真实的 +- 本轮排查确认并修补了一处真实存在的 clipboard restore timing 风险 +- 稳定化完整生命周期自动化下: + - `wt-cmd` 通过 + - `wt-powershell` 通过 + - `notepad` 通过 +- 当前环境中,目标最终都拿到本次 `finalText`,未再出现旧 clipboard 上屏 + +因此,这个 PR 的技术定位应当是: + +- 针对历史不稳定现象的一次 hardening 修复 +- 外加完整的回归覆盖补强 + +## 剩余风险 + +- `750ms` 仍然是启发式保护,不是目标确认式握手 +- 如果未来再出现 terminal 现场问题,更可能是更窄的环境因子,而不是当前这条主链路已经明确存在的稳定故障 + +建议 PR 标题:`fix(windows): 延后剪贴板恢复并补齐插入回归覆盖` diff --git a/openless-all/app/scripts/windows-clipboard-consumer-timing-smoke.ps1 b/openless-all/app/scripts/windows-clipboard-consumer-timing-smoke.ps1 new file mode 100644 index 00000000..149e64c8 --- /dev/null +++ b/openless-all/app/scripts/windows-clipboard-consumer-timing-smoke.ps1 @@ -0,0 +1,76 @@ +param( + [int]$ConsumerDelayMs = 250, + [int]$RestoreDelayMs = 150, + [string]$InsertedText = "OPENLESS_DICTATED_TEXT", + [string]$PreviousText = "OPENLESS_OLDER_CLIPBOARD" +) + +$ErrorActionPreference = "Stop" +Add-Type -AssemblyName System.Windows.Forms +Add-Type -ReferencedAssemblies @("System.Windows.Forms") @" +using System; +using System.Threading; +using System.Windows.Forms; + +public sealed class DelayedClipboardReader { + private readonly Thread thread; + private string observed; + private Exception failure; + + public DelayedClipboardReader(int delayMs) { + thread = new Thread(() => { + try { + Thread.Sleep(delayMs); + if (Clipboard.ContainsText()) { + observed = Clipboard.GetText(); + } + } catch (Exception ex) { + failure = ex; + } + }); + thread.SetApartmentState(ApartmentState.STA); + } + + public void Start() { + thread.Start(); + } + + public string JoinAndGetResult() { + thread.Join(); + if (failure != null) { + throw failure; + } + return observed; + } +} +"@ + +function Restore-ClipboardValue($Value) { + if ($null -eq $Value) { + cmd /c "echo off | clip" | Out-Null + return + } + Set-Clipboard -Value $Value +} + +$originalClipboard = Get-Clipboard -Raw -ErrorAction SilentlyContinue +try { + Set-Clipboard -Value $InsertedText + $reader = [DelayedClipboardReader]::new($ConsumerDelayMs) + $reader.Start() + + Start-Sleep -Milliseconds $RestoreDelayMs + Restore-ClipboardValue $PreviousText + $observedText = $reader.JoinAndGetResult() + $result = [pscustomobject]@{ + consumerDelayMs = $ConsumerDelayMs + restoreDelayMs = $RestoreDelayMs + insertedText = $InsertedText + previousText = $PreviousText + observedText = $observedText + matchedInserted = ($observedText -eq $InsertedText) + } + $result | ConvertTo-Json -Compress +} finally { + Restore-ClipboardValue $originalClipboard +} diff --git a/openless-all/app/scripts/windows-openless-lifecycle-e2e.py b/openless-all/app/scripts/windows-openless-lifecycle-e2e.py new file mode 100644 index 00000000..a4565a95 --- /dev/null +++ b/openless-all/app/scripts/windows-openless-lifecycle-e2e.py @@ -0,0 +1,309 @@ +import argparse +import json +import os +import subprocess +import sys +import tempfile +import time +import uuid +from pathlib import Path + +import win32clipboard +import win32con +from pywinauto import Application, Desktop, keyboard +from websockets.sync.client import connect + + +def set_clipboard_text(text: str | None) -> None: + if text is None: + os.system("echo off | clip") + return + win32clipboard.OpenClipboard() + try: + win32clipboard.EmptyClipboard() + win32clipboard.SetClipboardText(text) + finally: + win32clipboard.CloseClipboard() + + +def get_clipboard_text() -> str | None: + try: + win32clipboard.OpenClipboard() + return win32clipboard.GetClipboardData(win32con.CF_UNICODETEXT) + except Exception: + return None + finally: + try: + win32clipboard.CloseClipboard() + except Exception: + pass + + +class CdpClient: + def __init__(self, websocket_url: str): + self.ws = connect(websocket_url) + self.next_id = 1 + self._send("Runtime.enable") + + def _send(self, method: str, params: dict | None = None) -> dict: + msg_id = self.next_id + self.next_id += 1 + payload = {"id": msg_id, "method": method} + if params is not None: + payload["params"] = params + self.ws.send(json.dumps(payload)) + while True: + message = json.loads(self.ws.recv()) + if message.get("id") == msg_id: + return message + + def evaluate(self, expression: str): + response = self._send( + "Runtime.evaluate", + { + "expression": expression, + "returnByValue": True, + "awaitPromise": True, + }, + ) + if "exceptionDetails" in response.get("result", {}): + raise RuntimeError(json.dumps(response["result"]["exceptionDetails"], ensure_ascii=False)) + return response["result"]["result"].get("value") + + def invoke(self, command: str, args: dict | None = None): + args_json = json.dumps(args or {}, ensure_ascii=False) + expression = f""" + (async () => {{ + const value = await window.__TAURI__.core.invoke({json.dumps(command)}, {args_json}); + return JSON.stringify(value ?? null); + }})() + """ + raw = self.evaluate(expression) + return json.loads(raw) if raw else None + + def close(self): + self.ws.close() + + +def cdp_page_ws(port: int) -> str: + deadline = time.time() + 20 + last_targets = [] + while time.time() < deadline: + try: + response = subprocess.run( + [ + "powershell", + "-NoProfile", + "-Command", + f"(Invoke-WebRequest -UseBasicParsing http://127.0.0.1:{port}/json/list).Content", + ], + capture_output=True, + text=True, + check=True, + ) + targets = json.loads(response.stdout) + last_targets = [target.get("url", "") for target in targets] + for target in targets: + url = target.get("url", "") + if url.startswith("http://tauri.localhost") and "?window=" not in url: + return target["webSocketDebuggerUrl"] + except Exception: + pass + time.sleep(0.5) + raise RuntimeError(f"Main Tauri page target was not found. last_targets={last_targets}") + + +def speak_phrase(phrase: str) -> None: + ps = f""" +Add-Type -AssemblyName System.Speech +$speaker = New-Object System.Speech.Synthesis.SpeechSynthesizer +$speaker.Rate = -1 +$speaker.Volume = 100 +$speaker.Speak(@' +{phrase} +'@) +""" + subprocess.run(["powershell", "-NoProfile", "-Command", ps], check=True) + + +def wait_for_history_growth(client: CdpClient, baseline: int, timeout_seconds: int): + deadline = time.time() + timeout_seconds + while time.time() < deadline: + history = client.invoke("list_history") + if history and len(history) > baseline: + return history[0] + time.sleep(0.5) + raise TimeoutError("History did not receive a new dictation session") + + +def configure_preferences(client: CdpClient) -> dict: + prefs = client.invoke("get_settings") + previous = json.loads(json.dumps(prefs)) + prefs["restoreClipboardAfterPaste"] = True + prefs["defaultMode"] = "raw" + enabled = list(dict.fromkeys((prefs.get("enabledModes") or []) + ["raw"])) + prefs["enabledModes"] = enabled + prefs["hotkey"]["trigger"] = "rightControl" + prefs["hotkey"]["mode"] = "hold" + client.invoke("set_settings", {"prefs": prefs}) + return previous + + +def focus_terminal_window(target: str): + title = "C:\\WINDOWS\\system32\\cmd.exe" if target == "wt-cmd" else "Windows PowerShell" + win = None + for candidate in Desktop(backend="uia").windows(): + try: + if candidate.class_name() == "CASCADIA_HOSTING_WINDOW_CLASS" and candidate.window_text() == title: + win = candidate + break + except Exception: + continue + if win is None: + raise RuntimeError(f"terminal window not found for title={title}") + win.set_focus() + time.sleep(0.5) + keyboard.send_keys("{ESC}") + time.sleep(0.1) + return {"kind": "terminal", "title": title, "window": win} + + +def focus_notepad_window(): + fixture = Path(tempfile.gettempdir()) / f"openless-lifecycle-{uuid.uuid4().hex}.txt" + fixture.write_text("", encoding="utf-8") + app = Application(backend="uia").start(f"notepad.exe {fixture}") + time.sleep(2.5) + title = f"{fixture.name} - Notepad" + win = Desktop(backend="uia").window(title=title) + doc = next(d for d in win.descendants() if d.class_name() == "RichEditD2DPT") + doc.set_focus() + time.sleep(0.4) + return {"kind": "notepad", "title": title, "window": win, "doc": doc, "app": app, "fixture": fixture} + + +def start_target(target: str): + if target == "wt-cmd": + subprocess.run(["wt.exe", "new-tab", "cmd.exe"], check=True) + time.sleep(2.5) + return focus_terminal_window(target) + if target == "wt-powershell": + subprocess.run(["wt.exe", "new-tab", "powershell.exe"], check=True) + time.sleep(2.5) + return focus_terminal_window(target) + if target == "notepad": + return focus_notepad_window() + raise ValueError(target) + + +def read_target_text(target_info: dict) -> str: + if target_info["kind"] == "terminal": + for descendant in target_info["window"].descendants(): + if descendant.class_name() == "TermControl": + return descendant.window_text() + return "" + return target_info["doc"].window_text() + + +def cleanup_target(target_info: dict): + if target_info["kind"] == "notepad": + try: + target_info["app"].kill() + except Exception: + pass + try: + target_info["fixture"].unlink(missing_ok=True) + except Exception: + pass + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--exe-path", required=True) + parser.add_argument("--target", choices=["notepad", "wt-cmd", "wt-powershell"], required=True) + parser.add_argument("--phrase", default="openless terminal regression success") + parser.add_argument("--injected-transcript-text", default="") + parser.add_argument("--remote-debugging-port", type=int, default=9223) + parser.add_argument("--timeout-seconds", type=int, default=120) + args = parser.parse_args() + + debug_transcript_path = "" + if args.injected_transcript_text.strip(): + debug_transcript_path = str(Path(tempfile.gettempdir()) / "openless-debug-transcript-e2e.txt") + Path(debug_transcript_path).write_text(args.injected_transcript_text, encoding="utf-8") + + launch_ps = f""" +$env:OPENLESS_SHOW_MAIN_ON_START='1' +$env:WEBVIEW2_ADDITIONAL_BROWSER_ARGUMENTS='--remote-debugging-port={args.remote_debugging_port}' +$env:OPENLESS_DEBUG_TRANSCRIPT_FILE='{debug_transcript_path}' +$proc = Start-Process -FilePath '{args.exe_path}' -PassThru +$proc.Id +""" + app_process = subprocess.run( + ["powershell", "-NoProfile", "-Command", launch_ps], + check=True, + capture_output=True, + text=True, + ) + app_pid = int(app_process.stdout.strip().splitlines()[-1]) + client = None + target_info = None + previous_settings = None + previous_clipboard = get_clipboard_text() + clipboard_sentinel = f"OPENLESS_OLD_CLIPBOARD_SENTINEL_{uuid.uuid4().hex}" + + try: + time.sleep(5) + client = CdpClient(cdp_page_ws(args.remote_debugging_port)) + previous_settings = configure_preferences(client) + history = client.invoke("list_history") or [] + baseline_count = len(history) + + target_info = start_target(args.target) + set_clipboard_text(clipboard_sentinel) + + client.invoke("start_dictation") + time.sleep(1.0) + if args.injected_transcript_text.strip(): + time.sleep(1.0) + else: + speak_phrase(args.phrase) + time.sleep(0.8) + client.invoke("stop_dictation") + + latest = wait_for_history_growth(client, baseline_count, args.timeout_seconds) + target_text = read_target_text(target_info) + + result = { + "target": args.target, + "phrase": args.phrase, + "historyFinalText": latest.get("finalText"), + "historyRawTranscript": latest.get("rawTranscript"), + "insertStatus": latest.get("insertStatus"), + "targetContainsFinalText": bool(latest.get("finalText") and latest["finalText"] in target_text), + "targetContainsClipboardSentinel": clipboard_sentinel in target_text, + "targetTextTail": target_text[-400:], + } + print(json.dumps(result, ensure_ascii=False, indent=2)) + finally: + if client and previous_settings is not None: + try: + client.invoke("set_settings", {"prefs": previous_settings}) + except Exception: + pass + if client: + client.close() + cleanup_target(target_info) if target_info else None + set_clipboard_text(previous_clipboard) + subprocess.run( + ["powershell", "-NoProfile", "-Command", f"Stop-Process -Id {app_pid} -Force -ErrorAction SilentlyContinue"], + check=False, + ) + if debug_transcript_path: + try: + Path(debug_transcript_path).unlink(missing_ok=True) + except Exception: + pass + + +if __name__ == "__main__": + main() diff --git a/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 b/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 index 34c39a30..b1809151 100644 --- a/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 +++ b/openless-all/app/scripts/windows-real-asr-insertion-smoke.ps1 @@ -1,10 +1,11 @@ param( [string]$ExePath = "", - [ValidateSet("notepad", "browser")] + [ValidateSet("notepad", "browser", "wt-cmd", "wt-powershell")] [string]$Target = "notepad", [string]$Phrase = "OpenLess Windows real regression", [int]$TimeoutSeconds = 120, - [int]$VirtualKey = 0xA3, + [int]$VirtualKey = 0xA2, + [string]$InjectedTranscriptText = "", [int]$ManualSpeechSeconds = 8, [switch]$ManualSpeech, [switch]$DebugHotkeyEvents @@ -83,6 +84,14 @@ function Write-TextUtf8($Path, $Text) { [System.IO.File]::WriteAllText($Path, $Text, [System.Text.UTF8Encoding]::new($false)) } +function Restore-ClipboardValue($Value) { + if ($null -eq $Value) { + cmd /c "echo off | clip" | Out-Null + return + } + Set-Clipboard -Value $Value +} + function Set-HoldHotkeyPreference($Path) { $previous = Read-TextUtf8 $Path if ([string]::IsNullOrWhiteSpace($previous)) { @@ -94,9 +103,9 @@ function Set-HoldHotkeyPreference($Path) { $prefs | Add-Member -NotePropertyName hotkey -NotePropertyValue ([pscustomobject]@{}) } if ($null -eq $prefs.hotkey.PSObject.Properties["trigger"]) { - $prefs.hotkey | Add-Member -NotePropertyName trigger -NotePropertyValue "rightControl" + $prefs.hotkey | Add-Member -NotePropertyName trigger -NotePropertyValue "leftControl" } else { - $prefs.hotkey.trigger = "rightControl" + $prefs.hotkey.trigger = "leftControl" } if ($null -eq $prefs.hotkey.PSObject.Properties["mode"]) { $prefs.hotkey | Add-Member -NotePropertyName mode -NotePropertyValue "hold" @@ -109,6 +118,11 @@ function Set-HoldHotkeyPreference($Path) { if ($null -eq $prefs.showCapsule) { $prefs | Add-Member -NotePropertyName showCapsule -NotePropertyValue $true } if ($null -eq $prefs.activeAsrProvider) { $prefs | Add-Member -NotePropertyName activeAsrProvider -NotePropertyValue "volcengine" } if ($null -eq $prefs.activeLlmProvider) { $prefs | Add-Member -NotePropertyName activeLlmProvider -NotePropertyValue "ark" } + if ($null -eq $prefs.restoreClipboardAfterPaste) { + $prefs | Add-Member -NotePropertyName restoreClipboardAfterPaste -NotePropertyValue $true + } else { + $prefs.restoreClipboardAfterPaste = $true + } Write-TextUtf8 $Path ($prefs | ConvertTo-Json -Depth 8) return $previous } @@ -184,6 +198,23 @@ function Release-Hotkey { Send-KeyEdge $VirtualKey $true $true } +function Ensure-TargetFocused($TargetInfo) { + if ($null -eq $TargetInfo) { + return $false + } + if ($TargetInfo.TargetTitle) { + $wshell = New-Object -ComObject WScript.Shell + if ($wshell.AppActivate($TargetInfo.TargetTitle)) { + Start-Sleep -Milliseconds 500 + return $true + } + } + if ($null -ne $TargetInfo.Process) { + return (Focus-Window $TargetInfo.Process) + } + return $false +} + function Focus-Window($Process) { if ($null -eq $Process -or $Process.MainWindowHandle -eq 0) { return $false @@ -280,12 +311,96 @@ function Stop-BrowserProfileProcesses($ProfilePath) { function Start-InputTarget($TargetName) { $startedAt = Get-Date if ($TargetName -eq "notepad") { - Start-Process notepad.exe | Out-Null - $process = Wait-ProcessWindow "notepad" $startedAt 15 - if (-not (Focus-Window $process)) { - throw "Notepad window could not be focused." + $fixture = Join-Path $env:TEMP "openless-notepad-input-fixture.txt" + Write-TextUtf8 $fixture "" + $process = Start-Process notepad.exe -ArgumentList $fixture -PassThru + Start-Sleep -Seconds 2 + $title = "openless-notepad-input-fixture.txt - Notepad" + $activateScript = @" +import sys, time, win32com.client +title = sys.argv[1] +shell = win32com.client.Dispatch('WScript.Shell') +deadline = time.time() + 10 +while time.time() < deadline: + if shell.AppActivate(title): + print('activated') + raise SystemExit(0) + time.sleep(0.2) +raise SystemExit(1) +"@ + $activatePath = Join-Path $env:TEMP "openless-activate-notepad.py" + Write-TextUtf8 $activatePath $activateScript + try { + python $activatePath $title | Out-Null + } finally { + Remove-Item -LiteralPath $activatePath -Force -ErrorAction SilentlyContinue + } + Start-Sleep -Milliseconds 800 + return [pscustomobject]@{ + Process = $process + FixturePath = $fixture + ProfilePath = $null + TargetTitle = $title + TargetPid = $process.Id + TargetKind = "notepad" + } + } + + if ($TargetName -in @("wt-cmd", "wt-powershell")) { + $wt = Get-Command wt.exe -ErrorAction SilentlyContinue + if ($null -eq $wt) { + throw "wt.exe was not found." + } + $profile = if ($TargetName -eq "wt-cmd") { "cmd.exe" } else { "powershell.exe" } + Start-Process -FilePath $wt.Source -ArgumentList @("new-tab", $profile) | Out-Null + Start-Sleep -Seconds 2 + $title = if ($TargetName -eq "wt-cmd") { "C:\WINDOWS\system32\cmd.exe" } else { "Windows PowerShell" } + $activateScript = @" +import sys, time, win32com.client +title = sys.argv[1] +shell = win32com.client.Dispatch('WScript.Shell') +deadline = time.time() + 10 +while time.time() < deadline: + if shell.AppActivate(title): + print('activated') + raise SystemExit(0) + time.sleep(0.2) +raise SystemExit(1) +"@ + $activatePath = Join-Path $env:TEMP "openless-activate-target.py" + Write-TextUtf8 $activatePath $activateScript + try { + python $activatePath $title | Out-Null + } finally { + Remove-Item -LiteralPath $activatePath -Force -ErrorAction SilentlyContinue + } + $handleLookup = @" +import sys +from pywinauto import Desktop + +title = sys.argv[1] +for window in Desktop(backend='uia').windows(): + if window.class_name() == 'CASCADIA_HOSTING_WINDOW_CLASS' and window.window_text() == title: + print(window.handle) + raise SystemExit(0) +raise SystemExit(1) +"@ + $handlePath = Join-Path $env:TEMP "openless-terminal-handle.py" + Write-TextUtf8 $handlePath $handleLookup + try { + $targetHandle = [int](python -X utf8 $handlePath $title) + } finally { + Remove-Item -LiteralPath $handlePath -Force -ErrorAction SilentlyContinue + } + Start-Sleep -Milliseconds 800 + return [pscustomobject]@{ + Process = $null + FixturePath = $null + ProfilePath = $null + TargetTitle = $title + TargetHandle = $targetHandle + TargetKind = "terminal" } - return [pscustomobject]@{ Process = $process; FixturePath = $null; ProfilePath = $null } } $browserPath = Resolve-BrowserPath @@ -307,7 +422,93 @@ function Start-InputTarget($TargetName) { throw "Browser window could not be focused." } Start-Sleep -Seconds 1 - return [pscustomobject]@{ Process = $process; FixturePath = $fixture; ProfilePath = $profilePath } + return [pscustomobject]@{ Process = $process; FixturePath = $fixture; ProfilePath = $profilePath; TargetKind = "browser" } +} + +function Read-TargetContent($TargetInfo, $TargetName) { + if ($TargetName -eq "notepad") { + $readbackScript = @" +import sys +from pywinauto import Desktop + +pid = int(sys.argv[1]) +title = sys.argv[2] +out = sys.argv[3] +windows = [w for w in Desktop(backend='uia').windows() if getattr(w, 'process_id', lambda: None)() == pid] +win = None +for candidate in windows: + if candidate.window_text() == title: + win = candidate + break +if win is None and windows: + win = windows[0] +if win is None: + raise SystemExit(2) +for descendant in win.descendants(): + if descendant.class_name() == 'RichEditD2DPT': + value = descendant.window_text() + open(out, 'w', encoding='utf-8').write(value) + raise SystemExit(0) +raise SystemExit(1) +"@ + $readbackPath = Join-Path $env:TEMP "openless-notepad-readback.py" + $outputPath = Join-Path $env:TEMP "openless-notepad-readback.txt" + Write-TextUtf8 $readbackPath $readbackScript + try { + Remove-Item -LiteralPath $outputPath -Force -ErrorAction SilentlyContinue + python -X utf8 $readbackPath $TargetInfo.TargetPid $TargetInfo.TargetTitle $outputPath | Out-Null + Start-Sleep -Milliseconds 400 + if (Test-Path $outputPath) { + return Get-Content -Raw -Encoding UTF8 $outputPath + } + return $null + } finally { + Remove-Item -LiteralPath $readbackPath -Force -ErrorAction SilentlyContinue + Remove-Item -LiteralPath $outputPath -Force -ErrorAction SilentlyContinue + } + } + + if ($TargetName -eq "browser") { + Focus-Window $TargetInfo.Process | Out-Null + Start-Sleep -Milliseconds 400 + Send-CtrlChord 0x41 + Start-Sleep -Milliseconds 200 + Send-CtrlChord 0x43 + Start-Sleep -Milliseconds 400 + return Get-Clipboard -Raw -ErrorAction SilentlyContinue + } + + if ($TargetName -in @("wt-cmd", "wt-powershell")) { + $readbackScript = @" +import sys +from pywinauto import Desktop + +handle = int(sys.argv[1]) +out = sys.argv[2] +win = Desktop(backend='uia').window(handle=handle) +for descendant in win.descendants(): + if descendant.class_name() == 'TermControl': + open(out, 'w', encoding='utf-8').write(descendant.window_text()) + raise SystemExit(0) +raise SystemExit(1) +"@ + $readbackPath = Join-Path $env:TEMP "openless-terminal-readback.py" + $outputPath = Join-Path $env:TEMP "openless-terminal-readback.txt" + Write-TextUtf8 $readbackPath $readbackScript + try { + Remove-Item -LiteralPath $outputPath -Force -ErrorAction SilentlyContinue + python -X utf8 $readbackPath $TargetInfo.TargetHandle $outputPath | Out-Null + if (Test-Path $outputPath) { + return Get-Content -Raw -Encoding UTF8 $outputPath + } + return $null + } finally { + Remove-Item -LiteralPath $readbackPath -Force -ErrorAction SilentlyContinue + Remove-Item -LiteralPath $outputPath -Force -ErrorAction SilentlyContinue + } + } + + return $null } function Send-CtrlChord($Vk) { @@ -338,6 +539,14 @@ $historyPath = Join-Path $env:APPDATA "OpenLess\history.json" $preferencesPath = Join-Path $env:APPDATA "OpenLess\preferences.json" $baselineCount = Get-HistoryCount $historyPath $previousPreferences = Set-HoldHotkeyPreference $preferencesPath +$previousClipboard = Get-Clipboard -Raw -ErrorAction SilentlyContinue +$clipboardSentinel = "OPENLESS_OLD_CLIPBOARD_SENTINEL_$(Get-Date -Format 'yyyyMMddHHmmssfff')" +Restore-ClipboardValue $clipboardSentinel +$debugTranscriptPath = $null +if (-not [string]::IsNullOrWhiteSpace($InjectedTranscriptText)) { + $debugTranscriptPath = Join-Path $env:TEMP "openless-debug-transcript.txt" + Write-TextUtf8 $debugTranscriptPath $InjectedTranscriptText +} Get-Process openless -ErrorAction SilentlyContinue | Stop-Process -Force Remove-Item -LiteralPath $logPath -Force -ErrorAction SilentlyContinue @@ -348,12 +557,16 @@ $env:OPENLESS_ACCEPT_SYNTHETIC_HOTKEY_EVENTS = "1" if ($DebugHotkeyEvents) { $env:OPENLESS_DEBUG_HOTKEY_EVENTS = "1" } +if ($debugTranscriptPath) { + $env:OPENLESS_DEBUG_TRANSCRIPT_FILE = $debugTranscriptPath +} try { $openless = Start-Process -FilePath $ExePath -WorkingDirectory (Split-Path $ExePath -Parent) -PassThru } finally { Remove-Item Env:OPENLESS_SHOW_MAIN_ON_START -ErrorAction SilentlyContinue Remove-Item Env:OPENLESS_ACCEPT_SYNTHETIC_HOTKEY_EVENTS -ErrorAction SilentlyContinue Remove-Item Env:OPENLESS_DEBUG_HOTKEY_EVENTS -ErrorAction SilentlyContinue + Remove-Item Env:OPENLESS_DEBUG_TRANSCRIPT_FILE -ErrorAction SilentlyContinue } $inputTarget = $null @@ -364,9 +577,18 @@ try { $inputTarget = Start-InputTarget $Target - Press-Hotkey - if (-not (Wait-LogPattern $logPath "\[hotkey\] Windows trigger pressed" 10)) { - throw "Windows low-level hook did not observe the right Control press." + $observedPress = $false + for ($attempt = 1; $attempt -le 3 -and -not $observedPress; $attempt++) { + Ensure-TargetFocused $inputTarget | Out-Null + Press-Hotkey + $observedPress = Wait-LogPattern $logPath "\[hotkey\] Windows trigger pressed" 4 + if (-not $observedPress) { + Release-Hotkey + Start-Sleep -Milliseconds 500 + } + } + if (-not $observedPress) { + throw "Windows low-level hook did not observe the synthetic Control press." } if (-not (Wait-LogPattern $logPath "\[coord\] session started" 30)) { throw "OpenLess recording session did not start." @@ -399,18 +621,15 @@ try { throw "Expected Windows insertStatus pasteSent after guarded foreground restore, got '$($latest.insertStatus)'." } - Focus-Window $inputTarget.Process | Out-Null - Start-Sleep -Milliseconds 400 - Send-CtrlChord 0x41 - Start-Sleep -Milliseconds 200 - Send-CtrlChord 0x43 - Start-Sleep -Milliseconds 400 - $targetText = Get-Clipboard -Raw -ErrorAction SilentlyContinue + $targetText = Read-TargetContent $inputTarget $Target if ([string]::IsNullOrWhiteSpace($targetText)) { - throw "$Target clipboard readback is empty after Ctrl+A/C." + throw "$Target readback is empty." } if (-not $targetText.Contains($latest.finalText)) { + if ($targetText.Contains($clipboardSentinel)) { + throw "$Target readback contains the pre-dictation clipboard sentinel instead of latest finalText." + } throw "$Target readback does not contain latest finalText; insertion was not proven at the target caret." } @@ -422,7 +641,7 @@ try { if ($null -ne $inputTarget) { if ($inputTarget.ProfilePath) { Stop-BrowserProfileProcesses $inputTarget.ProfilePath - } else { + } elseif ($null -ne $inputTarget.Process) { Stop-Process -Id $inputTarget.Process.Id -Force -ErrorAction SilentlyContinue } if ($inputTarget.FixturePath) { @@ -438,6 +657,10 @@ try { } else { Write-TextUtf8 $preferencesPath $previousPreferences } + Restore-ClipboardValue $previousClipboard + if ($debugTranscriptPath) { + Remove-Item -LiteralPath $debugTranscriptPath -Force -ErrorAction SilentlyContinue + } } Write-Host "Real ASR + insertion fallback smoke ($Target) passed." diff --git a/openless-all/app/scripts/windows-terminal-clipboard-restore-smoke.ps1 b/openless-all/app/scripts/windows-terminal-clipboard-restore-smoke.ps1 new file mode 100644 index 00000000..6a368111 --- /dev/null +++ b/openless-all/app/scripts/windows-terminal-clipboard-restore-smoke.ps1 @@ -0,0 +1,184 @@ +param( + [ValidateSet("notepad", "powershell", "cmd", "wt")] + [string]$Target = "powershell", + [int]$RestoreDelayMs = 150, + [int]$PasteSettleDelayMs = 900 +) + +$ErrorActionPreference = "Stop" + +Add-Type @" +using System; +using System.Runtime.InteropServices; + +public static class ClipboardRestoreSmokeWin32 { + [DllImport("user32.dll")] + public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); + + [DllImport("user32.dll")] + public static extern bool SetForegroundWindow(IntPtr hWnd); + + [DllImport("user32.dll")] + public static extern void keybd_event(byte bVk, byte bScan, int dwFlags, UIntPtr dwExtraInfo); + + public const int KEYEVENTF_EXTENDEDKEY = 0x0001; + public const int KEYEVENTF_KEYUP = 0x0002; +} +"@ + +function Send-KeyEdge($Vk, $KeyUp, $Extended = $false) { + $flags = 0 + if ($Extended) { + $flags = $flags -bor [ClipboardRestoreSmokeWin32]::KEYEVENTF_EXTENDEDKEY + } + if ($KeyUp) { + $flags = $flags -bor [ClipboardRestoreSmokeWin32]::KEYEVENTF_KEYUP + } + $scanCode = if ($Vk -eq 0xA2 -or $Vk -eq 0xA3) { 0x1D } else { 0 } + [ClipboardRestoreSmokeWin32]::keybd_event([byte]$Vk, [byte]$scanCode, $flags, [UIntPtr]::Zero) +} + +function Send-CtrlChord($Vk) { + Send-KeyEdge 0xA2 $false $false + Start-Sleep -Milliseconds 70 + Send-KeyEdge $Vk $false $false + Start-Sleep -Milliseconds 70 + Send-KeyEdge $Vk $true $false + Start-Sleep -Milliseconds 70 + Send-KeyEdge 0xA2 $true $false +} + +function Send-EnterKey { + Send-KeyEdge 0x0D $false $false + Start-Sleep -Milliseconds 70 + Send-KeyEdge 0x0D $true $false +} + +function Wait-ProcessWindow($ProcessName, $After, $TimeoutSeconds) { + $deadline = (Get-Date).AddSeconds($TimeoutSeconds) + while ((Get-Date) -lt $deadline) { + $candidate = Get-Process $ProcessName -ErrorAction SilentlyContinue | + Where-Object { $_.StartTime -ge $After -and $_.MainWindowHandle -ne 0 } | + Sort-Object StartTime -Descending | + Select-Object -First 1 + if ($null -ne $candidate) { + return $candidate + } + Start-Sleep -Milliseconds 300 + } + return $null +} + +function Focus-Window($Process) { + if ($null -eq $Process -or $Process.MainWindowHandle -eq 0) { + throw "Target window is unavailable." + } + [ClipboardRestoreSmokeWin32]::ShowWindow($Process.MainWindowHandle, 9) | Out-Null + [ClipboardRestoreSmokeWin32]::SetForegroundWindow($Process.MainWindowHandle) | Out-Null + Start-Sleep -Milliseconds 500 +} + +function Start-TargetWindow($TargetName) { + $startedAt = Get-Date + switch ($TargetName) { + "notepad" { + Start-Process notepad.exe | Out-Null + return Wait-ProcessWindow "notepad" $startedAt 15 + } + "powershell" { + Start-Process powershell.exe -ArgumentList "-NoLogo" | Out-Null + return Wait-ProcessWindow "powershell" $startedAt 15 + } + "cmd" { + Start-Process cmd.exe | Out-Null + return Wait-ProcessWindow "cmd" $startedAt 15 + } + "wt" { + $wt = Get-Command wt.exe -ErrorAction SilentlyContinue + if ($null -eq $wt) { + throw "wt.exe was not found." + } + Start-Process $wt.Source -ArgumentList "new-tab", "powershell.exe", "-NoLogo" | Out-Null + return Wait-ProcessWindow "WindowsTerminal" $startedAt 20 + } + } +} + +function Wait-FileText($Path, $Expected, $TimeoutSeconds) { + $deadline = (Get-Date).AddSeconds($TimeoutSeconds) + while ((Get-Date) -lt $deadline) { + if (Test-Path $Path) { + $content = Get-Content -Raw -Encoding UTF8 $Path + if ($content.Contains($Expected)) { + return $true + } + } + Start-Sleep -Milliseconds 200 + } + return $false +} + +function Restore-ClipboardValue($Value) { + if ($null -eq $Value) { + cmd /c "echo off | clip" | Out-Null + return + } + Set-Clipboard -Value $Value +} + +$marker = "OPENLESS_CLIPBOARD_RESTORE_OK" +$outputPath = Join-Path $env:TEMP "openless-clipboard-restore-$Target.txt" +Remove-Item -LiteralPath $outputPath -Force -ErrorAction SilentlyContinue +$previousClipboard = Get-Clipboard -Raw -ErrorAction SilentlyContinue + +switch ($Target) { + "notepad" { + $payload = $marker + } + "cmd" { + $payload = "echo $marker > `"$outputPath`"" + } + default { + $payload = "Set-Content -Path `"$outputPath`" -Value `"$marker`"" + } +} + +$targetProcess = $null +try { + $targetProcess = Start-TargetWindow $Target + if ($null -eq $targetProcess) { + throw "Failed to start target window: $Target" + } + Focus-Window $targetProcess + Set-Clipboard -Value $payload + Start-Sleep -Milliseconds 150 + + Send-CtrlChord 0x56 + Start-Sleep -Milliseconds $RestoreDelayMs + Restore-ClipboardValue $previousClipboard + + if ($Target -eq "notepad") { + Start-Sleep -Milliseconds $PasteSettleDelayMs + Send-CtrlChord 0x41 + Start-Sleep -Milliseconds 120 + Send-CtrlChord 0x43 + Start-Sleep -Milliseconds 200 + $result = Get-Clipboard -Raw -ErrorAction SilentlyContinue + if (-not $result.Contains($marker)) { + throw "Notepad readback did not contain the pasted marker." + } + } else { + Start-Sleep -Milliseconds 120 + Send-EnterKey + if (-not (Wait-FileText $outputPath $marker 6)) { + throw "Terminal target did not execute the pasted command before clipboard restore." + } + } + + Write-Host "[ok] target=$Target restoreDelayMs=$RestoreDelayMs" +} finally { + Restore-ClipboardValue $previousClipboard + if ($null -ne $targetProcess) { + Stop-Process -Id $targetProcess.Id -Force -ErrorAction SilentlyContinue + } +} diff --git a/openless-all/app/scripts/windows-ui-config.test.mjs b/openless-all/app/scripts/windows-ui-config.test.mjs index ad98ca64..caaf4c6b 100644 --- a/openless-all/app/scripts/windows-ui-config.test.mjs +++ b/openless-all/app/scripts/windows-ui-config.test.mjs @@ -6,12 +6,48 @@ function assertEqual(actual, expected, name) { } } +function assertMatch(source, pattern, name) { + if (!pattern.test(source)) { + throw new Error(`${name}: pattern ${pattern} not found`); + } +} + const raw = await readFile(new URL('../src-tauri/tauri.conf.json', import.meta.url), 'utf-8'); const config = JSON.parse(raw); -const mainWindow = config.app.windows.find((window) => window.label === 'main'); +const capsuleWindow = config.app.windows.find((window) => window.label === 'capsule'); +const libRs = await readFile(new URL('../src-tauri/src/lib.rs', import.meta.url), 'utf-8'); +const coordinatorRs = await readFile(new URL('../src-tauri/src/coordinator.rs', import.meta.url), 'utf-8'); -if (!mainWindow) { - throw new Error('main window config missing'); +if (!capsuleWindow) { + throw new Error('capsule window config missing'); } -assertEqual(mainWindow.decorations, false, 'windows main window should use only custom titlebar'); +assertEqual(capsuleWindow.width, 220, 'windows capsule config keeps translation-capable width baseline'); +assertEqual(capsuleWindow.height, 110, 'windows capsule config keeps translation-capable height baseline'); +assertEqual(capsuleWindow.transparent, true, 'capsule window should keep transparent visuals'); +assertEqual(capsuleWindow.alwaysOnTop, true, 'capsule window should stay above the focused app while recording'); +assertMatch( + libRs, + /#\[cfg\(target_os = "windows"\)\][\s\S]*?\(196\.0, height\)/, + 'windows runtime capsule width should collapse to the visible pill', +); +assertMatch( + libRs, + /let height = if translation_active \{ 110\.0 \} else \{ 52\.0 \};/, + 'windows runtime capsule height should shrink outside translation mode', +); +assertMatch( + libRs, + /window\.set_size\(LogicalSize::new\(cap_w, cap_h\)\)\?/, + 'capsule positioning should resync runtime size with the computed layout', +); +assertMatch( + coordinatorRs, + /let accepts_cursor_events = matches!\(state, CapsuleState::Recording\);/, + 'windows capsule should only accept clicks while actively recording', +); +assertMatch( + coordinatorRs, + /window\.set_ignore_cursor_events\(!accepts_cursor_events\)/, + 'windows capsule should pass clicks through in non-recording states', +); diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index cca83683..f804884c 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -516,9 +516,7 @@ fn qa_hotkey_supervisor_loop(inner: Arc) { Err(e) => { attempts += 1; if attempts <= 3 || attempts % 10 == 0 { - log::warn!( - "[coord] QA hotkey 第 {attempts} 次注册失败: {e}; 3s 后重试" - ); + log::warn!("[coord] QA hotkey 第 {attempts} 次注册失败: {e}; 3s 后重试"); } std::thread::sleep(std::time::Duration::from_secs(3)); } @@ -756,6 +754,15 @@ async fn handle_window_hotkey_event( #[cfg(target_os = "windows")] { + if !window_hotkey_fallback_enabled() { + if event_type == "keydown" && !repeat { + log::info!( + "[window-hotkey] ignored because Windows lifecycle owner is the low-level hook" + ); + } + return Ok(()); + } + let trigger = inner.prefs.get().hotkey.trigger; if !window_key_matches_trigger(trigger, &key, &code) { return Ok(()); @@ -781,6 +788,10 @@ async fn handle_window_hotkey_event( } } +fn window_hotkey_fallback_enabled() -> bool { + crate::types::HotkeyCapability::current().explicit_fallback_available +} + #[cfg(any(target_os = "windows", test))] fn window_key_matches_trigger(trigger: crate::types::HotkeyTrigger, key: &str, code: &str) -> bool { use crate::types::HotkeyTrigger; @@ -820,7 +831,9 @@ async fn begin_session(inner: &Arc) -> Result<(), String> { } } // 翻译模式标志重置;hotkey 监听器在 Shift down 时再 set true。 - inner.translation_modifier_seen.store(false, Ordering::SeqCst); + inner + .translation_modifier_seen + .store(false, Ordering::SeqCst); #[cfg(any(debug_assertions, test))] if hotkey_injection_dry_run_enabled() { @@ -1192,6 +1205,19 @@ async fn end_session(inner: &Arc) -> Result<(), String> { // ASR 返回空转写护栏(来自 PR #66):写一条 emptyTranscript 失败历史 + 错误胶囊, // 与 main 上其它 error 路径保持一致(带 schedule_capsule_idle 让胶囊自动消失)。 + let mut raw = raw; + + #[cfg(any(debug_assertions, test))] + if raw.text.trim().is_empty() { + if let Some(debug_text) = debug_transcript_override_text() { + log::info!( + "[coord] using debug transcript override (chars={})", + debug_text.chars().count() + ); + raw.text = debug_text; + } + } + if raw.text.trim().is_empty() { let session = DictationSession { id: Uuid::new_v4().to_string(), @@ -1230,8 +1256,8 @@ async fn end_session(inner: &Arc) -> Result<(), String> { let working_languages = prefs.working_languages.clone(); let front_app = inner.state.lock().front_app.clone(); let translation_target = prefs.translation_target_language.trim().to_string(); - let translation_active = inner.translation_modifier_seen.load(Ordering::SeqCst) - && !translation_target.is_empty(); + let translation_active = + inner.translation_modifier_seen.load(Ordering::SeqCst) && !translation_target.is_empty(); let (polished, polish_error) = if translation_active { log::info!( "[coord] translation mode → target=\u{300C}{}\u{300D} working={:?} front_app={:?}", @@ -1411,6 +1437,18 @@ fn hotkey_injection_dry_run_enabled() -> bool { std::env::var_os("OPENLESS_HOTKEY_INJECTION_DRY_RUN").is_some() } +#[cfg(any(debug_assertions, test))] +fn debug_transcript_override_text() -> Option { + let path = std::env::var_os("OPENLESS_DEBUG_TRANSCRIPT_FILE")?; + let text = std::fs::read_to_string(path).ok()?; + let trimmed = text.trim().to_string(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } +} + fn ensure_microphone_permission(inner: &Arc) -> Result<(), String> { use crate::permissions::{self, PermissionStatus}; @@ -1702,11 +1740,7 @@ async fn begin_qa_session(inner: &Arc) -> Result<(), String> { *last = Some(now); } if let Some(app) = inner_for_level.app.lock().clone() { - let _ = app.emit_to( - "qa", - "qa:level", - serde_json::json!({ "level": level }), - ); + let _ = app.emit_to("qa", "qa:level", serde_json::json!({ "level": level })); } // 同步把电平推给底部胶囊,让 QA 录音也有跟主听写一致的可视反馈。 emit_capsule( @@ -1778,11 +1812,7 @@ async fn end_qa_session(inner: &Arc) -> Result<(), String> { emit_capsule(inner, CapsuleState::Transcribing, 0.0, 0, None, None); if let Some(app) = inner.app.lock().clone() { - let _ = app.emit_to( - "qa", - "qa:state", - serde_json::json!({ "kind": "loading" }), - ); + let _ = app.emit_to("qa", "qa:state", serde_json::json!({ "kind": "loading" })); } if let Some(rec) = inner.qa_recorder.lock().take() { @@ -1845,10 +1875,14 @@ async fn end_qa_session(inner: &Arc) -> Result<(), String> { } }; - inner.qa_state.lock().messages.push(crate::types::QaChatMessage { - role: "user".to_string(), - content: user_content, - }); + inner + .qa_state + .lock() + .messages + .push(crate::types::QaChatMessage { + role: "user".to_string(), + content: user_content, + }); if let Some(app) = inner.app.lock().clone() { let messages = inner.qa_state.lock().messages.clone(); @@ -1914,10 +1948,14 @@ async fn end_qa_session(inner: &Arc) -> Result<(), String> { return Ok(()); } - inner.qa_state.lock().messages.push(crate::types::QaChatMessage { - role: "assistant".to_string(), - content: answer.clone(), - }); + inner + .qa_state + .lock() + .messages + .push(crate::types::QaChatMessage { + role: "assistant".to_string(), + content: answer.clone(), + }); if let Some(app) = inner.app.lock().clone() { let messages = inner.qa_state.lock().messages.clone(); @@ -2195,6 +2233,14 @@ mod tests { ); assert!(coordinator.inner.hotkey_trigger_held.load(Ordering::SeqCst)); } + + #[test] + fn window_hotkey_fallback_is_disabled_when_no_explicit_fallback_is_advertised() { + assert_eq!( + window_hotkey_fallback_enabled(), + crate::types::HotkeyCapability::current().explicit_fallback_available + ); + } } fn enabled_phrases(inner: &Arc) -> Vec { @@ -2433,6 +2479,42 @@ fn show_capsule_window_no_activate() -> bool { false } +#[cfg(target_os = "windows")] +fn hide_capsule_window_if_present() { + use std::iter::once; + use windows::core::PCWSTR; + use windows::Win32::Foundation::HWND; + use windows::Win32::UI::WindowsAndMessaging::{ + FindWindowW, SetWindowPos, ShowWindow, HWND_NOTOPMOST, SWP_HIDEWINDOW, SWP_NOACTIVATE, + SWP_NOMOVE, SWP_NOSIZE, SW_HIDE, + }; + + let title: Vec = "OpenLess Capsule".encode_utf16().chain(once(0)).collect(); + let hwnd = match unsafe { FindWindowW(PCWSTR::null(), PCWSTR(title.as_ptr())) } { + Ok(hwnd) => hwnd, + Err(_) => return, + }; + if hwnd == HWND::default() || hwnd.0.is_null() { + return; + } + + let _ = unsafe { ShowWindow(hwnd, SW_HIDE) }; + let _ = unsafe { + SetWindowPos( + hwnd, + HWND_NOTOPMOST, + 0, + 0, + 0, + 0, + SWP_NOMOVE | SWP_NOSIZE | SWP_NOACTIVATE | SWP_HIDEWINDOW, + ) + }; +} + +#[cfg(not(target_os = "windows"))] +fn hide_capsule_window_if_present() {} + fn emit_capsule( inner: &Arc, state: CapsuleState, @@ -2454,7 +2536,10 @@ fn emit_capsule( let show_capsule = inner.prefs.get().show_capsule; if let Some(window) = app.get_webview_window("capsule") { - let visible = !matches!(state, CapsuleState::Idle); + let visible = matches!( + state, + CapsuleState::Recording | CapsuleState::Transcribing | CapsuleState::Polishing + ); maybe_position_capsule_bottom_center(inner, &window, payload.translation); if show_capsule && visible { if cfg!(target_os = "windows") { @@ -2469,6 +2554,7 @@ fn emit_capsule( #[cfg(target_os = "macos")] crate::restore_main_window_key_if_active(&app); } else { + hide_capsule_window_if_present(); let _ = window.hide(); } } diff --git a/openless-all/app/src-tauri/src/insertion.rs b/openless-all/app/src-tauri/src/insertion.rs index 8bc6c599..79ad95d5 100644 --- a/openless-all/app/src-tauri/src/insertion.rs +++ b/openless-all/app/src-tauri/src/insertion.rs @@ -14,7 +14,10 @@ use std::time::Duration; use crate::types::InsertStatus; -#[cfg(not(target_os = "macos"))] +#[cfg(target_os = "windows")] +const CLIPBOARD_RESTORE_DELAY: Duration = Duration::from_millis(750); + +#[cfg(all(not(target_os = "macos"), not(target_os = "windows")))] const CLIPBOARD_RESTORE_DELAY: Duration = Duration::from_millis(150); pub struct TextInserter; @@ -131,19 +134,24 @@ fn insert_with_clipboard_restore(text: &str, restore_clipboard_after_paste: bool } if restore_clipboard_after_paste { - maybe_restore_clipboard(restore_plan); + schedule_clipboard_restore(restore_plan); } // 关掉 → 听写文本留在剪贴板里,simulate_paste 没真正落地时用户能手动 Ctrl+V 找回。 insertion_success_status() } #[cfg(not(target_os = "macos"))] -fn maybe_restore_clipboard(plan: ClipboardRestorePlan) { +fn schedule_clipboard_restore(plan: ClipboardRestorePlan) { + std::thread::spawn(move || restore_clipboard_after_delay(plan, CLIPBOARD_RESTORE_DELAY)); +} + +#[cfg(not(target_os = "macos"))] +fn restore_clipboard_after_delay(plan: ClipboardRestorePlan, delay: Duration) { if plan.previous_text.is_none() { return; } - std::thread::sleep(CLIPBOARD_RESTORE_DELAY); + std::thread::sleep(delay); let mut clipboard = match arboard::Clipboard::new() { Ok(clipboard) => clipboard, @@ -303,6 +311,9 @@ mod macos { #[cfg(test)] mod tests { use super::*; + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; #[test] #[cfg(not(target_os = "macos"))] @@ -317,4 +328,34 @@ mod tests { )); assert!(!should_restore_clipboard(None, "dictated text")); } + + #[test] + #[cfg(target_os = "windows")] + fn delayed_terminal_paste_must_see_dictated_text_before_clipboard_restore() { + let inserted_text = "dictated text".to_string(); + let previous_text = "older clipboard".to_string(); + let clipboard = Arc::new(Mutex::new(inserted_text.clone())); + let pasted = Arc::new(Mutex::new(None::)); + + let clipboard_for_paste = Arc::clone(&clipboard); + let pasted_for_paste = Arc::clone(&pasted); + let reader = thread::spawn(move || { + thread::sleep(Duration::from_millis(250)); + let seen = clipboard_for_paste.lock().unwrap().clone(); + *pasted_for_paste.lock().unwrap() = Some(seen); + }); + + thread::sleep(CLIPBOARD_RESTORE_DELAY); + let current_text = Some(clipboard.lock().unwrap().clone()); + if should_restore_clipboard(current_text.as_deref(), &inserted_text) { + *clipboard.lock().unwrap() = previous_text; + } + + reader.join().unwrap(); + + assert_eq!( + pasted.lock().unwrap().as_deref(), + Some(inserted_text.as_str()) + ); + } }