diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 0f6b65a8..50980db1 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -4,29 +4,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
diff --git a/README.md b/README.md
index d951d121..2aa35d23 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,9 @@
- [Accessibility Element Finder](#accessibility-element-finder)
- [AI Element Locator (VLM)](#ai-element-locator-vlm)
- [OCR (Text on Screen)](#ocr-text-on-screen)
+ - [LLM Action Planner](#llm-action-planner)
+ - [Runtime Variables & Control Flow](#runtime-variables--control-flow)
+ - [Remote Desktop](#remote-desktop)
- [Clipboard](#clipboard)
- [Screenshot](#screenshot)
- [Action Recording & Playback](#action-recording--playback)
@@ -57,7 +60,10 @@
- **Image Recognition** — locate UI elements on screen using OpenCV template matching with configurable threshold
- **Accessibility Element Finder** — query the OS accessibility tree (Windows UIA / macOS AX) to locate buttons, menus, and controls by name/role
- **AI Element Locator (VLM)** — describe a UI element in plain language and let a vision-language model (Anthropic / OpenAI) find its screen coordinates
-- **OCR** — extract text from screen regions using Tesseract; wait for, click, or locate rendered text
+- **OCR** — extract text from screen regions using Tesseract; wait for, click, or locate rendered text; regex search and full-region dump
+- **LLM Action Planner** — translate a plain-language description into a validated `AC_*` action list using Claude
+- **Runtime Variables & Control Flow** — `${var}` substitution at execution time, plus `AC_set_var` / `AC_inc_var` / `AC_if_var` / `AC_for_each` / `AC_loop` / `AC_retry` for data-driven scripts
+- **Remote Desktop** — stream this machine's screen and accept remote input over a token-authenticated TCP protocol, *or* connect to another machine and view + control it (host + viewer GUIs included). Optional TLS (HTTPS-grade encryption), WebSocket transport (ws:// + wss:// for browser / firewall-friendly clients), persistent 9-digit Host ID, host→viewer audio streaming, bidirectional clipboard sync (text + image), and chunked file transfer (drag-drop + progress bar; arbitrary destination path; no size cap)
- **Clipboard** — read/write system clipboard text on Windows, macOS, and Linux
- **Screenshot & Screen Recording** — capture full screen or regions as images, record screen to video (AVI/MP4)
- **Action Recording & Playback** — record mouse/keyboard events and replay them
@@ -408,6 +414,201 @@ If Tesseract is not on `PATH`, point at it explicitly:
ac.set_tesseract_cmd(r"C:\Program Files\Tesseract-OCR\tesseract.exe")
```
+Dump every recognised text record in a region (or full screen), or
+search by regex when the text varies:
+
+```python
+import je_auto_control as ac
+
+# Every hit in a region as TextMatch records (text, bounding box, confidence)
+for match in ac.read_text_in_region(region=[0, 0, 800, 600]):
+ print(match.text, match.center, match.confidence)
+
+# Regex — accepts a pattern string or a compiled re.Pattern
+for match in ac.find_text_regex(r"Order#\d+"):
+ print(match.text, match.center)
+```
+
+GUI: **OCR Reader** tab.
+
+### LLM Action Planner
+
+Translate plain-language descriptions into validated `AC_*` action lists
+using an LLM (Anthropic Claude by default). Output is leniently parsed
+(strips code fences, extracts the first JSON array from prose) and then
+validated by the same schema the executor uses, so the result can be
+piped straight into `execute_action`:
+
+```python
+import je_auto_control as ac
+from je_auto_control.utils.executor.action_executor import executor
+
+actions = ac.plan_actions(
+ "click the Submit button, then type 'done' and save",
+ known_commands=executor.known_commands(),
+)
+executor.execute_action(actions)
+
+# Or in a single call:
+ac.run_from_description("open Notepad and type hello", executor=executor)
+```
+
+| Variable | Effect |
+|---|---|
+| `ANTHROPIC_API_KEY` | Enables the Anthropic backend |
+| `AUTOCONTROL_LLM_BACKEND` | `anthropic` to force a backend |
+| `AUTOCONTROL_LLM_MODEL` | Override the default model (e.g. `claude-opus-4-7`) |
+
+GUI: **LLM Planner** tab — description box, `QThread`-backed *Plan*
+button, action-list preview, and a *Run plan* button.
+
+### Runtime Variables & Control Flow
+
+The executor resolves `${var}` placeholders **per command call** rather
+than pre-flattening, so nested `body` / `then` / `else` lists keep their
+placeholders and re-bind on every iteration. Combined with new mutation
+commands, scripts can drive themselves from data without Python glue:
+
+```json
+[
+ ["AC_set_var", {"name": "items", "value": ["alpha", "beta"]}],
+ ["AC_set_var", {"name": "i", "value": 0}],
+ ["AC_for_each", {
+ "items": "${items}", "as": "name",
+ "body": [
+ ["AC_inc_var", {"name": "i"}],
+ ["AC_if_var", {
+ "name": "i", "op": "ge", "value": 2,
+ "then": [["AC_break"]], "else": []
+ }]
+ ]
+ }]
+]
+```
+
+`AC_if_var` operators: `eq`, `ne`, `lt`, `le`, `gt`, `ge`, `contains`,
+`startswith`, `endswith`. GUI: **Variables** tab — live view of
+`executor.variables` with single-set, JSON seed, and clear-all controls.
+
+### Remote Desktop
+
+Stream this machine's screen and accept remote input, **or** view and
+control another machine. The wire format is a length-prefixed framing
+on raw TCP (no extra deps), starting with an HMAC-SHA256
+challenge / response handshake; viewers that fail auth are dropped
+before they can see a frame. JPEG frames are produced at the configured
+FPS / quality and broadcast to authenticated viewers via a shared
+latest-frame slot, so a slow viewer drops frames instead of blocking
+the rest. Viewer input is JSON, validated against an allowlist, and
+applied through the existing wrappers.
+
+```python
+# Be remoted — start a host and hand the token + port to whoever views you
+from je_auto_control import RemoteDesktopHost
+host = RemoteDesktopHost(token="hunter2", bind="127.0.0.1",
+ port=0, fps=10, quality=70)
+host.start()
+print("listening on", host.port, "viewers:", host.connected_clients)
+```
+
+```python
+# Control another machine — connect a viewer and send input
+from je_auto_control import RemoteDesktopViewer
+viewer = RemoteDesktopViewer(host="10.0.0.5", port=51234, token="hunter2",
+ on_frame=lambda jpeg: ...)
+viewer.connect()
+viewer.send_input({"action": "mouse_move", "x": 100, "y": 200})
+viewer.send_input({"action": "type", "text": "hello"})
+viewer.disconnect()
+```
+
+GUI: **Remote Desktop** tab with two sub-tabs.
+
+- **Host** — token field with a *Generate* button, security warning
+ about the bind address, start / stop controls, refreshing port +
+ viewer-count status, and a 4 fps preview pane below the controls so
+ the user being remoted sees what viewers see.
+- **Viewer** — address / port / token form, *Connect* / *Disconnect*,
+ and a custom frame-display widget that paints incoming JPEG frames
+ scaled with `KeepAspectRatio`. Mouse / wheel / key events on the
+ display are remapped from widget coordinates back to the remote
+ screen's pixel space using the latest frame's dimensions, then
+ forwarded as `INPUT` messages.
+
+> ⚠️ Anyone with the host:port and token gets full mouse / keyboard
+> control of the host machine. Default bind is `127.0.0.1`; expose
+> externally only via SSH tunnel or TLS front-end. The token is the
+> only line of defence — treat it like a password.
+
+**Encrypted transports + alternate protocols.** Pass an `ssl_context`
+to either `RemoteDesktopHost` or `RemoteDesktopViewer` to wrap every
+connection in TLS. For firewall-friendly access, use the in-tree
+WebSocket variants (no extra deps) — same protocol, RFC 6455 framing,
+and `wss://` if you also pass `ssl_context`:
+
+```python
+from je_auto_control import (
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+)
+host = WebSocketDesktopHost(token="hunter2", ssl_context=server_ctx)
+viewer = WebSocketDesktopViewer(
+ host="example.com", port=443, token="hunter2",
+ ssl_context=client_ctx, expected_host_id="123456789",
+)
+```
+
+**Persistent Host ID.** Every host owns a stable 9-digit numeric ID
+(persisted at `~/.je_auto_control/remote_host_id`), announced in
+`AUTH_OK` and verifiable via the viewer's `expected_host_id`:
+
+```python
+print(host.host_id) # e.g. "123456789"
+viewer = RemoteDesktopViewer(
+ host=..., port=..., token=...,
+ expected_host_id="123456789", # AuthenticationError on mismatch
+)
+```
+
+**Audio streaming (host → viewer).** Optional `sounddevice` dep; opt
+in with `enable_audio=True` on the host, attach an `AudioPlayer` (or
+your own callback) on the viewer:
+
+```python
+host = RemoteDesktopHost(token="tok", enable_audio=True)
+
+from je_auto_control.utils.remote_desktop import AudioPlayer
+player = AudioPlayer(); player.start()
+viewer = RemoteDesktopViewer(host=..., on_audio=player.play)
+```
+
+**Clipboard sync (text + image, bidirectional).** Explicit per-call —
+no auto-poll loops. Image clipboard works on Windows (CF_DIB via
+ctypes) and Linux (`xclip -t image/png`); macOS get is supported via
+Pillow ImageGrab, set requires PyObjC.
+
+```python
+viewer.send_clipboard_text("hello")
+viewer.send_clipboard_image(open("logo.png", "rb").read())
+host.broadcast_clipboard_text("greetings")
+```
+
+**File transfer with progress.** Bidirectional, chunked, arbitrary
+destination path, no size cap; the GUI viewer also accepts drag-drop:
+
+```python
+viewer.send_file(
+ "local.bin", "/tmp/uploaded.bin",
+ on_progress=lambda tid, done, total: print(done, total),
+)
+host.send_file_to_viewers("local.bin", "/tmp/from_host.bin")
+```
+
+> ⚠️ Path is unrestricted and there is no aggregate size limit.
+> Anyone with the token can write any file to any location and can
+> fill the disk — keep "trusted token holders == trusted users" in
+> mind, or wrap with your own `FileReceiver` subclass that vets
+> destination paths.
+
### Clipboard
```python
@@ -494,10 +695,13 @@ je_auto_control.execute_action([
| Screen | `AC_screen_size`, `AC_screenshot` |
| Accessibility | `AC_a11y_list`, `AC_a11y_find`, `AC_a11y_click` |
| VLM (AI Locator) | `AC_vlm_locate`, `AC_vlm_click` |
-| OCR | `AC_locate_text`, `AC_click_text`, `AC_wait_text` |
+| OCR | `AC_locate_text`, `AC_click_text`, `AC_wait_text`, `AC_read_text_in_region`, `AC_find_text_regex` |
+| LLM planner | `AC_llm_plan`, `AC_llm_run` |
| Clipboard | `AC_clipboard_get`, `AC_clipboard_set` |
| Window | `AC_list_windows`, `AC_focus_window`, `AC_wait_window`, `AC_close_window` |
-| Flow control | `AC_loop`, `AC_break`, `AC_continue`, `AC_if_image_found`, `AC_if_pixel`, `AC_while_image`, `AC_wait_image`, `AC_wait_pixel`, `AC_sleep`, `AC_retry` |
+| Flow control | `AC_loop`, `AC_break`, `AC_continue`, `AC_if_image_found`, `AC_if_pixel`, `AC_if_var`, `AC_while_image`, `AC_for_each`, `AC_wait_image`, `AC_wait_pixel`, `AC_sleep`, `AC_retry` |
+| Variables | `AC_set_var`, `AC_get_var`, `AC_inc_var` |
+| Remote desktop | `AC_start_remote_host`, `AC_stop_remote_host`, `AC_remote_host_status`, `AC_remote_connect`, `AC_remote_disconnect`, `AC_remote_viewer_status`, `AC_remote_send_input` |
| Record | `AC_record`, `AC_stop_record`, `AC_set_record_enable` |
| Report | `AC_generate_html`, `AC_generate_json`, `AC_generate_xml`, `AC_generate_html_report`, `AC_generate_json_report`, `AC_generate_xml_report` |
| Run history | `AC_history_list`, `AC_history_clear` |
diff --git a/README/README_zh-CN.md b/README/README_zh-CN.md
index 51eb59d5..2a550df7 100644
--- a/README/README_zh-CN.md
+++ b/README/README_zh-CN.md
@@ -23,6 +23,9 @@
- [Accessibility 元件搜索](#accessibility-元件搜索)
- [AI 元件定位(VLM)](#ai-元件定位vlm)
- [OCR 屏幕文字识别](#ocr-屏幕文字识别)
+ - [LLM 动作规划器](#llm-动作规划器)
+ - [运行期变量与流程控制](#运行期变量与流程控制)
+ - [远程桌面](#远程桌面)
- [剪贴板](#剪贴板)
- [截图](#截图)
- [动作录制与回放](#动作录制与回放)
@@ -56,7 +59,10 @@
- **图像识别** — 使用 OpenCV 模板匹配在屏幕上定位 UI 元素,支持可配置的检测阈值
- **Accessibility 元件搜索** — 通过操作系统无障碍树(Windows UIA / macOS AX)按名称/角色定位按钮、菜单、控件
- **AI 元件定位(VLM)** — 用自然语言描述 UI 元素,由视觉语言模型(Anthropic / OpenAI)返回屏幕坐标
-- **OCR** — 使用 Tesseract 从屏幕提取文字,可搜索、点击或等待文字出现
+- **OCR** — 使用 Tesseract 从屏幕提取文字,可搜索、点击或等待文字出现;支持 regex 搜索与整块区域 dump
+- **LLM 动作规划器** — 用 Claude 把自然语言描述翻译成验证过的 `AC_*` 动作清单
+- **运行期变量与流程控制** — 执行时 `${var}` 替换,加上 `AC_set_var` / `AC_inc_var` / `AC_if_var` / `AC_for_each` / `AC_loop` / `AC_retry` 让脚本数据驱动
+- **远程桌面** — 用 token 认证的 TCP 协议串流本机画面并接收输入,**或** 连接到他机观看与控制(host + viewer GUI 内置)。可选 TLS(HTTPS 级加密)、WebSocket 传输(``ws://`` + ``wss://``,穿墙/浏览器友好)、持久化 9 位数 Host ID、host→viewer 音频串流、双向剪贴板同步(文字 + 图片)、分块文件传输(拖放 + 进度条;任意目的路径;无大小上限)
- **剪贴板** — 于 Windows / macOS / Linux 读写系统剪贴板文本
- **截图与屏幕录制** — 捕获全屏或指定区域为图片,录制屏幕为视频(AVI/MP4)
- **动作录制与回放** — 录制鼠标/键盘事件并重新播放
@@ -402,6 +408,155 @@ ac.wait_for_text("加载完成", timeout=15.0)
ac.set_tesseract_cmd(r"C:\Program Files\Tesseract-OCR\tesseract.exe")
```
+把区域(或整屏)内所有识别到的文字 dump 出来,或用 regex 搜索变动内容:
+
+```python
+import je_auto_control as ac
+
+# TextMatch 列表,含文字、边界框、置信度
+for match in ac.read_text_in_region(region=[0, 0, 800, 600]):
+ print(match.text, match.center, match.confidence)
+
+# Regex(接受字符串或 compiled re.Pattern)
+for match in ac.find_text_regex(r"Order#\d+"):
+ print(match.text, match.center)
+```
+
+GUI:**OCR Reader** 分页。
+
+### LLM 动作规划器
+
+把自然语言描述交给 LLM(默认 Anthropic Claude),翻译成验证过的 `AC_*` 动作清单。输出采用宽松解析(剥 code fence、从散文中抽出第一个 JSON array),再用 executor 同样的 schema 验证,所以结果可以直接喂给 `execute_action`:
+
+```python
+import je_auto_control as ac
+from je_auto_control.utils.executor.action_executor import executor
+
+actions = ac.plan_actions(
+ "点击 Submit 按钮,然后输入 'done' 并保存",
+ known_commands=executor.known_commands(),
+)
+executor.execute_action(actions)
+
+# 或者一行做完:
+ac.run_from_description("打开记事本并输入 hello", executor=executor)
+```
+
+| 变量 | 效果 |
+|---|---|
+| `ANTHROPIC_API_KEY` | 启用 Anthropic 后端 |
+| `AUTOCONTROL_LLM_BACKEND` | 强制指定 `anthropic` |
+| `AUTOCONTROL_LLM_MODEL` | 覆盖默认模型(如 `claude-opus-4-7`) |
+
+GUI:**LLM Planner** 分页 — 描述输入框、`QThread` 后台执行的 *Plan* 按钮、预览指令清单,以及 *Run plan* 按钮。
+
+### 运行期变量与流程控制
+
+executor 改成「每次调用」才解析 `${var}` placeholder(不会事先展平),所以嵌套的 `body` / `then` / `else` 列表会保留 placeholder,每次重复执行时重新绑定。配合新的变量修改命令,脚本可以数据驱动而不需要 Python 黏合:
+
+```json
+[
+ ["AC_set_var", {"name": "items", "value": ["alpha", "beta"]}],
+ ["AC_set_var", {"name": "i", "value": 0}],
+ ["AC_for_each", {
+ "items": "${items}", "as": "name",
+ "body": [
+ ["AC_inc_var", {"name": "i"}],
+ ["AC_if_var", {
+ "name": "i", "op": "ge", "value": 2,
+ "then": [["AC_break"]], "else": []
+ }]
+ ]
+ }]
+]
+```
+
+`AC_if_var` 比较运算符:`eq`、`ne`、`lt`、`le`、`gt`、`ge`、`contains`、`startswith`、`endswith`。GUI:**Variables** 分页 — 实时查看 `executor.variables`,支持单条设置、JSON 批量 seed、清空。
+
+### 远程桌面
+
+把本机画面串流给别人看 / 控制,**或** 观看并控制别人的机器。协议是 raw TCP 上的长度前缀框架(不引入额外依赖),先做一轮 HMAC-SHA256 challenge / response 认证;认证失败的 viewer 在看到任何画面前就被踢掉。JPEG frame 按照配置的 FPS / 质量产生,通过共享 latest-frame slot 广播给通过认证的 viewers,慢的 viewer 只会丢 frame 而不会卡其他人。Viewer 输入消息是 JSON,host 端用允许列表验证后才通过既有 wrapper 派发。
+
+```python
+# 被远程 — 启动 host 把 token + port 给对方
+from je_auto_control import RemoteDesktopHost
+host = RemoteDesktopHost(token="hunter2", bind="127.0.0.1",
+ port=0, fps=10, quality=70)
+host.start()
+print("listening on", host.port, "viewers:", host.connected_clients)
+```
+
+```python
+# 控制他机 — 连接 viewer 并发送输入
+from je_auto_control import RemoteDesktopViewer
+viewer = RemoteDesktopViewer(host="10.0.0.5", port=51234, token="hunter2",
+ on_frame=lambda jpeg: ...)
+viewer.connect()
+viewer.send_input({"action": "mouse_move", "x": 100, "y": 200})
+viewer.send_input({"action": "type", "text": "hello"})
+viewer.disconnect()
+```
+
+GUI:**Remote Desktop** 分页,内含两个子分页。
+
+- **Host**(被远程的本机)— Token 字段附 *生成* 按钮、bind 地址安全提示、启动 / 停止控制、实时刷新的 port + viewer 数量状态栏,以及 4fps 预览面板让被远程的人看到 viewer 看到的画面。
+- **Viewer**(控制他机)— 地址 / port / token 表单、*连接* / *断开*、自绘 frame display widget,会把 JPEG 等比缩放绘入。display 上的鼠标 / 滚轮 / 键盘事件会用最新 frame 的尺寸映射回原始远程屏幕的像素坐标,再用 `INPUT` 消息发回。
+
+> ⚠️ 取得 host:port 与 token 的人,等同拥有本机完整鼠标 / 键盘控制权。默认仅绑 `127.0.0.1`;要对外暴露请务必搭配 SSH tunnel 或 TLS 前端。Token 是唯一防线 — 请当作密码保管。
+
+**加密传输与替代协议**:传 `ssl_context` 给 `RemoteDesktopHost` 或 `RemoteDesktopViewer` 即套上 TLS。要穿墙/给浏览器接,用内置的 WebSocket 版本(无额外依赖),加 `ssl_context` 即 `wss://`:
+
+```python
+from je_auto_control import (
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+)
+host = WebSocketDesktopHost(token="hunter2", ssl_context=server_ctx)
+viewer = WebSocketDesktopViewer(
+ host="example.com", port=443, token="hunter2",
+ ssl_context=client_ctx, expected_host_id="123456789",
+)
+```
+
+**持久化 Host ID**:每台 host 有稳定的 9 位数字 ID(存在 `~/.je_auto_control/remote_host_id`),在 `AUTH_OK` 中声明,viewer 通过 `expected_host_id` 验证:
+
+```python
+print(host.host_id) # 例如 "123456789"
+viewer = RemoteDesktopViewer(
+ host=..., port=..., token=...,
+ expected_host_id="123456789", # 不一致就抛 AuthenticationError
+)
+```
+
+**音频串流(host → viewer)**:可选 `sounddevice` 依赖;host 端 `enable_audio=True` 开启,viewer 端接 `AudioPlayer`(或自己的 callback):
+
+```python
+host = RemoteDesktopHost(token="tok", enable_audio=True)
+
+from je_auto_control.utils.remote_desktop import AudioPlayer
+player = AudioPlayer(); player.start()
+viewer = RemoteDesktopViewer(host=..., on_audio=player.play)
+```
+
+**剪贴板同步(文字 + 图片,双向)**:明确调用,没有自动 polling 循环。图片剪贴板在 Windows(CF_DIB via ctypes)和 Linux(`xclip -t image/png`)支持;macOS get 走 Pillow ImageGrab、set 暂时需要 PyObjC。
+
+```python
+viewer.send_clipboard_text("hello")
+viewer.send_clipboard_image(open("logo.png", "rb").read())
+host.broadcast_clipboard_text("greetings")
+```
+
+**文件传输 + 进度**:双向、分块、目的路径任意、无大小上限;GUI viewer 还可以拖放:
+
+```python
+viewer.send_file(
+ "local.bin", "/tmp/uploaded.bin",
+ on_progress=lambda tid, done, total: print(done, total),
+)
+host.send_file_to_viewers("local.bin", "/tmp/from_host.bin")
+```
+
+> ⚠️ 路径无限制、大小无上限。任何拿到 token 的人都能把任意文件写到任意位置,也能塞满磁盘 — 必须等同信任 token 持有者,或自己继承 `FileReceiver` 在 `handle_begin` 内验证 dest_path。
+
### 剪贴板
```python
@@ -488,10 +643,13 @@ je_auto_control.execute_action([
| 屏幕 | `AC_screen_size`, `AC_screenshot` |
| Accessibility | `AC_a11y_list`, `AC_a11y_find`, `AC_a11y_click` |
| VLM(AI 定位) | `AC_vlm_locate`, `AC_vlm_click` |
-| OCR | `AC_locate_text`, `AC_click_text`, `AC_wait_text` |
+| OCR | `AC_locate_text`, `AC_click_text`, `AC_wait_text`, `AC_read_text_in_region`, `AC_find_text_regex` |
+| LLM 规划器 | `AC_llm_plan`, `AC_llm_run` |
| 剪贴板 | `AC_clipboard_get`, `AC_clipboard_set` |
| 窗口 | `AC_list_windows`, `AC_focus_window`, `AC_wait_window`, `AC_close_window` |
-| 流程控制 | `AC_loop`, `AC_break`, `AC_continue`, `AC_if_image_found`, `AC_if_pixel`, `AC_while_image`, `AC_wait_image`, `AC_wait_pixel`, `AC_sleep`, `AC_retry` |
+| 流程控制 | `AC_loop`, `AC_break`, `AC_continue`, `AC_if_image_found`, `AC_if_pixel`, `AC_if_var`, `AC_while_image`, `AC_for_each`, `AC_wait_image`, `AC_wait_pixel`, `AC_sleep`, `AC_retry` |
+| 变量 | `AC_set_var`, `AC_get_var`, `AC_inc_var` |
+| 远程桌面 | `AC_start_remote_host`, `AC_stop_remote_host`, `AC_remote_host_status`, `AC_remote_connect`, `AC_remote_disconnect`, `AC_remote_viewer_status`, `AC_remote_send_input` |
| 录制 | `AC_record`, `AC_stop_record`, `AC_set_record_enable` |
| 报告 | `AC_generate_html`, `AC_generate_json`, `AC_generate_xml`, `AC_generate_html_report`, `AC_generate_json_report`, `AC_generate_xml_report` |
| 执行记录 | `AC_history_list`, `AC_history_clear` |
diff --git a/README/README_zh-TW.md b/README/README_zh-TW.md
index 01b86587..486e726f 100644
--- a/README/README_zh-TW.md
+++ b/README/README_zh-TW.md
@@ -23,6 +23,9 @@
- [Accessibility 元件搜尋](#accessibility-元件搜尋)
- [AI 元件定位(VLM)](#ai-元件定位vlm)
- [OCR 螢幕文字辨識](#ocr-螢幕文字辨識)
+ - [LLM 動作規劃器](#llm-動作規劃器)
+ - [執行期變數與流程控制](#執行期變數與流程控制)
+ - [遠端桌面](#遠端桌面)
- [剪貼簿](#剪貼簿)
- [截圖](#截圖)
- [動作錄製與回放](#動作錄製與回放)
@@ -56,7 +59,10 @@
- **圖像辨識** — 使用 OpenCV 模板匹配在螢幕上定位 UI 元素,支援可設定的偵測閾值
- **Accessibility 元件搜尋** — 透過作業系統無障礙樹(Windows UIA / macOS AX)依名稱/角色定位按鈕、選單、控制項
- **AI 元件定位(VLM)** — 用自然語言描述 UI 元素,交由視覺語言模型(Anthropic / OpenAI)取得螢幕座標
-- **OCR** — 使用 Tesseract 從螢幕擷取文字,可搜尋、點擊或等待文字出現
+- **OCR** — 使用 Tesseract 從螢幕擷取文字,可搜尋、點擊或等待文字出現;支援 regex 搜尋與整塊區域 dump
+- **LLM 動作規劃器** — 用 Claude 把自然語言描述翻譯成驗證過的 `AC_*` 動作清單
+- **執行期變數與流程控制** — 執行時 `${var}` 取代,加上 `AC_set_var` / `AC_inc_var` / `AC_if_var` / `AC_for_each` / `AC_loop` / `AC_retry` 讓腳本資料驅動
+- **遠端桌面** — 用 token 認證的 TCP 協定串流本機畫面並接收輸入,**或** 連線到他機觀看與控制(host + viewer GUI 皆內建)。可選 TLS(HTTPS 級加密)、WebSocket 傳輸(``ws://`` + ``wss://``,穿牆/瀏覽器友善)、持久化 9 位數 Host ID、host→viewer 音訊串流、雙向剪貼簿同步(文字 + 圖片)、分塊檔案傳輸(拖放 + 進度條;任意目的路徑;無大小上限)
- **剪貼簿** — 於 Windows / macOS / Linux 讀寫系統剪貼簿文字
- **截圖與螢幕錄製** — 擷取全螢幕或指定區域為圖片,錄製螢幕為影片(AVI/MP4)
- **動作錄製與回放** — 錄製滑鼠/鍵盤事件並重新播放
@@ -402,6 +408,155 @@ ac.wait_for_text("載入完成", timeout=15.0)
ac.set_tesseract_cmd(r"C:\Program Files\Tesseract-OCR\tesseract.exe")
```
+把區域(或整螢幕)內所有辨識到的文字 dump 出來,或用 regex 搜尋變動內容:
+
+```python
+import je_auto_control as ac
+
+# TextMatch 列表,含文字、邊界框、信心度
+for match in ac.read_text_in_region(region=[0, 0, 800, 600]):
+ print(match.text, match.center, match.confidence)
+
+# Regex(接受字串或 compiled re.Pattern)
+for match in ac.find_text_regex(r"Order#\d+"):
+ print(match.text, match.center)
+```
+
+GUI:**OCR Reader** 分頁。
+
+### LLM 動作規劃器
+
+把自然語言描述交給 LLM(預設 Anthropic Claude),翻譯成驗證過的 `AC_*` 動作清單。輸出採寬鬆解析(會剝 code fence、從散文中抽出第一個 JSON array),再用 executor 同樣的 schema 驗證,所以結果可以直接餵給 `execute_action`:
+
+```python
+import je_auto_control as ac
+from je_auto_control.utils.executor.action_executor import executor
+
+actions = ac.plan_actions(
+ "點擊 Submit 按鈕,然後輸入 'done' 並儲存",
+ known_commands=executor.known_commands(),
+)
+executor.execute_action(actions)
+
+# 或者一行做完:
+ac.run_from_description("開記事本,輸入 hello", executor=executor)
+```
+
+| 變數 | 效果 |
+|---|---|
+| `ANTHROPIC_API_KEY` | 啟用 Anthropic 後端 |
+| `AUTOCONTROL_LLM_BACKEND` | 強制指定 `anthropic` |
+| `AUTOCONTROL_LLM_MODEL` | 覆寫預設模型(如 `claude-opus-4-7`) |
+
+GUI:**LLM Planner** 分頁 — 描述輸入框、`QThread` 背景執行的 *Plan* 按鈕、預覽指令清單,以及 *Run plan* 按鈕。
+
+### 執行期變數與流程控制
+
+executor 改成「每次呼叫」才解析 `${var}` placeholder(不會事先攤平),所以巢狀的 `body` / `then` / `else` 清單會保留 placeholder,每次重複執行時重新繫結。搭配新的變數修改指令,腳本可以資料驅動而不需要 Python 黏合:
+
+```json
+[
+ ["AC_set_var", {"name": "items", "value": ["alpha", "beta"]}],
+ ["AC_set_var", {"name": "i", "value": 0}],
+ ["AC_for_each", {
+ "items": "${items}", "as": "name",
+ "body": [
+ ["AC_inc_var", {"name": "i"}],
+ ["AC_if_var", {
+ "name": "i", "op": "ge", "value": 2,
+ "then": [["AC_break"]], "else": []
+ }]
+ ]
+ }]
+]
+```
+
+`AC_if_var` 比較運算子:`eq`、`ne`、`lt`、`le`、`gt`、`ge`、`contains`、`startswith`、`endswith`。GUI:**Variables** 分頁 — 即時檢視 `executor.variables`,可單筆設定、JSON 整批 seed、清空。
+
+### 遠端桌面
+
+把本機畫面串流給別人看/控制,**或** 觀看並控制別人的機器。協定是 raw TCP 上的長度前綴框架(沒有額外相依),先做一輪 HMAC-SHA256 challenge / response 認證;認證失敗的 viewer 在看到任何畫面前就被踢掉。JPEG frame 依設定的 FPS / 品質產生,透過共享 latest-frame slot 廣播給通過認證的 viewers,慢的 viewer 只會掉 frame 而不會卡其他人。Viewer 輸入訊息是 JSON,host 端用允許清單驗證後才透過既有 wrapper 派送。
+
+```python
+# 被遠端 — 啟動 host 把 token + port 給對方
+from je_auto_control import RemoteDesktopHost
+host = RemoteDesktopHost(token="hunter2", bind="127.0.0.1",
+ port=0, fps=10, quality=70)
+host.start()
+print("listening on", host.port, "viewers:", host.connected_clients)
+```
+
+```python
+# 控制他機 — 連線 viewer 並送出輸入
+from je_auto_control import RemoteDesktopViewer
+viewer = RemoteDesktopViewer(host="10.0.0.5", port=51234, token="hunter2",
+ on_frame=lambda jpeg: ...)
+viewer.connect()
+viewer.send_input({"action": "mouse_move", "x": 100, "y": 200})
+viewer.send_input({"action": "type", "text": "hello"})
+viewer.disconnect()
+```
+
+GUI:**Remote Desktop** 分頁,內含兩個子分頁。
+
+- **Host**(被遠端的本機)— Token 欄位附 *產生* 按鈕、bind 位址安全提示、啟動/停止控制、即時刷新的 port + viewer 數量狀態列,以及 4fps 預覽面板讓被遠端的人看到 viewer 看到的畫面。
+- **Viewer**(控制他機)— 位址 / port / token 表單、*連線* / *中斷連線*,自繪 frame display widget,會把 JPEG 等比縮放繪入。display 上的滑鼠 / 滾輪 / 鍵盤事件會用最新 frame 的尺寸映射回原始遠端螢幕的像素座標,再用 `INPUT` 訊息送回。
+
+> ⚠️ 取得 host:port 與 token 的人,等同擁有本機完整滑鼠 / 鍵盤控制權。預設只綁 `127.0.0.1`;要對外暴露請務必搭配 SSH tunnel 或 TLS 前端。Token 是唯一防線 — 請當作密碼來保管。
+
+**加密傳輸與替代協定**:傳 `ssl_context` 給 `RemoteDesktopHost` 或 `RemoteDesktopViewer` 即套上 TLS。要穿牆/給瀏覽器接,用內建的 WebSocket 版本(無額外相依),加 `ssl_context` 就變 `wss://`:
+
+```python
+from je_auto_control import (
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+)
+host = WebSocketDesktopHost(token="hunter2", ssl_context=server_ctx)
+viewer = WebSocketDesktopViewer(
+ host="example.com", port=443, token="hunter2",
+ ssl_context=client_ctx, expected_host_id="123456789",
+)
+```
+
+**持久化 Host ID**:每台 host 有穩定的 9 位數字 ID(存在 `~/.je_auto_control/remote_host_id`),在 `AUTH_OK` 中宣告,viewer 透過 `expected_host_id` 驗證:
+
+```python
+print(host.host_id) # 例如 "123456789"
+viewer = RemoteDesktopViewer(
+ host=..., port=..., token=...,
+ expected_host_id="123456789", # 不一致就拋 AuthenticationError
+)
+```
+
+**音訊串流(host → viewer)**:選用 `sounddevice` 相依;host 端 `enable_audio=True` 開啟,viewer 端接 `AudioPlayer`(或自己的 callback):
+
+```python
+host = RemoteDesktopHost(token="tok", enable_audio=True)
+
+from je_auto_control.utils.remote_desktop import AudioPlayer
+player = AudioPlayer(); player.start()
+viewer = RemoteDesktopViewer(host=..., on_audio=player.play)
+```
+
+**剪貼簿同步(文字 + 圖片,雙向)**:明確呼叫,沒有自動 polling 迴圈。圖片剪貼簿在 Windows(CF_DIB via ctypes)跟 Linux(`xclip -t image/png`)支援;macOS get 走 Pillow ImageGrab、set 暫時需要 PyObjC。
+
+```python
+viewer.send_clipboard_text("hello")
+viewer.send_clipboard_image(open("logo.png", "rb").read())
+host.broadcast_clipboard_text("greetings")
+```
+
+**檔案傳輸 + 進度**:雙向、分塊、目的路徑任意、無大小上限;GUI viewer 還可以拖放:
+
+```python
+viewer.send_file(
+ "local.bin", "/tmp/uploaded.bin",
+ on_progress=lambda tid, done, total: print(done, total),
+)
+host.send_file_to_viewers("local.bin", "/tmp/from_host.bin")
+```
+
+> ⚠️ 路徑無限制、大小無上限。任何拿到 token 的人都能把任意檔案寫到任意位置,也能塞滿磁碟 — 必須等同信任 token 持有者,或自己繼承 `FileReceiver` 在 `handle_begin` 內驗證 dest_path。
+
### 剪貼簿
```python
@@ -488,10 +643,13 @@ je_auto_control.execute_action([
| 螢幕 | `AC_screen_size`, `AC_screenshot` |
| Accessibility | `AC_a11y_list`, `AC_a11y_find`, `AC_a11y_click` |
| VLM(AI 定位) | `AC_vlm_locate`, `AC_vlm_click` |
-| OCR | `AC_locate_text`, `AC_click_text`, `AC_wait_text` |
+| OCR | `AC_locate_text`, `AC_click_text`, `AC_wait_text`, `AC_read_text_in_region`, `AC_find_text_regex` |
+| LLM 規劃器 | `AC_llm_plan`, `AC_llm_run` |
| 剪貼簿 | `AC_clipboard_get`, `AC_clipboard_set` |
| 視窗 | `AC_list_windows`, `AC_focus_window`, `AC_wait_window`, `AC_close_window` |
-| 流程控制 | `AC_loop`, `AC_break`, `AC_continue`, `AC_if_image_found`, `AC_if_pixel`, `AC_while_image`, `AC_wait_image`, `AC_wait_pixel`, `AC_sleep`, `AC_retry` |
+| 流程控制 | `AC_loop`, `AC_break`, `AC_continue`, `AC_if_image_found`, `AC_if_pixel`, `AC_if_var`, `AC_while_image`, `AC_for_each`, `AC_wait_image`, `AC_wait_pixel`, `AC_sleep`, `AC_retry` |
+| 變數 | `AC_set_var`, `AC_get_var`, `AC_inc_var` |
+| 遠端桌面 | `AC_start_remote_host`, `AC_stop_remote_host`, `AC_remote_host_status`, `AC_remote_connect`, `AC_remote_disconnect`, `AC_remote_viewer_status`, `AC_remote_send_input` |
| 錄製 | `AC_record`, `AC_stop_record`, `AC_set_record_enable` |
| 報告 | `AC_generate_html`, `AC_generate_json`, `AC_generate_xml`, `AC_generate_html_report`, `AC_generate_json_report`, `AC_generate_xml_report` |
| 執行紀錄 | `AC_history_list`, `AC_history_clear` |
diff --git a/docs/source/Eng/doc/new_features/new_features_doc.rst b/docs/source/Eng/doc/new_features/new_features_doc.rst
index e58dd6ad..c277a25e 100644
--- a/docs/source/Eng/doc/new_features/new_features_doc.rst
+++ b/docs/source/Eng/doc/new_features/new_features_doc.rst
@@ -296,3 +296,376 @@ Artifacts are stored under ``~/.je_auto_control/artifacts/`` and are
removed when the matching run is pruned or the history is cleared. GUI:
**Run History** tab — double-click the artifact column to open the
screenshot in the OS image viewer.
+
+
+OCR — region dump and regex search
+==================================
+
+The OCR module already exposed substring / exact-match helpers. Two new
+APIs cover scenarios the existing ones could not::
+
+ import je_auto_control as ac
+
+ # Dump every recognised text record in a region (or full screen)
+ for match in ac.read_text_in_region(region=[0, 0, 800, 600]):
+ print(match.text, match.center, match.confidence)
+
+ # Regex search — useful when text varies (order numbers, error codes)
+ for match in ac.find_text_regex(r"Order#\d+"):
+ print(match.text, match.center)
+
+ # Compiled patterns and flags work too
+ import re
+ ac.find_text_regex(re.compile(r"foo", re.IGNORECASE))
+
+Action-JSON commands::
+
+ [["AC_read_text_in_region", {"region": [0, 0, 800, 600]}]]
+ [["AC_find_text_regex", {"pattern": "Order#\\d+"}]]
+
+GUI: **OCR Reader** tab. Pick a region with the existing overlay (or
+leave blank for full screen), set language / minimum confidence, then
+hit *Dump region text* or *Find by regex*. Results are returned as a
+JSON list with text, bounding box, and confidence per hit.
+
+
+Runtime variables and data-driven control flow
+==============================================
+
+Pre-execution interpolation in :mod:`script_vars.interpolate` only
+substituted ``${var}`` placeholders once against a static mapping;
+scripts had no way to mutate state during execution. ``VariableScope``
+is a runtime mapping the executor exposes to flow-control commands so
+they can read and write the same bag the runtime interpolator consults.
+
+The executor now resolves ``${var}`` per command call (not pre-flattened),
+so nested ``body`` / ``then`` / ``else`` lists keep their placeholders
+and re-bind each time they execute — letting ``AC_for_each`` iterate
+over a list while the body sees the current item.
+
+::
+
+ import je_auto_control as ac
+ from je_auto_control.utils.executor.action_executor import executor
+
+ executor.execute_action([
+ ["AC_set_var", {"name": "items", "value": ["alpha", "beta"]}],
+ ["AC_set_var", {"name": "i", "value": 0}],
+ ["AC_for_each", {
+ "items": "${items}", "as": "name",
+ "body": [
+ ["AC_inc_var", {"name": "i"}],
+ ["AC_if_var", {
+ "name": "i", "op": "ge", "value": 2,
+ "then": [["AC_break"]], "else": [],
+ }],
+ ],
+ }],
+ ])
+
+Comparison operators for ``AC_if_var``: ``eq``, ``ne``, ``lt``, ``le``,
+``gt``, ``ge``, ``contains``, ``startswith``, ``endswith``.
+
+Action-JSON commands: ``AC_set_var``, ``AC_get_var``, ``AC_inc_var``,
+``AC_if_var``, ``AC_for_each``.
+
+GUI: **Variables** tab — live view of ``executor.variables`` with
+single-set, JSON seed, and clear-all controls; reflects what
+``AC_set_var`` / ``AC_for_each`` mutate at runtime.
+
+
+LLM action planner
+==================
+
+Translate a plain-language description into a validated ``AC_*``
+action list by asking an LLM (Anthropic Claude by default). Output is
+parsed leniently (strips code fences, extracts the first JSON array
+from prose) and then validated by the same schema the executor uses,
+so the result can be piped straight into ``execute_action``::
+
+ import je_auto_control as ac
+ from je_auto_control.utils.executor.action_executor import executor
+
+ actions = ac.plan_actions(
+ "click the Submit button, then type 'done' and save",
+ known_commands=executor.known_commands(),
+ )
+ executor.execute_action(actions)
+
+ # Or in one call:
+ ac.run_from_description("open Notepad and type hello", executor=executor)
+
+Backend selection mirrors :mod:`vision.backends`:
+
+- Anthropic (``anthropic`` SDK, ``ANTHROPIC_API_KEY``) — default
+- ``AUTOCONTROL_LLM_BACKEND`` and ``AUTOCONTROL_LLM_MODEL`` for overrides
+
+Action-JSON commands: ``AC_llm_plan``, ``AC_llm_run``.
+
+GUI: **LLM Planner** tab. Description box, ``QThread``-backed *Plan*
+button, action-list preview, and a *Run plan* button — long calls run
+off the GUI thread so the UI stays responsive.
+
+
+Remote desktop (host + viewer)
+==============================
+
+Stream this machine's screen to another machine, **or** view and
+control a remote machine — both directions ship with a headless API
+and a GUI tab.
+
+The wire format is a length-prefixed framing on raw TCP (no extra
+deps), starting with an HMAC-SHA256 challenge/response handshake;
+viewers that fail auth are dropped before they can see a frame. JPEG
+frames are produced at the configured FPS / quality and broadcast to
+authenticated viewers via a shared latest-frame slot, so a slow viewer
+drops frames instead of blocking the rest. Viewer input messages are
+JSON, validated against an allowlist, and applied through the existing
+mouse / keyboard wrappers.
+
+Headless host (be remoted by someone else)::
+
+ from je_auto_control import RemoteDesktopHost
+
+ host = RemoteDesktopHost(
+ token="hunter2", # shared secret (HMAC key)
+ bind="127.0.0.1", # default; expose externally only via
+ # SSH tunnel or trusted VPN
+ port=0, # 0 = auto-assigned
+ fps=10, quality=70,
+ )
+ host.start()
+ print("listening on", host.port, "viewers:", host.connected_clients)
+ # ...
+ host.stop()
+
+Headless viewer (control someone else)::
+
+ from je_auto_control import RemoteDesktopViewer
+
+ viewer = RemoteDesktopViewer(
+ host="10.0.0.5", port=51234, token="hunter2",
+ on_frame=lambda jpeg_bytes: ..., # render or save
+ )
+ viewer.connect()
+ viewer.send_input({"action": "mouse_move", "x": 100, "y": 200})
+ viewer.send_input({"action": "type", "text": "hello"})
+ viewer.disconnect()
+
+Input message allowlist (validated on the host before dispatch):
+
+- ``mouse_move`` ``{x, y}``
+- ``mouse_click`` ``{x?, y?, button}``
+- ``mouse_press`` / ``mouse_release`` ``{button}``
+- ``mouse_scroll`` ``{x?, y?, amount}``
+- ``key_press`` / ``key_release`` ``{keycode}``
+- ``type`` ``{text}``
+- ``ping``
+
+Action-JSON commands (use the singleton in
+:mod:`utils.remote_desktop.registry`)::
+
+ AC_start_remote_host # token, bind, port, fps, quality, region
+ AC_stop_remote_host
+ AC_remote_host_status # → {running, port, connected_clients}
+
+ AC_remote_connect # host, port, token, timeout
+ AC_remote_disconnect
+ AC_remote_viewer_status # → {connected}
+ AC_remote_send_input # action: {...}
+
+GUI: **Remote Desktop** tab with two sub-tabs.
+
+- **Host** — token field with a *Generate* button that emits 24 random
+ URL-safe bytes, security warning about the bind address, start / stop
+ controls, refreshing port + viewer-count status, and a 4 fps preview
+ pane below the controls so the user being remoted sees what viewers
+ see.
+- **Viewer** — address / port / token form, *Connect* / *Disconnect*,
+ and a custom frame-display widget that paints incoming JPEG frames
+ scaled with ``KeepAspectRatio``. Mouse / wheel / key events on the
+ display are remapped from widget coordinates back to the remote
+ screen's pixel space using the latest frame's dimensions, then
+ forwarded as ``INPUT`` messages.
+
+.. warning::
+ Anyone with the host:port and token gets full mouse / keyboard
+ control of the host machine. Defaults bind to ``127.0.0.1``;
+ exposing this to untrusted networks should be paired with an SSH
+ tunnel or TLS front-end. The token is the *only* line of defence —
+ treat it like a password.
+
+
+Remote desktop — secure transports, audio, clipboard, file transfer
+===================================================================
+
+Host ID handshake
+-----------------
+
+Every host now exposes a stable 9-digit numeric ID, persisted at
+``~/.je_auto_control/remote_host_id`` so it stays the same across
+restarts. The ID is announced inside ``AUTH_OK`` (so only authenticated
+viewers see it), and viewers can verify ``expected_host_id`` to defend
+against a different process listening on the same address::
+
+ from je_auto_control import RemoteDesktopHost, RemoteDesktopViewer
+ host = RemoteDesktopHost(token="tok")
+ print(host.host_id) # e.g. "123456789"
+
+ viewer = RemoteDesktopViewer(
+ host="10.0.0.5", port=51234, token="tok",
+ expected_host_id="123456789",
+ )
+ viewer.connect() # raises AuthenticationError on mismatch
+
+Helpers ``format_host_id("123456789") == "123 456 789"`` and
+``parse_host_id("123 456 789") == "123456789"`` are also exported. The
+GUI displays the formatted ID with a *Copy* button, and the viewer
+panel accepts any common spacing / dashing.
+
+TLS
+---
+
+Both ``RemoteDesktopHost`` and ``RemoteDesktopViewer`` accept an
+``ssl.SSLContext``. When provided, the host wraps each accepted
+connection server-side; the viewer wraps the connect socket
+client-side. Failed handshakes are logged and silently dropped before
+they can register as connected clients::
+
+ import ssl
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ ctx.load_cert_chain("cert.pem", "key.pem")
+ host = RemoteDesktopHost(token="tok", ssl_context=ctx)
+
+ client_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ client_ctx.load_verify_locations("cert.pem")
+ viewer = RemoteDesktopViewer(host=..., ssl_context=client_ctx)
+
+For self-signed loopback testing, set
+``ctx.check_hostname = False`` and ``ctx.verify_mode = ssl.CERT_NONE``
+on the client context. The Remote Desktop GUI host panel has TLS cert
+/ key file pickers; the viewer panel has a *Skip cert verification*
+checkbox.
+
+WebSocket transport
+-------------------
+
+A new ``WebSocketDesktopHost`` / ``WebSocketDesktopViewer`` pair
+speaks the same typed-message protocol over RFC 6455 BINARY frames.
+The implementation is in-tree (no extra deps); each application
+message rides as one full WebSocket frame, so reassembly machinery is
+unnecessary. The same ``ssl_context`` parameter doubles as the
+``wss://`` switch::
+
+ from je_auto_control import (
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+ )
+ host = WebSocketDesktopHost(token="tok", ssl_context=ctx) # wss://
+ viewer = WebSocketDesktopViewer(
+ host="example.com", port=443, token="tok",
+ ssl_context=client_ctx, path="/rd",
+ )
+
+Why WS: friendly to corporate firewalls and reverse proxies, and
+compatible with browser viewers. The GUI viewer's transport dropdown
+(*TCP* / *WebSocket* / *TLS* / *WSS*) chooses the right class
+automatically.
+
+Audio streaming
+---------------
+
+A new ``AUDIO`` message type carries 16-bit signed PCM blocks (default
+16 kHz mono, 50 ms / 1600 bytes per block). The optional
+``sounddevice`` dependency is loaded lazily — without it, audio is
+reported disabled and the host stays up::
+
+ host = RemoteDesktopHost(
+ token="tok", enable_audio=True, audio_device=None, # default mic
+ audio_sample_rate=16000, audio_channels=1,
+ )
+
+ from je_auto_control.utils.remote_desktop import AudioPlayer
+ player = AudioPlayer(); player.start()
+ viewer = RemoteDesktopViewer(host=..., on_audio=player.play)
+
+The host fans each captured block out to all authenticated viewers
+through a bounded per-client deque (~2.5 s of buffering), so a slow
+viewer drops old audio chunks instead of stalling capture for
+everyone else. To capture system audio (rather than the mic), pick a
+loopback / monitor device by index — Windows WASAPI loopback on
+Windows, the PulseAudio monitor source on Linux, BlackHole on macOS.
+GUI: *Stream system audio* on the Host panel, *Play received audio*
+on the Viewer panel.
+
+Clipboard sync (text + image)
+-----------------------------
+
+A new ``CLIPBOARD`` message type carries a JSON envelope so kinds can
+grow without a protocol bump:
+
+* ``{"kind": "text", "text": "..."}``
+* ``{"kind": "image", "format": "png", "data_b64": "..."}``
+
+``utils/clipboard/clipboard.py`` is extended with
+``get_clipboard_image`` / ``set_clipboard_image``; Windows uses
+CF_DIB via ctypes (Pillow rasterises PNG → BMP → DIB), Linux shells
+out to ``xclip -t image/png``, macOS get works via Pillow ImageGrab
+and set raises until a PyObjC backend lands. Sync is explicit per
+call — no auto-poll loops to avoid paste storms::
+
+ # Viewer pushes its local clipboard to the host
+ viewer.send_clipboard_text("hello")
+ viewer.send_clipboard_image(open("logo.png", "rb").read())
+
+ # Host pushes to all viewers
+ host.broadcast_clipboard_text("greetings")
+ host.broadcast_clipboard_image(png_bytes)
+
+ # Viewer wires a callback so it can choose when to paste
+ viewer = RemoteDesktopViewer(
+ host=..., on_clipboard=lambda kind, data: ...,
+ )
+
+GUI: *Push clipboard text to host* button on the Viewer panel; the
+host applies inbound clipboards via the helpers above.
+
+File transfer with progress
+---------------------------
+
+Three new message types form one transfer:
+
+* ``FILE_BEGIN`` — JSON ``{transfer_id, dest_path, size}``
+* ``FILE_CHUNK`` — 36-byte ASCII transfer id + raw payload
+* ``FILE_END`` — JSON ``{transfer_id, status, error?}``
+
+Transfers are bidirectional, chunked (256 KiB per chunk), and have
+*no aggregate size limit* and *no path restriction* on the
+destination — token holders are trusted users. Progress is reported
+locally on both sides without an extra wire message::
+
+ from je_auto_control.utils.remote_desktop import (
+ FileReceiver, RemoteDesktopHost, RemoteDesktopViewer, send_file,
+ )
+
+ # Viewer uploads to host
+ viewer.send_file("local.bin", "/tmp/uploaded.bin",
+ on_progress=lambda tid, done, total: print(done, total))
+
+ # Host pushes to all viewers (each viewer needs a FileReceiver)
+ viewer.set_file_receiver(FileReceiver(
+ on_progress=..., on_complete=...,
+ ))
+ host.send_file_to_viewers("local.bin", "/tmp/from_host.bin")
+
+GUI: *Send file...* opens a file picker + destination-path prompt and
+runs the upload on a ``QThread`` with a ``QProgressBar`` bound to the
+sender's progress events. The frame display widget also accepts
+dragEnter / drop of local files; each dropped file kicks off the same
+upload flow.
+
+.. warning::
+ Path is unrestricted and there is no size cap. Anyone with the
+ token can write any file to any location, and can fill the disk.
+ Keep ``trusted token holders == trusted users`` in mind, or wrap
+ the headless API in your own restricted ``FileReceiver`` subclass
+ that vets the destination path.
diff --git a/docs/source/Zh/doc/new_features/new_features_doc.rst b/docs/source/Zh/doc/new_features/new_features_doc.rst
index de5420f0..95a45ab6 100644
--- a/docs/source/Zh/doc/new_features/new_features_doc.rst
+++ b/docs/source/Zh/doc/new_features/new_features_doc.rst
@@ -282,3 +282,350 @@ Action-JSON 指令:``AC_vlm_locate``、``AC_vlm_click``。GUI:
截圖檔存於 ``~/.je_auto_control/artifacts/``,相關紀錄被 prune 或整個
歷史被清除時會一併刪除。GUI:**Run History** 分頁 — 雙擊截圖欄位可開
啟 OS 預覽。
+
+
+OCR — 區域 dump 與 regex 搜尋
+=============================
+
+原本 OCR 模組只支援字串/精確比對,新增兩個 API 補強其他常見場景::
+
+ import je_auto_control as ac
+
+ # 把區域(或整個螢幕)內辨識到的所有文字傾倒出來
+ for match in ac.read_text_in_region(region=[0, 0, 800, 600]):
+ print(match.text, match.center, match.confidence)
+
+ # Regex 搜尋 — 適合內容會變的文字(訂單編號、錯誤代碼)
+ for match in ac.find_text_regex(r"Order#\d+"):
+ print(match.text, match.center)
+
+ # 也接受 compiled pattern 與 flags
+ import re
+ ac.find_text_regex(re.compile(r"foo", re.IGNORECASE))
+
+Action-JSON 指令::
+
+ [["AC_read_text_in_region", {"region": [0, 0, 800, 600]}]]
+ [["AC_find_text_regex", {"pattern": "Order#\\d+"}]]
+
+GUI:**OCR Reader** 分頁。可用既有的選取 overlay 圈出區域(留空則整螢幕),
+設定語言/最低信心度後按 *抓取區域全部文字* 或 *用 regex 搜尋*。結果
+以 JSON 列出,含文字、邊界框、信心度。
+
+
+執行期變數與資料驅動流程控制
+============================
+
+過去 :mod:`script_vars.interpolate` 只能在執行前一次性把 ``${var}``
+取代成靜態 mapping 中的值,腳本沒辦法在執行時修改狀態。``VariableScope``
+是 executor 暴露給流程控制指令的執行期 mapping,讓它們能讀寫與
+runtime interpolator 相同的容器。
+
+executor 現在改成「每次呼叫」才解析 ``${var}`` placeholder(不會
+事先攤平),所以巢狀的 ``body`` / ``then`` / ``else`` 清單會保留
+placeholder,每次重複執行時重新繫結 — 因此 ``AC_for_each`` 走訪
+list 時,body 內看到的就是當前的元素。
+
+::
+
+ import je_auto_control as ac
+ from je_auto_control.utils.executor.action_executor import executor
+
+ executor.execute_action([
+ ["AC_set_var", {"name": "items", "value": ["alpha", "beta"]}],
+ ["AC_set_var", {"name": "i", "value": 0}],
+ ["AC_for_each", {
+ "items": "${items}", "as": "name",
+ "body": [
+ ["AC_inc_var", {"name": "i"}],
+ ["AC_if_var", {
+ "name": "i", "op": "ge", "value": 2,
+ "then": [["AC_break"]], "else": [],
+ }],
+ ],
+ }],
+ ])
+
+``AC_if_var`` 的比較運算子:``eq``、``ne``、``lt``、``le``、``gt``、
+``ge``、``contains``、``startswith``、``endswith``。
+
+Action-JSON 指令:``AC_set_var``、``AC_get_var``、``AC_inc_var``、
+``AC_if_var``、``AC_for_each``。
+
+GUI:**Variables** 分頁 — 即時檢視 ``executor.variables``,可單筆設
+定、JSON 整批 seed、清空,反映 ``AC_set_var`` / ``AC_for_each`` 在執
+行期的變動。
+
+
+LLM 動作規劃器
+==============
+
+把一段中/英文描述交給 LLM(預設 Anthropic Claude),生成驗證過的
+``AC_*`` 動作清單。輸出採寬鬆解析(會剝 code fence、從散文中抽出
+第一個 JSON array),再用 executor 同樣的 schema 驗證,所以結果可
+以直接餵給 ``execute_action``::
+
+ import je_auto_control as ac
+ from je_auto_control.utils.executor.action_executor import executor
+
+ actions = ac.plan_actions(
+ "點擊 Submit 按鈕,然後輸入 'done' 並儲存",
+ known_commands=executor.known_commands(),
+ )
+ executor.execute_action(actions)
+
+ # 或者一行做完:
+ ac.run_from_description("開記事本,輸入 hello", executor=executor)
+
+後端選擇對齊 :mod:`vision.backends`:
+
+- Anthropic(``anthropic`` SDK,``ANTHROPIC_API_KEY``)— 預設
+- 用 ``AUTOCONTROL_LLM_BACKEND``、``AUTOCONTROL_LLM_MODEL`` 覆寫
+
+Action-JSON 指令:``AC_llm_plan``、``AC_llm_run``。
+
+GUI:**LLM Planner** 分頁。描述輸入框、``QThread`` 背景執行的 *Plan*
+按鈕、預覽指令清單、以及 *Run plan* 按鈕 — 長時間呼叫不會卡 UI。
+
+
+遠端桌面(Host + Viewer)
+=========================
+
+把本機畫面串流給別人看/控制,**或** 觀看並控制別人的機器 — 雙向都有
+headless API 與 GUI 分頁。
+
+協定是 raw TCP 上的長度前綴框架(沒有額外相依),先做一輪 HMAC-SHA256
+challenge/response 認證;認證失敗的 viewer 在看到任何畫面前就被踢掉。
+JPEG frame 依設定的 FPS/品質產生,透過共享 latest-frame slot 廣播給
+通過認證的 viewers,慢的 viewer 只會掉 frame 而不會卡其他人。Viewer
+輸入訊息是 JSON,host 端用允許清單驗證後才透過既有 mouse/keyboard
+wrapper 派送。
+
+Headless host(被別人遠端)::
+
+ from je_auto_control import RemoteDesktopHost
+
+ host = RemoteDesktopHost(
+ token="hunter2", # 共用密鑰(HMAC key)
+ bind="127.0.0.1", # 預設值;要對外請走 SSH tunnel
+ # 或可信的 VPN
+ port=0, # 0 = 自動指派
+ fps=10, quality=70,
+ )
+ host.start()
+ print("listening on", host.port, "viewers:", host.connected_clients)
+ # ...
+ host.stop()
+
+Headless viewer(控制別人)::
+
+ from je_auto_control import RemoteDesktopViewer
+
+ viewer = RemoteDesktopViewer(
+ host="10.0.0.5", port=51234, token="hunter2",
+ on_frame=lambda jpeg_bytes: ..., # 顯示或存檔
+ )
+ viewer.connect()
+ viewer.send_input({"action": "mouse_move", "x": 100, "y": 200})
+ viewer.send_input({"action": "type", "text": "hello"})
+ viewer.disconnect()
+
+輸入訊息允許清單(host 派送前驗證):
+
+- ``mouse_move`` ``{x, y}``
+- ``mouse_click`` ``{x?, y?, button}``
+- ``mouse_press`` / ``mouse_release`` ``{button}``
+- ``mouse_scroll`` ``{x?, y?, amount}``
+- ``key_press`` / ``key_release`` ``{keycode}``
+- ``type`` ``{text}``
+- ``ping``
+
+Action-JSON 指令(使用 :mod:`utils.remote_desktop.registry` 的單例)::
+
+ AC_start_remote_host # token, bind, port, fps, quality, region
+ AC_stop_remote_host
+ AC_remote_host_status # → {running, port, connected_clients}
+
+ AC_remote_connect # host, port, token, timeout
+ AC_remote_disconnect
+ AC_remote_viewer_status # → {connected}
+ AC_remote_send_input # action: {...}
+
+GUI:**Remote Desktop** 分頁,內含兩個子分頁。
+
+- **Host**(被遠端的本機)— Token 欄位附 *產生* 按鈕(24 bytes
+ URL-safe 隨機字串)、bind 位址安全提示、啟動/停止控制、即時刷新
+ 的 port + viewer 數量狀態列,以及底部 4fps 的預覽面板,讓被遠端
+ 的人看到 viewer 看到的畫面。
+- **Viewer**(控制別人)— 位址/port/token 表單、*連線* / *中斷
+ 連線*,以及自繪的 frame display widget,會把 JPEG 等比縮放繪入。
+ display 上的滑鼠/滾輪/鍵盤事件,會用最新 frame 的尺寸把 widget
+ 座標映射回原始遠端螢幕的像素座標,再用 ``INPUT`` 訊息送回。
+
+.. warning::
+ 取得 host:port 與 token 的人,等同擁有本機完整滑鼠/鍵盤控制權。
+ 預設只綁 ``127.0.0.1``;要對外暴露請務必搭配 SSH tunnel 或 TLS
+ 前端。Token 是唯一防線 — 請當作密碼來保管。
+
+
+遠端桌面 — 加密傳輸、音訊、剪貼簿、檔案傳輸
+============================================
+
+Host ID 握手
+------------
+
+每台 host 現在都有一個穩定的 9 位數字 ID,存在
+``~/.je_auto_control/remote_host_id``,重啟後仍是同一個。ID 在
+``AUTH_OK`` 訊息內回傳(只有通過認證的 viewer 才看得到),viewer 可
+以指定 ``expected_host_id`` 驗證,避免「同樣位址但是別的程序」的
+冒充攻擊::
+
+ from je_auto_control import RemoteDesktopHost, RemoteDesktopViewer
+ host = RemoteDesktopHost(token="tok")
+ print(host.host_id) # 例如 "123456789"
+
+ viewer = RemoteDesktopViewer(
+ host="10.0.0.5", port=51234, token="tok",
+ expected_host_id="123456789",
+ )
+ viewer.connect() # 不一致就拋 AuthenticationError
+
+另外提供 ``format_host_id("123456789") == "123 456 789"`` 與
+``parse_host_id("123 456 789") == "123456789"`` 助手。GUI 會顯示分組
+過的 ID 並有 *複製* 按鈕;viewer 端的輸入欄接受常見的空白/破折號。
+
+TLS
+---
+
+``RemoteDesktopHost`` 與 ``RemoteDesktopViewer`` 都接受
+``ssl.SSLContext`` 參數。設定後,host 會把每條接受的連線在伺服器側
+套上 TLS;viewer 在客戶端側套上。失敗的握手會被記錄並關閉,不會
+進到 connected client 計數::
+
+ import ssl
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ ctx.load_cert_chain("cert.pem", "key.pem")
+ host = RemoteDesktopHost(token="tok", ssl_context=ctx)
+
+ client_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ client_ctx.load_verify_locations("cert.pem")
+ viewer = RemoteDesktopViewer(host=..., ssl_context=client_ctx)
+
+自簽憑證 loopback 測試時,把 ``ctx.check_hostname = False`` 與
+``ctx.verify_mode = ssl.CERT_NONE`` 設在 client context 上。GUI host
+分頁有 TLS 憑證/私鑰的檔案選擇器;viewer 分頁有 *忽略憑證驗證* 的
+checkbox 配自簽用。
+
+WebSocket 傳輸
+--------------
+
+新增 ``WebSocketDesktopHost`` / ``WebSocketDesktopViewer``,用 RFC
+6455 BINARY frame 傳同樣的 typed message。實作放在 in-tree(沒有
+額外相依);每個 application message 對應一個完整的 WS frame,所以
+不需要重組機制。同一個 ``ssl_context`` 也是 ``wss://`` 的開關::
+
+ from je_auto_control import (
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+ )
+ host = WebSocketDesktopHost(token="tok", ssl_context=ctx) # wss://
+ viewer = WebSocketDesktopViewer(
+ host="example.com", port=443, token="tok",
+ ssl_context=client_ctx, path="/rd",
+ )
+
+為什麼用 WS:穿牆友善、容易接反向代理、跟瀏覽器 viewer 相容。GUI
+viewer 的傳輸下拉(*TCP* / *WebSocket* / *TLS* / *WSS*)會自動選對
+應的 class。
+
+音訊串流
+--------
+
+新增 ``AUDIO`` 訊息類型,攜帶 16-bit signed PCM 區塊(預設 16 kHz
+mono,每塊 50 ms / 1600 bytes)。``sounddevice`` 為 optional 相依,
+延遲載入;沒裝就 host 端音訊回報停用且整個 host 仍能運作::
+
+ host = RemoteDesktopHost(
+ token="tok", enable_audio=True, audio_device=None, # 預設 mic
+ audio_sample_rate=16000, audio_channels=1,
+ )
+
+ from je_auto_control.utils.remote_desktop import AudioPlayer
+ player = AudioPlayer(); player.start()
+ viewer = RemoteDesktopViewer(host=..., on_audio=player.play)
+
+Host 把每塊抓到的音訊透過每個 client 一個有上限的 deque(~2.5 秒緩
+衝)廣播出去;慢的 viewer 只會丟掉舊的音訊塊,不會卡到大家的擷取
+執行緒。如果要抓系統聲音(而非 mic),用 device index 指定 — Win
+是 WASAPI loopback、Linux 是 PulseAudio monitor source、macOS 要
+BlackHole 之類。GUI:Host 分頁的 *串流系統音訊*,Viewer 分頁的 *播
+放接收的音訊*。
+
+剪貼簿同步(文字 + 圖片)
+-------------------------
+
+新增 ``CLIPBOARD`` 訊息類型,payload 是 JSON envelope,方便日後加新
+類別不用動到 framing:
+
+* ``{"kind": "text", "text": "..."}``
+* ``{"kind": "image", "format": "png", "data_b64": "..."}``
+
+``utils/clipboard/clipboard.py`` 補上 ``get_clipboard_image`` /
+``set_clipboard_image``;Windows 用 ctypes 寫 CF_DIB(Pillow 把 PNG
+轉成 BMP 再去掉 14 byte file header 變成 DIB),Linux 走
+``xclip -t image/png``,macOS get 走 Pillow ImageGrab、set 暫時拋
+NotImplemented 等 PyObjC backend。同步是「明確呼叫」的(避免雙向
+auto-poll 造成 paste 迴圈)::
+
+ # Viewer 把本機剪貼簿送到 host
+ viewer.send_clipboard_text("hello")
+ viewer.send_clipboard_image(open("logo.png", "rb").read())
+
+ # Host 把本機剪貼簿送到所有 viewers
+ host.broadcast_clipboard_text("greetings")
+ host.broadcast_clipboard_image(png_bytes)
+
+ # Viewer 接收回 callback,自己決定要不要 paste
+ viewer = RemoteDesktopViewer(
+ host=..., on_clipboard=lambda kind, data: ...,
+ )
+
+GUI:Viewer 分頁有 *把本機剪貼簿文字送到 Host* 按鈕;host 收到後
+透過上述 helpers 套用到本機剪貼簿。
+
+檔案傳輸 + 進度
+---------------
+
+三個新訊息組成一次傳輸:
+
+* ``FILE_BEGIN`` — JSON ``{transfer_id, dest_path, size}``
+* ``FILE_CHUNK`` — 36-byte ASCII transfer id + 原始 payload
+* ``FILE_END`` — JSON ``{transfer_id, status, error?}``
+
+雙向、分塊(256 KiB / chunk)、**沒有總大小上限**、**沒有目的路徑
+限制**(拿到 token 就視為信任使用者)。進度由兩端各自本地計算,不
+需要額外的 wire 訊息::
+
+ from je_auto_control.utils.remote_desktop import (
+ FileReceiver, RemoteDesktopHost, RemoteDesktopViewer, send_file,
+ )
+
+ # Viewer 上傳到 host
+ viewer.send_file("local.bin", "/tmp/uploaded.bin",
+ on_progress=lambda tid, done, total: print(done, total))
+
+ # Host 下發到所有 viewer(viewer 需要設一個 FileReceiver 來收)
+ viewer.set_file_receiver(FileReceiver(
+ on_progress=..., on_complete=...,
+ ))
+ host.send_file_to_viewers("local.bin", "/tmp/from_host.bin")
+
+GUI:*傳送檔案...* 按鈕開啟檔案選擇器 + 目的路徑提示,上傳跑在
+``QThread`` 上,底下 ``QProgressBar`` 綁到 sender 的 progress 事
+件。Frame display widget 也接受 dragEnter/drop 拖放本機檔案,丟進
+去就走同一個流程上傳。
+
+.. warning::
+ 路徑無限制、大小無上限。任何拿到 token 的人都能把任意檔案寫到
+ 任意位置(覆蓋 ``C:\\Windows\\System32\\*.dll`` 都可能),也能
+ 塞滿磁碟。Token 持有者必須等同信任使用者;要更嚴格的話請自行
+ 繼承 ``FileReceiver`` 在 ``handle_begin`` 內驗證 dest_path。
diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py
index 9f45a02a..7d0747e2 100644
--- a/je_auto_control/__init__.py
+++ b/je_auto_control/__init__.py
@@ -59,8 +59,24 @@
)
# OCR (headless)
from je_auto_control.utils.ocr.ocr_engine import (
- TextMatch, click_text, find_text_matches, locate_text_center,
- set_tesseract_cmd, wait_for_text,
+ TextMatch, click_text, find_text_matches, find_text_regex,
+ locate_text_center, read_text_in_region, set_tesseract_cmd,
+ wait_for_text,
+)
+# LLM action planner (headless)
+from je_auto_control.utils.llm import (
+ LLMBackend, LLMNotAvailableError, LLMPlanError,
+ plan_actions, run_from_description,
+)
+# Remote desktop (headless)
+from je_auto_control.utils.remote_desktop import (
+ AuthenticationError as RemoteDesktopAuthError,
+ InputDispatchError as RemoteDesktopInputError,
+ ProtocolError as RemoteDesktopProtocolError,
+ RemoteDesktopHost, RemoteDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.registry import (
+ registry as remote_desktop_registry,
)
# MCP server (headless stdio bridge for Claude / other MCP clients)
from je_auto_control.utils.mcp_server import (
@@ -103,6 +119,7 @@
from je_auto_control.utils.script_vars.interpolate import (
interpolate_actions, interpolate_value, load_vars_from_json,
)
+from je_auto_control.utils.script_vars.scope import VariableScope
# Watchers (headless)
from je_auto_control.utils.watcher.watcher import (
LogTail, MouseWatcher, PixelWatcher,
@@ -203,7 +220,8 @@ def start_autocontrol_gui(*args, **kwargs):
"add_command_to_executor", "test_record_instance", "pil_screenshot",
# OCR
"TextMatch", "find_text_matches", "locate_text_center", "wait_for_text",
- "click_text", "set_tesseract_cmd",
+ "click_text", "set_tesseract_cmd", "read_text_in_region",
+ "find_text_regex",
# Recording editor
"trim_actions", "insert_action", "remove_action", "filter_actions",
"adjust_delays", "scale_coordinates",
@@ -211,6 +229,7 @@ def start_autocontrol_gui(*args, **kwargs):
"Scheduler", "ScheduledJob", "default_scheduler",
# Script variables
"interpolate_actions", "interpolate_value", "load_vars_from_json",
+ "VariableScope",
# Watchers
"MouseWatcher", "PixelWatcher", "LogTail",
# Window manager
@@ -246,6 +265,13 @@ def start_autocontrol_gui(*args, **kwargs):
"click_accessibility_element",
# VLM locator
"VLMNotAvailableError", "locate_by_description", "click_by_description",
+ # LLM action planner
+ "LLMBackend", "LLMNotAvailableError", "LLMPlanError",
+ "plan_actions", "run_from_description",
+ # Remote desktop
+ "RemoteDesktopHost", "RemoteDesktopViewer",
+ "RemoteDesktopAuthError", "RemoteDesktopInputError",
+ "RemoteDesktopProtocolError", "remote_desktop_registry",
"generate_html", "generate_html_report", "generate_json", "generate_json_report", "generate_xml",
"generate_xml_report", "get_dir_files_as_list", "create_project_dir", "start_autocontrol_socket_server",
"callback_executor", "package_manager", "ShellManager", "default_shell_manager",
diff --git a/je_auto_control/gui/language_wrapper/english.py b/je_auto_control/gui/language_wrapper/english.py
index 40efca04..fa51d61e 100644
--- a/je_auto_control/gui/language_wrapper/english.py
+++ b/je_auto_control/gui/language_wrapper/english.py
@@ -27,6 +27,10 @@
"tab_run_history": "Run History",
"tab_accessibility": "Accessibility",
"tab_vlm": "AI Locator",
+ "tab_ocr_reader": "OCR Reader",
+ "tab_variables": "Variables",
+ "tab_llm_planner": "LLM Planner",
+ "tab_remote_desktop": "Remote Desktop",
# Auto Click Tab
"interval_time": "Interval (ms):",
@@ -317,6 +321,117 @@
"vlm_error": "Error",
"vlm_desc_required": "Describe the target first",
+ # OCR Reader Tab
+ "ocr_region_group": "Region",
+ "ocr_region_label": "x, y, w, h:",
+ "ocr_region_placeholder": "leave blank for full screen",
+ "ocr_pick_region": "Pick region...",
+ "ocr_lang_label": "Language:",
+ "ocr_min_conf_label": "Min confidence:",
+ "ocr_regex_label": "Regex pattern:",
+ "ocr_regex_placeholder": r"e.g. Order#\d+",
+ "ocr_dump_region": "Dump region text",
+ "ocr_find_regex": "Find by regex",
+ "ocr_results_label": "Matches:",
+ "ocr_match_count": "{n} matches",
+ "ocr_region_invalid": "Region must be 4 comma-separated integers",
+ "ocr_min_conf_invalid": "Min confidence must be a number",
+ "ocr_regex_required": "Enter a regex pattern first",
+ "ocr_regex_invalid": "Invalid regex",
+
+ # Variables Tab
+ "vars_current_group": "Current scope",
+ "vars_col_name": "Name",
+ "vars_col_value": "Value",
+ "vars_count": "{n} variables",
+ "vars_refresh": "Refresh",
+ "vars_clear": "Clear all",
+ "vars_clear_confirm": "Clear every runtime variable?",
+ "vars_set_group": "Set one",
+ "vars_name_label": "Name:",
+ "vars_name_placeholder": "variable name",
+ "vars_name_required": "Name is required",
+ "vars_value_label": "Value:",
+ "vars_value_placeholder": "JSON literal (42, \"text\", [1,2]) or raw string",
+ "vars_set_btn": "Set",
+ "vars_seed_group": "Seed from JSON",
+ "vars_seed_placeholder": "{\"counter\": 0, \"items\": [\"a\", \"b\"]}",
+ "vars_seed_btn": "Merge JSON into scope",
+ "vars_seed_required": "Paste a JSON object first",
+ "vars_seed_invalid": "Invalid JSON",
+ "vars_seed_not_object": "Seed JSON must be an object",
+
+ # LLM Planner Tab
+ "llm_desc_group": "Describe the task",
+ "llm_desc_placeholder": (
+ "e.g. open Notepad, type 'hello world', save as test.txt"
+ ),
+ "llm_model_label": "Model:",
+ "llm_model_placeholder": "optional override (e.g. claude-opus-4-7)",
+ "llm_plan_btn": "Plan",
+ "llm_run_btn": "Run plan",
+ "llm_plan_group": "Planned actions",
+ "llm_result_group": "Execution result",
+ "llm_desc_required": "Describe what you want to automate first",
+ "llm_planning": "Planning...",
+ "llm_plan_count": "Planned {n} actions",
+ "llm_no_plan": "Click Plan first",
+ "llm_running": "Running...",
+ "llm_run_done": "Done",
+
+ # Remote Desktop Tab
+ "rd_host_tab": "Host (this machine)",
+ "rd_viewer_tab": "Viewer (control another)",
+ "rd_host_security_warning": (
+ "WARNING: anyone with the host:port and token gets full mouse / "
+ "keyboard control of this machine. Default bind is 127.0.0.1; "
+ "expose to a network only via SSH tunnel or trusted VPN."
+ ),
+ "rd_host_config_group": "Host configuration",
+ "rd_viewer_config_group": "Connect to a remote host",
+ "rd_token_label": "Token:",
+ "rd_token_placeholder": "shared secret (HMAC key)",
+ "rd_token_generate": "Generate",
+ "rd_bind_label": "Address:",
+ "rd_port_label": "Port:",
+ "rd_fps_label": "FPS:",
+ "rd_quality_label": "JPEG quality:",
+ "rd_host_start": "Start host",
+ "rd_host_stop": "Stop host",
+ "rd_host_status_running": "Running on port {port} — {n} viewer(s)",
+ "rd_host_status_stopped": "Host is stopped",
+ "rd_host_preview_label": "Preview (what viewers see):",
+ "rd_host_id_group": "Host ID (share with viewers)",
+ "rd_host_id_label": "Host ID:",
+ "rd_host_id_copy": "Copy",
+ "rd_transport_label": "Transport:",
+ "rd_tls_cert_label": "TLS cert:",
+ "rd_tls_cert_placeholder": "PEM certificate path (optional)",
+ "rd_tls_key_label": "TLS key:",
+ "rd_tls_key_placeholder": "PEM private key path (optional)",
+ "rd_tls_both_required": "TLS cert and key must both be provided",
+ "rd_tls_insecure": "Skip cert verification (self-signed)",
+ "rd_browse": "Browse...",
+ "rd_enable_audio": "Stream system audio (sounddevice)",
+ "rd_viewer_audio_play": "Play received audio (sounddevice)",
+ "rd_viewer_push_clipboard": "Push my clipboard text to host",
+ "rd_viewer_send_file": "Send file...",
+ "rd_dest_path_prompt": "Destination path on remote for {name}:",
+ "rd_clipboard_empty": "Local clipboard is empty",
+ "rd_clipboard_sent": "Clipboard sent to host",
+ "rd_viewer_clipboard_received": "Clipboard updated from host",
+ "rd_progress_label": "Transfer: {done} / {total} bytes",
+ "rd_progress_done": "Transfer complete: {path}",
+ "rd_progress_failed": "Transfer failed: {error}",
+ "rd_viewer_connect": "Connect",
+ "rd_viewer_disconnect": "Disconnect",
+ "rd_viewer_required_fields": (
+ "Address, port, and token are all required."
+ ),
+ "rd_viewer_status_connected": "Connected — receiving frames",
+ "rd_viewer_status_idle": "Not connected",
+ "rd_viewer_error": "Remote desktop error",
+
# Menu bar
"menu_file": "File",
"menu_file_open_script": "Open Script...",
diff --git a/je_auto_control/gui/language_wrapper/japanese.py b/je_auto_control/gui/language_wrapper/japanese.py
index aff81d72..6f29f700 100644
--- a/je_auto_control/gui/language_wrapper/japanese.py
+++ b/je_auto_control/gui/language_wrapper/japanese.py
@@ -27,6 +27,10 @@
"tab_run_history": "実行履歴",
"tab_accessibility": "アクセシビリティ",
"tab_vlm": "AI ロケーター",
+ "tab_ocr_reader": "OCR リーダー",
+ "tab_variables": "実行時変数",
+ "tab_llm_planner": "LLM プランナー",
+ "tab_remote_desktop": "リモートデスクトップ",
# Auto Click Tab
"interval_time": "間隔 (ms):",
@@ -317,6 +321,115 @@
"vlm_error": "エラー",
"vlm_desc_required": "まず対象の説明を入力してください",
+ # OCR Reader Tab
+ "ocr_region_group": "領域",
+ "ocr_region_label": "x, y, w, h:",
+ "ocr_region_placeholder": "空欄なら画面全体",
+ "ocr_pick_region": "領域を選択...",
+ "ocr_lang_label": "言語:",
+ "ocr_min_conf_label": "最低信頼度:",
+ "ocr_regex_label": "正規表現:",
+ "ocr_regex_placeholder": r"例: Order#\d+",
+ "ocr_dump_region": "領域内の文字を取得",
+ "ocr_find_regex": "正規表現で検索",
+ "ocr_results_label": "結果:",
+ "ocr_match_count": "{n} 件",
+ "ocr_region_invalid": "領域はカンマ区切りの整数 4 つ",
+ "ocr_min_conf_invalid": "信頼度は数値で入力",
+ "ocr_regex_required": "先に正規表現を入力",
+ "ocr_regex_invalid": "正規表現が不正",
+
+ # Variables Tab
+ "vars_current_group": "現在のスコープ",
+ "vars_col_name": "名前",
+ "vars_col_value": "値",
+ "vars_count": "{n} 個の変数",
+ "vars_refresh": "再読み込み",
+ "vars_clear": "全消去",
+ "vars_clear_confirm": "実行時変数を全て消去しますか?",
+ "vars_set_group": "個別に設定",
+ "vars_name_label": "名前:",
+ "vars_name_placeholder": "変数名",
+ "vars_name_required": "名前を入力してください",
+ "vars_value_label": "値:",
+ "vars_value_placeholder": "JSON リテラル(42、\"text\"、[1,2])または文字列",
+ "vars_set_btn": "設定",
+ "vars_seed_group": "JSON から読み込み",
+ "vars_seed_placeholder": "{\"counter\": 0, \"items\": [\"a\", \"b\"]}",
+ "vars_seed_btn": "JSON をスコープに統合",
+ "vars_seed_required": "JSON オブジェクトを貼り付けてください",
+ "vars_seed_invalid": "JSON が不正",
+ "vars_seed_not_object": "JSON はオブジェクトでなければなりません",
+
+ # LLM Planner Tab
+ "llm_desc_group": "タスクの説明",
+ "llm_desc_placeholder": (
+ "例: メモ帳を開き、'hello world' と入力し、test.txt として保存"
+ ),
+ "llm_model_label": "モデル:",
+ "llm_model_placeholder": "任意(例: claude-opus-4-7)",
+ "llm_plan_btn": "プラン作成",
+ "llm_run_btn": "プラン実行",
+ "llm_plan_group": "計画されたアクション",
+ "llm_result_group": "実行結果",
+ "llm_desc_required": "まず自動化したいタスクを記述してください",
+ "llm_planning": "計画中...",
+ "llm_plan_count": "{n} 個のアクションを計画",
+ "llm_no_plan": "まず「プラン作成」を押してください",
+ "llm_running": "実行中...",
+ "llm_run_done": "完了",
+
+ # Remote Desktop Tab
+ "rd_host_tab": "ホスト(このマシン)",
+ "rd_viewer_tab": "ビューア(他マシンを操作)",
+ "rd_host_security_warning": (
+ "警告:host:port と token を知る相手は、このマシンのマウス/キーボードを"
+ "完全に操作できます。既定は 127.0.0.1。外部公開は SSH トンネルか"
+ "信頼できる VPN 経由で行ってください。"
+ ),
+ "rd_host_config_group": "ホスト設定",
+ "rd_viewer_config_group": "リモートホストへ接続",
+ "rd_token_label": "トークン:",
+ "rd_token_placeholder": "共有シークレット(HMAC キー)",
+ "rd_token_generate": "生成",
+ "rd_bind_label": "アドレス:",
+ "rd_port_label": "ポート:",
+ "rd_fps_label": "FPS:",
+ "rd_quality_label": "JPEG 品質:",
+ "rd_host_start": "ホスト開始",
+ "rd_host_stop": "ホスト停止",
+ "rd_host_status_running": "稼働中 ポート {port} — ビューア {n} 名",
+ "rd_host_status_stopped": "ホストは停止中",
+ "rd_host_preview_label": "プレビュー(ビューアの表示):",
+ "rd_host_id_group": "ホスト ID(ビューアに伝える)",
+ "rd_host_id_label": "ホスト ID:",
+ "rd_host_id_copy": "コピー",
+ "rd_transport_label": "トランスポート:",
+ "rd_tls_cert_label": "TLS 証明書:",
+ "rd_tls_cert_placeholder": "PEM 証明書パス(任意)",
+ "rd_tls_key_label": "TLS キー:",
+ "rd_tls_key_placeholder": "PEM 秘密鍵パス(任意)",
+ "rd_tls_both_required": "TLS 証明書とキーは両方必要",
+ "rd_tls_insecure": "証明書検証をスキップ(自己署名用)",
+ "rd_browse": "参照...",
+ "rd_enable_audio": "システム音声をストリーム(sounddevice)",
+ "rd_viewer_audio_play": "受信音声を再生(sounddevice)",
+ "rd_viewer_push_clipboard": "ローカルのクリップボード文字をホストへ送信",
+ "rd_viewer_send_file": "ファイル送信...",
+ "rd_dest_path_prompt": "{name} のリモート保存先:",
+ "rd_clipboard_empty": "ローカルのクリップボードが空です",
+ "rd_clipboard_sent": "クリップボードをホストへ送信しました",
+ "rd_viewer_clipboard_received": "ホストからクリップボードを同期しました",
+ "rd_progress_label": "転送中: {done} / {total} バイト",
+ "rd_progress_done": "転送完了: {path}",
+ "rd_progress_failed": "転送失敗: {error}",
+ "rd_viewer_connect": "接続",
+ "rd_viewer_disconnect": "切断",
+ "rd_viewer_required_fields": "アドレス・ポート・トークンはすべて必須です。",
+ "rd_viewer_status_connected": "接続中 — フレーム受信中",
+ "rd_viewer_status_idle": "未接続",
+ "rd_viewer_error": "リモートデスクトップエラー",
+
# Menu bar
"menu_file": "ファイル",
"menu_file_open_script": "スクリプトを開く...",
diff --git a/je_auto_control/gui/language_wrapper/simplified_chinese.py b/je_auto_control/gui/language_wrapper/simplified_chinese.py
index d6c5fa02..e90c65e7 100644
--- a/je_auto_control/gui/language_wrapper/simplified_chinese.py
+++ b/je_auto_control/gui/language_wrapper/simplified_chinese.py
@@ -22,6 +22,10 @@
"tab_run_history": "执行记录",
"tab_accessibility": "无障碍树",
"tab_vlm": "AI 定位",
+ "tab_ocr_reader": "OCR 读取",
+ "tab_variables": "运行期变量",
+ "tab_llm_planner": "LLM 脚本规划",
+ "tab_remote_desktop": "远程桌面",
# Auto Click Tab
"interval_time": "间隔时间 (ms):",
@@ -312,6 +316,114 @@
"vlm_error": "错误",
"vlm_desc_required": "请先输入目标描述",
+ # OCR Reader Tab
+ "ocr_region_group": "区域",
+ "ocr_region_label": "x, y, w, h:",
+ "ocr_region_placeholder": "留空代表整个屏幕",
+ "ocr_pick_region": "选取区域...",
+ "ocr_lang_label": "语言:",
+ "ocr_min_conf_label": "最低置信度:",
+ "ocr_regex_label": "Regex 表达式:",
+ "ocr_regex_placeholder": r"例如 Order#\d+",
+ "ocr_dump_region": "抓取区域全部文字",
+ "ocr_find_regex": "用 regex 搜索",
+ "ocr_results_label": "结果:",
+ "ocr_match_count": "{n} 个匹配",
+ "ocr_region_invalid": "区域必须为 4 个逗号分隔整数",
+ "ocr_min_conf_invalid": "置信度需为数字",
+ "ocr_regex_required": "请先输入 regex 表达式",
+ "ocr_regex_invalid": "Regex 不正确",
+
+ # Variables Tab
+ "vars_current_group": "当前作用域",
+ "vars_col_name": "名称",
+ "vars_col_value": "值",
+ "vars_count": "共 {n} 个变量",
+ "vars_refresh": "刷新",
+ "vars_clear": "全部清除",
+ "vars_clear_confirm": "确定清空所有运行期变量?",
+ "vars_set_group": "新增单个变量",
+ "vars_name_label": "名称:",
+ "vars_name_placeholder": "变量名",
+ "vars_name_required": "请输入名称",
+ "vars_value_label": "值:",
+ "vars_value_placeholder": "JSON 字面量(42、\"text\"、[1,2])或纯字符串",
+ "vars_set_btn": "设定",
+ "vars_seed_group": "从 JSON 载入",
+ "vars_seed_placeholder": "{\"counter\": 0, \"items\": [\"a\", \"b\"]}",
+ "vars_seed_btn": "合并 JSON 到作用域",
+ "vars_seed_required": "请先粘贴 JSON 对象",
+ "vars_seed_invalid": "JSON 格式错误",
+ "vars_seed_not_object": "JSON 必须是对象",
+
+ # LLM Planner Tab
+ "llm_desc_group": "描述要做的任务",
+ "llm_desc_placeholder": (
+ "例如:打开记事本、输入 'hello world'、另存为 test.txt"
+ ),
+ "llm_model_label": "模型:",
+ "llm_model_placeholder": "选填(例如 claude-opus-4-7)",
+ "llm_plan_btn": "规划",
+ "llm_run_btn": "执行此规划",
+ "llm_plan_group": "已规划的指令",
+ "llm_result_group": "执行结果",
+ "llm_desc_required": "请先描述要自动化的任务",
+ "llm_planning": "规划中...",
+ "llm_plan_count": "已规划 {n} 个指令",
+ "llm_no_plan": "请先按下「规划」",
+ "llm_running": "执行中...",
+ "llm_run_done": "完成",
+
+ # Remote Desktop Tab
+ "rd_host_tab": "被远程(本机)",
+ "rd_viewer_tab": "远程他人(控制他机)",
+ "rd_host_security_warning": (
+ "警告:取得本机 host:port 与 token 的人,可以完全控制本机的鼠标/键盘。"
+ "默认仅绑 127.0.0.1;要对外请透过 SSH tunnel 或可信的 VPN。"
+ ),
+ "rd_host_config_group": "Host 设置",
+ "rd_viewer_config_group": "连接到远程 Host",
+ "rd_token_label": "Token:",
+ "rd_token_placeholder": "共享密钥(HMAC key)",
+ "rd_token_generate": "生成",
+ "rd_bind_label": "地址:",
+ "rd_port_label": "端口:",
+ "rd_fps_label": "FPS:",
+ "rd_quality_label": "JPEG 质量:",
+ "rd_host_start": "启动 Host",
+ "rd_host_stop": "停止 Host",
+ "rd_host_status_running": "运行中 端口 {port} — {n} 个 viewer",
+ "rd_host_status_stopped": "Host 已停止",
+ "rd_host_preview_label": "预览(viewer 看到的画面):",
+ "rd_host_id_group": "Host ID(给远程的人)",
+ "rd_host_id_label": "Host ID:",
+ "rd_host_id_copy": "复制",
+ "rd_transport_label": "传输协议:",
+ "rd_tls_cert_label": "TLS 证书:",
+ "rd_tls_cert_placeholder": "PEM 证书路径(选填)",
+ "rd_tls_key_label": "TLS 密钥:",
+ "rd_tls_key_placeholder": "PEM 私钥路径(选填)",
+ "rd_tls_both_required": "TLS 证书与密钥必须一并提供",
+ "rd_tls_insecure": "忽略证书验证(自签用)",
+ "rd_browse": "浏览...",
+ "rd_enable_audio": "串流系统音频(sounddevice)",
+ "rd_viewer_audio_play": "播放接收的音频(sounddevice)",
+ "rd_viewer_push_clipboard": "把本机剪贴板文字发送到 Host",
+ "rd_viewer_send_file": "发送文件...",
+ "rd_dest_path_prompt": "{name} 在远程的目的路径:",
+ "rd_clipboard_empty": "本机剪贴板是空的",
+ "rd_clipboard_sent": "剪贴板已发送到 Host",
+ "rd_viewer_clipboard_received": "已从 Host 同步剪贴板",
+ "rd_progress_label": "传输进度: {done} / {total} bytes",
+ "rd_progress_done": "传输完成: {path}",
+ "rd_progress_failed": "传输失败: {error}",
+ "rd_viewer_connect": "连接",
+ "rd_viewer_disconnect": "断开",
+ "rd_viewer_required_fields": "地址、端口、token 都必须填写。",
+ "rd_viewer_status_connected": "已连接 — 正在接收画面",
+ "rd_viewer_status_idle": "未连接",
+ "rd_viewer_error": "远程桌面错误",
+
# Menu bar
"menu_file": "文件",
"menu_file_open_script": "打开脚本...",
diff --git a/je_auto_control/gui/language_wrapper/traditional_chinese.py b/je_auto_control/gui/language_wrapper/traditional_chinese.py
index 2fa9eb1c..234bf621 100644
--- a/je_auto_control/gui/language_wrapper/traditional_chinese.py
+++ b/je_auto_control/gui/language_wrapper/traditional_chinese.py
@@ -23,6 +23,10 @@
"tab_run_history": "執行紀錄",
"tab_accessibility": "無障礙樹",
"tab_vlm": "AI 定位",
+ "tab_ocr_reader": "OCR 讀取",
+ "tab_variables": "執行期變數",
+ "tab_llm_planner": "LLM 腳本規劃",
+ "tab_remote_desktop": "遠端桌面",
# Auto Click Tab
"interval_time": "間隔時間 (ms):",
@@ -313,6 +317,114 @@
"vlm_error": "錯誤",
"vlm_desc_required": "請先輸入目標描述",
+ # OCR Reader Tab
+ "ocr_region_group": "區域",
+ "ocr_region_label": "x, y, w, h:",
+ "ocr_region_placeholder": "留空代表整個螢幕",
+ "ocr_pick_region": "選取區域...",
+ "ocr_lang_label": "語言:",
+ "ocr_min_conf_label": "最低信心度:",
+ "ocr_regex_label": "Regex 樣式:",
+ "ocr_regex_placeholder": r"例如 Order#\d+",
+ "ocr_dump_region": "抓取區域全部文字",
+ "ocr_find_regex": "用 regex 搜尋",
+ "ocr_results_label": "結果:",
+ "ocr_match_count": "{n} 個結果",
+ "ocr_region_invalid": "區域必須為 4 個逗號分隔整數",
+ "ocr_min_conf_invalid": "信心度需為數字",
+ "ocr_regex_required": "請先輸入 regex 樣式",
+ "ocr_regex_invalid": "Regex 不正確",
+
+ # Variables Tab
+ "vars_current_group": "目前作用域",
+ "vars_col_name": "名稱",
+ "vars_col_value": "值",
+ "vars_count": "共 {n} 個變數",
+ "vars_refresh": "重新整理",
+ "vars_clear": "全部清除",
+ "vars_clear_confirm": "確定要清空所有執行期變數嗎?",
+ "vars_set_group": "新增單一變數",
+ "vars_name_label": "名稱:",
+ "vars_name_placeholder": "變數名稱",
+ "vars_name_required": "請輸入名稱",
+ "vars_value_label": "值:",
+ "vars_value_placeholder": "JSON 字面值(42、\"text\"、[1,2])或純字串",
+ "vars_set_btn": "設定",
+ "vars_seed_group": "從 JSON 載入",
+ "vars_seed_placeholder": "{\"counter\": 0, \"items\": [\"a\", \"b\"]}",
+ "vars_seed_btn": "合併 JSON 到作用域",
+ "vars_seed_required": "請先貼上 JSON 物件",
+ "vars_seed_invalid": "JSON 格式錯誤",
+ "vars_seed_not_object": "JSON 必須是物件",
+
+ # LLM Planner Tab
+ "llm_desc_group": "描述要做的任務",
+ "llm_desc_placeholder": (
+ "例如:開啟記事本、輸入 'hello world'、另存為 test.txt"
+ ),
+ "llm_model_label": "模型:",
+ "llm_model_placeholder": "選填(例如 claude-opus-4-7)",
+ "llm_plan_btn": "規劃",
+ "llm_run_btn": "執行此規劃",
+ "llm_plan_group": "已規劃的指令",
+ "llm_result_group": "執行結果",
+ "llm_desc_required": "請先描述要自動化的任務",
+ "llm_planning": "規劃中...",
+ "llm_plan_count": "已規劃 {n} 個指令",
+ "llm_no_plan": "請先按下「規劃」",
+ "llm_running": "執行中...",
+ "llm_run_done": "完成",
+
+ # Remote Desktop Tab
+ "rd_host_tab": "被遠端(本機)",
+ "rd_viewer_tab": "遠端他人(控制他機)",
+ "rd_host_security_warning": (
+ "警告:取得本機 host:port 與 token 的人,可以完全控制本機的滑鼠/鍵盤。"
+ "預設只綁 127.0.0.1;要對外請透過 SSH tunnel 或可信的 VPN。"
+ ),
+ "rd_host_config_group": "Host 設定",
+ "rd_viewer_config_group": "連線到遠端 Host",
+ "rd_token_label": "Token:",
+ "rd_token_placeholder": "共用密鑰(HMAC key)",
+ "rd_token_generate": "產生",
+ "rd_bind_label": "位址:",
+ "rd_port_label": "Port:",
+ "rd_fps_label": "FPS:",
+ "rd_quality_label": "JPEG 品質:",
+ "rd_host_start": "啟動 Host",
+ "rd_host_stop": "停止 Host",
+ "rd_host_status_running": "運行中 port {port} — {n} 個 viewer",
+ "rd_host_status_stopped": "Host 已停止",
+ "rd_host_preview_label": "預覽(viewer 看到的畫面):",
+ "rd_host_id_group": "Host ID(給遠端的人)",
+ "rd_host_id_label": "Host ID:",
+ "rd_host_id_copy": "複製",
+ "rd_transport_label": "傳輸協定:",
+ "rd_tls_cert_label": "TLS 憑證:",
+ "rd_tls_cert_placeholder": "PEM 憑證路徑(選填)",
+ "rd_tls_key_label": "TLS 金鑰:",
+ "rd_tls_key_placeholder": "PEM 私鑰路徑(選填)",
+ "rd_tls_both_required": "TLS 憑證與金鑰必須一併提供",
+ "rd_tls_insecure": "忽略憑證驗證(自簽用)",
+ "rd_browse": "瀏覽...",
+ "rd_enable_audio": "串流系統音訊(sounddevice)",
+ "rd_viewer_audio_play": "播放接收的音訊(sounddevice)",
+ "rd_viewer_push_clipboard": "把本機剪貼簿文字送到 Host",
+ "rd_viewer_send_file": "傳送檔案...",
+ "rd_dest_path_prompt": "{name} 在遠端的目的路徑:",
+ "rd_clipboard_empty": "本機剪貼簿是空的",
+ "rd_clipboard_sent": "剪貼簿已送到 Host",
+ "rd_viewer_clipboard_received": "已從 Host 同步剪貼簿",
+ "rd_progress_label": "傳輸進度:{done} / {total} bytes",
+ "rd_progress_done": "傳輸完成:{path}",
+ "rd_progress_failed": "傳輸失敗:{error}",
+ "rd_viewer_connect": "連線",
+ "rd_viewer_disconnect": "中斷連線",
+ "rd_viewer_required_fields": "位址、port、token 都必須填寫。",
+ "rd_viewer_status_connected": "已連線 — 正在接收畫面",
+ "rd_viewer_status_idle": "尚未連線",
+ "rd_viewer_error": "遠端桌面錯誤",
+
# Menu bar
"menu_file": "檔案",
"menu_file_open_script": "開啟腳本...",
diff --git a/je_auto_control/gui/llm_planner_tab.py b/je_auto_control/gui/llm_planner_tab.py
new file mode 100644
index 00000000..348ba218
--- /dev/null
+++ b/je_auto_control/gui/llm_planner_tab.py
@@ -0,0 +1,195 @@
+"""LLM Planner tab: describe a task in plain language → preview → run.
+
+The tab calls the headless ``plan_actions`` helper, shows the resulting
+JSON action list for review, and lets the user execute it through the
+shared global executor. Long calls run on a background ``QThread`` so the
+UI stays responsive.
+"""
+import json
+from typing import List, Optional
+
+from PySide6.QtCore import QObject, QThread, Signal
+from PySide6.QtWidgets import (
+ QGroupBox, QHBoxLayout, QLabel, QLineEdit, QMessageBox, QPushButton,
+ QTextEdit, QVBoxLayout, QWidget,
+)
+
+from je_auto_control.gui._i18n_helpers import TranslatableMixin
+from je_auto_control.gui.language_wrapper.multi_language_wrapper import (
+ language_wrapper,
+)
+from je_auto_control.utils.executor.action_executor import execute_action, executor
+from je_auto_control.utils.llm.backends.base import LLMNotAvailableError
+from je_auto_control.utils.llm.planner import LLMPlanError, plan_actions
+
+
+def _t(key: str) -> str:
+ return language_wrapper.translate(key, key)
+
+
+class _PlanWorker(QObject):
+ """Runs ``plan_actions`` off the GUI thread and emits the result."""
+
+ finished = Signal(list)
+ failed = Signal(str)
+
+ def __init__(self, description: str, model: Optional[str],
+ known_commands: List[str]) -> None:
+ super().__init__()
+ self._description = description
+ self._model = model
+ self._known = list(known_commands)
+
+ def run(self) -> None:
+ try:
+ actions = plan_actions(
+ self._description,
+ known_commands=self._known,
+ model=self._model,
+ )
+ except LLMNotAvailableError as error:
+ self.failed.emit(str(error))
+ except (LLMPlanError, ValueError, OSError, RuntimeError) as error:
+ self.failed.emit(f"{type(error).__name__}: {error}")
+ else:
+ self.finished.emit(actions)
+
+
+class LLMPlannerTab(TranslatableMixin, QWidget):
+ """Translate plain-language descriptions into runnable AC_* scripts."""
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._tr_init()
+ self._description = QTextEdit()
+ self._model = QLineEdit()
+ self._actions_view = QTextEdit()
+ self._actions_view.setReadOnly(True)
+ self._result_view = QTextEdit()
+ self._result_view.setReadOnly(True)
+ self._status = QLabel()
+ self._planned_actions: Optional[list] = None
+ self._plan_btn: Optional[QPushButton] = None
+ self._run_btn: Optional[QPushButton] = None
+ self._plan_thread: Optional[QThread] = None
+ self._plan_worker: Optional[_PlanWorker] = None
+ self._build_layout()
+ self._apply_placeholders()
+ self._set_run_enabled(False)
+
+ def retranslate(self) -> None:
+ TranslatableMixin.retranslate(self)
+ self._apply_placeholders()
+
+ def _apply_placeholders(self) -> None:
+ self._description.setPlaceholderText(_t("llm_desc_placeholder"))
+ self._model.setPlaceholderText(_t("llm_model_placeholder"))
+
+ def _build_layout(self) -> None:
+ root = QVBoxLayout(self)
+
+ desc_group = self._tr(QGroupBox(), "llm_desc_group")
+ desc_layout = QVBoxLayout()
+ desc_layout.addWidget(self._description)
+ model_row = QHBoxLayout()
+ model_row.addWidget(self._tr(QLabel(), "llm_model_label"))
+ model_row.addWidget(self._model, stretch=1)
+ desc_layout.addLayout(model_row)
+ btn_row = QHBoxLayout()
+ self._plan_btn = self._tr(QPushButton(), "llm_plan_btn")
+ self._plan_btn.clicked.connect(self._on_plan)
+ self._run_btn = self._tr(QPushButton(), "llm_run_btn")
+ self._run_btn.clicked.connect(self._on_run)
+ btn_row.addWidget(self._plan_btn)
+ btn_row.addWidget(self._run_btn)
+ btn_row.addStretch()
+ desc_layout.addLayout(btn_row)
+ desc_group.setLayout(desc_layout)
+ root.addWidget(desc_group)
+
+ actions_group = self._tr(QGroupBox(), "llm_plan_group")
+ actions_layout = QVBoxLayout()
+ actions_layout.addWidget(self._actions_view)
+ actions_group.setLayout(actions_layout)
+ root.addWidget(actions_group, stretch=1)
+
+ result_group = self._tr(QGroupBox(), "llm_result_group")
+ result_layout = QVBoxLayout()
+ result_layout.addWidget(self._result_view)
+ result_group.setLayout(result_layout)
+ root.addWidget(result_group, stretch=1)
+
+ root.addWidget(self._status)
+
+ def _set_run_enabled(self, enabled: bool) -> None:
+ if self._run_btn is not None:
+ self._run_btn.setEnabled(enabled)
+
+ def _on_plan(self) -> None:
+ description = self._description.toPlainText().strip()
+ if not description:
+ self._status.setText(_t("llm_desc_required"))
+ return
+ if self._plan_thread is not None and self._plan_thread.isRunning():
+ return
+ model = self._model.text().strip() or None
+ if self._plan_btn is not None:
+ self._plan_btn.setEnabled(False)
+ self._status.setText(_t("llm_planning"))
+ self._actions_view.clear()
+ self._planned_actions = None
+ self._set_run_enabled(False)
+ worker = _PlanWorker(description, model, sorted(executor.known_commands()))
+ thread = QThread(self)
+ worker.moveToThread(thread)
+ thread.started.connect(worker.run)
+ worker.finished.connect(self._on_plan_finished)
+ worker.failed.connect(self._on_plan_failed)
+ worker.finished.connect(thread.quit)
+ worker.failed.connect(thread.quit)
+ thread.finished.connect(self._on_thread_done)
+ self._plan_worker = worker
+ self._plan_thread = thread
+ thread.start()
+
+ def _on_plan_finished(self, actions: list) -> None:
+ self._planned_actions = actions
+ self._actions_view.setText(
+ json.dumps(actions, indent=2, ensure_ascii=False)
+ )
+ self._status.setText(
+ _t("llm_plan_count").replace("{n}", str(len(actions)))
+ )
+ self._set_run_enabled(bool(actions))
+
+ def _on_plan_failed(self, message: str) -> None:
+ self._planned_actions = None
+ self._set_run_enabled(False)
+ QMessageBox.warning(self, _t("llm_plan_btn"), message)
+ self._status.setText(message)
+
+ def _on_thread_done(self) -> None:
+ if self._plan_thread is not None:
+ self._plan_thread.deleteLater()
+ if self._plan_worker is not None:
+ self._plan_worker.deleteLater()
+ self._plan_thread = None
+ self._plan_worker = None
+ if self._plan_btn is not None:
+ self._plan_btn.setEnabled(True)
+
+ def _on_run(self) -> None:
+ if not self._planned_actions:
+ self._status.setText(_t("llm_no_plan"))
+ return
+ self._status.setText(_t("llm_running"))
+ try:
+ record = execute_action(self._planned_actions)
+ except (OSError, ValueError, TypeError, RuntimeError) as error:
+ QMessageBox.warning(self, _t("llm_run_btn"), str(error))
+ self._status.setText(str(error))
+ return
+ self._result_view.setText(
+ json.dumps(record, indent=2, ensure_ascii=False, default=str)
+ )
+ self._status.setText(_t("llm_run_done"))
diff --git a/je_auto_control/gui/main_widget.py b/je_auto_control/gui/main_widget.py
index dca598cb..c2206f34 100644
--- a/je_auto_control/gui/main_widget.py
+++ b/je_auto_control/gui/main_widget.py
@@ -16,13 +16,17 @@
from je_auto_control.gui.hotkeys_tab import HotkeysTab
from je_auto_control.gui.language_wrapper.multi_language_wrapper import language_wrapper
from je_auto_control.gui.live_hud_tab import LiveHUDTab
+from je_auto_control.gui.llm_planner_tab import LLMPlannerTab
+from je_auto_control.gui.ocr_tab import OCRReaderTab
from je_auto_control.gui.plugins_tab import PluginsTab
from je_auto_control.gui.recording_editor_tab import RecordingEditorTab
+from je_auto_control.gui.remote_desktop_tab import RemoteDesktopTab
from je_auto_control.gui.run_history_tab import RunHistoryTab
from je_auto_control.gui.scheduler_tab import SchedulerTab
from je_auto_control.gui.script_builder import ScriptBuilderTab
from je_auto_control.gui.selector import crop_template_to_file, open_region_selector
from je_auto_control.gui.triggers_tab import TriggersTab
+from je_auto_control.gui.variables_tab import VariablesTab
from je_auto_control.gui.vlm_tab import VLMTab
from je_auto_control.gui.window_tab import WindowManagerTab
from je_auto_control.wrapper.auto_control_screen import screen_size, screenshot, get_pixel
@@ -90,6 +94,10 @@ def __init__(self, parent=None):
self._add_tab("run_history", "tab_run_history", RunHistoryTab())
self._add_tab("accessibility", "tab_accessibility", AccessibilityTab())
self._add_tab("vlm", "tab_vlm", VLMTab())
+ self._add_tab("ocr_reader", "tab_ocr_reader", OCRReaderTab())
+ self._add_tab("variables", "tab_variables", VariablesTab())
+ self._add_tab("llm_planner", "tab_llm_planner", LLMPlannerTab())
+ self._add_tab("remote_desktop", "tab_remote_desktop", RemoteDesktopTab())
self._add_tab("plugins", "tab_plugins", PluginsTab())
layout.addWidget(self.tabs)
diff --git a/je_auto_control/gui/ocr_tab.py b/je_auto_control/gui/ocr_tab.py
new file mode 100644
index 00000000..4a0a4322
--- /dev/null
+++ b/je_auto_control/gui/ocr_tab.py
@@ -0,0 +1,172 @@
+"""OCR Reader tab: dump text in a region, or regex-search for matches."""
+import json
+import re
+from typing import Optional
+
+from PySide6.QtWidgets import (
+ QGroupBox, QHBoxLayout, QLabel, QLineEdit, QMessageBox, QPushButton,
+ QTextEdit, QVBoxLayout, QWidget,
+)
+
+from je_auto_control.gui._i18n_helpers import TranslatableMixin
+from je_auto_control.gui.language_wrapper.multi_language_wrapper import (
+ language_wrapper,
+)
+from je_auto_control.gui.selector import open_region_selector
+from je_auto_control.utils.ocr.ocr_engine import (
+ find_text_regex, read_text_in_region,
+)
+
+
+def _t(key: str) -> str:
+ return language_wrapper.translate(key, key)
+
+
+def _matches_to_json(matches) -> str:
+ rows = [
+ {
+ "text": m.text, "x": m.x, "y": m.y,
+ "width": m.width, "height": m.height,
+ "confidence": m.confidence,
+ }
+ for m in matches
+ ]
+ return json.dumps(rows, indent=2, ensure_ascii=False)
+
+
+class OCRReaderTab(TranslatableMixin, QWidget):
+ """Drive headless OCR helpers (region dump + regex search) from the UI."""
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._tr_init()
+ self._region = QLineEdit()
+ self._lang = QLineEdit("eng")
+ self._min_conf = QLineEdit("60")
+ self._regex = QLineEdit()
+ self._result = QTextEdit()
+ self._result.setReadOnly(True)
+ self._status = QLabel()
+ self._apply_placeholders()
+ self._build_layout()
+
+ def retranslate(self) -> None:
+ TranslatableMixin.retranslate(self)
+ self._apply_placeholders()
+
+ def _apply_placeholders(self) -> None:
+ self._region.setPlaceholderText(_t("ocr_region_placeholder"))
+ self._regex.setPlaceholderText(_t("ocr_regex_placeholder"))
+
+ def _build_layout(self) -> None:
+ root = QVBoxLayout(self)
+
+ region_group = self._tr(QGroupBox(), "ocr_region_group")
+ region_layout = QHBoxLayout()
+ region_layout.addWidget(self._tr(QLabel(), "ocr_region_label"))
+ region_layout.addWidget(self._region, stretch=1)
+ pick_btn = self._tr(QPushButton(), "ocr_pick_region")
+ pick_btn.clicked.connect(self._on_pick_region)
+ region_layout.addWidget(pick_btn)
+ region_group.setLayout(region_layout)
+ root.addWidget(region_group)
+
+ params_layout = QHBoxLayout()
+ params_layout.addWidget(self._tr(QLabel(), "ocr_lang_label"))
+ params_layout.addWidget(self._lang)
+ params_layout.addWidget(self._tr(QLabel(), "ocr_min_conf_label"))
+ params_layout.addWidget(self._min_conf)
+ params_layout.addStretch()
+ root.addLayout(params_layout)
+
+ regex_layout = QHBoxLayout()
+ regex_layout.addWidget(self._tr(QLabel(), "ocr_regex_label"))
+ regex_layout.addWidget(self._regex, stretch=1)
+ root.addLayout(regex_layout)
+
+ btn_row = QHBoxLayout()
+ dump_btn = self._tr(QPushButton(), "ocr_dump_region")
+ dump_btn.clicked.connect(self._on_dump)
+ find_btn = self._tr(QPushButton(), "ocr_find_regex")
+ find_btn.clicked.connect(self._on_find_regex)
+ btn_row.addWidget(dump_btn)
+ btn_row.addWidget(find_btn)
+ btn_row.addStretch()
+ root.addLayout(btn_row)
+
+ root.addWidget(self._tr(QLabel(), "ocr_results_label"))
+ root.addWidget(self._result, stretch=1)
+ root.addWidget(self._status)
+
+ def _on_pick_region(self) -> None:
+ region = open_region_selector(self)
+ if region is None:
+ return
+ x, y, width, height = region
+ self._region.setText(f"{x}, {y}, {width}, {height}")
+
+ def _parse_region(self) -> Optional[list]:
+ text = self._region.text().strip()
+ if not text:
+ return None
+ try:
+ parts = [int(token.strip()) for token in text.split(",")]
+ except ValueError as error:
+ raise ValueError(_t("ocr_region_invalid")) from error
+ if len(parts) != 4:
+ raise ValueError(_t("ocr_region_invalid"))
+ return parts
+
+ def _parse_min_conf(self) -> float:
+ text = self._min_conf.text().strip() or "0"
+ try:
+ return float(text)
+ except ValueError as error:
+ raise ValueError(_t("ocr_min_conf_invalid")) from error
+
+ def _on_dump(self) -> None:
+ try:
+ region = self._parse_region()
+ min_conf = self._parse_min_conf()
+ lang = self._lang.text().strip() or "eng"
+ matches = read_text_in_region(
+ region=region, lang=lang, min_confidence=min_conf,
+ )
+ except ValueError as error:
+ self._status.setText(str(error))
+ return
+ except (OSError, RuntimeError) as error:
+ QMessageBox.warning(self, _t("ocr_dump_region"), str(error))
+ return
+ self._result.setText(_matches_to_json(matches))
+ self._status.setText(
+ _t("ocr_match_count").replace("{n}", str(len(matches)))
+ )
+
+ def _on_find_regex(self) -> None:
+ pattern = self._regex.text().strip()
+ if not pattern:
+ self._status.setText(_t("ocr_regex_required"))
+ return
+ try:
+ compiled = re.compile(pattern)
+ except re.error as error:
+ self._status.setText(f"{_t('ocr_regex_invalid')}: {error}")
+ return
+ try:
+ region = self._parse_region()
+ min_conf = self._parse_min_conf()
+ lang = self._lang.text().strip() or "eng"
+ matches = find_text_regex(
+ compiled, lang=lang, region=region, min_confidence=min_conf,
+ )
+ except ValueError as error:
+ self._status.setText(str(error))
+ return
+ except (OSError, RuntimeError) as error:
+ QMessageBox.warning(self, _t("ocr_find_regex"), str(error))
+ return
+ self._result.setText(_matches_to_json(matches))
+ self._status.setText(
+ _t("ocr_match_count").replace("{n}", str(len(matches)))
+ )
diff --git a/je_auto_control/gui/remote_desktop_tab.py b/je_auto_control/gui/remote_desktop_tab.py
new file mode 100644
index 00000000..da5e3206
--- /dev/null
+++ b/je_auto_control/gui/remote_desktop_tab.py
@@ -0,0 +1,983 @@
+"""Remote-desktop tab: host this machine, or view+control another.
+
+Two sub-tabs share the same window:
+
+* **Host**: starts a :class:`RemoteDesktopHost` and shows the bound port,
+ token, host ID, and connected-viewer count. Token + host ID together
+ identify the session; users hand both to whoever is connecting.
+* **Viewer**: connects a :class:`RemoteDesktopViewer` (or its WebSocket
+ variant), decodes incoming JPEG frames into a custom
+ :class:`_FrameDisplay` widget that accepts drag-and-drop file uploads,
+ and forwards mouse / keyboard / wheel events back to the host as JSON
+ ``INPUT`` messages. Coordinates are mapped from widget space to the
+ original remote-screen pixel space using the latest received frame's
+ size.
+"""
+import secrets
+import ssl
+from pathlib import Path
+from typing import Optional
+
+from PySide6.QtCore import QPoint, QRect, Qt, QThread, QTimer, Signal
+from PySide6.QtGui import (
+ QDragEnterEvent, QDropEvent, QGuiApplication, QImage,
+ QKeyEvent, QMouseEvent, QPainter, QWheelEvent,
+)
+from PySide6.QtWidgets import (
+ QCheckBox, QComboBox, QFileDialog, QGroupBox, QHBoxLayout,
+ QInputDialog, QLabel, QLineEdit, QMessageBox, QProgressBar, QPushButton,
+ QSizePolicy, QSpinBox, QTabWidget, QVBoxLayout, QWidget,
+)
+
+from je_auto_control.gui._i18n_helpers import TranslatableMixin
+from je_auto_control.gui.language_wrapper.multi_language_wrapper import (
+ language_wrapper,
+)
+from je_auto_control.utils.remote_desktop import (
+ FileReceiver, RemoteDesktopHost, RemoteDesktopViewer,
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.audio import (
+ AudioCaptureConfig, AudioPlayer, is_audio_backend_available,
+)
+from je_auto_control.utils.remote_desktop.host_id import (
+ HostIdError, format_host_id, parse_host_id,
+)
+from je_auto_control.utils.remote_desktop.protocol import (
+ AuthenticationError,
+)
+from je_auto_control.utils.remote_desktop.registry import registry
+
+
+def _t(key: str) -> str:
+ return language_wrapper.translate(key, key)
+
+
+def _qt_button_name(button: Qt.MouseButton) -> Optional[str]:
+ """Map a Qt mouse button to the AC button name used by the wrappers."""
+ if button == Qt.MouseButton.LeftButton:
+ return "mouse_left"
+ if button == Qt.MouseButton.RightButton:
+ return "mouse_right"
+ if button == Qt.MouseButton.MiddleButton:
+ return "mouse_middle"
+ return None
+
+
+_QT_KEY_TO_AC = {
+ Qt.Key.Key_Up: "up",
+ Qt.Key.Key_Down: "down",
+ Qt.Key.Key_Left: "left",
+ Qt.Key.Key_Right: "right",
+ Qt.Key.Key_Return: "return",
+ Qt.Key.Key_Enter: "return",
+ Qt.Key.Key_Escape: "escape",
+ Qt.Key.Key_Tab: "tab",
+ Qt.Key.Key_Backspace: "back",
+ Qt.Key.Key_Space: "space",
+ Qt.Key.Key_Delete: "delete",
+ Qt.Key.Key_Home: "home",
+ Qt.Key.Key_End: "end",
+ Qt.Key.Key_Insert: "insert",
+ Qt.Key.Key_Shift: "shift",
+ Qt.Key.Key_Control: "control",
+ Qt.Key.Key_Alt: "menu",
+ Qt.Key.Key_PageUp: "prior",
+ Qt.Key.Key_PageDown: "next",
+}
+for _i in range(1, 13):
+ _QT_KEY_TO_AC[getattr(Qt.Key, f"Key_F{_i}")] = f"f{_i}"
+
+
+def _key_event_to_ac(event: QKeyEvent) -> Optional[str]:
+ """Return the AC keycode for ``event``, or ``None`` if unmappable."""
+ mapped = _QT_KEY_TO_AC.get(Qt.Key(event.key()))
+ if mapped is not None:
+ return mapped
+ text = event.text()
+ if len(text) == 1 and text.isprintable():
+ return text
+ return None
+
+
+def _scroll_amount(angle_delta: int) -> int:
+ """Return ``+1`` / ``-1`` / ``0`` for a Qt wheel ``angleDelta`` value."""
+ if angle_delta > 0:
+ return 1
+ if angle_delta < 0:
+ return -1
+ return 0
+
+
+def _build_verifying_client_context() -> ssl.SSLContext:
+ """TLS client context with full hostname + cert verification enabled."""
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+ ctx.load_default_certs()
+ ctx.check_hostname = True
+ ctx.verify_mode = ssl.CERT_REQUIRED
+ return ctx
+
+
+def _build_insecure_client_context() -> ssl.SSLContext:
+ """Opt-in self-signed loopback context — verification intentionally off.
+
+ Triggered only when the user ticks 'Skip cert verification' on the
+ Viewer panel; meant for self-signed dev / LAN hosts where the user
+ has already pinned the host out-of-band (token + 9-digit Host ID).
+ """
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) # NOSONAR S5527 # opt-in self-signed
+ ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+ ctx.check_hostname = False
+ ctx.verify_mode = ssl.CERT_NONE # NOSONAR S4830 # opt-in self-signed
+ return ctx
+
+
+class _FrameDisplay(QWidget):
+ """Paints the latest frame and emits remapped input events.
+
+ Also accepts drag-and-drop of local files; each dropped file path is
+ re-emitted via :pyattr:`files_dropped` so the parent panel can choose
+ a destination on the remote host and start a transfer.
+ """
+
+ mouse_moved = Signal(int, int)
+ mouse_pressed = Signal(int, int, str)
+ mouse_released = Signal(int, int, str)
+ mouse_scrolled = Signal(int, int, int)
+ key_pressed = Signal(str)
+ key_released = Signal(str)
+ type_text = Signal(str)
+ files_dropped = Signal(list)
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._image: Optional[QImage] = None
+ self.setFocusPolicy(Qt.FocusPolicy.StrongFocus)
+ self.setMouseTracking(True)
+ self.setSizePolicy(
+ QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Expanding,
+ )
+ self.setMinimumSize(320, 200)
+ self.setStyleSheet("background-color: #101010;")
+ self.setAcceptDrops(True)
+
+ def set_image(self, image: QImage) -> None:
+ self._image = image
+ self.update()
+
+ def clear(self) -> None:
+ self._image = None
+ self.update()
+
+ def has_image(self) -> bool:
+ return self._image is not None and not self._image.isNull()
+
+ # --- painting -------------------------------------------------------
+
+ def paintEvent(self, _event) -> None: # noqa: N802 Qt override
+ painter = QPainter(self)
+ painter.fillRect(self.rect(), Qt.GlobalColor.black)
+ if not self.has_image():
+ return
+ target = self._fit_rect()
+ if target.isValid():
+ painter.drawImage(target, self._image)
+
+ def _fit_rect(self) -> QRect:
+ if self._image is None or self._image.isNull():
+ return QRect()
+ img_w = self._image.width()
+ img_h = self._image.height()
+ widget_w = self.width()
+ widget_h = self.height()
+ if img_w <= 0 or img_h <= 0 or widget_w <= 0 or widget_h <= 0:
+ return QRect()
+ scale = min(widget_w / img_w, widget_h / img_h)
+ scaled_w = max(1, int(img_w * scale))
+ scaled_h = max(1, int(img_h * scale))
+ x = (widget_w - scaled_w) // 2
+ y = (widget_h - scaled_h) // 2
+ return QRect(x, y, scaled_w, scaled_h)
+
+ def _to_remote(self, pos: QPoint) -> Optional[tuple]:
+ rect = self._fit_rect()
+ if not rect.isValid() or not rect.contains(pos):
+ return None
+ if self._image is None:
+ return None
+ rel_x = pos.x() - rect.x()
+ rel_y = pos.y() - rect.y()
+ scale_x = self._image.width() / rect.width()
+ scale_y = self._image.height() / rect.height()
+ return int(rel_x * scale_x), int(rel_y * scale_y)
+
+ # --- input ---------------------------------------------------------
+
+ def mouseMoveEvent(self, event: QMouseEvent) -> None: # noqa: N802
+ coords = self._to_remote(event.position().toPoint())
+ if coords is not None:
+ self.mouse_moved.emit(*coords)
+
+ def mousePressEvent(self, event: QMouseEvent) -> None: # noqa: N802
+ self.setFocus()
+ coords = self._to_remote(event.position().toPoint())
+ if coords is None:
+ return
+ button = _qt_button_name(event.button())
+ if button is not None:
+ self.mouse_pressed.emit(*coords, button)
+
+ def mouseReleaseEvent(self, event: QMouseEvent) -> None: # noqa: N802
+ coords = self._to_remote(event.position().toPoint())
+ if coords is None:
+ return
+ button = _qt_button_name(event.button())
+ if button is not None:
+ self.mouse_released.emit(*coords, button)
+
+ def wheelEvent(self, event: QWheelEvent) -> None: # noqa: N802
+ coords = self._to_remote(event.position().toPoint())
+ if coords is None:
+ return
+ amount = _scroll_amount(event.angleDelta().y())
+ if amount:
+ self.mouse_scrolled.emit(coords[0], coords[1], amount)
+
+ def keyPressEvent(self, event: QKeyEvent) -> None: # noqa: N802
+ if event.isAutoRepeat():
+ return
+ keycode = _key_event_to_ac(event)
+ if keycode is not None:
+ self.key_pressed.emit(keycode)
+ return
+ text = event.text()
+ if text:
+ self.type_text.emit(text)
+
+ def keyReleaseEvent(self, event: QKeyEvent) -> None: # noqa: N802
+ if event.isAutoRepeat():
+ return
+ keycode = _key_event_to_ac(event)
+ if keycode is not None:
+ self.key_released.emit(keycode)
+
+ # --- drag-and-drop --------------------------------------------------
+
+ def dragEnterEvent(self, event: QDragEnterEvent) -> None: # noqa: N802
+ if event.mimeData().hasUrls():
+ event.acceptProposedAction()
+
+ def dropEvent(self, event: QDropEvent) -> None: # noqa: N802
+ urls = event.mimeData().urls()
+ local_paths = [
+ url.toLocalFile() for url in urls
+ if url.isLocalFile() and url.toLocalFile()
+ ]
+ files = [p for p in local_paths if Path(p).is_file()]
+ if files:
+ self.files_dropped.emit(files)
+ event.acceptProposedAction()
+
+
+class _HostPanel(TranslatableMixin, QWidget):
+ """Start / stop the singleton host and show what is being streamed."""
+
+ _PREVIEW_INTERVAL_MS = 250 # 4 fps preview is enough to confirm liveness
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._tr_init()
+ self._host_id_label = QLabel("---")
+ self._host_id_label.setStyleSheet(
+ "font-size: 18pt; font-weight: bold; color: #2070d0;"
+ )
+ self._token = QLineEdit()
+ self._bind = QLineEdit("127.0.0.1")
+ self._port = QSpinBox()
+ self._port.setRange(0, 65535)
+ self._port.setValue(0)
+ self._transport = QComboBox()
+ self._transport.addItems(["TCP", "WebSocket"])
+ self._fps = QSpinBox()
+ self._fps.setRange(1, 60)
+ self._fps.setValue(10)
+ self._quality = QSpinBox()
+ self._quality.setRange(1, 95)
+ self._quality.setValue(70)
+ self._tls_cert = QLineEdit()
+ self._tls_key = QLineEdit()
+ self._enable_audio = QCheckBox()
+ self._enable_audio.setChecked(False)
+ if not is_audio_backend_available():
+ self._enable_audio.setEnabled(False)
+ self._status = QLabel()
+ self._preview = _FrameDisplay()
+ # Preview is read-only — a host watching their own stream shouldn't
+ # trigger fake input on themselves through the local widget.
+ self._preview.setEnabled(False)
+ self._start_btn: Optional[QPushButton] = None
+ self._stop_btn: Optional[QPushButton] = None
+ self._copy_id_btn: Optional[QPushButton] = None
+ self._refresh_timer = QTimer(self)
+ self._refresh_timer.setInterval(1000)
+ self._refresh_timer.timeout.connect(self._refresh_status)
+ self._preview_timer = QTimer(self)
+ self._preview_timer.setInterval(self._PREVIEW_INTERVAL_MS)
+ self._preview_timer.timeout.connect(self._refresh_preview)
+ self._build_layout()
+ self._apply_placeholders()
+ self._refresh_status()
+ self._refresh_timer.start()
+ self._preview_timer.start()
+
+ def retranslate(self) -> None:
+ TranslatableMixin.retranslate(self)
+ self._apply_placeholders()
+ self._refresh_status()
+
+ def _apply_placeholders(self) -> None:
+ self._token.setPlaceholderText(_t("rd_token_placeholder"))
+ self._tls_cert.setPlaceholderText(_t("rd_tls_cert_placeholder"))
+ self._tls_key.setPlaceholderText(_t("rd_tls_key_placeholder"))
+
+ def _build_layout(self) -> None:
+ root = QVBoxLayout(self)
+
+ warning = QLabel()
+ warning.setText(_t("rd_host_security_warning"))
+ warning.setWordWrap(True)
+ warning.setStyleSheet("color: #cc7000;")
+ self._tr(warning, "rd_host_security_warning")
+ root.addWidget(warning)
+
+ id_group = self._tr(QGroupBox(), "rd_host_id_group")
+ id_layout = QHBoxLayout()
+ id_layout.addWidget(self._tr(QLabel(), "rd_host_id_label"))
+ id_layout.addWidget(self._host_id_label, stretch=1)
+ self._copy_id_btn = self._tr(QPushButton(), "rd_host_id_copy")
+ self._copy_id_btn.clicked.connect(self._copy_host_id)
+ id_layout.addWidget(self._copy_id_btn)
+ id_group.setLayout(id_layout)
+ root.addWidget(id_group)
+
+ config = self._tr(QGroupBox(), "rd_host_config_group")
+ grid = QVBoxLayout()
+ token_row = QHBoxLayout()
+ token_row.addWidget(self._tr(QLabel(), "rd_token_label"))
+ token_row.addWidget(self._token, stretch=1)
+ gen_btn = self._tr(QPushButton(), "rd_token_generate")
+ gen_btn.clicked.connect(self._generate_token)
+ token_row.addWidget(gen_btn)
+ grid.addLayout(token_row)
+
+ bind_row = QHBoxLayout()
+ bind_row.addWidget(self._tr(QLabel(), "rd_bind_label"))
+ bind_row.addWidget(self._bind, stretch=1)
+ bind_row.addWidget(self._tr(QLabel(), "rd_port_label"))
+ bind_row.addWidget(self._port)
+ grid.addLayout(bind_row)
+
+ transport_row = QHBoxLayout()
+ transport_row.addWidget(self._tr(QLabel(), "rd_transport_label"))
+ transport_row.addWidget(self._transport)
+ transport_row.addStretch()
+ grid.addLayout(transport_row)
+
+ tls_row = QHBoxLayout()
+ tls_row.addWidget(self._tr(QLabel(), "rd_tls_cert_label"))
+ tls_row.addWidget(self._tls_cert, stretch=2)
+ cert_browse = self._tr(QPushButton(), "rd_browse")
+ cert_browse.clicked.connect(self._browse_cert)
+ tls_row.addWidget(cert_browse)
+ grid.addLayout(tls_row)
+
+ key_row = QHBoxLayout()
+ key_row.addWidget(self._tr(QLabel(), "rd_tls_key_label"))
+ key_row.addWidget(self._tls_key, stretch=2)
+ key_browse = self._tr(QPushButton(), "rd_browse")
+ key_browse.clicked.connect(self._browse_key)
+ key_row.addWidget(key_browse)
+ grid.addLayout(key_row)
+
+ media_row = QHBoxLayout()
+ media_row.addWidget(self._tr(QLabel(), "rd_fps_label"))
+ media_row.addWidget(self._fps)
+ media_row.addWidget(self._tr(QLabel(), "rd_quality_label"))
+ media_row.addWidget(self._quality)
+ media_row.addStretch()
+ grid.addLayout(media_row)
+
+ audio_row = QHBoxLayout()
+ audio_row.addWidget(self._tr(self._enable_audio, "rd_enable_audio"))
+ audio_row.addStretch()
+ grid.addLayout(audio_row)
+
+ config.setLayout(grid)
+ root.addWidget(config)
+
+ btn_row = QHBoxLayout()
+ self._start_btn = self._tr(QPushButton(), "rd_host_start")
+ self._start_btn.clicked.connect(self._start)
+ self._stop_btn = self._tr(QPushButton(), "rd_host_stop")
+ self._stop_btn.clicked.connect(self._stop)
+ btn_row.addWidget(self._start_btn)
+ btn_row.addWidget(self._stop_btn)
+ btn_row.addStretch()
+ root.addLayout(btn_row)
+
+ root.addWidget(self._tr(QLabel(), "rd_host_preview_label"))
+ root.addWidget(self._preview, stretch=1)
+ root.addWidget(self._status)
+
+ def _generate_token(self) -> None:
+ self._token.setText(secrets.token_urlsafe(24))
+
+ def _copy_host_id(self) -> None:
+ host = registry.host
+ if host is None:
+ return
+ QGuiApplication.clipboard().setText(format_host_id(host.host_id))
+
+ def _browse_cert(self) -> None:
+ path, _ = QFileDialog.getOpenFileName(
+ self, _t("rd_tls_cert_label"), "",
+ "PEM (*.pem *.crt *.cer);;All (*)",
+ )
+ if path:
+ self._tls_cert.setText(path)
+
+ def _browse_key(self) -> None:
+ path, _ = QFileDialog.getOpenFileName(
+ self, _t("rd_tls_key_label"), "",
+ "PEM (*.pem *.key);;All (*)",
+ )
+ if path:
+ self._tls_key.setText(path)
+
+ def _build_ssl_context(self) -> Optional[ssl.SSLContext]:
+ cert_path = self._tls_cert.text().strip()
+ key_path = self._tls_key.text().strip()
+ if not cert_path and not key_path:
+ return None
+ if not cert_path or not key_path:
+ raise ValueError(_t("rd_tls_both_required"))
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+ ctx.load_cert_chain(certfile=cert_path, keyfile=key_path)
+ return ctx
+
+ def _start(self) -> None:
+ token = self._token.text().strip()
+ if not token:
+ self._generate_token()
+ token = self._token.text().strip()
+ try:
+ ssl_context = self._build_ssl_context()
+ except (OSError, ValueError) as error:
+ QMessageBox.warning(self, _t("rd_host_start"), str(error))
+ return
+ host_cls = (WebSocketDesktopHost
+ if self._transport.currentText() == "WebSocket"
+ else RemoteDesktopHost)
+ registry.disconnect_viewer()
+ registry.stop_host()
+ try:
+ host = host_cls(
+ token=token,
+ bind=self._bind.text().strip() or "127.0.0.1",
+ port=self._port.value(),
+ fps=float(self._fps.value()),
+ quality=self._quality.value(),
+ ssl_context=ssl_context,
+ audio_config=AudioCaptureConfig(
+ enabled=self._enable_audio.isChecked()
+ and self._enable_audio.isEnabled(),
+ ),
+ )
+ host.start()
+ except (OSError, ValueError, RuntimeError) as error:
+ QMessageBox.warning(self, _t("rd_host_start"), str(error))
+ return
+ registry._host = host # noqa: SLF001 centralised lifecycle ownership
+ self._refresh_status()
+
+ def _stop(self) -> None:
+ try:
+ registry.stop_host()
+ except (OSError, RuntimeError) as error:
+ QMessageBox.warning(self, _t("rd_host_stop"), str(error))
+ return
+ self._refresh_status()
+
+ def _refresh_status(self) -> None:
+ status = registry.host_status()
+ if status["running"]:
+ text = (_t("rd_host_status_running")
+ .replace("{port}", str(status["port"]))
+ .replace("{n}", str(status["connected_clients"])))
+ host_id = status.get("host_id") or ""
+ self._host_id_label.setText(
+ format_host_id(host_id) if host_id else "---"
+ )
+ else:
+ text = _t("rd_host_status_stopped")
+ self._host_id_label.setText("---")
+ self._status.setText(text)
+
+ def _refresh_preview(self) -> None:
+ host = registry.host
+ if host is None or not host.is_running:
+ self._preview.clear()
+ return
+ frame = host.latest_frame()
+ if frame is None:
+ return
+ image = QImage.fromData(frame, "JPEG")
+ if not image.isNull():
+ self._preview.set_image(image)
+
+
+class _ViewerPanel(TranslatableMixin, QWidget):
+ """Connect to a host, render frames, and forward input events."""
+
+ _frame_signal = Signal(bytes)
+ _error_signal = Signal(str)
+ _audio_signal = Signal(bytes)
+ _clipboard_signal = Signal(str, object)
+ _file_progress_signal = Signal(str, int, int)
+ _file_complete_signal = Signal(str, bool, str, str)
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._tr_init()
+ self._host_field = QLineEdit("127.0.0.1")
+ self._port = QSpinBox()
+ self._port.setRange(1, 65535)
+ self._port.setValue(0)
+ self._token = QLineEdit()
+ self._host_id = QLineEdit()
+ self._transport = QComboBox()
+ self._transport.addItems(["TCP", "WebSocket", "TLS", "WSS"])
+ self._tls_insecure = QCheckBox()
+ self._tls_insecure.setChecked(True)
+ self._enable_audio = QCheckBox()
+ self._enable_audio.setChecked(False)
+ if not is_audio_backend_available():
+ self._enable_audio.setEnabled(False)
+ self._status = QLabel()
+ self._display = _FrameDisplay()
+ self._connect_btn: Optional[QPushButton] = None
+ self._disconnect_btn: Optional[QPushButton] = None
+ self._connected = False
+ self._audio_player: Optional[AudioPlayer] = None
+ self._progress_bar = QProgressBar()
+ self._progress_bar.setVisible(False)
+ self._progress_label = QLabel()
+ self._active_progress_id: Optional[str] = None
+ self._build_layout()
+ self._apply_placeholders()
+ self._wire_signals()
+ self._refresh_status()
+
+ def retranslate(self) -> None:
+ TranslatableMixin.retranslate(self)
+ self._apply_placeholders()
+ self._refresh_status()
+
+ def _apply_placeholders(self) -> None:
+ self._token.setPlaceholderText(_t("rd_token_placeholder"))
+
+ def _build_layout(self) -> None:
+ root = QVBoxLayout(self)
+ connect_group = self._tr(QGroupBox(), "rd_viewer_config_group")
+ grid = QVBoxLayout()
+
+ host_id_row = QHBoxLayout()
+ host_id_row.addWidget(self._tr(QLabel(), "rd_host_id_label"))
+ host_id_row.addWidget(self._host_id, stretch=1)
+ grid.addLayout(host_id_row)
+
+ host_row = QHBoxLayout()
+ host_row.addWidget(self._tr(QLabel(), "rd_bind_label"))
+ host_row.addWidget(self._host_field, stretch=1)
+ host_row.addWidget(self._tr(QLabel(), "rd_port_label"))
+ host_row.addWidget(self._port)
+ grid.addLayout(host_row)
+
+ token_row = QHBoxLayout()
+ token_row.addWidget(self._tr(QLabel(), "rd_token_label"))
+ token_row.addWidget(self._token, stretch=1)
+ grid.addLayout(token_row)
+
+ transport_row = QHBoxLayout()
+ transport_row.addWidget(self._tr(QLabel(), "rd_transport_label"))
+ transport_row.addWidget(self._transport)
+ transport_row.addWidget(self._tr(self._tls_insecure,
+ "rd_tls_insecure"))
+ transport_row.addStretch()
+ grid.addLayout(transport_row)
+
+ feature_row = QHBoxLayout()
+ feature_row.addWidget(self._tr(self._enable_audio,
+ "rd_viewer_audio_play"))
+ feature_row.addStretch()
+ grid.addLayout(feature_row)
+
+ connect_group.setLayout(grid)
+ root.addWidget(connect_group)
+
+ btn_row = QHBoxLayout()
+ self._connect_btn = self._tr(QPushButton(), "rd_viewer_connect")
+ self._connect_btn.clicked.connect(self._connect)
+ self._disconnect_btn = self._tr(QPushButton(), "rd_viewer_disconnect")
+ self._disconnect_btn.clicked.connect(self._disconnect)
+ btn_row.addWidget(self._connect_btn)
+ btn_row.addWidget(self._disconnect_btn)
+ btn_row.addStretch()
+ root.addLayout(btn_row)
+
+ action_row = QHBoxLayout()
+ push_clip_btn = self._tr(QPushButton(), "rd_viewer_push_clipboard")
+ push_clip_btn.clicked.connect(self._push_clipboard_to_host)
+ send_file_btn = self._tr(QPushButton(), "rd_viewer_send_file")
+ send_file_btn.clicked.connect(self._on_send_file_clicked)
+ action_row.addWidget(push_clip_btn)
+ action_row.addWidget(send_file_btn)
+ action_row.addStretch()
+ root.addLayout(action_row)
+
+ root.addWidget(self._display, stretch=1)
+ root.addWidget(self._progress_label)
+ root.addWidget(self._progress_bar)
+ root.addWidget(self._status)
+
+ def _wire_signals(self) -> None:
+ self._frame_signal.connect(self._on_frame_main)
+ self._error_signal.connect(self._on_error_main)
+ self._audio_signal.connect(self._on_audio_main)
+ self._clipboard_signal.connect(self._on_clipboard_main)
+ self._file_progress_signal.connect(self._on_file_progress_main)
+ self._file_complete_signal.connect(self._on_file_complete_main)
+ self._display.mouse_moved.connect(self._send_mouse_move)
+ self._display.mouse_pressed.connect(self._send_mouse_press)
+ self._display.mouse_released.connect(self._send_mouse_release)
+ self._display.mouse_scrolled.connect(self._send_mouse_scroll)
+ self._display.key_pressed.connect(
+ lambda k: self._send({"action": "key_press", "keycode": k})
+ )
+ self._display.key_released.connect(
+ lambda k: self._send({"action": "key_release", "keycode": k})
+ )
+ self._display.type_text.connect(
+ lambda text: self._send({"action": "type", "text": text})
+ )
+ self._display.files_dropped.connect(self._on_files_dropped)
+
+ # --- connection lifecycle ------------------------------------------
+
+ def _connect(self) -> None:
+ host = self._host_field.text().strip()
+ token = self._token.text().strip()
+ port = self._port.value()
+ if not host or not token or port == 0:
+ QMessageBox.warning(
+ self, _t("rd_viewer_connect"), _t("rd_viewer_required_fields"),
+ )
+ return
+ try:
+ expected_id = self._parse_host_id_input()
+ except HostIdError as error:
+ QMessageBox.warning(self, _t("rd_viewer_connect"), str(error))
+ return
+ transport = self._transport.currentText()
+ ssl_context = self._build_client_ssl_context(transport)
+ viewer_cls = (WebSocketDesktopViewer
+ if transport in ("WebSocket", "WSS")
+ else RemoteDesktopViewer)
+ registry.disconnect_viewer()
+ try:
+ viewer = viewer_cls(
+ host=host, port=port, token=token,
+ on_frame=self._frame_signal.emit,
+ on_error=lambda exc: self._error_signal.emit(str(exc)),
+ on_audio=self._audio_signal.emit,
+ on_clipboard=lambda kind, data:
+ self._clipboard_signal.emit(kind, data),
+ expected_host_id=expected_id,
+ ssl_context=ssl_context,
+ )
+ viewer.set_file_receiver(FileReceiver(
+ on_progress=lambda tid, done, total:
+ self._file_progress_signal.emit(tid, done, total),
+ on_complete=lambda tid, ok, err, dst:
+ self._file_complete_signal.emit(
+ tid, bool(ok), err or "", dst,
+ ),
+ ))
+ viewer.connect(timeout=5.0)
+ except AuthenticationError as error:
+ QMessageBox.warning(self, _t("rd_viewer_connect"), str(error))
+ return
+ except (OSError, RuntimeError) as error:
+ QMessageBox.warning(self, _t("rd_viewer_connect"), str(error))
+ return
+ registry._viewer = viewer # noqa: SLF001 centralised lifecycle ownership
+ self._connected = True
+ self._start_audio_player_if_requested()
+ self._refresh_status()
+
+ def _parse_host_id_input(self) -> Optional[str]:
+ text = self._host_id.text().strip()
+ if not text:
+ return None
+ return parse_host_id(text)
+
+ def _build_client_ssl_context(
+ self, transport: str) -> Optional[ssl.SSLContext]:
+ if transport not in ("TLS", "WSS"):
+ return None
+ if self._tls_insecure.isChecked():
+ return _build_insecure_client_context()
+ return _build_verifying_client_context()
+
+ def _start_audio_player_if_requested(self) -> None:
+ if not (self._enable_audio.isChecked()
+ and self._enable_audio.isEnabled()):
+ return
+ try:
+ player = AudioPlayer()
+ player.start()
+ except (OSError, RuntimeError) as error:
+ self._status.setText(f"{_t('rd_viewer_audio_play')}: {error}")
+ return
+ self._audio_player = player
+
+ def _stop_audio_player(self) -> None:
+ player = self._audio_player
+ self._audio_player = None
+ if player is not None:
+ try:
+ player.stop()
+ except (OSError, RuntimeError):
+ pass
+
+ def _disconnect(self) -> None:
+ registry.disconnect_viewer()
+ self._stop_audio_player()
+ self._connected = False
+ self._display.clear()
+ self._progress_bar.setVisible(False)
+ self._progress_label.setText("")
+ self._active_progress_id = None
+ self._refresh_status()
+
+ def _refresh_status(self) -> None:
+ if self._connected and registry.viewer_status()["connected"]:
+ self._status.setText(_t("rd_viewer_status_connected"))
+ else:
+ self._status.setText(_t("rd_viewer_status_idle"))
+
+ # --- slot handlers (run on GUI thread) -----------------------------
+
+ def _on_frame_main(self, payload: bytes) -> None:
+ image = QImage.fromData(payload, "JPEG")
+ if image.isNull():
+ return
+ self._display.set_image(image)
+
+ def _on_error_main(self, message: str) -> None:
+ self._connected = False
+ self._refresh_status()
+ QMessageBox.warning(self, _t("rd_viewer_error"), message)
+
+ def _on_audio_main(self, payload: bytes) -> None:
+ player = self._audio_player
+ if player is None:
+ return
+ try:
+ player.play(payload)
+ except (OSError, RuntimeError):
+ pass
+
+ def _on_clipboard_main(self, kind: str, data) -> None:
+ from je_auto_control.utils.clipboard.clipboard import (
+ set_clipboard, set_clipboard_image,
+ )
+ try:
+ if kind == "text":
+ set_clipboard(data)
+ elif kind == "image":
+ set_clipboard_image(data)
+ except (OSError, RuntimeError) as error:
+ self._status.setText(f"{_t('rd_viewer_error')}: {error}")
+ return
+ self._status.setText(_t("rd_viewer_clipboard_received"))
+
+ def _on_file_progress_main(self, transfer_id: str,
+ bytes_done: int, total: int) -> None:
+ if (self._active_progress_id is not None
+ and self._active_progress_id != transfer_id):
+ return
+ self._active_progress_id = transfer_id
+ self._progress_bar.setVisible(True)
+ if total > 0:
+ self._progress_bar.setRange(0, total)
+ self._progress_bar.setValue(min(bytes_done, total))
+ else:
+ self._progress_bar.setRange(0, 0)
+ self._progress_label.setText(
+ _t("rd_progress_label")
+ .replace("{done}", str(bytes_done))
+ .replace("{total}", str(total))
+ )
+
+ def _on_file_complete_main(self, transfer_id: str, success: bool,
+ error: str, dest_path: str) -> None:
+ del transfer_id
+ self._active_progress_id = None
+ self._progress_bar.setVisible(False)
+ if success:
+ self._progress_label.setText(
+ _t("rd_progress_done").replace("{path}", dest_path)
+ )
+ else:
+ self._progress_label.setText(
+ _t("rd_progress_failed").replace("{error}", error)
+ )
+
+ # --- input forwarding ---------------------------------------------
+
+ def _send(self, action: dict) -> None:
+ viewer = registry.viewer
+ if viewer is None or not viewer.connected:
+ return
+ try:
+ viewer.send_input(action)
+ except OSError as error:
+ self._error_signal.emit(str(error))
+
+ def _send_mouse_move(self, x: int, y: int) -> None:
+ self._send({"action": "mouse_move", "x": x, "y": y})
+
+ def _send_mouse_press(self, x: int, y: int, button: str) -> None:
+ self._send({"action": "mouse_move", "x": x, "y": y})
+ self._send({"action": "mouse_press", "button": button})
+
+ def _send_mouse_release(self, x: int, y: int, button: str) -> None:
+ self._send({"action": "mouse_release", "button": button})
+
+ def _send_mouse_scroll(self, x: int, y: int, amount: int) -> None:
+ self._send({
+ "action": "mouse_scroll", "x": x, "y": y, "amount": amount,
+ })
+
+ # --- clipboard / file transfer (viewer -> host) -------------------
+
+ def _push_clipboard_to_host(self) -> None:
+ viewer = registry.viewer
+ if viewer is None or not viewer.connected:
+ QMessageBox.warning(self, _t("rd_viewer_push_clipboard"),
+ _t("rd_viewer_status_idle"))
+ return
+ text = QGuiApplication.clipboard().text()
+ if not text:
+ self._status.setText(_t("rd_clipboard_empty"))
+ return
+ try:
+ viewer.send_clipboard_text(text)
+ except OSError as error:
+ QMessageBox.warning(self, _t("rd_viewer_push_clipboard"),
+ str(error))
+ return
+ self._status.setText(_t("rd_clipboard_sent"))
+
+ def _on_send_file_clicked(self) -> None:
+ viewer = registry.viewer
+ if viewer is None or not viewer.connected:
+ QMessageBox.warning(self, _t("rd_viewer_send_file"),
+ _t("rd_viewer_status_idle"))
+ return
+ source, _ = QFileDialog.getOpenFileName(
+ self, _t("rd_viewer_send_file"), "", "All Files (*)",
+ )
+ if not source:
+ return
+ self._upload_file(source)
+
+ def _on_files_dropped(self, paths) -> None:
+ viewer = registry.viewer
+ if viewer is None or not viewer.connected:
+ return
+ for path in paths:
+ self._upload_file(path)
+
+ def _upload_file(self, source_path: str) -> None:
+ default_dest = "~/" + Path(source_path).name
+ dest, ok = QInputDialog.getText(
+ self, _t("rd_viewer_send_file"),
+ _t("rd_dest_path_prompt").replace("{name}",
+ Path(source_path).name),
+ text=default_dest,
+ )
+ if not ok or not dest:
+ return
+ viewer = registry.viewer
+ if viewer is None:
+ return
+ thread = _FileSendThread(viewer, source_path, dest, self)
+ thread.progress.connect(self._on_file_progress_main)
+ thread.completed.connect(self._on_file_complete_main)
+ thread.finished.connect(thread.deleteLater)
+ thread.start()
+
+
+class _FileSendThread(QThread):
+ """Run send_file off the GUI thread; bridge progress via signals."""
+
+ progress = Signal(str, int, int)
+ completed = Signal(str, bool, str, str)
+
+ def __init__(self, viewer: RemoteDesktopViewer, source: str, dest: str,
+ parent=None) -> None:
+ super().__init__(parent)
+ self._viewer = viewer
+ self._source = source
+ self._dest = dest
+
+ def run(self) -> None:
+ def relay(transfer_id, done, total):
+ self.progress.emit(transfer_id, done, total)
+ try:
+ result = self._viewer.send_file(
+ self._source, self._dest, on_progress=relay,
+ )
+ except (OSError, RuntimeError) as error:
+ self.completed.emit("", False, str(error), self._dest)
+ return
+ self.completed.emit(
+ result.transfer_id, bool(result.success),
+ result.error or "", self._dest,
+ )
+
+
+class RemoteDesktopTab(TranslatableMixin, QWidget):
+ """Outer container holding the host and viewer sub-tabs."""
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._tr_init()
+ layout = QVBoxLayout(self)
+ self._tabs = QTabWidget()
+ self._host_panel = _HostPanel()
+ self._viewer_panel = _ViewerPanel()
+ host_index = self._tabs.addTab(self._host_panel, _t("rd_host_tab"))
+ viewer_index = self._tabs.addTab(self._viewer_panel, _t("rd_viewer_tab"))
+ self._tr_tab(self._tabs, host_index, "rd_host_tab")
+ self._tr_tab(self._tabs, viewer_index, "rd_viewer_tab")
+ layout.addWidget(self._tabs)
+
+ def retranslate(self) -> None:
+ TranslatableMixin.retranslate(self)
+ self._host_panel.retranslate()
+ self._viewer_panel.retranslate()
diff --git a/je_auto_control/gui/variables_tab.py b/je_auto_control/gui/variables_tab.py
new file mode 100644
index 00000000..dac0e592
--- /dev/null
+++ b/je_auto_control/gui/variables_tab.py
@@ -0,0 +1,169 @@
+"""Variables tab: inspect, seed, and clear the executor's runtime scope.
+
+Runtime variables drive ``AC_set_var`` / ``AC_if_var`` / ``AC_for_each``
+and live placeholder substitution. This tab is a thin Qt wrapper that
+shows the current scope and lets users seed a JSON bag before running a
+script that reads ``${var}`` placeholders.
+"""
+import json
+from typing import Optional
+
+from PySide6.QtWidgets import (
+ QGroupBox, QHBoxLayout, QHeaderView, QLabel, QLineEdit, QMessageBox,
+ QPushButton, QTableWidget, QTableWidgetItem, QTextEdit, QVBoxLayout,
+ QWidget,
+)
+
+from je_auto_control.gui._i18n_helpers import TranslatableMixin
+from je_auto_control.gui.language_wrapper.multi_language_wrapper import (
+ language_wrapper,
+)
+from je_auto_control.utils.executor.action_executor import executor
+
+
+def _t(key: str) -> str:
+ return language_wrapper.translate(key, key)
+
+
+class VariablesTab(TranslatableMixin, QWidget):
+ """View and seed the global executor variable scope."""
+
+ def __init__(self, parent: Optional[QWidget] = None) -> None:
+ super().__init__(parent)
+ self._tr_init()
+ self._table = QTableWidget(0, 2)
+ self._table.horizontalHeader().setSectionResizeMode(
+ QHeaderView.ResizeMode.Stretch,
+ )
+ self._table.verticalHeader().setVisible(False)
+ self._set_name = QLineEdit()
+ self._set_value = QLineEdit()
+ self._seed_text = QTextEdit()
+ self._status = QLabel()
+ self._build_layout()
+ self._apply_placeholders()
+ self._refresh()
+
+ def retranslate(self) -> None:
+ TranslatableMixin.retranslate(self)
+ self._apply_placeholders()
+ self._update_table_headers()
+
+ def _apply_placeholders(self) -> None:
+ self._set_name.setPlaceholderText(_t("vars_name_placeholder"))
+ self._set_value.setPlaceholderText(_t("vars_value_placeholder"))
+ self._seed_text.setPlaceholderText(_t("vars_seed_placeholder"))
+
+ def _update_table_headers(self) -> None:
+ self._table.setHorizontalHeaderLabels([
+ _t("vars_col_name"), _t("vars_col_value"),
+ ])
+
+ def _build_layout(self) -> None:
+ root = QVBoxLayout(self)
+
+ view_group = self._tr(QGroupBox(), "vars_current_group")
+ view_layout = QVBoxLayout()
+ self._update_table_headers()
+ view_layout.addWidget(self._table)
+ view_btns = QHBoxLayout()
+ refresh_btn = self._tr(QPushButton(), "vars_refresh")
+ refresh_btn.clicked.connect(self._refresh)
+ clear_btn = self._tr(QPushButton(), "vars_clear")
+ clear_btn.clicked.connect(self._on_clear)
+ view_btns.addWidget(refresh_btn)
+ view_btns.addWidget(clear_btn)
+ view_btns.addStretch()
+ view_layout.addLayout(view_btns)
+ view_group.setLayout(view_layout)
+ root.addWidget(view_group)
+
+ set_group = self._tr(QGroupBox(), "vars_set_group")
+ set_layout = QHBoxLayout()
+ set_layout.addWidget(self._tr(QLabel(), "vars_name_label"))
+ set_layout.addWidget(self._set_name, stretch=1)
+ set_layout.addWidget(self._tr(QLabel(), "vars_value_label"))
+ set_layout.addWidget(self._set_value, stretch=2)
+ set_btn = self._tr(QPushButton(), "vars_set_btn")
+ set_btn.clicked.connect(self._on_set_one)
+ set_layout.addWidget(set_btn)
+ set_group.setLayout(set_layout)
+ root.addWidget(set_group)
+
+ seed_group = self._tr(QGroupBox(), "vars_seed_group")
+ seed_layout = QVBoxLayout()
+ seed_layout.addWidget(self._seed_text)
+ seed_btn = self._tr(QPushButton(), "vars_seed_btn")
+ seed_btn.clicked.connect(self._on_seed_json)
+ seed_layout.addWidget(seed_btn)
+ seed_group.setLayout(seed_layout)
+ root.addWidget(seed_group)
+
+ root.addWidget(self._status)
+
+ def _refresh(self) -> None:
+ snapshot = executor.variables.as_dict()
+ self._table.setRowCount(len(snapshot))
+ for row, (name, value) in enumerate(sorted(snapshot.items())):
+ self._table.setItem(row, 0, QTableWidgetItem(str(name)))
+ display = self._format_value(value)
+ self._table.setItem(row, 1, QTableWidgetItem(display))
+ self._status.setText(
+ _t("vars_count").replace("{n}", str(len(snapshot)))
+ )
+
+ @staticmethod
+ def _format_value(value) -> str:
+ if isinstance(value, (dict, list, tuple)):
+ try:
+ return json.dumps(value, ensure_ascii=False)
+ except (TypeError, ValueError):
+ return repr(value)
+ return repr(value) if isinstance(value, str) else str(value)
+
+ @staticmethod
+ def _coerce_value(text: str):
+ """Try JSON-decoding so '42' becomes int, fall back to raw string."""
+ stripped = text.strip()
+ if not stripped:
+ return ""
+ try:
+ return json.loads(stripped)
+ except (ValueError, TypeError):
+ return text
+
+ def _on_set_one(self) -> None:
+ name = self._set_name.text().strip()
+ if not name:
+ self._status.setText(_t("vars_name_required"))
+ return
+ executor.variables.set(name, self._coerce_value(self._set_value.text()))
+ self._set_name.clear()
+ self._set_value.clear()
+ self._refresh()
+
+ def _on_clear(self) -> None:
+ confirm = QMessageBox.question(
+ self, _t("vars_clear"), _t("vars_clear_confirm"),
+ QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
+ )
+ if confirm != QMessageBox.StandardButton.Yes:
+ return
+ executor.variables.clear()
+ self._refresh()
+
+ def _on_seed_json(self) -> None:
+ text = self._seed_text.toPlainText().strip()
+ if not text:
+ self._status.setText(_t("vars_seed_required"))
+ return
+ try:
+ data = json.loads(text)
+ except (ValueError, TypeError) as error:
+ self._status.setText(f"{_t('vars_seed_invalid')}: {error}")
+ return
+ if not isinstance(data, dict):
+ self._status.setText(_t("vars_seed_not_object"))
+ return
+ executor.variables.update_many(data)
+ self._refresh()
diff --git a/je_auto_control/utils/clipboard/clipboard.py b/je_auto_control/utils/clipboard/clipboard.py
index 761f3cb6..1357b048 100644
--- a/je_auto_control/utils/clipboard/clipboard.py
+++ b/je_auto_control/utils/clipboard/clipboard.py
@@ -1,8 +1,11 @@
-"""Headless cross-platform text clipboard.
+"""Headless cross-platform text + image clipboard.
-Windows uses Win32 clipboard API via ctypes.
-macOS shells out to pbcopy / pbpaste.
-Linux shells out to xclip or xsel (whichever is available).
+Windows uses Win32 clipboard API via ctypes (CF_UNICODETEXT for text,
+CF_DIB for image).
+macOS shells out to pbcopy / pbpaste for text; image support requires
+PyObjC and is best effort.
+Linux shells out to xclip / xsel for text and ``xclip -t image/png`` for
+images.
All functions raise ``RuntimeError`` if the platform backend is missing so
callers can degrade gracefully.
@@ -10,8 +13,11 @@
import shutil
import subprocess # nosec B404 # reason: required for pbcopy/pbpaste/xclip/xsel
import sys
+from io import BytesIO
from typing import Optional
+_OPEN_CLIPBOARD_FAILED = "OpenClipboard failed"
+
def get_clipboard() -> str:
"""Return the current clipboard text (empty string if empty)."""
@@ -35,6 +41,30 @@ def set_clipboard(text: str) -> None:
_linux_set(text)
+def get_clipboard_image() -> Optional[bytes]:
+ """Return the clipboard's image as PNG bytes, or ``None`` if no image."""
+ if sys.platform.startswith("win"):
+ return _win_get_image()
+ if sys.platform == "darwin":
+ return _mac_get_image()
+ return _linux_get_image()
+
+
+def set_clipboard_image(png_bytes: bytes) -> None:
+ """Place a PNG image (as bytes) onto the clipboard."""
+ if not isinstance(png_bytes, (bytes, bytearray)):
+ raise TypeError("set_clipboard_image expects bytes")
+ if not png_bytes:
+ raise ValueError("png_bytes is empty")
+ if sys.platform.startswith("win"):
+ _win_set_image(bytes(png_bytes))
+ return
+ if sys.platform == "darwin":
+ _mac_set_image(bytes(png_bytes))
+ return
+ _linux_set_image(bytes(png_bytes))
+
+
# === Windows backend =========================================================
def _win_get() -> str:
@@ -55,7 +85,7 @@ def _win_get() -> str:
kernel32.GlobalUnlock.argtypes = [wintypes.HGLOBAL]
if not user32.OpenClipboard(None):
- raise RuntimeError("OpenClipboard failed")
+ raise RuntimeError(_OPEN_CLIPBOARD_FAILED)
try:
handle = user32.GetClipboardData(cf_unicodetext)
if not handle:
@@ -103,7 +133,7 @@ def _win_set(text: str) -> None:
ctypes.memmove(pointer, ctypes.addressof(data), size) # NOSONAR S5655 false positive — Array is accepted by addressof
kernel32.GlobalUnlock(handle)
if not user32.OpenClipboard(None):
- raise RuntimeError("OpenClipboard failed")
+ raise RuntimeError(_OPEN_CLIPBOARD_FAILED)
try:
user32.EmptyClipboard()
if not user32.SetClipboardData(cf_unicodetext, handle):
@@ -160,3 +190,125 @@ def _linux_set(text: str) -> None:
write_cmd, input=text.encode("utf-8"),
check=True, timeout=5,
)
+
+
+# === Image clipboard backends ===============================================
+
+
+def _win_get_image() -> Optional[bytes]:
+ """Return the Windows clipboard image as PNG bytes, or None."""
+ try:
+ from PIL import ImageGrab # noqa: PLC0415 lazy import
+ except ImportError as error:
+ raise RuntimeError(
+ "Pillow is required for clipboard image support"
+ ) from error
+ image = ImageGrab.grabclipboard()
+ if image is None or isinstance(image, list):
+ return None
+ buffer = BytesIO()
+ if image.mode != "RGB":
+ image = image.convert("RGB")
+ image.save(buffer, format="PNG")
+ return buffer.getvalue()
+
+
+def _win_set_image(png_bytes: bytes) -> None:
+ """Set the Windows clipboard image from PNG bytes (CF_DIB)."""
+ try:
+ from PIL import Image # noqa: PLC0415 lazy import
+ except ImportError as error:
+ raise RuntimeError(
+ "Pillow is required for clipboard image support"
+ ) from error
+ image = Image.open(BytesIO(png_bytes))
+ if image.mode != "RGB":
+ image = image.convert("RGB")
+ bmp_buf = BytesIO()
+ image.save(bmp_buf, format="BMP")
+ # CF_DIB excludes the 14-byte BITMAPFILEHEADER prefix that BMP files use.
+ dib = bmp_buf.getvalue()[14:]
+
+ import ctypes # noqa: PLC0415
+ from ctypes import wintypes # noqa: PLC0415
+
+ user32 = ctypes.WinDLL("user32", use_last_error=True)
+ kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
+ cf_dib = 8
+ gmem_moveable = 0x0002
+
+ user32.OpenClipboard.argtypes = [wintypes.HWND]
+ user32.OpenClipboard.restype = wintypes.BOOL
+ user32.EmptyClipboard.restype = wintypes.BOOL
+ user32.SetClipboardData.argtypes = [wintypes.UINT, wintypes.HANDLE]
+ user32.SetClipboardData.restype = wintypes.HANDLE
+ user32.CloseClipboard.restype = wintypes.BOOL
+ kernel32.GlobalAlloc.argtypes = [wintypes.UINT, ctypes.c_size_t]
+ kernel32.GlobalAlloc.restype = wintypes.HGLOBAL
+ kernel32.GlobalLock.argtypes = [wintypes.HGLOBAL]
+ kernel32.GlobalLock.restype = ctypes.c_void_p
+ kernel32.GlobalUnlock.argtypes = [wintypes.HGLOBAL]
+
+ handle = kernel32.GlobalAlloc(gmem_moveable, len(dib))
+ if not handle:
+ raise RuntimeError("GlobalAlloc failed")
+ pointer = kernel32.GlobalLock(handle)
+ if not pointer:
+ raise RuntimeError("GlobalLock failed")
+ ctypes.memmove(pointer, dib, len(dib))
+ kernel32.GlobalUnlock(handle)
+ if not user32.OpenClipboard(None):
+ raise RuntimeError(_OPEN_CLIPBOARD_FAILED)
+ try:
+ user32.EmptyClipboard()
+ if not user32.SetClipboardData(cf_dib, handle):
+ raise RuntimeError("SetClipboardData(CF_DIB) failed")
+ finally:
+ user32.CloseClipboard()
+
+
+def _mac_get_image() -> Optional[bytes]:
+ """Read clipboard image via Pillow's ImageGrab; raises if PIL missing."""
+ try:
+ from PIL import ImageGrab # noqa: PLC0415
+ except ImportError as error:
+ raise RuntimeError(
+ "Pillow is required for clipboard image support on macOS"
+ ) from error
+ image = ImageGrab.grabclipboard()
+ if image is None or isinstance(image, list):
+ return None
+ buffer = BytesIO()
+ if image.mode != "RGB":
+ image = image.convert("RGB")
+ image.save(buffer, format="PNG")
+ return buffer.getvalue()
+
+
+def _mac_set_image(_png_bytes: bytes) -> None:
+ raise RuntimeError(
+ "Setting clipboard images on macOS requires PyObjC; not yet supported"
+ )
+
+
+def _linux_get_image() -> Optional[bytes]:
+ if not shutil.which("xclip"):
+ raise RuntimeError("Install xclip for Linux clipboard image support")
+ # nosemgrep: python.lang.security.audit.dangerous-subprocess-use-audit.dangerous-subprocess-use-audit
+ result = subprocess.run( # nosec B603 B607 # reason: hard-coded argv to xclip
+ ["xclip", "-selection", "clipboard", "-t", "image/png", "-o"],
+ capture_output=True, check=False, timeout=5,
+ )
+ if result.returncode != 0 or not result.stdout:
+ return None
+ return result.stdout
+
+
+def _linux_set_image(png_bytes: bytes) -> None:
+ if not shutil.which("xclip"):
+ raise RuntimeError("Install xclip for Linux clipboard image support")
+ # nosemgrep: python.lang.security.audit.dangerous-subprocess-use-audit.dangerous-subprocess-use-audit
+ subprocess.run( # nosec B603 B607 # reason: hard-coded argv to xclip
+ ["xclip", "-selection", "clipboard", "-t", "image/png", "-i"],
+ input=png_bytes, check=True, timeout=5,
+ )
diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py
index de0d93a1..136054d0 100644
--- a/je_auto_control/utils/executor/action_executor.py
+++ b/je_auto_control/utils/executor/action_executor.py
@@ -23,13 +23,25 @@
BLOCK_COMMANDS, LoopBreak, LoopContinue,
)
from je_auto_control.utils.executor.mouse_aliases import MOUSE_BUTTON_COMMANDS
+from je_auto_control.utils.llm.planner import (
+ plan_actions as llm_plan_actions,
+ run_from_description as llm_run_from_description,
+)
+from je_auto_control.utils.remote_desktop.registry import (
+ registry as remote_desktop_registry,
+)
from je_auto_control.utils.ocr.ocr_engine import (
click_text as ocr_click_text,
+ find_text_regex as ocr_find_text_regex,
locate_text_center as ocr_locate_text_center,
+ read_text_in_region as ocr_read_text_in_region,
wait_for_text as ocr_wait_for_text,
)
from je_auto_control.utils.run_history.history_store import default_history_store
-from je_auto_control.utils.script_vars.interpolate import interpolate_actions
+from je_auto_control.utils.script_vars.interpolate import (
+ interpolate_actions, interpolate_value,
+)
+from je_auto_control.utils.script_vars.scope import VariableScope
from je_auto_control.utils.generate_report.generate_html_report import generate_html, generate_html_report
from je_auto_control.utils.generate_report.generate_json_report import generate_json, generate_json_report
from je_auto_control.utils.generate_report.generate_xml_report import generate_xml, generate_xml_report
@@ -92,6 +104,112 @@ def _vlm_locate_as_list(description: str,
return None if coords is None else [coords[0], coords[1]]
+def _remote_start_host(token: str,
+ bind: str = "127.0.0.1",
+ port: int = 0,
+ fps: float = 10.0,
+ quality: int = 70,
+ region: Optional[List[int]] = None,
+ max_clients: int = 4) -> Dict[str, Any]:
+ """Executor adapter: start the singleton remote-desktop host."""
+ return remote_desktop_registry.start_host(
+ token=token, bind=bind, port=int(port),
+ fps=float(fps), quality=int(quality),
+ region=region, max_clients=int(max_clients),
+ )
+
+
+def _remote_stop_host() -> Dict[str, Any]:
+ return remote_desktop_registry.stop_host()
+
+
+def _remote_host_status() -> Dict[str, Any]:
+ return remote_desktop_registry.host_status()
+
+
+def _remote_connect(host: str, port: int, token: str,
+ timeout: float = 5.0) -> Dict[str, Any]:
+ """Executor adapter: connect the singleton viewer."""
+ return remote_desktop_registry.connect_viewer(
+ host=host, port=int(port), token=token, timeout=float(timeout),
+ )
+
+
+def _remote_disconnect() -> Dict[str, Any]:
+ return remote_desktop_registry.disconnect_viewer()
+
+
+def _remote_viewer_status() -> Dict[str, Any]:
+ return remote_desktop_registry.viewer_status()
+
+
+def _remote_send_input(action: Dict[str, Any]) -> Dict[str, Any]:
+ return remote_desktop_registry.send_input(action)
+
+
+def _llm_plan_for_executor(description: str,
+ examples: Optional[list] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> list:
+ """Executor adapter: plan without executing, using current command set."""
+ return llm_plan_actions(
+ description,
+ known_commands=executor.known_commands(),
+ examples=examples,
+ model=model,
+ max_tokens=int(max_tokens),
+ )
+
+
+def _llm_run_for_executor(description: str,
+ examples: Optional[list] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> Dict[str, Any]:
+ """Executor adapter: plan and execute against the global executor."""
+ return llm_run_from_description(
+ description,
+ executor=executor,
+ examples=examples,
+ model=model,
+ max_tokens=int(max_tokens),
+ )
+
+
+def _ocr_read_region_as_dicts(region: Optional[List[int]] = None,
+ lang: str = "eng",
+ min_confidence: float = 60.0) -> List[dict]:
+ """Executor adapter: dump OCR hits in a region as JSON-friendly dicts."""
+ return [
+ {
+ "text": match.text, "x": match.x, "y": match.y,
+ "width": match.width, "height": match.height,
+ "confidence": match.confidence,
+ }
+ for match in ocr_read_text_in_region(
+ region=region, lang=lang, min_confidence=float(min_confidence),
+ )
+ ]
+
+
+def _ocr_find_regex_as_dicts(pattern: str,
+ lang: str = "eng",
+ region: Optional[List[int]] = None,
+ min_confidence: float = 60.0,
+ flags: int = 0) -> List[dict]:
+ """Executor adapter: regex OCR search returning JSON-friendly dicts."""
+ return [
+ {
+ "text": match.text, "x": match.x, "y": match.y,
+ "width": match.width, "height": match.height,
+ "confidence": match.confidence,
+ }
+ for match in ocr_find_text_regex(
+ pattern, lang=lang, region=region,
+ min_confidence=float(min_confidence), flags=int(flags),
+ )
+ ]
+
+
def _history_list_as_dicts(limit: int = 100,
source_type: Optional[str] = None) -> List[dict]:
"""Executor adapter: list run history as plain dicts (JSON-friendly)."""
@@ -120,8 +238,13 @@ class Executor:
- 支援流程控制指令 (AC_loop, AC_if_image_found 等)
"""
+ # Args keys that hold nested action lists; runtime interpolation must
+ # leave them untouched so each iteration re-reads current variable state.
+ _DEFERRED_ARG_KEYS: frozenset = frozenset({"body", "then", "else"})
+
def __init__(self):
self._block_commands = BLOCK_COMMANDS
+ self.variables = VariableScope()
# 事件字典,對應字串名稱到函式
self.event_dict: dict = {
# Mouse 滑鼠相關
@@ -186,6 +309,8 @@ def __init__(self):
"AC_locate_text": ocr_locate_text_center,
"AC_wait_text": ocr_wait_for_text,
"AC_click_text": ocr_click_text,
+ "AC_read_text_in_region": _ocr_read_region_as_dicts,
+ "AC_find_text_regex": _ocr_find_regex_as_dicts,
# Window management
"AC_list_windows": list_windows,
@@ -213,12 +338,48 @@ def __init__(self):
# MCP server (Model Context Protocol stdio bridge)
"AC_start_mcp_server": start_mcp_stdio_server,
"AC_start_mcp_http_server": start_mcp_http_server,
+
+ # LLM action planner
+ "AC_llm_plan": _llm_plan_for_executor,
+ "AC_llm_run": _llm_run_for_executor,
+
+ # Remote desktop host (this machine streams to others)
+ "AC_start_remote_host": _remote_start_host,
+ "AC_stop_remote_host": _remote_stop_host,
+ "AC_remote_host_status": _remote_host_status,
+
+ # Remote desktop viewer (this machine controls others)
+ "AC_remote_connect": _remote_connect,
+ "AC_remote_disconnect": _remote_disconnect,
+ "AC_remote_viewer_status": _remote_viewer_status,
+ "AC_remote_send_input": _remote_send_input,
}
def known_commands(self) -> set:
"""Return the set of all command names the executor recognises."""
return set(self.event_dict.keys()) | set(self._block_commands.keys())
+ def _resolve_runtime_args(self, args: Any) -> Any:
+ """Interpolate ``${var}`` placeholders against the current scope.
+
+ Keys inside :attr:`_DEFERRED_ARG_KEYS` (``body``/``then``/``else``)
+ are left as-is so nested action lists keep their placeholders for
+ per-iteration evaluation.
+ """
+ if not self.variables:
+ return args
+ if isinstance(args, dict):
+ resolved: Dict[str, Any] = {}
+ for key, value in args.items():
+ if key in self._DEFERRED_ARG_KEYS:
+ resolved[key] = value
+ else:
+ resolved[key] = interpolate_value(value, self.variables)
+ return resolved
+ if isinstance(args, list):
+ return [interpolate_value(item, self.variables) for item in args]
+ return args
+
def _execute_event(self, action: list) -> Any:
"""
執行單一事件
@@ -232,16 +393,17 @@ def _execute_event(self, action: list) -> Any:
raise AutoControlActionException(
f"{name} requires a dict of arguments"
)
- return block_handler(self, args)
+ return block_handler(self, self._resolve_runtime_args(args))
event = self.event_dict.get(name)
if event is None:
raise AutoControlActionException(f"Unknown action: {name}")
if len(action) == 2:
- if isinstance(action[1], dict):
- return event(**action[1])
- return event(*action[1])
+ resolved = self._resolve_runtime_args(action[1])
+ if isinstance(resolved, dict):
+ return event(**resolved)
+ return event(*resolved)
if len(action) == 1:
return event()
raise AutoControlActionException(cant_execute_action_error_message + " " + str(action))
@@ -354,6 +516,12 @@ def execute_files(execute_files_list: list) -> List[Dict[str, str]]:
def execute_action_with_vars(action_list: list, variables: dict
) -> Dict[str, str]:
- """Interpolate ``${name}`` placeholders with ``variables`` and execute."""
+ """Interpolate ``${name}`` placeholders with ``variables`` and execute.
+
+ The same mapping seeds the runtime variable scope so flow-control
+ commands (``AC_set_var``/``AC_if_var``/...) can read and mutate the
+ same values during execution.
+ """
resolved = interpolate_actions(action_list, variables)
+ executor.variables.update_many(variables)
return executor.execute_action(resolved)
diff --git a/je_auto_control/utils/executor/action_schema.py b/je_auto_control/utils/executor/action_schema.py
index a5a33c04..57726962 100644
--- a/je_auto_control/utils/executor/action_schema.py
+++ b/je_auto_control/utils/executor/action_schema.py
@@ -12,9 +12,11 @@
FLOW_BODY_KEYS = {
"AC_if_image_found": ("then", "else"),
"AC_if_pixel": ("then", "else"),
+ "AC_if_var": ("then", "else"),
"AC_loop": ("body",),
"AC_while_image": ("body",),
"AC_retry": ("body",),
+ "AC_for_each": ("body",),
}
diff --git a/je_auto_control/utils/executor/flow_control.py b/je_auto_control/utils/executor/flow_control.py
index 2af34d29..f66b97d8 100644
--- a/je_auto_control/utils/executor/flow_control.py
+++ b/je_auto_control/utils/executor/flow_control.py
@@ -178,9 +178,95 @@ def exec_continue(executor: Any, args: Mapping[str, Any]) -> None:
raise LoopContinue()
+def exec_set_var(executor: Any, args: Mapping[str, Any]) -> Any:
+ """Store ``value`` under ``name`` in the executor's variable scope."""
+ name = args["name"]
+ value = args.get("value")
+ executor.variables.set(name, value)
+ return value
+
+
+def exec_get_var(executor: Any, args: Mapping[str, Any]) -> Any:
+ """Return the variable named ``name`` (or ``default`` if missing)."""
+ return executor.variables.get_value(args["name"], args.get("default"))
+
+
+def exec_inc_var(executor: Any, args: Mapping[str, Any]) -> Any:
+ """Increment a numeric variable by ``by`` (default 1) and return new value."""
+ name = args["name"]
+ delta = args.get("by", 1)
+ current = executor.variables.get_value(name, 0)
+ try:
+ new_value = current + delta
+ except TypeError as error:
+ raise AutoControlActionException(
+ f"AC_inc_var: variable {name!r} is not numeric: {current!r}"
+ ) from error
+ executor.variables.set(name, new_value)
+ return new_value
+
+
+_COMPARATORS: Dict[str, Callable[[Any, Any], bool]] = {
+ "eq": lambda a, b: a == b,
+ "ne": lambda a, b: a != b,
+ "lt": lambda a, b: a < b,
+ "le": lambda a, b: a <= b,
+ "gt": lambda a, b: a > b,
+ "ge": lambda a, b: a >= b,
+ "contains": lambda a, b: b in a,
+ "startswith": lambda a, b: isinstance(a, str) and a.startswith(b),
+ "endswith": lambda a, b: isinstance(a, str) and a.endswith(b),
+}
+
+
+def exec_if_var(executor: Any, args: Mapping[str, Any]) -> Any:
+ """Run ``then`` when ``variable op value`` holds, else run ``else``."""
+ name = args["name"]
+ op = args.get("op", "eq")
+ comparator = _COMPARATORS.get(op)
+ if comparator is None:
+ raise AutoControlActionException(
+ f"AC_if_var: unsupported op {op!r}; "
+ f"expected one of {sorted(_COMPARATORS)}"
+ )
+ current = executor.variables.get_value(name)
+ try:
+ matched = comparator(current, args.get("value"))
+ except TypeError as error:
+ raise AutoControlActionException(
+ f"AC_if_var: cannot compare {current!r} {op} {args.get('value')!r}"
+ ) from error
+ key = "then" if matched else "else"
+ return _run_branch(executor, args.get(key))
+
+
+def exec_for_each(executor: Any, args: Mapping[str, Any]) -> int:
+ """Bind each item in ``items`` to ``as`` and execute ``body``."""
+ items = args["items"]
+ if not isinstance(items, (list, tuple)):
+ raise AutoControlActionException(
+ f"AC_for_each: items must be a list, got {type(items).__name__}"
+ )
+ var_name = args.get("as", "item")
+ body = args.get("body") or []
+ iterations = 0
+ for item in items:
+ executor.variables.set(var_name, item)
+ try:
+ executor.execute_action(body, _validated=True)
+ except LoopContinue:
+ iterations += 1
+ continue
+ except LoopBreak:
+ break
+ iterations += 1
+ return iterations
+
+
BLOCK_COMMANDS: Dict[str, Callable[[Any, Mapping[str, Any]], Any]] = {
"AC_if_image_found": exec_if_image_found,
"AC_if_pixel": exec_if_pixel,
+ "AC_if_var": exec_if_var,
"AC_wait_image": exec_wait_image,
"AC_wait_pixel": exec_wait_pixel,
"AC_sleep": exec_sleep,
@@ -189,4 +275,8 @@ def exec_continue(executor: Any, args: Mapping[str, Any]) -> None:
"AC_retry": exec_retry,
"AC_break": exec_break,
"AC_continue": exec_continue,
+ "AC_set_var": exec_set_var,
+ "AC_get_var": exec_get_var,
+ "AC_inc_var": exec_inc_var,
+ "AC_for_each": exec_for_each,
}
diff --git a/je_auto_control/utils/llm/__init__.py b/je_auto_control/utils/llm/__init__.py
new file mode 100644
index 00000000..5f97f407
--- /dev/null
+++ b/je_auto_control/utils/llm/__init__.py
@@ -0,0 +1,20 @@
+"""LLM-driven natural-language → action-list planning.
+
+The planner asks an LLM (default: Anthropic Claude) to translate a
+description like ``"open Notepad, type hello, save as test.txt"`` into a
+validated JSON action list using the executor's known ``AC_*`` commands.
+The result is structurally validated before it is returned, so callers can
+feed it straight into the executor.
+"""
+from je_auto_control.utils.llm.backends import (
+ LLMBackend, LLMNotAvailableError, get_backend, reset_backend_cache,
+)
+from je_auto_control.utils.llm.planner import (
+ LLMPlanError, plan_actions, run_from_description,
+)
+
+__all__ = [
+ "LLMBackend", "LLMNotAvailableError", "LLMPlanError",
+ "get_backend", "reset_backend_cache",
+ "plan_actions", "run_from_description",
+]
diff --git a/je_auto_control/utils/llm/backends/__init__.py b/je_auto_control/utils/llm/backends/__init__.py
new file mode 100644
index 00000000..f0420433
--- /dev/null
+++ b/je_auto_control/utils/llm/backends/__init__.py
@@ -0,0 +1,67 @@
+"""LLM backend factory.
+
+Mirrors :mod:`je_auto_control.utils.vision.backends`: backends declare
+``available`` and ``complete()``; the factory picks the first ready
+candidate based on env vars and an optional preference. A null backend is
+returned when nothing is configured so callers can detect the situation
+through :class:`LLMNotAvailableError` rather than ``ImportError``.
+"""
+import os
+from typing import Optional
+
+from je_auto_control.utils.llm.backends.base import (
+ LLMBackend, LLMNotAvailableError,
+)
+from je_auto_control.utils.llm.backends.null_backend import NullLLMBackend
+
+_cached_backend: Optional[LLMBackend] = None
+
+
+def get_backend() -> LLMBackend:
+ """Return (and cache) an LLM backend chosen by env vars."""
+ global _cached_backend
+ if _cached_backend is not None:
+ return _cached_backend
+ _cached_backend = _build_backend()
+ return _cached_backend
+
+
+def reset_backend_cache() -> None:
+ """Force ``get_backend()`` to re-detect on its next call."""
+ global _cached_backend
+ _cached_backend = None
+
+
+def _build_backend() -> LLMBackend:
+ preferred = os.environ.get("AUTOCONTROL_LLM_BACKEND", "").lower()
+ for candidate in _preference_order(preferred):
+ backend = _try_build(candidate)
+ if backend is not None and backend.available:
+ return backend
+ return NullLLMBackend(
+ "no LLM backend ready; set ANTHROPIC_API_KEY and install the "
+ "matching SDK (anthropic)",
+ )
+
+
+def _preference_order(preferred: str):
+ if preferred == "anthropic":
+ return ("anthropic",)
+ if os.environ.get("ANTHROPIC_API_KEY"):
+ return ("anthropic",)
+ return ("anthropic",)
+
+
+def _try_build(name: str) -> Optional[LLMBackend]:
+ if name == "anthropic":
+ from je_auto_control.utils.llm.backends.anthropic_backend import (
+ AnthropicLLMBackend,
+ )
+ return AnthropicLLMBackend()
+ return None
+
+
+__all__ = [
+ "LLMBackend", "LLMNotAvailableError", "NullLLMBackend",
+ "get_backend", "reset_backend_cache",
+]
diff --git a/je_auto_control/utils/llm/backends/anthropic_backend.py b/je_auto_control/utils/llm/backends/anthropic_backend.py
new file mode 100644
index 00000000..ff349c81
--- /dev/null
+++ b/je_auto_control/utils/llm/backends/anthropic_backend.py
@@ -0,0 +1,72 @@
+"""Anthropic (Claude) text-completion backend for the action planner."""
+import os
+from typing import Optional
+
+from je_auto_control.utils.llm.backends.base import LLMBackend
+from je_auto_control.utils.logging.logging_instance import autocontrol_logger
+
+_DEFAULT_MODEL = "claude-opus-4-7"
+_REQUEST_TIMEOUT_S = 60.0
+
+
+class AnthropicLLMBackend(LLMBackend):
+ """Call ``claude-*`` chat models via the ``anthropic`` Python SDK."""
+
+ name = "anthropic"
+
+ def __init__(self) -> None:
+ self._client = None
+ try:
+ import anthropic # noqa: F401
+ except ImportError:
+ self.available = False
+ return
+ if not os.environ.get("ANTHROPIC_API_KEY"):
+ self.available = False
+ return
+ try:
+ from anthropic import Anthropic
+ self._client = Anthropic()
+ self.available = True
+ except (ImportError, ValueError, RuntimeError) as error:
+ autocontrol_logger.warning(
+ "Anthropic LLM client init failed: %r", error,
+ )
+ self.available = False
+
+ def complete(self, prompt: str,
+ system: Optional[str] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> str:
+ if not self.available or self._client is None:
+ return ""
+ chosen_model = (model
+ or os.environ.get("AUTOCONTROL_LLM_MODEL")
+ or _DEFAULT_MODEL)
+ kwargs = {
+ "model": chosen_model,
+ "max_tokens": int(max_tokens),
+ "timeout": _REQUEST_TIMEOUT_S,
+ "messages": [{"role": "user", "content": prompt}],
+ }
+ if system:
+ kwargs["system"] = system
+ try:
+ response = self._client.messages.create(**kwargs)
+ except (OSError, ValueError, RuntimeError) as error:
+ autocontrol_logger.warning(
+ "Anthropic LLM request failed: %r", error,
+ )
+ return ""
+ return _join_text_blocks(response)
+
+
+def _join_text_blocks(response) -> str:
+ """Concatenate every text block in an Anthropic response."""
+ parts = []
+ for block in getattr(response, "content", []) or []:
+ if getattr(block, "type", None) == "text":
+ text = getattr(block, "text", "") or ""
+ if text:
+ parts.append(text)
+ return "".join(parts)
diff --git a/je_auto_control/utils/llm/backends/base.py b/je_auto_control/utils/llm/backends/base.py
new file mode 100644
index 00000000..4f01f21d
--- /dev/null
+++ b/je_auto_control/utils/llm/backends/base.py
@@ -0,0 +1,24 @@
+"""Common protocol shared by every LLM backend."""
+from typing import Optional
+
+
+class LLMNotAvailableError(RuntimeError):
+ """Raised when no LLM backend is configured / reachable."""
+
+
+class LLMBackend:
+ """Minimal text-completion contract used by the action planner."""
+
+ name: str = "base"
+ available: bool = False
+
+ def complete(self, prompt: str,
+ system: Optional[str] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> str:
+ """Return the model's text response for ``prompt``.
+
+ Backends should return an empty string (not raise) on transient
+ failures so the planner can surface a deterministic error.
+ """
+ raise NotImplementedError
diff --git a/je_auto_control/utils/llm/backends/null_backend.py b/je_auto_control/utils/llm/backends/null_backend.py
new file mode 100644
index 00000000..34b8f730
--- /dev/null
+++ b/je_auto_control/utils/llm/backends/null_backend.py
@@ -0,0 +1,23 @@
+"""Fallback LLM backend used when nothing real is configured."""
+from typing import Optional
+
+from je_auto_control.utils.llm.backends.base import (
+ LLMBackend, LLMNotAvailableError,
+)
+
+
+class NullLLMBackend(LLMBackend):
+ """Always raises so callers fail fast with a clear message."""
+
+ name = "null"
+ available = False
+
+ def __init__(self, reason: str) -> None:
+ self._reason = reason
+
+ def complete(self, prompt: str,
+ system: Optional[str] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> str:
+ del prompt, system, model, max_tokens
+ raise LLMNotAvailableError(self._reason)
diff --git a/je_auto_control/utils/llm/planner.py b/je_auto_control/utils/llm/planner.py
new file mode 100644
index 00000000..9bba9553
--- /dev/null
+++ b/je_auto_control/utils/llm/planner.py
@@ -0,0 +1,145 @@
+"""Plan ``AC_*`` action lists from natural-language descriptions.
+
+The planner builds a system prompt describing the available command set,
+asks the configured LLM backend to emit a JSON action list, then validates
+the result with the same schema the executor uses. If the model wraps the
+list in prose or a code fence, we extract the first JSON array we find.
+"""
+import json
+import re
+from typing import Any, Dict, Iterable, List, Optional
+
+from je_auto_control.utils.exception.exceptions import AutoControlActionException
+from je_auto_control.utils.executor.action_schema import validate_actions
+from je_auto_control.utils.llm.backends import (
+ LLMBackend, LLMNotAvailableError, get_backend,
+)
+
+_SYSTEM_PROMPT = (
+ "You translate plain-language automation instructions into a strict "
+ "JSON action list for the AutoControl executor.\n\n"
+ "Rules:\n"
+ "1. Output ONLY a JSON array. No prose, no code fences, no comments.\n"
+ "2. Each element is [name] or [name, params]. ``params`` is an object.\n"
+ "3. Use ONLY commands from the provided allowlist; do not invent names.\n"
+ "4. Coordinates and counts are integers; thresholds are floats.\n"
+ "5. Flow-control commands carry their nested actions inside ``body`` / "
+ "``then`` / ``else``.\n"
+ "6. Reference runtime variables with ``${name}`` strings; declare them "
+ "with AC_set_var before use.\n"
+)
+
+_JSON_ARRAY = re.compile(r"\[.*\]", re.DOTALL)
+
+
+class LLMPlanError(AutoControlActionException):
+ """Raised when the LLM response is not a valid action list."""
+
+
+def plan_actions(description: str,
+ known_commands: Iterable[str],
+ examples: Optional[List[Dict[str, Any]]] = None,
+ backend: Optional[LLMBackend] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> List[list]:
+ """Translate ``description`` into a validated action list.
+
+ ``examples`` is an optional list of ``{"description": ..., "actions": [...]}``
+ pairs used as few-shot guidance. Raises :class:`LLMPlanError` when the
+ model output is unparseable, empty, or references unknown commands.
+ """
+ if not description or not description.strip():
+ raise ValueError("description must be a non-empty string")
+ bound = backend if backend is not None else get_backend()
+ if not bound.available:
+ raise LLMNotAvailableError(
+ "no LLM backend configured; set ANTHROPIC_API_KEY and install "
+ "the matching SDK",
+ )
+ allowed = sorted(set(known_commands))
+ if not allowed:
+ raise ValueError("known_commands must list at least one command")
+ prompt = _build_user_prompt(description, allowed, examples)
+ raw = bound.complete(prompt, system=_SYSTEM_PROMPT, model=model,
+ max_tokens=max_tokens)
+ actions = _parse_actions(raw)
+ validate_actions(actions, allowed)
+ return actions
+
+
+def run_from_description(description: str,
+ executor: Any,
+ examples: Optional[List[Dict[str, Any]]] = None,
+ backend: Optional[LLMBackend] = None,
+ model: Optional[str] = None,
+ max_tokens: int = 2048) -> Dict[str, Any]:
+ """Plan a description and execute it on ``executor`` in one call."""
+ actions = plan_actions(
+ description,
+ known_commands=executor.known_commands(),
+ examples=examples,
+ backend=backend,
+ model=model,
+ max_tokens=max_tokens,
+ )
+ record = executor.execute_action(actions, _validated=True)
+ return {"actions": actions, "record": record}
+
+
+def _build_user_prompt(description: str,
+ allowed: List[str],
+ examples: Optional[List[Dict[str, Any]]]) -> str:
+ """Compose the user-side prompt, including allowlist and few-shot."""
+ parts = ["Allowed commands:"]
+ parts.append(", ".join(allowed))
+ if examples:
+ parts.append("\nExamples:")
+ for example in examples:
+ desc = example.get("description", "").strip()
+ actions = example.get("actions")
+ if not desc or not isinstance(actions, list):
+ continue
+ parts.append(f"Description: {desc}")
+ parts.append("Actions: " + json.dumps(actions, ensure_ascii=False))
+ parts.append("\nDescription:")
+ parts.append(description.strip())
+ parts.append("\nReturn the JSON array now:")
+ return "\n".join(parts)
+
+
+def _parse_actions(raw: str) -> List[list]:
+ """Extract a JSON array from ``raw`` and ensure it's a list of lists."""
+ if not raw or not raw.strip():
+ raise LLMPlanError("LLM returned empty response")
+ candidate = _strip_code_fence(raw.strip())
+ try:
+ actions = json.loads(candidate)
+ except json.JSONDecodeError:
+ match = _JSON_ARRAY.search(candidate)
+ if match is None:
+ raise LLMPlanError(
+ f"LLM output is not valid JSON: {raw[:200]!r}"
+ ) from None
+ try:
+ actions = json.loads(match.group(0))
+ except json.JSONDecodeError as error:
+ raise LLMPlanError(
+ f"LLM output JSON parse failed: {error}"
+ ) from error
+ if not isinstance(actions, list):
+ raise LLMPlanError(
+ f"LLM output must be a JSON array, got {type(actions).__name__}"
+ )
+ return actions
+
+
+def _strip_code_fence(text: str) -> str:
+ """Drop a leading/trailing markdown code fence if present."""
+ if not text.startswith("```"):
+ return text
+ lines = text.splitlines()
+ if len(lines) >= 2 and lines[0].startswith("```"):
+ lines = lines[1:]
+ if lines and lines[-1].strip() == "```":
+ lines = lines[:-1]
+ return "\n".join(lines).strip()
diff --git a/je_auto_control/utils/ocr/ocr_engine.py b/je_auto_control/utils/ocr/ocr_engine.py
index e3f99926..70bd85ce 100644
--- a/je_auto_control/utils/ocr/ocr_engine.py
+++ b/je_auto_control/utils/ocr/ocr_engine.py
@@ -4,9 +4,10 @@
binary is loaded lazily; if it is missing, a clear ``RuntimeError`` is raised
rather than ``ImportError`` so callers can degrade gracefully.
"""
+import re
import time
from dataclasses import dataclass
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Pattern, Sequence, Tuple, Union
from je_auto_control.utils.exception.exceptions import AutoControlActionException
from je_auto_control.utils.logging.logging_instance import autocontrol_logger
@@ -112,6 +113,33 @@ def find_text_matches(target: str,
or needle in (m.text if case_sensitive else m.text.lower())]
+def read_text_in_region(region: Optional[Sequence[int]] = None,
+ lang: str = "eng",
+ min_confidence: float = 60.0) -> List[TextMatch]:
+ """Return every OCR hit in ``region`` (or whole screen) as TextMatch records."""
+ pt, _ = _load_backend()
+ frame, offset_x, offset_y = _grab(region)
+ try:
+ data = pt.image_to_data(frame, lang=lang, output_type=pt.Output.DICT)
+ except (OSError, RuntimeError) as error:
+ raise RuntimeError(
+ "Tesseract binary not found. Install it and/or call set_tesseract_cmd()."
+ ) from error
+ return _parse_matches(data, offset_x, offset_y, min_confidence)
+
+
+def find_text_regex(pattern: Union[str, Pattern[str]],
+ lang: str = "eng",
+ region: Optional[Sequence[int]] = None,
+ min_confidence: float = 60.0,
+ flags: int = 0) -> List[TextMatch]:
+ """Return every match whose text matches ``pattern`` (regex search)."""
+ compiled = pattern if isinstance(pattern, re.Pattern) else re.compile(pattern, flags)
+ matches = read_text_in_region(region=region, lang=lang,
+ min_confidence=min_confidence)
+ return [m for m in matches if compiled.search(m.text) is not None]
+
+
def locate_text_center(target: str,
lang: str = "eng",
region: Optional[Sequence[int]] = None,
diff --git a/je_auto_control/utils/remote_desktop/__init__.py b/je_auto_control/utils/remote_desktop/__init__.py
new file mode 100644
index 00000000..1f4ac41d
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/__init__.py
@@ -0,0 +1,53 @@
+"""Remote-desktop host/viewer for screen streaming and remote input.
+
+The protocol is a minimal length-prefixed framing on raw TCP (no extra
+deps). The host periodically encodes the screen as JPEG and pushes it to
+authenticated viewers; viewers send back JSON input messages that the
+host dispatches via the existing mouse/keyboard wrappers. Token-based
+HMAC-SHA256 authentication and a default loopback bind keep casual
+misuse difficult — this is *not* a hardened RDP replacement, and exposing
+it to untrusted networks should be paired with an SSH tunnel or TLS
+front-end.
+"""
+from je_auto_control.utils.remote_desktop.audio import (
+ AudioBackendError, AudioCapture, AudioPlayer,
+ is_audio_backend_available,
+)
+from je_auto_control.utils.remote_desktop.clipboard_sync import (
+ ClipboardSyncError,
+)
+from je_auto_control.utils.remote_desktop.file_transfer import (
+ FileReceiver, FileSendResult, FileTransferError, send_file,
+)
+from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+from je_auto_control.utils.remote_desktop.host_id import (
+ HostIdError, format_host_id, generate_host_id, load_or_create_host_id,
+ parse_host_id, validate_host_id,
+)
+from je_auto_control.utils.remote_desktop.input_dispatch import (
+ InputDispatchError, dispatch_input,
+)
+from je_auto_control.utils.remote_desktop.protocol import (
+ AuthenticationError, MessageType, ProtocolError,
+ decode_frame_header, encode_frame,
+)
+from je_auto_control.utils.remote_desktop.registry import registry
+from je_auto_control.utils.remote_desktop.viewer import RemoteDesktopViewer
+from je_auto_control.utils.remote_desktop.ws_host import WebSocketDesktopHost
+from je_auto_control.utils.remote_desktop.ws_viewer import (
+ WebSocketDesktopViewer,
+)
+
+__all__ = [
+ "RemoteDesktopHost", "RemoteDesktopViewer",
+ "WebSocketDesktopHost", "WebSocketDesktopViewer",
+ "InputDispatchError", "AuthenticationError", "ProtocolError",
+ "MessageType", "encode_frame", "decode_frame_header",
+ "dispatch_input", "registry",
+ "HostIdError", "format_host_id", "generate_host_id",
+ "load_or_create_host_id", "parse_host_id", "validate_host_id",
+ "AudioBackendError", "AudioCapture", "AudioPlayer",
+ "is_audio_backend_available",
+ "ClipboardSyncError",
+ "FileReceiver", "FileSendResult", "FileTransferError", "send_file",
+]
diff --git a/je_auto_control/utils/remote_desktop/audio.py b/je_auto_control/utils/remote_desktop/audio.py
new file mode 100644
index 00000000..5feb1ccc
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/audio.py
@@ -0,0 +1,205 @@
+"""Audio capture + playback wrappers around the optional ``sounddevice`` lib.
+
+Both classes import ``sounddevice`` lazily so the package stays importable
+on systems without PortAudio. ``AudioCapture`` pulls signed-int16 PCM in
+fixed-size blocks via the library's callback API and forwards each block
+as raw bytes; ``AudioPlayer`` accepts the same byte format and writes it
+to the default (or user-chosen) output device.
+
+Defaults are 16 kHz, mono, 50 ms blocks (1600 bytes per block, ~32 KB/s)
+— small enough that audio chunks ride alongside JPEG frames over a LAN
+without noticeably starving the video pipe.
+"""
+import threading
+from dataclasses import dataclass
+from typing import Callable, Optional
+
+DEFAULT_SAMPLE_RATE = 16_000
+DEFAULT_CHANNELS = 1
+DEFAULT_BLOCK_FRAMES = 800 # 50 ms at 16 kHz
+SAMPLE_DTYPE = "int16"
+BYTES_PER_SAMPLE = 2
+
+AudioBlockCallback = Callable[[bytes], None]
+
+
+class AudioBackendError(RuntimeError):
+ """Raised when the optional ``sounddevice`` backend cannot be loaded."""
+
+
+def _load_sounddevice():
+ """Import ``sounddevice`` lazily; raise a helpful error if missing."""
+ try:
+ import sounddevice # noqa: PLC0415 intentional lazy import
+ except ImportError as error:
+ raise AudioBackendError(
+ "audio support requires 'sounddevice'. Install with: "
+ "pip install sounddevice"
+ ) from error
+ return sounddevice
+
+
+def is_audio_backend_available() -> bool:
+ """Return True if ``sounddevice`` can be imported."""
+ try:
+ _load_sounddevice()
+ except AudioBackendError:
+ return False
+ return True
+
+
+@dataclass(frozen=True)
+class AudioCaptureConfig:
+ """Bundled tuning knobs for :class:`RemoteDesktopHost` audio capture."""
+
+ enabled: bool = False
+ device: Optional[int] = None
+ sample_rate: int = DEFAULT_SAMPLE_RATE
+ channels: int = DEFAULT_CHANNELS
+ block_frames: int = DEFAULT_BLOCK_FRAMES
+
+
+class AudioCapture:
+ """Capture mono int16 PCM blocks and hand them to ``on_block`` as bytes.
+
+ ``on_block`` is invoked from the audio library's internal thread, so
+ callers must keep it cheap (queueing / signalling is fine; CPU-heavy
+ work blocks the audio pipeline).
+ """
+
+ def __init__(self, on_block: AudioBlockCallback,
+ device: Optional[int] = None,
+ sample_rate: int = DEFAULT_SAMPLE_RATE,
+ channels: int = DEFAULT_CHANNELS,
+ block_frames: int = DEFAULT_BLOCK_FRAMES) -> None:
+ if not callable(on_block):
+ raise TypeError("on_block must be callable")
+ if sample_rate <= 0 or channels <= 0 or block_frames <= 0:
+ raise ValueError(
+ "sample_rate, channels and block_frames must be positive"
+ )
+ self._on_block = on_block
+ self._device = device
+ self._sample_rate = int(sample_rate)
+ self._channels = int(channels)
+ self._block_frames = int(block_frames)
+ self._stream = None
+ self._lock = threading.Lock()
+
+ @property
+ def sample_rate(self) -> int:
+ return self._sample_rate
+
+ @property
+ def channels(self) -> int:
+ return self._channels
+
+ @property
+ def is_running(self) -> bool:
+ return self._stream is not None
+
+ def start(self) -> None:
+ """Open the input stream; subsequent blocks fire ``on_block`` callbacks."""
+ with self._lock:
+ if self._stream is not None:
+ return
+ sd = _load_sounddevice()
+ self._stream = sd.RawInputStream(
+ samplerate=self._sample_rate,
+ channels=self._channels,
+ dtype=SAMPLE_DTYPE,
+ blocksize=self._block_frames,
+ device=self._device,
+ callback=self._raw_callback,
+ )
+ self._stream.start()
+
+ def stop(self) -> None:
+ """Stop and release the input stream."""
+ with self._lock:
+ stream = self._stream
+ self._stream = None
+ if stream is None:
+ return
+ try:
+ stream.stop()
+ finally:
+ try:
+ stream.close()
+ except (OSError, RuntimeError):
+ pass
+
+ def _raw_callback(self, indata, frames, time_info, status) -> None:
+ del frames, time_info # unused — block size is fixed
+ if status:
+ # Drops / overflows are surfaced via ``status``; we let the
+ # audio thread continue rather than tearing down the stream.
+ return
+ try:
+ self._on_block(bytes(indata))
+ except Exception: # noqa: BLE001 callback isolation # nosec B110 # reason: PortAudio callback must never raise
+ # We must not propagate user callback errors back into PortAudio.
+ return
+
+
+class AudioPlayer:
+ """Play int16 PCM bytes through the default (or chosen) output device."""
+
+ def __init__(self, device: Optional[int] = None,
+ sample_rate: int = DEFAULT_SAMPLE_RATE,
+ channels: int = DEFAULT_CHANNELS) -> None:
+ if sample_rate <= 0 or channels <= 0:
+ raise ValueError("sample_rate and channels must be positive")
+ self._device = device
+ self._sample_rate = int(sample_rate)
+ self._channels = int(channels)
+ self._stream = None
+ self._lock = threading.Lock()
+
+ @property
+ def is_running(self) -> bool:
+ return self._stream is not None
+
+ def start(self) -> None:
+ """Open the output stream so :meth:`play` becomes valid."""
+ with self._lock:
+ if self._stream is not None:
+ return
+ sd = _load_sounddevice()
+ self._stream = sd.RawOutputStream(
+ samplerate=self._sample_rate,
+ channels=self._channels,
+ dtype=SAMPLE_DTYPE,
+ device=self._device,
+ )
+ self._stream.start()
+
+ def play(self, chunk: bytes) -> None:
+ """Write a chunk of int16 PCM bytes to the stream."""
+ if not isinstance(chunk, (bytes, bytearray, memoryview)):
+ raise TypeError("chunk must be bytes-like")
+ if not chunk:
+ return
+ stream = self._stream
+ if stream is None:
+ raise RuntimeError("AudioPlayer is not running; call start() first")
+ try:
+ stream.write(bytes(chunk))
+ except (OSError, RuntimeError):
+ # Late writes after stop / device removal — ignore so the
+ # network thread can keep flowing without crashing.
+ pass
+
+ def stop(self) -> None:
+ with self._lock:
+ stream = self._stream
+ self._stream = None
+ if stream is None:
+ return
+ try:
+ stream.stop()
+ finally:
+ try:
+ stream.close()
+ except (OSError, RuntimeError):
+ pass
diff --git a/je_auto_control/utils/remote_desktop/auth.py b/je_auto_control/utils/remote_desktop/auth.py
new file mode 100644
index 00000000..244bfc8c
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/auth.py
@@ -0,0 +1,28 @@
+"""HMAC-SHA256 challenge/response helpers shared by host and viewer."""
+import hmac
+import os
+from hashlib import sha256
+
+NONCE_BYTES = 32
+
+
+def make_nonce() -> bytes:
+ """Return a fresh random nonce for the auth handshake."""
+ return os.urandom(NONCE_BYTES)
+
+
+def compute_response(token: str, nonce: bytes) -> bytes:
+ """Return ``HMAC_SHA256(token, nonce)`` for the given token."""
+ if not isinstance(token, str) or not token:
+ raise ValueError("token must be a non-empty string")
+ if not isinstance(nonce, (bytes, bytearray)) or len(nonce) != NONCE_BYTES:
+ raise ValueError(f"nonce must be {NONCE_BYTES} bytes")
+ return hmac.new(token.encode("utf-8"), bytes(nonce), sha256).digest()
+
+
+def verify_response(token: str, nonce: bytes, response: bytes) -> bool:
+ """Constant-time check that ``response`` matches the expected HMAC."""
+ expected = compute_response(token, nonce)
+ if not isinstance(response, (bytes, bytearray)):
+ return False
+ return hmac.compare_digest(expected, bytes(response))
diff --git a/je_auto_control/utils/remote_desktop/clipboard_sync.py b/je_auto_control/utils/remote_desktop/clipboard_sync.py
new file mode 100644
index 00000000..3237cf41
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/clipboard_sync.py
@@ -0,0 +1,72 @@
+"""Serialization helpers for CLIPBOARD messages.
+
+The wire format is a JSON envelope so adding new payload kinds (rich
+text, file lists, ...) doesn't require touching the framing layer:
+
+* ``{"kind": "text", "text": "..."}``
+* ``{"kind": "image", "format": "png", "data_b64": "..."}``
+"""
+import base64
+import json
+from typing import Any, Dict, Tuple
+
+
+class ClipboardSyncError(ValueError):
+ """Raised when a CLIPBOARD payload is malformed or unsupported."""
+
+
+def encode_text(text: str) -> bytes:
+ """Encode a text-clipboard payload."""
+ if not isinstance(text, str):
+ raise TypeError("text must be a string")
+ return json.dumps(
+ {"kind": "text", "text": text}, ensure_ascii=False,
+ ).encode("utf-8")
+
+
+def encode_image(png_bytes: bytes) -> bytes:
+ """Encode a PNG image as a clipboard payload."""
+ if not isinstance(png_bytes, (bytes, bytearray)):
+ raise TypeError("png_bytes must be bytes")
+ if not png_bytes:
+ raise ValueError("png_bytes is empty")
+ return json.dumps({
+ "kind": "image",
+ "format": "png",
+ "data_b64": base64.b64encode(bytes(png_bytes)).decode("ascii"),
+ }, ensure_ascii=False).encode("utf-8")
+
+
+def decode(payload: bytes) -> Tuple[str, Any]:
+ """Parse a CLIPBOARD payload; return ``(kind, data)``.
+
+ For ``"text"`` ``data`` is a ``str``; for ``"image"`` it is the raw
+ PNG bytes (already base64-decoded).
+ """
+ try:
+ envelope: Dict[str, Any] = json.loads(payload.decode("utf-8"))
+ except (UnicodeDecodeError, json.JSONDecodeError) as error:
+ raise ClipboardSyncError(f"invalid CLIPBOARD JSON: {error}") from error
+ if not isinstance(envelope, dict):
+ raise ClipboardSyncError("CLIPBOARD payload must be a JSON object")
+ kind = envelope.get("kind")
+ if kind == "text":
+ text = envelope.get("text")
+ if not isinstance(text, str):
+ raise ClipboardSyncError("text payload missing 'text' string")
+ return ("text", text)
+ if kind == "image":
+ if envelope.get("format") != "png":
+ raise ClipboardSyncError(
+ f"image format {envelope.get('format')!r} not supported"
+ )
+ encoded = envelope.get("data_b64", "")
+ if not isinstance(encoded, str):
+ raise ClipboardSyncError("image payload missing 'data_b64'")
+ try:
+ return ("image", base64.b64decode(encoded))
+ except (ValueError, TypeError) as error:
+ raise ClipboardSyncError(
+ f"invalid base64 image payload: {error}"
+ ) from error
+ raise ClipboardSyncError(f"unknown clipboard kind: {kind!r}")
diff --git a/je_auto_control/utils/remote_desktop/file_transfer.py b/je_auto_control/utils/remote_desktop/file_transfer.py
new file mode 100644
index 00000000..faa2a33e
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/file_transfer.py
@@ -0,0 +1,273 @@
+"""Chunked file transfer over the typed-message channel.
+
+Three message types form a transfer:
+
+* ``FILE_BEGIN`` — JSON ``{transfer_id, dest_path, size}`` announces a new
+ stream. ``transfer_id`` is a 36-character UUID hex string so the
+ receiver can demultiplex multiple in-flight transfers on one channel.
+* ``FILE_CHUNK`` — first 36 bytes are the ASCII transfer id, the rest is
+ raw payload. Chunks arrive in order; the receiver writes them
+ sequentially and accumulates ``bytes_done``.
+* ``FILE_END`` — JSON ``{transfer_id, status, error?}`` finalises the
+ stream. The receiver closes the file and fires ``on_complete`` with
+ success / failure info.
+
+There is no central per-host file-size limit — operators relying on
+this should keep ``trusted token holders == trusted users`` in mind, and
+treat the dropbox / destination filesystem accordingly.
+"""
+import json
+import os
+import threading
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+
+from je_auto_control.utils.logging.logging_instance import autocontrol_logger
+from je_auto_control.utils.remote_desktop.protocol import MessageType
+
+DEFAULT_CHUNK_SIZE = 256 * 1024
+TRANSFER_ID_LEN = 36 # str(uuid.uuid4()) length
+
+_INVALID_TRANSFER_ID_MESSAGE = (
+ f"transfer_id must be a {TRANSFER_ID_LEN}-char UUID string"
+)
+
+ProgressCallback = Callable[[str, int, int], None]
+CompleteCallback = Callable[[str, bool, Optional[str], str], None]
+
+
+class FileTransferError(RuntimeError):
+ """Raised when a file-transfer payload is malformed."""
+
+
+def new_transfer_id() -> str:
+ """Return a fresh 36-character ASCII transfer ID."""
+ return str(uuid.uuid4())
+
+
+def encode_begin(transfer_id: str, dest_path: str, size: int) -> bytes:
+ if len(transfer_id) != TRANSFER_ID_LEN:
+ raise FileTransferError(_INVALID_TRANSFER_ID_MESSAGE)
+ return json.dumps({
+ "transfer_id": transfer_id,
+ "dest_path": str(dest_path),
+ "size": int(size),
+ }, ensure_ascii=False).encode("utf-8")
+
+
+def decode_begin(payload: bytes) -> Tuple[str, str, int]:
+ body = _decode_json(payload)
+ transfer_id = body.get("transfer_id")
+ dest_path = body.get("dest_path")
+ size = body.get("size")
+ if (not isinstance(transfer_id, str)
+ or len(transfer_id) != TRANSFER_ID_LEN):
+ raise FileTransferError("FILE_BEGIN missing valid transfer_id")
+ if not isinstance(dest_path, str) or not dest_path:
+ raise FileTransferError("FILE_BEGIN missing dest_path")
+ if not isinstance(size, int) or size < 0:
+ raise FileTransferError("FILE_BEGIN missing valid size")
+ return transfer_id, dest_path, size
+
+
+def encode_chunk(transfer_id: str, chunk: bytes) -> bytes:
+ if len(transfer_id) != TRANSFER_ID_LEN:
+ raise FileTransferError(_INVALID_TRANSFER_ID_MESSAGE)
+ return transfer_id.encode("ascii") + bytes(chunk)
+
+
+def decode_chunk(payload: bytes) -> Tuple[str, bytes]:
+ if len(payload) < TRANSFER_ID_LEN:
+ raise FileTransferError("FILE_CHUNK shorter than transfer id header")
+ transfer_id = payload[:TRANSFER_ID_LEN].decode("ascii", errors="replace")
+ return transfer_id, bytes(payload[TRANSFER_ID_LEN:])
+
+
+def encode_end(transfer_id: str, status: str = "ok",
+ error: Optional[str] = None) -> bytes:
+ if len(transfer_id) != TRANSFER_ID_LEN:
+ raise FileTransferError(_INVALID_TRANSFER_ID_MESSAGE)
+ body: Dict[str, Any] = {"transfer_id": transfer_id, "status": status}
+ if error is not None:
+ body["error"] = str(error)
+ return json.dumps(body, ensure_ascii=False).encode("utf-8")
+
+
+def decode_end(payload: bytes) -> Tuple[str, str, Optional[str]]:
+ body = _decode_json(payload)
+ transfer_id = body.get("transfer_id")
+ status = body.get("status", "ok")
+ if (not isinstance(transfer_id, str)
+ or len(transfer_id) != TRANSFER_ID_LEN):
+ raise FileTransferError("FILE_END missing valid transfer_id")
+ if not isinstance(status, str):
+ raise FileTransferError("FILE_END status must be a string")
+ error = body.get("error")
+ return transfer_id, status, error if isinstance(error, str) else None
+
+
+def _decode_json(payload: bytes) -> Dict[str, Any]:
+ try:
+ body = json.loads(payload.decode("utf-8"))
+ except (UnicodeDecodeError, json.JSONDecodeError) as error:
+ raise FileTransferError(f"invalid JSON: {error}") from error
+ if not isinstance(body, dict):
+ raise FileTransferError("payload must be a JSON object")
+ return body
+
+
+@dataclass
+class _Incoming:
+ """Per-transfer state owned by ``FileReceiver``."""
+
+ transfer_id: str
+ dest_path: Path
+ total_size: int
+ handle: Any # file object
+ bytes_done: int = 0
+ error: Optional[str] = None
+
+
+class FileReceiver:
+ """Demultiplex incoming FILE_* messages into one or more file writes."""
+
+ def __init__(self, on_progress: Optional[ProgressCallback] = None,
+ on_complete: Optional[CompleteCallback] = None) -> None:
+ self._on_progress = on_progress
+ self._on_complete = on_complete
+ self._active: Dict[str, _Incoming] = {}
+ self._lock = threading.Lock()
+
+ def handle_begin(self, payload: bytes) -> None:
+ transfer_id, dest_path, total_size = decode_begin(payload)
+ path = Path(os.path.expanduser(dest_path))
+ path.parent.mkdir(parents=True, exist_ok=True)
+ try:
+ handle = open(path, "wb") # noqa: SIM115 managed manually
+ except OSError as error:
+ self._fire_complete(transfer_id, False, str(error), str(path))
+ return
+ with self._lock:
+ self._active[transfer_id] = _Incoming(
+ transfer_id=transfer_id, dest_path=path,
+ total_size=total_size, handle=handle,
+ )
+ if self._on_progress is not None:
+ self._on_progress(transfer_id, 0, total_size)
+
+ def handle_chunk(self, payload: bytes) -> None:
+ transfer_id, chunk = decode_chunk(payload)
+ with self._lock:
+ incoming = self._active.get(transfer_id)
+ if incoming is None:
+ autocontrol_logger.info(
+ "remote_desktop FILE_CHUNK for unknown transfer %s",
+ transfer_id,
+ )
+ return
+ try:
+ incoming.handle.write(chunk)
+ except OSError as error:
+ incoming.error = str(error)
+ self._abort(incoming)
+ return
+ incoming.bytes_done += len(chunk)
+ if self._on_progress is not None:
+ self._on_progress(
+ transfer_id, incoming.bytes_done, incoming.total_size,
+ )
+
+ def handle_end(self, payload: bytes) -> None:
+ transfer_id, status, error = decode_end(payload)
+ with self._lock:
+ incoming = self._active.pop(transfer_id, None)
+ if incoming is None:
+ return
+ try:
+ incoming.handle.close()
+ except OSError:
+ pass
+ ok = (status == "ok") and incoming.error is None
+ message = error or incoming.error
+ self._fire_complete(
+ transfer_id, ok, message, str(incoming.dest_path),
+ )
+
+ def _abort(self, incoming: _Incoming) -> None:
+ try:
+ incoming.handle.close()
+ except OSError:
+ pass
+ with self._lock:
+ self._active.pop(incoming.transfer_id, None)
+ self._fire_complete(
+ incoming.transfer_id, False, incoming.error,
+ str(incoming.dest_path),
+ )
+
+ def _fire_complete(self, transfer_id: str, ok: bool,
+ error: Optional[str], dest_path: str) -> None:
+ if self._on_complete is None:
+ return
+ try:
+ self._on_complete(transfer_id, ok, error, dest_path)
+ except Exception: # noqa: BLE001
+ autocontrol_logger.exception(
+ "remote_desktop FileReceiver.on_complete callback raised"
+ )
+
+
+@dataclass
+class FileSendResult:
+ """Outcome of one outbound transfer."""
+
+ transfer_id: str
+ success: bool
+ error: Optional[str] = None
+ bytes_sent: int = 0
+
+
+def send_file(channel, source_path: str, dest_path: str,
+ on_progress: Optional[ProgressCallback] = None,
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
+ transfer_id: Optional[str] = None) -> FileSendResult:
+ """Stream ``source_path`` to ``dest_path`` over ``channel``.
+
+ Synchronous: the caller's thread does the I/O. Wrap in a thread for
+ background uploads. ``on_progress(transfer_id, bytes_done, total)``
+ fires after every chunk (and once at the start with ``bytes_done=0``).
+ """
+ transfer_id = transfer_id or new_transfer_id()
+ source = Path(os.path.expanduser(source_path))
+ if not source.is_file():
+ raise FileTransferError(f"source not found: {source}")
+ total_size = source.stat().st_size
+ channel.send_typed(MessageType.FILE_BEGIN,
+ encode_begin(transfer_id, dest_path, total_size))
+ if on_progress is not None:
+ on_progress(transfer_id, 0, total_size)
+ bytes_sent = 0
+ try:
+ with open(source, "rb") as handle:
+ while True:
+ chunk = handle.read(int(chunk_size))
+ if not chunk:
+ break
+ channel.send_typed(
+ MessageType.FILE_CHUNK, encode_chunk(transfer_id, chunk),
+ )
+ bytes_sent += len(chunk)
+ if on_progress is not None:
+ on_progress(transfer_id, bytes_sent, total_size)
+ except OSError as error:
+ channel.send_typed(
+ MessageType.FILE_END,
+ encode_end(transfer_id, status="error", error=str(error)),
+ )
+ return FileSendResult(transfer_id=transfer_id, success=False,
+ error=str(error), bytes_sent=bytes_sent)
+ channel.send_typed(MessageType.FILE_END, encode_end(transfer_id))
+ return FileSendResult(transfer_id=transfer_id, success=True,
+ bytes_sent=bytes_sent)
diff --git a/je_auto_control/utils/remote_desktop/host.py b/je_auto_control/utils/remote_desktop/host.py
new file mode 100644
index 00000000..f46bf85c
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/host.py
@@ -0,0 +1,647 @@
+"""TCP host that streams JPEG frames and applies viewer input."""
+import collections
+import json
+import socket
+import ssl
+import threading
+import time
+from io import BytesIO
+from typing import Any, Callable, Deque, List, Mapping, Optional, Sequence
+
+from je_auto_control.utils.logging.logging_instance import autocontrol_logger
+from je_auto_control.utils.remote_desktop.audio import (
+ AudioCapture, AudioCaptureConfig,
+)
+from je_auto_control.utils.remote_desktop.auth import (
+ make_nonce, verify_response,
+)
+from je_auto_control.utils.remote_desktop.clipboard_sync import (
+ ClipboardSyncError, decode as decode_clipboard, encode_image, encode_text,
+)
+from je_auto_control.utils.remote_desktop.file_transfer import (
+ FileReceiver, FileTransferError, send_file,
+)
+from je_auto_control.utils.remote_desktop.host_id import (
+ load_or_create_host_id, validate_host_id,
+)
+from je_auto_control.utils.remote_desktop.input_dispatch import (
+ InputDispatchError, dispatch_input,
+)
+from je_auto_control.utils.remote_desktop.protocol import (
+ AuthenticationError, MessageType, ProtocolError,
+)
+from je_auto_control.utils.remote_desktop.transport import (
+ MessageChannel, TcpMessageChannel,
+)
+
+FrameProvider = Callable[[], bytes]
+InputDispatcher = Callable[[Mapping[str, Any]], Any]
+
+_AUTH_TIMEOUT_S = 5.0
+_DEFAULT_QUALITY = 70
+
+
+def _default_frame_provider(region: Optional[Sequence[int]] = None,
+ quality: int = _DEFAULT_QUALITY) -> FrameProvider:
+ """Build a JPEG frame producer using PIL.ImageGrab."""
+ def provide() -> bytes:
+ from PIL import ImageGrab # local import: not needed for unit tests
+ if region is not None:
+ x, y, width, height = (int(v) for v in region)
+ bbox = (x, y, x + width, y + height)
+ image = ImageGrab.grab(bbox=bbox, all_screens=True)
+ else:
+ image = ImageGrab.grab(all_screens=True)
+ if image.mode != "RGB":
+ image = image.convert("RGB")
+ buffer = BytesIO()
+ image.save(buffer, format="JPEG", quality=int(quality))
+ return buffer.getvalue()
+ return provide
+
+
+class _ClientHandler:
+ """Per-connection auth + input-receive + frame-send state."""
+
+ _AUDIO_QUEUE_MAXLEN = 50 # ~2.5 s of buffered chunks at 50 ms each
+
+ def __init__(self, host: "RemoteDesktopHost",
+ channel: MessageChannel, address) -> None:
+ self._host = host
+ self._channel = channel
+ self._address = address
+ self._shutdown = threading.Event()
+ self._sender_thread: Optional[threading.Thread] = None
+ self._receiver_thread: Optional[threading.Thread] = None
+ self._audio_queue: Deque[bytes] = collections.deque(
+ maxlen=self._AUDIO_QUEUE_MAXLEN,
+ )
+ self._audio_lock = threading.Lock()
+ self._audio_event = threading.Event()
+ self._audio_sender_thread: Optional[threading.Thread] = None
+ self.authenticated = False
+
+ @property
+ def address(self):
+ return self._address
+
+ def start(self) -> None:
+ """Run auth, then split into sender + receiver threads."""
+ try:
+ self._authenticate()
+ except (AuthenticationError, ProtocolError, OSError) as error:
+ autocontrol_logger.info(
+ "remote_desktop client %s rejected: %r", self._address, error,
+ )
+ self._close()
+ return
+ self.authenticated = True
+ self._sender_thread = threading.Thread(
+ target=self._send_loop, name="rd-sender", daemon=True,
+ )
+ self._receiver_thread = threading.Thread(
+ target=self._recv_loop, name="rd-recv", daemon=True,
+ )
+ self._sender_thread.start()
+ self._receiver_thread.start()
+ if self._host._audio_config.enabled:
+ self._audio_sender_thread = threading.Thread(
+ target=self._audio_send_loop, name="rd-audio", daemon=True,
+ )
+ self._audio_sender_thread.start()
+
+ def push_audio(self, chunk: bytes) -> None:
+ """Enqueue a PCM chunk for delivery; oldest dropped if queue is full."""
+ if self._shutdown.is_set() or not self.authenticated:
+ return
+ with self._audio_lock:
+ self._audio_queue.append(chunk)
+ self._audio_event.set()
+
+ def stop(self) -> None:
+ """Signal threads and close the socket."""
+ self._shutdown.set()
+ with self._host._frame_cond:
+ self._host._frame_cond.notify_all()
+ self._audio_event.set()
+ self._close()
+
+ def _authenticate(self) -> None:
+ nonce = make_nonce()
+ self._channel.settimeout(_AUTH_TIMEOUT_S)
+ self._channel.send_typed(MessageType.AUTH_CHALLENGE, nonce)
+ msg_type, payload = self._channel.read_typed()
+ if msg_type is not MessageType.AUTH_RESPONSE:
+ self._channel.send_typed(MessageType.AUTH_FAIL,
+ b"expected AUTH_RESPONSE")
+ raise AuthenticationError(
+ f"expected AUTH_RESPONSE, got {msg_type.name}"
+ )
+ if not verify_response(self._host._token, nonce, payload):
+ self._channel.send_typed(MessageType.AUTH_FAIL, b"bad token")
+ raise AuthenticationError("bad token")
+ ok_payload = json.dumps(
+ {"host_id": self._host.host_id}, ensure_ascii=False,
+ ).encode("utf-8")
+ self._channel.send_typed(MessageType.AUTH_OK, ok_payload)
+ self._channel.settimeout(None)
+
+ def _send_loop(self) -> None:
+ last_sent = 0
+ while not self._shutdown.is_set():
+ with self._host._frame_cond:
+ while (not self._shutdown.is_set()
+ and self._host._latest_seq <= last_sent):
+ self._host._frame_cond.wait(timeout=0.5)
+ if self._shutdown.is_set():
+ return
+ frame = self._host._latest_frame
+ seq = self._host._latest_seq
+ if frame is None:
+ continue
+ try:
+ self._channel.send_typed(MessageType.FRAME, frame)
+ except OSError as error:
+ autocontrol_logger.info(
+ "remote_desktop send to %s failed: %r",
+ self._address, error,
+ )
+ self.stop()
+ return
+ last_sent = seq
+
+ def _audio_send_loop(self) -> None:
+ while not self._shutdown.is_set():
+ self._audio_event.wait(timeout=0.5)
+ if self._shutdown.is_set():
+ return
+ while True:
+ with self._audio_lock:
+ if not self._audio_queue:
+ self._audio_event.clear()
+ break
+ chunk = self._audio_queue.popleft()
+ try:
+ self._channel.send_typed(MessageType.AUDIO, chunk)
+ except OSError as error:
+ autocontrol_logger.info(
+ "remote_desktop audio send to %s failed: %r",
+ self._address, error,
+ )
+ self.stop()
+ return
+
+ def _recv_loop(self) -> None:
+ while not self._shutdown.is_set():
+ try:
+ msg_type, payload = self._channel.read_typed()
+ except (OSError, ProtocolError) as error:
+ if not self._shutdown.is_set():
+ autocontrol_logger.info(
+ "remote_desktop recv from %s ended: %r",
+ self._address, error,
+ )
+ self.stop()
+ return
+ if msg_type is MessageType.PING:
+ continue
+ if msg_type is MessageType.INPUT:
+ self._handle_input_payload(payload)
+ continue
+ if msg_type is MessageType.CLIPBOARD:
+ self._handle_clipboard_payload(payload)
+ continue
+ if msg_type in (MessageType.FILE_BEGIN, MessageType.FILE_CHUNK,
+ MessageType.FILE_END):
+ self._handle_file_payload(msg_type, payload)
+ continue
+ autocontrol_logger.info(
+ "remote_desktop unexpected msg %s from %s",
+ msg_type.name, self._address,
+ )
+
+ def _handle_file_payload(self, msg_type: MessageType,
+ payload: bytes) -> None:
+ receiver = self._host._ensure_file_receiver()
+ try:
+ if msg_type is MessageType.FILE_BEGIN:
+ receiver.handle_begin(payload)
+ elif msg_type is MessageType.FILE_CHUNK:
+ receiver.handle_chunk(payload)
+ elif msg_type is MessageType.FILE_END:
+ receiver.handle_end(payload)
+ except FileTransferError as error:
+ autocontrol_logger.info(
+ "remote_desktop bad file message from %s: %r",
+ self._address, error,
+ )
+
+ def _handle_clipboard_payload(self, payload: bytes) -> None:
+ try:
+ kind, data = decode_clipboard(payload)
+ except ClipboardSyncError as error:
+ autocontrol_logger.info(
+ "remote_desktop bad CLIPBOARD from %s: %r",
+ self._address, error,
+ )
+ return
+ try:
+ self._host._apply_clipboard(kind, data)
+ except (OSError, RuntimeError, TypeError, ValueError) as error:
+ autocontrol_logger.warning(
+ "remote_desktop clipboard apply failed for %s: %r",
+ self._address, error,
+ )
+
+ def _handle_input_payload(self, payload: bytes) -> None:
+ try:
+ message = json.loads(payload.decode("utf-8"))
+ except (UnicodeDecodeError, json.JSONDecodeError) as error:
+ autocontrol_logger.info(
+ "remote_desktop bad INPUT from %s: %r",
+ self._address, error,
+ )
+ return
+ try:
+ self._host._dispatch(message)
+ except InputDispatchError as error:
+ autocontrol_logger.info(
+ "remote_desktop rejected INPUT from %s: %r",
+ self._address, error,
+ )
+ except (OSError, RuntimeError, ValueError, TypeError) as error:
+ autocontrol_logger.warning(
+ "remote_desktop input apply failed for %s: %r",
+ self._address, error,
+ )
+
+ def _close(self) -> None:
+ self._channel.close()
+
+
+class RemoteDesktopHost:
+ """Stream the screen to authenticated viewers and apply their input.
+
+ The instance owns three kinds of threads: one accept loop, one
+ capture loop, and a sender + receiver pair per connected viewer.
+ Public methods are thread-safe; ``start()`` is idempotent and
+ ``stop()`` can be called from any thread.
+ """
+
+ def __init__(self, token: str,
+ bind: str = "127.0.0.1",
+ port: int = 0,
+ fps: float = 10.0,
+ quality: int = _DEFAULT_QUALITY,
+ region: Optional[Sequence[int]] = None,
+ max_clients: int = 4,
+ frame_provider: Optional[FrameProvider] = None,
+ input_dispatcher: Optional[InputDispatcher] = None,
+ host_id: Optional[str] = None,
+ ssl_context: Optional[ssl.SSLContext] = None,
+ audio_config: Optional[AudioCaptureConfig] = None,
+ audio_capture: Optional[Any] = None,
+ ) -> None:
+ if not isinstance(token, str) or not token:
+ raise ValueError("token must be a non-empty string")
+ if fps <= 0:
+ raise ValueError("fps must be positive")
+ if not 1 <= int(quality) <= 95:
+ raise ValueError("quality must be in [1, 95]")
+ if audio_config is None:
+ audio_config = AudioCaptureConfig()
+ self._host_id = (validate_host_id(host_id) if host_id
+ else load_or_create_host_id())
+ self._token = token
+ self._ssl_context = ssl_context
+ self._bind = bind
+ self._requested_port = int(port)
+ self._period = 1.0 / float(fps)
+ self._max_clients = int(max_clients)
+ self._frame_provider: FrameProvider = (
+ frame_provider or _default_frame_provider(region, int(quality))
+ )
+ self._dispatch: InputDispatcher = input_dispatcher or dispatch_input
+ self._file_receiver: Optional[FileReceiver] = None
+ self._audio_config = audio_config
+ self._audio_capture_override = audio_capture
+ self._audio_capture: Optional[AudioCapture] = None
+ self._listen_sock: Optional[socket.socket] = None
+ self._accept_thread: Optional[threading.Thread] = None
+ self._capture_thread: Optional[threading.Thread] = None
+ self._shutdown = threading.Event()
+ self._clients: List[_ClientHandler] = []
+ self._clients_lock = threading.Lock()
+ self._frame_cond = threading.Condition()
+ self._latest_frame: Optional[bytes] = None
+ self._latest_seq = 0
+ self._port: int = 0
+
+ # public API ----------------------------------------------------------
+
+ @property
+ def host_id(self) -> str:
+ """The 9-digit numeric ID viewers use to verify this host."""
+ return self._host_id
+
+ @property
+ def audio_enabled(self) -> bool:
+ return self._audio_config.enabled and self._audio_capture is not None
+
+ @property
+ def port(self) -> int:
+ return self._port
+
+ @property
+ def is_running(self) -> bool:
+ return self._listen_sock is not None and not self._shutdown.is_set()
+
+ @property
+ def connected_clients(self) -> int:
+ with self._clients_lock:
+ return sum(
+ 1 for client in self._clients
+ if client.authenticated and not client._shutdown.is_set()
+ )
+
+ def latest_frame(self) -> Optional[bytes]:
+ """Return the most recent encoded frame (JPEG bytes) or ``None``.
+
+ Useful for a local preview pane: the GUI can poll this without
+ opening a TCP connection back to the host.
+ """
+ with self._frame_cond:
+ return self._latest_frame
+
+ def start(self) -> None:
+ """Bind, then launch accept + capture threads."""
+ if self.is_running:
+ return
+ self._shutdown.clear()
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ sock.bind((self._bind, self._requested_port))
+ sock.listen(self._max_clients)
+ self._port = sock.getsockname()[1]
+ self._listen_sock = sock
+ self._accept_thread = threading.Thread(
+ target=self._accept_loop, name="rd-accept", daemon=True,
+ )
+ self._capture_thread = threading.Thread(
+ target=self._capture_loop, name="rd-capture", daemon=True,
+ )
+ self._accept_thread.start()
+ self._capture_thread.start()
+ self._start_audio_capture()
+
+ def stop(self, timeout: float = 2.0) -> None:
+ """Tear down accept loop, capture loop, and every connected client."""
+ if self._listen_sock is None:
+ return
+ self._shutdown.set()
+ self._stop_audio_capture()
+ try:
+ self._listen_sock.close()
+ except OSError:
+ pass
+ self._listen_sock = None
+ with self._frame_cond:
+ self._frame_cond.notify_all()
+ with self._clients_lock:
+ clients = list(self._clients)
+ self._clients.clear()
+ for client in clients:
+ client.stop()
+ for thread in (self._accept_thread, self._capture_thread):
+ if thread is not None:
+ thread.join(timeout=timeout)
+ self._accept_thread = None
+ self._capture_thread = None
+
+ def _start_audio_capture(self) -> None:
+ """Open the audio input stream when audio capture is enabled."""
+ config = self._audio_config
+ if not config.enabled:
+ return
+ if self._audio_capture_override is not None:
+ self._audio_capture = self._audio_capture_override
+ try:
+ self._audio_capture.start()
+ except (OSError, RuntimeError) as error:
+ autocontrol_logger.warning(
+ "remote_desktop audio capture failed to start: %r", error,
+ )
+ self._audio_capture = None
+ return
+ try:
+ capture = AudioCapture(
+ on_block=self._broadcast_audio,
+ device=config.device,
+ sample_rate=config.sample_rate,
+ channels=config.channels,
+ block_frames=config.block_frames,
+ )
+ capture.start()
+ except (OSError, RuntimeError) as error:
+ autocontrol_logger.warning(
+ "remote_desktop audio capture disabled: %r", error,
+ )
+ self._audio_capture = None
+ return
+ self._audio_capture = capture
+
+ def _stop_audio_capture(self) -> None:
+ capture = self._audio_capture
+ if capture is None:
+ return
+ try:
+ capture.stop()
+ except (OSError, RuntimeError):
+ pass
+ self._audio_capture = None
+
+ def _broadcast_audio(self, chunk: bytes) -> None:
+ """Push a captured PCM block to every authenticated client."""
+ with self._clients_lock:
+ clients = [c for c in self._clients
+ if c.authenticated and not c._shutdown.is_set()]
+ for client in clients:
+ client.push_audio(chunk)
+
+ def broadcast_clipboard_text(self, text: str) -> int:
+ """Send a text-clipboard message to every authenticated viewer."""
+ return self._broadcast_clipboard_payload(encode_text(text))
+
+ def broadcast_clipboard_image(self, png_bytes: bytes) -> int:
+ """Send a PNG image to every authenticated viewer's clipboard."""
+ return self._broadcast_clipboard_payload(encode_image(png_bytes))
+
+ def _broadcast_clipboard_payload(self, payload: bytes) -> int:
+ with self._clients_lock:
+ clients = [c for c in self._clients
+ if c.authenticated and not c._shutdown.is_set()]
+ sent = 0
+ for client in clients:
+ try:
+ client._channel.send_typed(MessageType.CLIPBOARD, payload)
+ sent += 1
+ except OSError as error:
+ autocontrol_logger.info(
+ "remote_desktop clipboard send to %s failed: %r",
+ client.address, error,
+ )
+ client.stop()
+ return sent
+
+ def set_file_receiver(self, receiver: FileReceiver) -> None:
+ """Replace the default ``FileReceiver`` (e.g. to wire progress callbacks)."""
+ self._file_receiver = receiver
+
+ def _ensure_file_receiver(self) -> FileReceiver:
+ if self._file_receiver is None:
+ self._file_receiver = FileReceiver()
+ return self._file_receiver
+
+ def send_file_to_viewers(self, source_path: str, dest_path: str,
+ on_progress=None) -> int:
+ """Stream ``source_path`` to every authenticated viewer.
+
+ Returns the number of viewers the transfer was attempted on.
+ Each viewer gets its own ``transfer_id`` so progress callbacks
+ can be demultiplexed in the GUI.
+ """
+ with self._clients_lock:
+ clients = [c for c in self._clients
+ if c.authenticated and not c._shutdown.is_set()]
+ for client in clients:
+ try:
+ send_file(client._channel, source_path, dest_path,
+ on_progress=on_progress)
+ except (OSError, FileTransferError) as error:
+ autocontrol_logger.info(
+ "remote_desktop file send to %s failed: %r",
+ client.address, error,
+ )
+ return len(clients)
+
+ def _apply_clipboard(self, kind: str, data: Any) -> None:
+ """Set this host's local clipboard from a decoded CLIPBOARD payload.
+
+ Subclasses or tests may override; the default routes to the
+ utils.clipboard helpers and accepts ``"text"`` / ``"image"`` kinds.
+ """
+ from je_auto_control.utils.clipboard.clipboard import (
+ set_clipboard, set_clipboard_image,
+ )
+ if kind == "text":
+ set_clipboard(data)
+ elif kind == "image":
+ set_clipboard_image(data)
+ else:
+ raise ValueError(f"unsupported clipboard kind: {kind!r}")
+
+ # internals -----------------------------------------------------------
+
+ def _accept_loop(self) -> None:
+ listen = self._listen_sock
+ if listen is None:
+ return
+ listen.settimeout(0.5)
+ while not self._shutdown.is_set():
+ try:
+ client_sock, address = listen.accept()
+ except socket.timeout:
+ continue
+ except OSError:
+ return
+ wrapped = self._maybe_wrap_tls(client_sock, address)
+ if wrapped is None:
+ continue
+ try:
+ channel = self._build_channel(wrapped, address)
+ except (OSError, RuntimeError) as error:
+ autocontrol_logger.info(
+ "remote_desktop channel handshake from %s failed: %r",
+ address, error,
+ )
+ try:
+ wrapped.close()
+ except OSError:
+ pass
+ continue
+ handler = _ClientHandler(self, channel, address)
+ with self._clients_lock:
+ if len(self._clients) >= self._max_clients:
+ autocontrol_logger.info(
+ "remote_desktop dropping %s: max_clients reached",
+ address,
+ )
+ handler._close()
+ continue
+ self._clients.append(handler)
+ handler.start()
+ self._reap_dead_clients()
+
+ def _build_channel(self, sock: socket.socket,
+ address) -> MessageChannel:
+ """Hook for transports: TCP wraps directly, WS overrides this."""
+ del address
+ return TcpMessageChannel(sock)
+
+ def _maybe_wrap_tls(self, client_sock: socket.socket,
+ address) -> Optional[socket.socket]:
+ """Return a TLS-wrapped socket when an ssl_context is configured."""
+ if self._ssl_context is None:
+ return client_sock
+ try:
+ client_sock.settimeout(_AUTH_TIMEOUT_S)
+ wrapped = self._ssl_context.wrap_socket(
+ client_sock, server_side=True,
+ )
+ wrapped.settimeout(None)
+ return wrapped
+ except OSError as error:
+ autocontrol_logger.info(
+ "remote_desktop TLS handshake from %s failed: %r",
+ address, error,
+ )
+ try:
+ client_sock.close()
+ except OSError:
+ pass
+ return None
+
+ def _capture_loop(self) -> None:
+ next_tick = time.monotonic()
+ while not self._shutdown.is_set():
+ try:
+ frame = self._frame_provider()
+ except (OSError, RuntimeError, ValueError) as error:
+ autocontrol_logger.warning(
+ "remote_desktop frame capture failed: %r", error,
+ )
+ self._shutdown.wait(self._period)
+ continue
+ with self._frame_cond:
+ self._latest_frame = frame
+ self._latest_seq += 1
+ self._frame_cond.notify_all()
+ next_tick += self._period
+ sleep_for = max(0.0, next_tick - time.monotonic())
+ if sleep_for <= 0.0:
+ next_tick = time.monotonic()
+ self._shutdown.wait(sleep_for)
+
+ def _reap_dead_clients(self) -> None:
+ with self._clients_lock:
+ self._clients = [c for c in self._clients
+ if not c._shutdown.is_set()]
+
+ # context manager ----------------------------------------------------
+
+ def __enter__(self) -> "RemoteDesktopHost":
+ self.start()
+ return self
+
+ def __exit__(self, exc_type, exc, tb) -> None:
+ self.stop()
diff --git a/je_auto_control/utils/remote_desktop/host_id.py b/je_auto_control/utils/remote_desktop/host_id.py
new file mode 100644
index 00000000..b87c1c91
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/host_id.py
@@ -0,0 +1,81 @@
+"""Stable, persistent host identifier exposed during the auth handshake.
+
+Each host has a 9-digit numeric ID — short enough to read aloud, long
+enough to be hard to guess by chance. The ID is generated on first use
+and cached at ``~/.je_auto_control/remote_host_id`` so it stays the same
+across restarts; users hand the ID + token + address to the people they
+want to connect, and viewers can verify ``expected_host_id`` after auth
+to defend against TCP-level impersonation.
+
+The ID is *not* a substitute for the auth token — it is broadcast in
+plain text inside ``AUTH_OK`` and is meant to be shared. Token-based
+HMAC auth gates the actual session.
+"""
+import os
+import re
+import secrets
+from pathlib import Path
+from typing import Optional
+
+_HOST_ID_DIGITS = 9
+_DEFAULT_PATH_RELATIVE = ".je_auto_control/remote_host_id"
+_HOST_ID_PATTERN = re.compile(r"^\d{9}$")
+
+
+class HostIdError(ValueError):
+ """Raised when a host ID is malformed."""
+
+
+def generate_host_id() -> str:
+ """Return a fresh random 9-digit host ID (zero-padded)."""
+ return f"{secrets.randbelow(10 ** _HOST_ID_DIGITS):0{_HOST_ID_DIGITS}d}"
+
+
+def validate_host_id(value: str) -> str:
+ """Return ``value`` unchanged if it is a valid 9-digit host ID."""
+ if not isinstance(value, str) or _HOST_ID_PATTERN.fullmatch(value) is None:
+ raise HostIdError(
+ f"host_id must be {_HOST_ID_DIGITS} numeric digits, got {value!r}"
+ )
+ return value
+
+
+def format_host_id(value: str) -> str:
+ """Render a host ID with grouping for display (e.g. ``123 456 789``)."""
+ digits = validate_host_id(value)
+ return f"{digits[:3]} {digits[3:6]} {digits[6:]}"
+
+
+def parse_host_id(value: str) -> str:
+ """Strip whitespace / separators from user input and validate."""
+ if not isinstance(value, str):
+ raise HostIdError(f"host_id must be a string, got {type(value).__name__}")
+ cleaned = re.sub(r"[\s\-_]", "", value)
+ return validate_host_id(cleaned)
+
+
+def default_host_id_path() -> Path:
+ """Return the on-disk path used to persist the host ID."""
+ home = Path(os.path.expanduser("~"))
+ return home / _DEFAULT_PATH_RELATIVE
+
+
+def load_or_create_host_id(path: Optional[Path] = None) -> str:
+ """Return the persisted host ID, creating one on first call."""
+ target = Path(path) if path is not None else default_host_id_path()
+ if target.exists():
+ try:
+ existing = target.read_text(encoding="utf-8").strip()
+ return validate_host_id(existing)
+ except (OSError, HostIdError):
+ # Corrupt / unreadable — regenerate rather than fail the host.
+ pass
+ new_id = generate_host_id()
+ try:
+ target.parent.mkdir(parents=True, exist_ok=True)
+ target.write_text(new_id, encoding="utf-8")
+ except OSError:
+ # Persisting is best-effort; an in-memory ID still works for the
+ # current process even if the home directory is read-only.
+ pass
+ return new_id
diff --git a/je_auto_control/utils/remote_desktop/input_dispatch.py b/je_auto_control/utils/remote_desktop/input_dispatch.py
new file mode 100644
index 00000000..057dad75
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/input_dispatch.py
@@ -0,0 +1,117 @@
+"""Apply incoming viewer input messages on the host machine.
+
+Each accepted ``action`` maps to one call against the existing
+:mod:`je_auto_control.wrapper` helpers, so the dispatcher is a thin and
+auditable boundary: any field not in the allowlist is rejected before it
+ever reaches platform code. Wrappers are imported lazily so the module
+can be imported on non-host systems (e.g. inside the viewer process)
+without pulling in OS-specific backends.
+"""
+from typing import Any, Callable, Dict, Mapping
+
+InputDispatcher = Callable[[Mapping[str, Any]], Any]
+
+
+class InputDispatchError(ValueError):
+ """Raised when an input message is malformed or references an unknown action."""
+
+
+_ALLOWED_ACTIONS = {
+ "mouse_move", "mouse_click", "mouse_press", "mouse_release",
+ "mouse_scroll", "key_press", "key_release", "type", "ping",
+}
+
+
+def _import_wrappers():
+ """Lazy import so headless/viewer-only consumers stay platform-agnostic."""
+ from je_auto_control.wrapper.auto_control_keyboard import (
+ press_keyboard_key, release_keyboard_key, write,
+ )
+ from je_auto_control.wrapper.auto_control_mouse import (
+ click_mouse, mouse_scroll, press_mouse, release_mouse,
+ set_mouse_position,
+ )
+ return {
+ "click_mouse": click_mouse,
+ "mouse_scroll": mouse_scroll,
+ "press_mouse": press_mouse,
+ "release_mouse": release_mouse,
+ "set_mouse_position": set_mouse_position,
+ "press_keyboard_key": press_keyboard_key,
+ "release_keyboard_key": release_keyboard_key,
+ "write": write,
+ }
+
+
+def dispatch_input(message: Mapping[str, Any]) -> Any:
+ """Validate ``message`` and call the matching wrapper function."""
+ if not isinstance(message, Mapping):
+ raise InputDispatchError(
+ f"input message must be a mapping, got {type(message).__name__}"
+ )
+ action = message.get("action")
+ if action not in _ALLOWED_ACTIONS:
+ raise InputDispatchError(f"unknown action: {action!r}")
+ if action == "ping":
+ return None
+ wrappers = _import_wrappers()
+ return _APPLIERS[action](message, wrappers)
+
+
+def _apply_mouse_move(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ return wrappers["set_mouse_position"](
+ int(message["x"]), int(message["y"]),
+ )
+
+
+def _apply_mouse_click(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ if "x" in message and "y" in message:
+ wrappers["set_mouse_position"](
+ int(message["x"]), int(message["y"]),
+ )
+ button = message.get("button", "mouse_left")
+ return wrappers["click_mouse"](button)
+
+
+def _apply_mouse_press(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ return wrappers["press_mouse"](message.get("button", "mouse_left"))
+
+
+def _apply_mouse_release(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ return wrappers["release_mouse"](message.get("button", "mouse_left"))
+
+
+def _apply_mouse_scroll(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ amount = int(message["amount"])
+ if "x" in message and "y" in message:
+ return wrappers["mouse_scroll"](
+ amount, int(message["x"]), int(message["y"]),
+ )
+ return wrappers["mouse_scroll"](amount)
+
+
+def _apply_key_press(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ return wrappers["press_keyboard_key"](message["keycode"])
+
+
+def _apply_key_release(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ return wrappers["release_keyboard_key"](message["keycode"])
+
+
+def _apply_type(message: Mapping[str, Any], wrappers: Dict[str, Any]) -> Any:
+ text = message.get("text", "")
+ if not isinstance(text, str):
+ raise InputDispatchError("'type' message requires string 'text'")
+ return wrappers["write"](text)
+
+
+_APPLIERS: Dict[str, Callable[[Mapping[str, Any], Dict[str, Any]], Any]] = {
+ "mouse_move": _apply_mouse_move,
+ "mouse_click": _apply_mouse_click,
+ "mouse_press": _apply_mouse_press,
+ "mouse_release": _apply_mouse_release,
+ "mouse_scroll": _apply_mouse_scroll,
+ "key_press": _apply_key_press,
+ "key_release": _apply_key_release,
+ "type": _apply_type,
+}
diff --git a/je_auto_control/utils/remote_desktop/protocol.py b/je_auto_control/utils/remote_desktop/protocol.py
new file mode 100644
index 00000000..a796b42b
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/protocol.py
@@ -0,0 +1,92 @@
+"""Length-prefixed framing for the remote-desktop TCP protocol.
+
+Wire format per message: ``MAGIC(2) | TYPE(1) | LENGTH(4 BE) | PAYLOAD``.
+Payload size is capped to keep a misbehaving peer from forcing the host
+to allocate gigabyte buffers. Helpers here only deal with bytes — auth
+and high-level semantics live in :mod:`auth`, :mod:`host`, :mod:`viewer`.
+"""
+import enum
+import struct
+from typing import Tuple
+
+_MAGIC = b"AC"
+_HEADER_FMT = "!2sBI"
+HEADER_SIZE = struct.calcsize(_HEADER_FMT)
+MAX_PAYLOAD_BYTES = 16 * 1024 * 1024 # 16 MiB hard cap
+
+
+class ProtocolError(RuntimeError):
+ """Raised when an incoming frame violates the wire format."""
+
+
+class AuthenticationError(RuntimeError):
+ """Raised when the HMAC handshake fails."""
+
+
+class MessageType(enum.IntEnum):
+ """Single-byte type tags for every protocol message."""
+
+ AUTH_CHALLENGE = 0x01 # host -> viewer: random nonce
+ AUTH_RESPONSE = 0x02 # viewer -> host: HMAC of nonce
+ AUTH_OK = 0x03 # host -> viewer: handshake accepted
+ AUTH_FAIL = 0x04 # host -> viewer: handshake rejected
+ FRAME = 0x10 # host -> viewer: JPEG frame
+ AUDIO = 0x11 # host -> viewer: PCM audio chunk
+ INPUT = 0x20 # viewer -> host: JSON input message
+ CLIPBOARD = 0x21 # either way: clipboard payload (text or image)
+ FILE_BEGIN = 0x22 # either way: JSON metadata for an incoming transfer
+ FILE_CHUNK = 0x23 # either way: 36-byte transfer id + chunk bytes
+ FILE_END = 0x24 # either way: JSON status for a finished transfer
+ PING = 0x30 # either way: liveness
+
+
+def encode_frame(message_type: MessageType, payload: bytes = b"") -> bytes:
+ """Serialise ``payload`` with a typed header."""
+ if not isinstance(payload, (bytes, bytearray)):
+ raise TypeError("payload must be bytes")
+ if len(payload) > MAX_PAYLOAD_BYTES:
+ raise ProtocolError(
+ f"payload too large: {len(payload)} > {MAX_PAYLOAD_BYTES}"
+ )
+ header = struct.pack(_HEADER_FMT, _MAGIC, int(message_type), len(payload))
+ return header + bytes(payload)
+
+
+def decode_frame_header(header: bytes) -> Tuple[MessageType, int]:
+ """Validate the header bytes and return ``(type, length)``."""
+ if len(header) != HEADER_SIZE:
+ raise ProtocolError(
+ f"header must be {HEADER_SIZE} bytes, got {len(header)}"
+ )
+ magic, type_byte, length = struct.unpack(_HEADER_FMT, header)
+ if magic != _MAGIC:
+ raise ProtocolError(f"bad magic: {magic!r}")
+ if length > MAX_PAYLOAD_BYTES:
+ raise ProtocolError(
+ f"declared payload too large: {length} > {MAX_PAYLOAD_BYTES}"
+ )
+ try:
+ return MessageType(type_byte), int(length)
+ except ValueError as error:
+ raise ProtocolError(f"unknown message type 0x{type_byte:02x}") from error
+
+
+def read_exact(sock, length: int) -> bytes:
+ """Read exactly ``length`` bytes from ``sock`` or raise ConnectionError."""
+ chunks = []
+ remaining = length
+ while remaining > 0:
+ chunk = sock.recv(remaining)
+ if not chunk:
+ raise ConnectionError("peer closed connection")
+ chunks.append(chunk)
+ remaining -= len(chunk)
+ return b"".join(chunks)
+
+
+def read_message(sock) -> Tuple[MessageType, bytes]:
+ """Read one full ``(type, payload)`` message from ``sock``."""
+ header = read_exact(sock, HEADER_SIZE)
+ msg_type, length = decode_frame_header(header)
+ payload = read_exact(sock, length) if length else b""
+ return msg_type, payload
diff --git a/je_auto_control/utils/remote_desktop/registry.py b/je_auto_control/utils/remote_desktop/registry.py
new file mode 100644
index 00000000..e88e48e8
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/registry.py
@@ -0,0 +1,128 @@
+"""Process-global singletons used by AC_remote_* executor commands.
+
+JSON action scripts and the GUI both want to talk to one running host
+and at most one active viewer without juggling handles. Holding those
+references here keeps :mod:`action_executor` thin and avoids circular
+imports between the executor and the host/viewer classes.
+"""
+import ssl
+from typing import Any, Callable, Dict, Optional, Sequence
+
+from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+from je_auto_control.utils.remote_desktop.viewer import RemoteDesktopViewer
+
+FrameCallback = Callable[[bytes], None]
+ErrorCallback = Callable[[Exception], None]
+
+
+class _RemoteDesktopRegistry:
+ """Hold one host + one viewer for the executor command surface."""
+
+ def __init__(self) -> None:
+ self._host: Optional[RemoteDesktopHost] = None
+ self._viewer: Optional[RemoteDesktopViewer] = None
+
+ @property
+ def host(self) -> Optional[RemoteDesktopHost]:
+ return self._host
+
+ @property
+ def viewer(self) -> Optional[RemoteDesktopViewer]:
+ return self._viewer
+
+ def start_host(self, token: str,
+ bind: str = "127.0.0.1",
+ port: int = 0,
+ fps: float = 10.0,
+ quality: int = 70,
+ region: Optional[Sequence[int]] = None,
+ max_clients: int = 4,
+ host_id: Optional[str] = None,
+ ssl_context: Optional[ssl.SSLContext] = None,
+ ) -> Dict[str, Any]:
+ """Stop any existing host, then start a fresh one with the given config."""
+ self.stop_host()
+ host = RemoteDesktopHost(
+ token=token, bind=bind, port=int(port),
+ fps=float(fps), quality=int(quality),
+ region=region, max_clients=int(max_clients),
+ host_id=host_id, ssl_context=ssl_context,
+ )
+ host.start()
+ self._host = host
+ return self.host_status()
+
+ def stop_host(self, timeout: float = 2.0) -> Dict[str, Any]:
+ """Stop the active host (if any) and clear the slot."""
+ if self._host is not None:
+ self._host.stop(timeout=timeout)
+ self._host = None
+ return self.host_status()
+
+ def host_status(self) -> Dict[str, Any]:
+ host = self._host
+ if host is None:
+ return {
+ "running": False, "port": 0, "connected_clients": 0,
+ "host_id": None,
+ }
+ return {
+ "running": host.is_running,
+ "port": host.port,
+ "connected_clients": host.connected_clients,
+ "host_id": host.host_id,
+ }
+
+ def connect_viewer(self, host: str, port: int, token: str,
+ timeout: float = 5.0,
+ on_frame: Optional[FrameCallback] = None,
+ on_error: Optional[ErrorCallback] = None,
+ expected_host_id: Optional[str] = None,
+ ssl_context: Optional[ssl.SSLContext] = None,
+ server_hostname: Optional[str] = None,
+ ) -> Dict[str, Any]:
+ """Disconnect any existing viewer, then connect a fresh one.
+
+ ``on_frame`` and ``on_error`` are wired before the receiver
+ thread starts, so no frame can arrive while the GUI is still
+ attaching its callbacks. When ``expected_host_id`` is provided
+ the handshake is rejected if the server reports a different ID.
+ Pass an ``ssl_context`` to upgrade the connection to TLS.
+ """
+ self.disconnect_viewer()
+ viewer = RemoteDesktopViewer(
+ host=host, port=int(port), token=token,
+ on_frame=on_frame, on_error=on_error,
+ expected_host_id=expected_host_id,
+ ssl_context=ssl_context,
+ server_hostname=server_hostname,
+ )
+ viewer.connect(timeout=float(timeout))
+ self._viewer = viewer
+ return self.viewer_status()
+
+ def disconnect_viewer(self, timeout: float = 2.0) -> Dict[str, Any]:
+ """Disconnect the active viewer (if any) and clear the slot."""
+ if self._viewer is not None:
+ self._viewer.disconnect(timeout=timeout)
+ self._viewer = None
+ return self.viewer_status()
+
+ def viewer_status(self) -> Dict[str, Any]:
+ viewer = self._viewer
+ if viewer is None:
+ return {"connected": False, "host_id": None}
+ return {
+ "connected": viewer.connected,
+ "host_id": viewer.remote_host_id,
+ }
+
+ def send_input(self, action: Dict[str, Any]) -> Dict[str, Any]:
+ """Forward ``action`` through the connected viewer, raise if offline."""
+ if self._viewer is None or not self._viewer.connected:
+ raise ConnectionError("no remote viewer is connected")
+ self._viewer.send_input(action)
+ return {"sent": True}
+
+
+registry = _RemoteDesktopRegistry()
diff --git a/je_auto_control/utils/remote_desktop/transport.py b/je_auto_control/utils/remote_desktop/transport.py
new file mode 100644
index 00000000..5be224ce
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/transport.py
@@ -0,0 +1,123 @@
+"""Pluggable typed-message transport for the remote-desktop protocol.
+
+The host and viewer always exchange the same typed messages
+(``MessageType`` from :mod:`protocol`), but the wire layer can be either
+the original raw-TCP framing or WebSocket binary frames. ``MessageChannel``
+hides that distinction so the rest of the codebase deals with
+``send_typed`` / ``read_typed`` only.
+"""
+import socket
+import threading
+from typing import Tuple
+
+from je_auto_control.utils.remote_desktop.protocol import (
+ HEADER_SIZE, MessageType, ProtocolError,
+ decode_frame_header, encode_frame, read_message,
+)
+from je_auto_control.utils.remote_desktop.ws_protocol import (
+ recv_message as ws_recv_message,
+ send_binary as ws_send_binary,
+ send_close as ws_send_close,
+)
+
+
+class MessageChannel:
+ """Abstract bidirectional typed-message endpoint."""
+
+ def send_typed(self, message_type: MessageType, payload: bytes) -> None:
+ raise NotImplementedError
+
+ def read_typed(self) -> Tuple[MessageType, bytes]:
+ raise NotImplementedError
+
+ def settimeout(self, timeout) -> None:
+ raise NotImplementedError
+
+ def close(self) -> None:
+ raise NotImplementedError
+
+
+class TcpMessageChannel(MessageChannel):
+ """Original transport: each typed message is one length-prefixed frame."""
+
+ def __init__(self, sock: socket.socket) -> None:
+ self._sock = sock
+ self._send_lock = threading.Lock()
+
+ def send_typed(self, message_type: MessageType, payload: bytes) -> None:
+ data = encode_frame(message_type, payload)
+ with self._send_lock:
+ self._sock.sendall(data)
+
+ def read_typed(self) -> Tuple[MessageType, bytes]:
+ return read_message(self._sock)
+
+ def settimeout(self, timeout) -> None:
+ self._sock.settimeout(timeout)
+
+ def close(self) -> None:
+ try:
+ self._sock.shutdown(socket.SHUT_RDWR)
+ except OSError:
+ pass
+ try:
+ self._sock.close()
+ except OSError:
+ pass
+
+ @property
+ def sock(self) -> socket.socket:
+ return self._sock
+
+
+class WsMessageChannel(MessageChannel):
+ """WebSocket transport: each WS BINARY frame carries one typed message.
+
+ The WS payload is the existing typed-frame encoding (magic + type +
+ length + body), so :func:`decode_frame_header` and :func:`encode_frame`
+ are reused unchanged. ``mask_outgoing`` follows RFC 6455: clients must
+ mask, servers must not.
+ """
+
+ def __init__(self, sock: socket.socket, mask_outgoing: bool) -> None:
+ self._sock = sock
+ self._mask = bool(mask_outgoing)
+ self._send_lock = threading.Lock()
+
+ def send_typed(self, message_type: MessageType, payload: bytes) -> None:
+ data = encode_frame(message_type, payload)
+ with self._send_lock:
+ ws_send_binary(self._sock, data, mask=self._mask)
+
+ def read_typed(self) -> Tuple[MessageType, bytes]:
+ ws_payload = ws_recv_message(self._sock)
+ if len(ws_payload) < HEADER_SIZE:
+ raise ProtocolError("WS payload too short to contain typed header")
+ msg_type, length = decode_frame_header(ws_payload[:HEADER_SIZE])
+ body = ws_payload[HEADER_SIZE:HEADER_SIZE + length]
+ if len(body) != length:
+ raise ProtocolError(
+ f"declared length {length} but ws payload had {len(body)}"
+ )
+ return msg_type, body
+
+ def settimeout(self, timeout) -> None:
+ self._sock.settimeout(timeout)
+
+ def close(self) -> None:
+ try:
+ ws_send_close(self._sock, mask=self._mask)
+ except OSError:
+ pass
+ try:
+ self._sock.shutdown(socket.SHUT_RDWR)
+ except OSError:
+ pass
+ try:
+ self._sock.close()
+ except OSError:
+ pass
+
+ @property
+ def sock(self) -> socket.socket:
+ return self._sock
diff --git a/je_auto_control/utils/remote_desktop/viewer.py b/je_auto_control/utils/remote_desktop/viewer.py
new file mode 100644
index 00000000..bfb56588
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/viewer.py
@@ -0,0 +1,365 @@
+"""TCP viewer that receives JPEG frames and forwards input messages."""
+import json
+import socket
+import ssl
+import threading
+from typing import Any, Callable, Mapping, Optional
+
+from je_auto_control.utils.logging.logging_instance import autocontrol_logger
+from je_auto_control.utils.remote_desktop.auth import compute_response
+from je_auto_control.utils.remote_desktop.clipboard_sync import (
+ ClipboardSyncError, decode as decode_clipboard, encode_image, encode_text,
+)
+from je_auto_control.utils.remote_desktop.file_transfer import (
+ FileReceiver, FileTransferError, send_file,
+)
+from je_auto_control.utils.remote_desktop.host_id import validate_host_id
+from je_auto_control.utils.remote_desktop.protocol import (
+ AuthenticationError, MessageType, ProtocolError,
+)
+from je_auto_control.utils.remote_desktop.transport import (
+ MessageChannel, TcpMessageChannel,
+)
+
+FrameCallback = Callable[[bytes], None]
+AudioCallback = Callable[[bytes], None]
+ClipboardCallback = Callable[[str, Any], None]
+ErrorCallback = Callable[[Exception], None]
+
+_DEFAULT_AUTH_TIMEOUT_S = 5.0
+_DEFAULT_CONNECT_TIMEOUT_S = 5.0
+_NOT_CONNECTED_MESSAGE = "viewer is not connected"
+
+
+def _extract_host_id(payload: bytes) -> Optional[str]:
+ """Pull ``host_id`` out of an AUTH_OK payload (JSON or empty)."""
+ if not payload:
+ return None
+ try:
+ body = json.loads(payload.decode("utf-8"))
+ except (UnicodeDecodeError, json.JSONDecodeError):
+ return None
+ value = body.get("host_id") if isinstance(body, dict) else None
+ return value if isinstance(value, str) else None
+
+
+class RemoteDesktopViewer:
+ """Connect to a :class:`RemoteDesktopHost` and stream frames + input.
+
+ Frames are delivered to ``on_frame`` from a background thread, so the
+ callback must be quick or hand work off (e.g. via ``QMetaObject`` for
+ Qt). ``send_input`` is safe to call from any thread.
+ """
+
+ def __init__(self, host: str, port: int, token: str,
+ on_frame: Optional[FrameCallback] = None,
+ on_error: Optional[ErrorCallback] = None,
+ on_audio: Optional[AudioCallback] = None,
+ on_clipboard: Optional[ClipboardCallback] = None,
+ expected_host_id: Optional[str] = None,
+ ssl_context: Optional[ssl.SSLContext] = None,
+ server_hostname: Optional[str] = None,
+ ) -> None:
+ if not isinstance(host, str) or not host:
+ raise ValueError("host must be a non-empty string")
+ if not isinstance(token, str) or not token:
+ raise ValueError("token must be a non-empty string")
+ self._host = host
+ self._port = int(port)
+ self._token = token
+ self._on_frame = on_frame
+ self._on_error = on_error
+ self._on_audio = on_audio
+ self._on_clipboard = on_clipboard
+ self._file_receiver: Optional[FileReceiver] = None
+ self._expected_host_id = (validate_host_id(expected_host_id)
+ if expected_host_id else None)
+ self._remote_host_id: Optional[str] = None
+ self._ssl_context = ssl_context
+ self._server_hostname = server_hostname
+ self._channel: Optional[MessageChannel] = None
+ self._sock: Optional[socket.socket] = None
+ self._shutdown = threading.Event()
+ self._receiver: Optional[threading.Thread] = None
+ self._connected = False
+
+ @property
+ def connected(self) -> bool:
+ return self._connected and not self._shutdown.is_set()
+
+ @property
+ def remote_host_id(self) -> Optional[str]:
+ """The host ID announced in AUTH_OK; ``None`` until handshake completes."""
+ return self._remote_host_id
+
+ def connect(self, timeout: float = _DEFAULT_CONNECT_TIMEOUT_S) -> None:
+ """Open the (optionally TLS) connection and complete the auth handshake.
+
+ Spawns a receiver thread on success. Raises
+ :class:`AuthenticationError` if the handshake fails.
+ """
+ if self._connected:
+ return
+ raw_sock = socket.create_connection(
+ (self._host, self._port), timeout=timeout,
+ )
+ raw_sock.settimeout(_DEFAULT_AUTH_TIMEOUT_S)
+ try:
+ sock = self._maybe_wrap_tls(raw_sock)
+ channel = self._build_channel(sock)
+ self._handshake(channel)
+ except (AuthenticationError, ProtocolError, OSError):
+ try:
+ raw_sock.close()
+ except OSError:
+ pass
+ raise
+ channel.settimeout(None)
+ self._sock = sock
+ self._channel = channel
+ self._shutdown.clear()
+ self._connected = True
+ self._receiver = threading.Thread(
+ target=self._recv_loop, name="rd-viewer", daemon=True,
+ )
+ self._receiver.start()
+
+ def _maybe_wrap_tls(self, raw_sock: socket.socket) -> socket.socket:
+ """Return a TLS-wrapped socket when an ssl_context was configured."""
+ if self._ssl_context is None:
+ return raw_sock
+ hostname = self._server_hostname or self._host
+ if (self._ssl_context.check_hostname is False
+ and self._ssl_context.verify_mode == ssl.CERT_NONE):
+ # ``wrap_socket`` rejects server_hostname when verification is off.
+ hostname = None
+ return self._ssl_context.wrap_socket(
+ raw_sock, server_hostname=hostname,
+ )
+
+ def _build_channel(self, sock: socket.socket) -> MessageChannel:
+ """Hook for transports: TCP wraps directly, WS overrides this."""
+ return TcpMessageChannel(sock)
+
+ def disconnect(self, timeout: float = 2.0) -> None:
+ """Close the connection and join the receiver thread."""
+ self._shutdown.set()
+ channel = self._channel
+ if channel is not None:
+ channel.close()
+ self._sock = None
+ self._channel = None
+ receiver = self._receiver
+ if receiver is not None:
+ receiver.join(timeout=timeout)
+ self._receiver = None
+ self._connected = False
+
+ def send_input(self, action: Mapping[str, Any]) -> None:
+ """JSON-encode ``action`` and forward it as an INPUT message."""
+ if not self._connected or self._channel is None:
+ raise ConnectionError(_NOT_CONNECTED_MESSAGE)
+ if not isinstance(action, Mapping):
+ raise TypeError("action must be a mapping")
+ payload = json.dumps(dict(action), ensure_ascii=False).encode("utf-8")
+ self._channel.send_typed(MessageType.INPUT, payload)
+
+ def send_ping(self) -> None:
+ """Send a no-op PING message; the host treats it as liveness."""
+ if not self._connected or self._channel is None:
+ raise ConnectionError(_NOT_CONNECTED_MESSAGE)
+ self._channel.send_typed(MessageType.PING, b"")
+
+ def send_clipboard_text(self, text: str) -> None:
+ """Push ``text`` onto the host's clipboard."""
+ if not self._connected or self._channel is None:
+ raise ConnectionError(_NOT_CONNECTED_MESSAGE)
+ self._channel.send_typed(MessageType.CLIPBOARD, encode_text(text))
+
+ def send_clipboard_image(self, png_bytes: bytes) -> None:
+ """Push a PNG image onto the host's clipboard."""
+ if not self._connected or self._channel is None:
+ raise ConnectionError(_NOT_CONNECTED_MESSAGE)
+ self._channel.send_typed(MessageType.CLIPBOARD, encode_image(png_bytes))
+
+ def set_file_receiver(self, receiver: FileReceiver) -> None:
+ """Replace the default ``FileReceiver`` used for incoming files."""
+ self._file_receiver = receiver
+
+ def _ensure_file_receiver(self) -> FileReceiver:
+ if self._file_receiver is None:
+ self._file_receiver = FileReceiver()
+ return self._file_receiver
+
+ def send_file(self, source_path: str, dest_path: str,
+ on_progress=None):
+ """Stream ``source_path`` to ``dest_path`` on the host.
+
+ Returns the :class:`FileSendResult`. Synchronous: callers wanting
+ a non-blocking upload should run this in a worker thread.
+ """
+ if not self._connected or self._channel is None:
+ raise ConnectionError(_NOT_CONNECTED_MESSAGE)
+ return send_file(self._channel, source_path, dest_path,
+ on_progress=on_progress)
+
+ def _handle_file_payload(self, msg_type: MessageType,
+ payload: bytes) -> None:
+ receiver = self._ensure_file_receiver()
+ try:
+ if msg_type is MessageType.FILE_BEGIN:
+ receiver.handle_begin(payload)
+ elif msg_type is MessageType.FILE_CHUNK:
+ receiver.handle_chunk(payload)
+ elif msg_type is MessageType.FILE_END:
+ receiver.handle_end(payload)
+ except FileTransferError as error:
+ autocontrol_logger.info(
+ "remote_desktop viewer bad file message: %r", error,
+ )
+
+ def _handle_clipboard_payload(self, payload: bytes) -> None:
+ try:
+ kind, data = decode_clipboard(payload)
+ except ClipboardSyncError as error:
+ autocontrol_logger.info(
+ "remote_desktop viewer bad CLIPBOARD: %r", error,
+ )
+ return
+ if self._on_clipboard is not None:
+ try:
+ self._on_clipboard(kind, data)
+ except Exception: # noqa: BLE001
+ autocontrol_logger.exception(
+ "remote_desktop viewer on_clipboard callback raised"
+ )
+
+ # context manager ----------------------------------------------------
+
+ def __enter__(self) -> "RemoteDesktopViewer":
+ self.connect()
+ return self
+
+ def __exit__(self, exc_type, exc, tb) -> None:
+ self.disconnect()
+
+ # internals ----------------------------------------------------------
+
+ def _handshake(self, channel: MessageChannel) -> None:
+ msg_type, payload = channel.read_typed()
+ if msg_type is not MessageType.AUTH_CHALLENGE:
+ raise AuthenticationError(
+ f"expected AUTH_CHALLENGE, got {msg_type.name}"
+ )
+ response = compute_response(self._token, payload)
+ channel.send_typed(MessageType.AUTH_RESPONSE, response)
+ msg_type, payload = channel.read_typed()
+ if msg_type is MessageType.AUTH_OK:
+ self._remote_host_id = _extract_host_id(payload)
+ self._verify_host_id(self._remote_host_id)
+ return
+ if msg_type is MessageType.AUTH_FAIL:
+ raise AuthenticationError(
+ payload.decode("utf-8", errors="replace") or "auth rejected"
+ )
+ raise AuthenticationError(
+ f"unexpected handshake reply {msg_type.name}"
+ )
+
+ def _verify_host_id(self, announced: Optional[str]) -> None:
+ """Reject the connection when the server's ID does not match expectation."""
+ if self._expected_host_id is None:
+ return
+ if announced != self._expected_host_id:
+ raise AuthenticationError(
+ f"host_id mismatch: expected {self._expected_host_id}, "
+ f"got {announced!r}"
+ )
+
+ def _recv_loop(self) -> None:
+ channel = self._channel
+ if channel is None:
+ return
+ try:
+ while not self._shutdown.is_set():
+ if not self._read_and_dispatch(channel):
+ return
+ finally:
+ self._connected = False
+
+ def _read_and_dispatch(self, channel: MessageChannel) -> bool:
+ """Read one typed message and dispatch it; return False on disconnect."""
+ try:
+ msg_type, payload = channel.read_typed()
+ except (OSError, ProtocolError) as error:
+ self._notify_error(error)
+ return False
+ handler = _RECV_HANDLERS.get(msg_type)
+ if handler is None:
+ autocontrol_logger.info(
+ "remote_desktop viewer ignoring %s message", msg_type.name,
+ )
+ return True
+ handler(self, payload, msg_type)
+ return True
+
+ # --- per-message dispatch helpers ---------------------------------
+
+ def _on_recv_frame(self, payload: bytes,
+ msg_type: MessageType) -> None:
+ del msg_type
+ if self._on_frame is None:
+ return
+ try:
+ self._on_frame(payload)
+ except Exception as error: # noqa: BLE001 callback isolation
+ autocontrol_logger.exception(
+ "remote_desktop viewer on_frame callback raised"
+ )
+ self._notify_error(error)
+
+ def _on_recv_audio(self, payload: bytes,
+ msg_type: MessageType) -> None:
+ del msg_type
+ if self._on_audio is None:
+ return
+ try:
+ self._on_audio(payload)
+ except Exception: # noqa: BLE001
+ autocontrol_logger.exception(
+ "remote_desktop viewer on_audio callback raised"
+ )
+
+ def _on_recv_clipboard(self, payload: bytes,
+ msg_type: MessageType) -> None:
+ del msg_type
+ self._handle_clipboard_payload(payload)
+
+ def _on_recv_file(self, payload: bytes,
+ msg_type: MessageType) -> None:
+ self._handle_file_payload(msg_type, payload)
+
+ def _on_recv_ping(self, payload: bytes,
+ msg_type: MessageType) -> None:
+ del payload, msg_type
+
+ def _notify_error(self, error: BaseException) -> None:
+ if self._shutdown.is_set() or self._on_error is None:
+ return
+ try:
+ self._on_error(error)
+ except Exception: # noqa: BLE001 callback isolation
+ autocontrol_logger.exception(
+ "remote_desktop viewer on_error callback raised"
+ )
+
+
+_RECV_HANDLERS = {
+ MessageType.FRAME: RemoteDesktopViewer._on_recv_frame,
+ MessageType.AUDIO: RemoteDesktopViewer._on_recv_audio,
+ MessageType.CLIPBOARD: RemoteDesktopViewer._on_recv_clipboard,
+ MessageType.FILE_BEGIN: RemoteDesktopViewer._on_recv_file,
+ MessageType.FILE_CHUNK: RemoteDesktopViewer._on_recv_file,
+ MessageType.FILE_END: RemoteDesktopViewer._on_recv_file,
+ MessageType.PING: RemoteDesktopViewer._on_recv_ping,
+}
diff --git a/je_auto_control/utils/remote_desktop/ws_host.py b/je_auto_control/utils/remote_desktop/ws_host.py
new file mode 100644
index 00000000..44f773fa
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/ws_host.py
@@ -0,0 +1,37 @@
+"""WebSocket-transport variant of :class:`RemoteDesktopHost`.
+
+The only thing this subclass overrides is the per-connection channel
+factory: each accepted (optionally TLS-wrapped) socket performs an HTTP
+upgrade handshake before being handed to the shared client-handler
+machinery. Auth, capture, frame broadcast, and input dispatch all
+remain identical, so `wss://` is just `ws://` over the same
+``ssl_context`` already supported by the parent.
+"""
+import socket
+
+from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+from je_auto_control.utils.remote_desktop.transport import (
+ MessageChannel, WsMessageChannel,
+)
+from je_auto_control.utils.remote_desktop.ws_protocol import (
+ WsProtocolError, server_handshake,
+)
+
+_HANDSHAKE_TIMEOUT_S = 5.0
+
+
+class WebSocketDesktopHost(RemoteDesktopHost):
+ """Speak the same protocol as :class:`RemoteDesktopHost` over WebSockets."""
+
+ def _build_channel(self, sock: socket.socket,
+ address) -> MessageChannel:
+ del address
+ sock.settimeout(_HANDSHAKE_TIMEOUT_S)
+ try:
+ server_handshake(sock)
+ except (WsProtocolError, OSError) as error:
+ raise RuntimeError(
+ f"websocket handshake failed: {error}"
+ ) from error
+ sock.settimeout(None)
+ return WsMessageChannel(sock, mask_outgoing=False)
diff --git a/je_auto_control/utils/remote_desktop/ws_protocol.py b/je_auto_control/utils/remote_desktop/ws_protocol.py
new file mode 100644
index 00000000..9361d665
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/ws_protocol.py
@@ -0,0 +1,264 @@
+"""Minimal RFC 6455 WebSocket framing + handshake helpers.
+
+This is the smallest implementation that round-trips our typed-message
+payloads as WebSocket BINARY frames. It deliberately rejects fragmented
+data frames (FIN must be 1) — every typed message we send fits in a
+single ~16 MiB WS frame, so reassembly machinery would only add risk
+without buying anything. PING / PONG control frames are handled
+transparently in :func:`recv_message`.
+"""
+import base64
+import hashlib
+import os
+import socket
+import struct
+from typing import Tuple
+
+WS_GUID = b"258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
+
+OPCODE_CONTINUATION = 0x0
+OPCODE_TEXT = 0x1
+OPCODE_BINARY = 0x2
+OPCODE_CLOSE = 0x8
+OPCODE_PING = 0x9
+OPCODE_PONG = 0xA
+
+MAX_FRAME_PAYLOAD_BYTES = 16 * 1024 * 1024
+MAX_HEADER_BYTES = 8192
+
+
+class WsProtocolError(RuntimeError):
+ """Raised when a peer breaks the WebSocket framing contract."""
+
+
+class WsClosedError(ConnectionError):
+ """Raised when the peer sends a CLOSE frame."""
+
+
+# --- handshake ------------------------------------------------------------
+
+
+def server_handshake(sock: socket.socket) -> str:
+ """Read the HTTP upgrade request and reply ``101 Switching Protocols``.
+
+ Returns the request path (``"/"`` for unspecified), so callers can
+ route on it later if they want to host multiple services on one port.
+ """
+ request = _read_http_message(sock)
+ request_line = request.split("\r\n", 1)[0]
+ parts = request_line.split(" ")
+ if len(parts) < 3 or parts[0].upper() != "GET":
+ _send_http_error(sock, 400, "Bad Request")
+ raise WsProtocolError(f"bad request line {request_line!r}")
+ path = parts[1] or "/"
+ headers = _parse_headers(request)
+ if "websocket" not in headers.get("upgrade", "").lower():
+ _send_http_error(sock, 400, "Bad Request: Upgrade")
+ raise WsProtocolError("missing websocket upgrade header")
+ if "upgrade" not in headers.get("connection", "").lower():
+ _send_http_error(sock, 400, "Bad Request: Connection")
+ raise WsProtocolError("missing connection upgrade header")
+ key = headers.get("sec-websocket-key")
+ if not key:
+ _send_http_error(sock, 400, "Bad Request: Sec-WebSocket-Key")
+ raise WsProtocolError("missing Sec-WebSocket-Key")
+ accept = _compute_accept(key)
+ response = (
+ "HTTP/1.1 101 Switching Protocols\r\n"
+ "Upgrade: websocket\r\n"
+ "Connection: Upgrade\r\n"
+ f"Sec-WebSocket-Accept: {accept}\r\n"
+ "\r\n"
+ ).encode("ascii")
+ sock.sendall(response)
+ return path
+
+
+def client_handshake(sock: socket.socket, host: str, port: int,
+ path: str = "/") -> None:
+ """Send the HTTP upgrade and validate the ``101`` reply."""
+ key_bytes = os.urandom(16)
+ key = base64.b64encode(key_bytes).decode("ascii")
+ request = (
+ f"GET {path} HTTP/1.1\r\n"
+ f"Host: {host}:{port}\r\n"
+ "Upgrade: websocket\r\n"
+ "Connection: Upgrade\r\n"
+ f"Sec-WebSocket-Key: {key}\r\n"
+ "Sec-WebSocket-Version: 13\r\n"
+ "\r\n"
+ ).encode("ascii")
+ sock.sendall(request)
+ response = _read_http_message(sock)
+ status = _parse_status(response)
+ if status != 101:
+ raise WsProtocolError(f"server returned status {status}")
+ headers = _parse_headers(response)
+ if "websocket" not in headers.get("upgrade", "").lower():
+ raise WsProtocolError("server missing Upgrade: websocket")
+ expected = _compute_accept(key)
+ if headers.get("sec-websocket-accept", "") != expected:
+ raise WsProtocolError("Sec-WebSocket-Accept mismatch")
+
+
+def _read_http_message(sock: socket.socket) -> str:
+ buf = bytearray()
+ while b"\r\n\r\n" not in buf:
+ chunk = sock.recv(1024)
+ if not chunk:
+ raise WsProtocolError("connection closed during handshake")
+ buf.extend(chunk)
+ if len(buf) > MAX_HEADER_BYTES:
+ raise WsProtocolError("HTTP header too large")
+ return bytes(buf).decode("iso-8859-1")
+
+
+def _parse_status(response: str) -> int:
+ line = response.split("\r\n", 1)[0]
+ parts = line.split(" ", 2)
+ if len(parts) < 2 or not parts[1].isdigit():
+ raise WsProtocolError(f"bad status line {line!r}")
+ return int(parts[1])
+
+
+def _parse_headers(text: str) -> dict:
+ headers = {}
+ for line in text.split("\r\n"):
+ if ":" in line:
+ name, _, value = line.partition(":")
+ headers[name.strip().lower()] = value.strip()
+ return headers
+
+
+def _compute_accept(key: str) -> str:
+ # RFC 6455 mandates SHA-1 for the Sec-WebSocket-Accept handshake;
+ # ``usedforsecurity=False`` tells linters this is a protocol-required
+ # checksum, not a cryptographic primitive.
+ digest = hashlib.sha1( # nosec B324 # nosemgrep: python.lang.security.insecure-hash-algorithms.insecure-hash-algorithm-sha1
+ key.encode("ascii") + WS_GUID,
+ usedforsecurity=False,
+ ).digest()
+ return base64.b64encode(digest).decode("ascii")
+
+
+def _send_http_error(sock: socket.socket, status: int, message: str) -> None:
+ response = (
+ f"HTTP/1.1 {status} {message}\r\n"
+ "Connection: close\r\n"
+ "Content-Length: 0\r\n"
+ "\r\n"
+ ).encode("ascii")
+ try:
+ sock.sendall(response)
+ except OSError:
+ pass
+
+
+# --- frame I/O ------------------------------------------------------------
+
+
+def send_binary(sock: socket.socket, payload: bytes,
+ mask: bool = False) -> None:
+ """Send a single BINARY frame with FIN=1."""
+ _send_frame(sock, OPCODE_BINARY, payload, mask=mask)
+
+
+def send_close(sock: socket.socket, code: int = 1000,
+ mask: bool = False) -> None:
+ """Send a CLOSE frame with the given status code."""
+ _send_frame(sock, OPCODE_CLOSE, struct.pack("!H", code), mask=mask)
+
+
+def _send_frame(sock: socket.socket, opcode: int, payload: bytes,
+ mask: bool) -> None:
+ if len(payload) > MAX_FRAME_PAYLOAD_BYTES:
+ raise WsProtocolError(
+ f"payload too large: {len(payload)} > {MAX_FRAME_PAYLOAD_BYTES}"
+ )
+ header = bytearray()
+ # First byte: FIN bit set, no reserved bits, low nibble is opcode.
+ header.append(0x80 | (opcode & 0x0F))
+ length = len(payload)
+ mask_bit = 0x80 if mask else 0
+ if length < 126:
+ header.append(mask_bit | length)
+ elif length < 0x10000:
+ header.append(mask_bit | 126)
+ header.extend(struct.pack("!H", length))
+ else:
+ header.append(mask_bit | 127)
+ header.extend(struct.pack("!Q", length))
+ if mask:
+ masking_key = os.urandom(4)
+ header.extend(masking_key)
+ masked = bytes(
+ payload[i] ^ masking_key[i % 4] for i in range(length)
+ )
+ sock.sendall(bytes(header) + masked)
+ else:
+ sock.sendall(bytes(header) + bytes(payload))
+
+
+def recv_message(sock: socket.socket) -> bytes:
+ """Read one application message (BINARY) and return its payload bytes.
+
+ Control frames (PING / PONG / CLOSE) are handled inline: PINGs get a
+ PONG reply, PONGs are dropped, CLOSE raises :class:`WsClosedError`.
+ """
+ while True:
+ opcode, payload = _read_frame(sock)
+ if opcode == OPCODE_BINARY:
+ return payload
+ if opcode == OPCODE_TEXT:
+ raise WsProtocolError("text frames are not supported")
+ if opcode == OPCODE_CLOSE:
+ raise WsClosedError("peer sent CLOSE")
+ if opcode == OPCODE_PING:
+ _send_frame(sock, OPCODE_PONG, payload, mask=False)
+ continue
+ if opcode == OPCODE_PONG:
+ continue
+ if opcode == OPCODE_CONTINUATION:
+ raise WsProtocolError("standalone CONTINUATION frame")
+ raise WsProtocolError(f"unknown opcode 0x{opcode:x}")
+
+
+def _read_frame(sock: socket.socket) -> Tuple[int, bytes]:
+ header = _read_exact(sock, 2)
+ fin = (header[0] & 0x80) != 0
+ rsv = (header[0] >> 4) & 0x07
+ opcode = header[0] & 0x0F
+ masked = (header[1] & 0x80) != 0
+ length = header[1] & 0x7F
+ if rsv != 0:
+ raise WsProtocolError("RSV bits set")
+ if not fin:
+ raise WsProtocolError("fragmented frames not supported")
+ if length == 126:
+ length = struct.unpack("!H", _read_exact(sock, 2))[0]
+ elif length == 127:
+ length = struct.unpack("!Q", _read_exact(sock, 8))[0]
+ if length > MAX_FRAME_PAYLOAD_BYTES:
+ raise WsProtocolError(
+ f"declared payload too large: {length} > {MAX_FRAME_PAYLOAD_BYTES}"
+ )
+ masking_key = _read_exact(sock, 4) if masked else None
+ payload = _read_exact(sock, length) if length > 0 else b""
+ if masking_key is not None and payload:
+ unmasked = bytes(
+ payload[i] ^ masking_key[i % 4] for i in range(len(payload))
+ )
+ return opcode, unmasked
+ return opcode, payload
+
+
+def _read_exact(sock: socket.socket, n: int) -> bytes:
+ buf = bytearray()
+ remaining = n
+ while remaining > 0:
+ chunk = sock.recv(remaining)
+ if not chunk:
+ raise ConnectionError("peer closed connection")
+ buf.extend(chunk)
+ remaining -= len(chunk)
+ return bytes(buf)
diff --git a/je_auto_control/utils/remote_desktop/ws_viewer.py b/je_auto_control/utils/remote_desktop/ws_viewer.py
new file mode 100644
index 00000000..9ccbd46f
--- /dev/null
+++ b/je_auto_control/utils/remote_desktop/ws_viewer.py
@@ -0,0 +1,29 @@
+"""WebSocket-transport variant of :class:`RemoteDesktopViewer`.
+
+Mirrors :class:`WebSocketDesktopHost`: the only difference from the TCP
+viewer is that the connect-time channel factory performs an HTTP upgrade
+handshake before falling back to the shared auth + receive loop.
+"""
+import socket
+
+from je_auto_control.utils.remote_desktop.transport import (
+ MessageChannel, WsMessageChannel,
+)
+from je_auto_control.utils.remote_desktop.viewer import RemoteDesktopViewer
+from je_auto_control.utils.remote_desktop.ws_protocol import client_handshake
+
+_DEFAULT_PATH = "/"
+
+
+class WebSocketDesktopViewer(RemoteDesktopViewer):
+ """Speak the same protocol as :class:`RemoteDesktopViewer` over WebSockets."""
+
+ def __init__(self, *args, path: str = _DEFAULT_PATH, **kwargs) -> None:
+ super().__init__(*args, **kwargs)
+ if not isinstance(path, str) or not path.startswith("/"):
+ raise ValueError("path must be an absolute URL path starting with '/'")
+ self._ws_path = path
+
+ def _build_channel(self, sock: socket.socket) -> MessageChannel:
+ client_handshake(sock, self._host, self._port, path=self._ws_path)
+ return WsMessageChannel(sock, mask_outgoing=True)
diff --git a/je_auto_control/utils/script_vars/__init__.py b/je_auto_control/utils/script_vars/__init__.py
index 7ca26c74..9ac72755 100644
--- a/je_auto_control/utils/script_vars/__init__.py
+++ b/je_auto_control/utils/script_vars/__init__.py
@@ -2,5 +2,9 @@
from je_auto_control.utils.script_vars.interpolate import (
interpolate_actions, interpolate_value, load_vars_from_json,
)
+from je_auto_control.utils.script_vars.scope import VariableScope
-__all__ = ["interpolate_actions", "interpolate_value", "load_vars_from_json"]
+__all__ = [
+ "VariableScope", "interpolate_actions", "interpolate_value",
+ "load_vars_from_json",
+]
diff --git a/je_auto_control/utils/script_vars/scope.py b/je_auto_control/utils/script_vars/scope.py
new file mode 100644
index 00000000..3d206206
--- /dev/null
+++ b/je_auto_control/utils/script_vars/scope.py
@@ -0,0 +1,60 @@
+"""Runtime variable scope for the action executor.
+
+Pre-execution interpolation in :mod:`interpolate` replaces ``${var}``
+placeholders once, against a static mapping. Some scripts need to mutate
+state during execution — counters in loops, captured OCR/locator results,
+``for_each`` items. ``VariableScope`` is a thin mutable container the
+executor exposes to flow-control commands so those commands can read and
+write the same bag the runtime interpolator consults.
+"""
+from typing import Any, Dict, Iterator, Mapping, MutableMapping, Optional
+
+
+class VariableScope(MutableMapping[str, Any]):
+ """Mutable mapping of script variables shared across action execution."""
+
+ __slots__ = ("_vars",)
+
+ def __init__(self, initial: Optional[Mapping[str, Any]] = None) -> None:
+ self._vars: Dict[str, Any] = dict(initial) if initial else {}
+
+ def __getitem__(self, key: str) -> Any:
+ return self._vars[key]
+
+ def __setitem__(self, key: str, value: Any) -> None:
+ if not isinstance(key, str) or not key:
+ raise ValueError("variable name must be a non-empty string")
+ self._vars[key] = value
+
+ def __delitem__(self, key: str) -> None:
+ del self._vars[key]
+
+ def __iter__(self) -> Iterator[str]:
+ return iter(self._vars)
+
+ def __len__(self) -> int:
+ return len(self._vars)
+
+ def __contains__(self, key: object) -> bool:
+ return key in self._vars
+
+ def set(self, name: str, value: Any) -> None:
+ """Assign ``name`` to ``value``."""
+ self[name] = value
+
+ def get_value(self, name: str, default: Any = None) -> Any:
+ """Return the variable, or ``default`` when missing."""
+ return self._vars.get(name, default)
+
+ def update_many(self, mapping: Mapping[str, Any]) -> None:
+ """Bulk-assign from a mapping."""
+ for key, value in mapping.items():
+ self[key] = value
+
+ def as_dict(self) -> Dict[str, Any]:
+ """Return a shallow copy as a plain dict (safe for interpolation)."""
+ return dict(self._vars)
+
+ def clear(self) -> None:
+ """Drop every stored variable."""
+ self._vars.clear()
diff --git a/test/unit_test/flow_control/test_flow_control.py b/test/unit_test/flow_control/test_flow_control.py
index c29cbaf3..31b7bbad 100644
--- a/test/unit_test/flow_control/test_flow_control.py
+++ b/test/unit_test/flow_control/test_flow_control.py
@@ -147,3 +147,120 @@ def test_if_image_found_selects_else_branch(monkeypatch, executor_with_hooks):
"image": "x.png", "then": [], "else": [["AC_noop"]]
}]])
assert state["count"] == 1
+
+
+def test_ac_set_var_stores_value(executor_with_hooks):
+ ex, _ = executor_with_hooks
+ ex.execute_action([["AC_set_var", {"name": "greeting", "value": "hi"}]])
+ assert ex.variables.get_value("greeting") == "hi"
+
+
+def test_ac_inc_var_increments_default_zero(executor_with_hooks):
+ ex, _ = executor_with_hooks
+ ex.execute_action([
+ ["AC_inc_var", {"name": "counter"}],
+ ["AC_inc_var", {"name": "counter", "by": 4}],
+ ])
+ assert ex.variables.get_value("counter") == 5
+
+
+def test_runtime_interpolation_uses_current_scope(executor_with_hooks):
+ ex, _ = executor_with_hooks
+ seen = []
+ ex.event_dict["AC_capture"] = lambda payload: seen.append(payload)
+ ex.execute_action([
+ ["AC_set_var", {"name": "msg", "value": "hello"}],
+ ["AC_capture", {"payload": "${msg}"}],
+ ])
+ assert seen == ["hello"]
+
+
+def test_runtime_interpolation_preserves_value_type(executor_with_hooks):
+ ex, _ = executor_with_hooks
+ seen = []
+ ex.event_dict["AC_capture"] = lambda payload: seen.append(payload)
+ ex.execute_action([
+ ["AC_set_var", {"name": "n", "value": 42}],
+ ["AC_capture", {"payload": "${n}"}],
+ ])
+ assert len(seen) == 1
+ assert seen[0] == 42
+ assert isinstance(seen[0], int)
+
+
+def test_ac_if_var_eq_runs_then(executor_with_hooks):
+ ex, state = executor_with_hooks
+ ex.execute_action([
+ ["AC_set_var", {"name": "x", "value": 5}],
+ ["AC_if_var", {
+ "name": "x", "op": "eq", "value": 5,
+ "then": [["AC_noop"]], "else": [],
+ }],
+ ])
+ assert state["count"] == 1
+
+
+def test_ac_if_var_lt_picks_else_when_false(executor_with_hooks):
+ ex, state = executor_with_hooks
+ ex.execute_action([
+ ["AC_set_var", {"name": "x", "value": 9}],
+ ["AC_if_var", {
+ "name": "x", "op": "lt", "value": 5,
+ "then": [["AC_noop"]], "else": [["AC_noop"], ["AC_noop"]],
+ }],
+ ])
+ assert state["count"] == 2
+
+
+def test_ac_if_var_unknown_op_raises(executor_with_hooks):
+ ex, _ = executor_with_hooks
+ with pytest.raises(AutoControlActionException):
+ ex.execute_action([
+ ["AC_if_var", {
+ "name": "x", "op": "wat", "value": 1,
+ "then": [], "else": [],
+ }],
+ ], raise_on_error=True)
+
+
+def test_ac_for_each_iterates_items(executor_with_hooks):
+ ex, _ = executor_with_hooks
+ seen = []
+ ex.event_dict["AC_capture"] = lambda payload: seen.append(payload)
+ ex.execute_action([
+ ["AC_for_each", {
+ "items": ["a", "b", "c"], "as": "letter",
+ "body": [["AC_capture", {"payload": "${letter}"}]],
+ }],
+ ])
+ assert seen == ["a", "b", "c"]
+
+
+def test_ac_for_each_break_stops_iteration(executor_with_hooks):
+ ex, state = executor_with_hooks
+ ex.execute_action([
+ ["AC_for_each", {
+ "items": [1, 2, 3, 4], "as": "n",
+ "body": [
+ ["AC_if_var", {
+ "name": "n", "op": "ge", "value": 3,
+ "then": [["AC_break"]], "else": [["AC_noop"]],
+ }],
+ ],
+ }],
+ ])
+ assert state["count"] == 2
+
+
+def test_deferred_args_keep_placeholders_for_each_iteration(executor_with_hooks):
+ """Body must re-resolve ${var} per iteration, not freeze first value."""
+ ex, _ = executor_with_hooks
+ seen = []
+ ex.event_dict["AC_capture"] = lambda payload: seen.append(payload)
+ ex.execute_action([
+ ["AC_for_each", {
+ "items": [10, 20, 30], "as": "v",
+ "body": [["AC_capture", {"payload": "${v}"}]],
+ }],
+ ])
+ assert seen == [10, 20, 30]
diff --git a/test/unit_test/headless/test_llm_planner.py b/test/unit_test/headless/test_llm_planner.py
new file mode 100644
index 00000000..321fccff
--- /dev/null
+++ b/test/unit_test/headless/test_llm_planner.py
@@ -0,0 +1,144 @@
+"""Tests for the LLM action planner (no real LLM calls)."""
+from typing import Optional
+
+import pytest
+
+from je_auto_control.utils.llm.backends.base import LLMBackend
+from je_auto_control.utils.llm.planner import (
+ LLMNotAvailableError, LLMPlanError, plan_actions, run_from_description,
+)
+
+
+class _StubBackend(LLMBackend):
+ """Returns canned text and records the prompt for assertions."""
+
+ name = "stub"
+
+ def __init__(self, response: str, *, available: bool = True) -> None:
+ self.available = available
+ self.response = response
+ self.last_prompt: Optional[str] = None
+ self.last_system: Optional[str] = None
+ self.last_model: Optional[str] = None
+
+ def complete(self, prompt: str, system=None, model=None,
+ max_tokens: int = 2048) -> str:
+ self.last_prompt = prompt
+ self.last_system = system
+ self.last_model = model
+ return self.response
+
+
+_KNOWN = {"AC_click_mouse", "AC_type_keyboard", "AC_set_var", "AC_loop"}
+
+
+def test_plan_actions_returns_validated_list():
+ backend = _StubBackend(
+ '[["AC_click_mouse", {"mouse_keycode": "mouse_left"}]]'
+ )
+ actions = plan_actions("click", known_commands=_KNOWN, backend=backend)
+ assert actions == [["AC_click_mouse", {"mouse_keycode": "mouse_left"}]]
+
+
+def test_plan_actions_strips_code_fence():
+ backend = _StubBackend(
+ '```json\n[["AC_click_mouse"]]\n```'
+ )
+ actions = plan_actions("click", known_commands=_KNOWN, backend=backend)
+ assert actions == [["AC_click_mouse"]]
+
+
+def test_plan_actions_extracts_json_when_wrapped_in_prose():
+ backend = _StubBackend(
+ 'Sure! Here is the plan: [["AC_click_mouse"]] hope it helps.'
+ )
+ actions = plan_actions("click", known_commands=_KNOWN, backend=backend)
+ assert actions == [["AC_click_mouse"]]
+
+
+def test_plan_actions_rejects_unknown_command():
+ backend = _StubBackend('[["AC_does_not_exist"]]')
+ with pytest.raises(Exception):
+ plan_actions("x", known_commands=_KNOWN, backend=backend)
+
+
+def test_plan_actions_rejects_non_array_response():
+ backend = _StubBackend('{"not": "an array"}')
+ with pytest.raises(LLMPlanError):
+ plan_actions("x", known_commands=_KNOWN, backend=backend)
+
+
+def test_plan_actions_rejects_empty_response():
+ backend = _StubBackend("")
+ with pytest.raises(LLMPlanError):
+ plan_actions("x", known_commands=_KNOWN, backend=backend)
+
+
+def test_plan_actions_unavailable_backend_raises():
+ backend = _StubBackend("[]", available=False)
+ with pytest.raises(LLMNotAvailableError):
+ plan_actions("x", known_commands=_KNOWN, backend=backend)
+
+
+def test_plan_actions_rejects_blank_description():
+ backend = _StubBackend("[]")
+ with pytest.raises(ValueError):
+ plan_actions(" ", known_commands=_KNOWN, backend=backend)
+
+
+def test_prompt_lists_allowed_commands_and_description():
+ backend = _StubBackend("[]")
+ try:
+ plan_actions("type hello", known_commands=_KNOWN, backend=backend)
+ except LLMPlanError:
+ pass # we only care about the prompt
+ assert backend.last_prompt is not None
+ assert "type hello" in backend.last_prompt
+ for command in _KNOWN:
+ assert command in backend.last_prompt
+
+
+def test_prompt_includes_examples_when_provided():
+ backend = _StubBackend("[]")
+ examples = [{
+ "description": "click left",
+ "actions": [["AC_click_mouse"]],
+ }]
+ try:
+ plan_actions("x", known_commands=_KNOWN, backend=backend,
+ examples=examples)
+ except LLMPlanError:
+ pass
+ assert "click left" in backend.last_prompt
+ assert "AC_click_mouse" in backend.last_prompt
+
+
+def test_run_from_description_executes_plan():
+ backend = _StubBackend('[["AC_noop"]]')
+
+ class FakeExecutor:
+ def __init__(self):
+ self.executed = None
+
+ def known_commands(self):
+ return {"AC_noop"}
+
+ def execute_action(self, actions, _validated=False):
+ self.executed = actions
+ return {"ok": True}
+
+ fake = FakeExecutor()
+ result = run_from_description("noop please", executor=fake, backend=backend)
+ assert fake.executed == [["AC_noop"]]
+ assert result["actions"] == [["AC_noop"]]
+ assert result["record"] == {"ok": True}
+
+
+def test_planner_passes_model_through_to_backend():
+ backend = _StubBackend("[]")
+ try:
+ plan_actions("x", known_commands=_KNOWN, backend=backend,
+ model="claude-sonnet-4-6")
+ except LLMPlanError:
+ pass
+ assert backend.last_model == "claude-sonnet-4-6"
diff --git a/test/unit_test/headless/test_ocr_engine.py b/test/unit_test/headless/test_ocr_engine.py
index e52b292e..05807864 100644
--- a/test/unit_test/headless/test_ocr_engine.py
+++ b/test/unit_test/headless/test_ocr_engine.py
@@ -1,5 +1,10 @@
"""Tests for the OCR parser logic (no real Tesseract binary required)."""
-from je_auto_control.utils.ocr.ocr_engine import TextMatch, _parse_matches
+import re
+
+from je_auto_control.utils.ocr import ocr_engine
+from je_auto_control.utils.ocr.ocr_engine import (
+ TextMatch, _parse_matches, find_text_regex, read_text_in_region,
+)
def _sample_data():
@@ -28,3 +33,71 @@ def test_parse_matches_applies_offsets():
def test_text_match_center_is_midpoint():
match = TextMatch(text="x", x=10, y=20, width=30, height=40, confidence=90.0)
assert match.center == (25, 40)
+
+
+class _FakePytesseract:
+ """Stand-in for pytesseract that returns a canned image_to_data dict."""
+
+ class Output:
+ DICT = "dict"
+
+ def __init__(self, data):
+ self._data = data
+
+ def image_to_data(self, _frame, lang="eng", output_type=None):
+ del lang, output_type
+ return self._data
+
+
+def _install_fake_backend(monkeypatch, data):
+ fake = _FakePytesseract(data)
+ monkeypatch.setattr(ocr_engine, "_pytesseract", fake)
+ monkeypatch.setattr(ocr_engine, "_image_grab", object())
+ monkeypatch.setattr(ocr_engine, "_load_backend",
+ lambda: (fake, ocr_engine._image_grab))
+ monkeypatch.setattr(ocr_engine, "_grab",
+ lambda region: (object(), 0, 0))
+
+
+def test_read_text_in_region_returns_all_hits(monkeypatch):
+ _install_fake_backend(monkeypatch, {
+ "text": ["alpha", "beta", "gamma"],
+ "conf": ["91.0", "82.0", "75.0"],
+ "left": [0, 30, 60], "top": [0, 0, 0],
+ "width": [20, 20, 20], "height": [10, 10, 10],
+ })
+ matches = read_text_in_region(region=[0, 0, 200, 100], min_confidence=60.0)
+ assert [m.text for m in matches] == ["alpha", "beta", "gamma"]
+
+
+def test_read_text_in_region_filters_by_confidence(monkeypatch):
+ _install_fake_backend(monkeypatch, {
+ "text": ["high", "low"],
+ "conf": ["95.0", "20.0"],
+ "left": [0, 30], "top": [0, 0],
+ "width": [20, 20], "height": [10, 10],
+ })
+ matches = read_text_in_region(min_confidence=60.0)
+ assert [m.text for m in matches] == ["high"]
+
+
+def test_find_text_regex_matches_pattern(monkeypatch):
+ _install_fake_backend(monkeypatch, {
+ "text": ["Order#42", "ignore", "Order#99"],
+ "conf": ["95.0", "95.0", "95.0"],
+ "left": [0, 30, 60], "top": [0, 0, 0],
+ "width": [20, 20, 20], "height": [10, 10, 10],
+ })
+ matches = find_text_regex(r"Order#\d+")
+ assert [m.text for m in matches] == ["Order#42", "Order#99"]
+
+
+def test_find_text_regex_accepts_compiled_pattern(monkeypatch):
+ _install_fake_backend(monkeypatch, {
+ "text": ["FOO", "foo", "bar"],
+ "conf": ["90.0", "90.0", "90.0"],
+ "left": [0, 10, 20], "top": [0, 0, 0],
+ "width": [10, 10, 10], "height": [10, 10, 10],
+ })
+ matches = find_text_regex(re.compile(r"foo", re.IGNORECASE))
+ assert {m.text for m in matches} == {"FOO", "foo"}
diff --git a/test/unit_test/headless/test_remote_desktop_audio.py b/test/unit_test/headless/test_remote_desktop_audio.py
new file mode 100644
index 00000000..33f74040
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_audio.py
@@ -0,0 +1,257 @@
+"""Audio capture / playback contract + host->viewer streaming tests.
+
+The real ``sounddevice`` backend is replaced by a fake throughout, so the
+tests run on machines without PortAudio. They cover: lazy backend
+loading, the AUDIO message type round-trip, viewer ``on_audio`` dispatch,
+host queue back-pressure (oldest dropped), and the audio sender thread
+shutting down with the client.
+"""
+import threading
+import time
+from typing import Optional
+
+import pytest
+
+from je_auto_control.utils.remote_desktop import (
+ RemoteDesktopHost, RemoteDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.audio import (
+ AudioBackendError, AudioCapture, AudioCaptureConfig, AudioPlayer,
+)
+
+
+class _FakeStream:
+ """Imitates the bits of sounddevice.RawInputStream we use."""
+
+ def __init__(self, *, callback=None, **_kwargs) -> None:
+ self.callback = callback
+ self.started = False
+ self.closed = False
+
+ def start(self) -> None:
+ self.started = True
+
+ def stop(self) -> None:
+ self.started = False
+
+ def close(self) -> None:
+ self.closed = True
+
+
+class _FakeSounddevice:
+ def __init__(self) -> None:
+ self.last_input: Optional[_FakeStream] = None
+ self.last_output: Optional[_FakeStream] = None
+
+ def RawInputStream(self, **kwargs) -> _FakeStream: # noqa: N802 # NOSONAR S100 # mirrors sounddevice API
+ self.last_input = _FakeStream(**kwargs)
+ return self.last_input
+
+ def RawOutputStream(self, **kwargs) -> _FakeStream: # noqa: N802 # NOSONAR S100 # mirrors sounddevice API
+ self.last_output = _FakeStream(**kwargs)
+ return self.last_output
+
+
+@pytest.fixture()
+def fake_sd(monkeypatch):
+ fake = _FakeSounddevice()
+
+ from je_auto_control.utils.remote_desktop import audio as audio_mod
+ monkeypatch.setattr(audio_mod, "_load_sounddevice", lambda: fake)
+ return fake
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+# --- AudioCapture / AudioPlayer unit tests --------------------------------
+
+
+def test_audio_capture_invokes_callback_with_block_bytes(fake_sd):
+ received = []
+ capture = AudioCapture(on_block=received.append)
+ capture.start()
+ assert fake_sd.last_input.started
+ # Simulate the sounddevice thread firing a block:
+ fake_sd.last_input.callback(b"abc", 800, None, None)
+ assert received == [b"abc"]
+ capture.stop()
+ assert fake_sd.last_input.closed
+
+
+def test_audio_capture_swallows_callback_exceptions(fake_sd):
+ capture = AudioCapture(on_block=lambda chunk: 1 / 0)
+ capture.start()
+ # Must not raise even though the user callback exploded:
+ fake_sd.last_input.callback(b"xx", 800, None, None)
+ capture.stop()
+
+
+def test_audio_player_writes_chunks(fake_sd):
+ player = AudioPlayer()
+ player.start()
+ assert fake_sd.last_output.started
+ written = []
+ fake_sd.last_output.write = written.append # type: ignore[attr-defined]
+ player.play(b"\x01\x02")
+ assert written == [b"\x01\x02"]
+ player.stop()
+
+
+def test_audio_player_play_before_start_raises(fake_sd):
+ del fake_sd
+ player = AudioPlayer()
+ with pytest.raises(RuntimeError):
+ player.play(b"x")
+
+
+def test_audio_capture_validates_args():
+ with pytest.raises(TypeError):
+ AudioCapture(on_block="not callable") # type: ignore[arg-type]
+ with pytest.raises(ValueError):
+ AudioCapture(on_block=lambda c: None, sample_rate=0)
+
+
+# --- end-to-end host -> viewer streaming ---------------------------------
+
+
+class _ManualCapture:
+ """Stub object with the same start/stop API used by the host."""
+
+ def __init__(self) -> None:
+ self.started = False
+ self.stopped = False
+ self.on_block = None
+
+ def start(self) -> None:
+ self.started = True
+
+ def stop(self) -> None:
+ self.stopped = True
+
+
+def _start_audio_host():
+ capture = _ManualCapture()
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"frame",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="555444333",
+ audio_config=AudioCaptureConfig(enabled=True), audio_capture=capture,
+ )
+ host.start()
+ capture.on_block = host._broadcast_audio # noqa: SLF001
+ return host, capture
+
+
+def test_audio_chunks_reach_viewer():
+ host, capture = _start_audio_host()
+ try:
+ received = []
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ on_audio=received.append,
+ )
+ viewer.connect(timeout=2.0)
+ # Wait for the auth handshake to count as authenticated.
+ assert _wait_until(lambda: host.connected_clients == 1)
+ capture.on_block(b"\xaa" * 100)
+ capture.on_block(b"\xbb" * 100)
+ assert _wait_until(lambda: len(received) >= 2)
+ assert received[0] == b"\xaa" * 100
+ assert received[1] == b"\xbb" * 100
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_audio_disabled_means_no_sender_thread():
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"frame",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="200200200",
+ )
+ host.start()
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: host.connected_clients == 1)
+ with host._clients_lock: # noqa: SLF001
+ client = host._clients[0] # noqa: SLF001
+ assert client._audio_sender_thread is None # noqa: SLF001
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_audio_queue_drops_oldest_when_full():
+ """Slow viewer (one that never reads) should not back up host capture."""
+ host, capture = _start_audio_host()
+ try:
+ # No viewer attached — but emulate one being authenticated by
+ # building a client handler manually would be invasive. Instead
+ # use a real viewer that we never let read fast.
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ on_audio=lambda chunk: time.sleep(0.5), # very slow consumer
+ )
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: host.connected_clients == 1)
+ # Push more chunks than the queue capacity.
+ for i in range(200):
+ capture.on_block(bytes([i % 256]) * 16)
+ # Queue must remain bounded.
+ with host._clients_lock: # noqa: SLF001
+ client = host._clients[0] # noqa: SLF001
+ with client._audio_lock: # noqa: SLF001
+ assert len(client._audio_queue) <= client._AUDIO_QUEUE_MAXLEN # noqa: SLF001
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_host_audio_capture_lifecycle():
+ host, capture = _start_audio_host()
+ try:
+ assert capture.started is True
+ assert host.audio_enabled
+ finally:
+ host.stop(timeout=1.0)
+ assert capture.stopped is True
+
+
+def test_audio_capture_failure_leaves_host_running():
+ """A backend failure during start must not abort the host."""
+ class _Failing:
+ def start(self):
+ raise AudioBackendError("no portaudio")
+
+ def stop(self):
+ # No teardown needed — start() never opened a real stream.
+ return None
+
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"frame",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="600600600",
+ audio_config=AudioCaptureConfig(enabled=True), audio_capture=_Failing(),
+ )
+ host.start()
+ try:
+ # Host is running but audio is reported as not enabled because the
+ # capture object failed to come up.
+ assert host.is_running
+ assert host.audio_enabled is False
+ finally:
+ host.stop(timeout=1.0)
diff --git a/test/unit_test/headless/test_remote_desktop_clipboard.py b/test/unit_test/headless/test_remote_desktop_clipboard.py
new file mode 100644
index 00000000..bfc3a575
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_clipboard.py
@@ -0,0 +1,162 @@
+"""Clipboard sync tests: serialization round-trip and host<->viewer flow."""
+import time
+
+import pytest
+
+from je_auto_control.utils.remote_desktop import (
+ RemoteDesktopHost, RemoteDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.clipboard_sync import (
+ ClipboardSyncError, decode, encode_image, encode_text,
+)
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+# --- serialization unit tests --------------------------------------------
+
+
+def test_encode_decode_text_round_trip():
+ payload = encode_text("hello 世界")
+ kind, data = decode(payload)
+ assert kind == "text"
+ assert data == "hello 世界"
+
+
+def test_encode_decode_image_round_trip():
+ raw = b"\x89PNG\r\n\x1a\n_synthetic_"
+ payload = encode_image(raw)
+ kind, data = decode(payload)
+ assert kind == "image"
+ assert data == raw
+
+
+def test_encode_text_rejects_non_string():
+ with pytest.raises(TypeError):
+ encode_text(123) # type: ignore[arg-type] # NOSONAR S5655 # intentional bad-type test
+
+
+def test_encode_image_rejects_empty():
+ with pytest.raises(ValueError):
+ encode_image(b"")
+
+
+def test_decode_rejects_invalid_json():
+ with pytest.raises(ClipboardSyncError):
+ decode(b"not json")
+
+
+def test_decode_rejects_unknown_kind():
+ with pytest.raises(ClipboardSyncError):
+ decode(b'{"kind": "video", "data": "x"}')
+
+
+def test_decode_rejects_unsupported_image_format():
+ with pytest.raises(ClipboardSyncError):
+ decode(b'{"kind": "image", "format": "gif", "data_b64": ""}')
+
+
+def test_decode_text_missing_field():
+ with pytest.raises(ClipboardSyncError):
+ decode(b'{"kind": "text"}')
+
+
+# --- end-to-end host<->viewer ---------------------------------------------
+
+
+class _RecordingHost(RemoteDesktopHost):
+ """Host that captures clipboard apply calls instead of touching the OS."""
+
+ def __init__(self, *args, **kwargs) -> None:
+ super().__init__(*args, **kwargs)
+ self.applied = []
+
+ def _apply_clipboard(self, kind, data) -> None:
+ self.applied.append((kind, data))
+
+
+def _start_host() -> _RecordingHost:
+ host = _RecordingHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"frame",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="900800700",
+ )
+ host.start()
+ return host
+
+
+def test_viewer_send_clipboard_text_reaches_host():
+ host = _start_host()
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.connect(timeout=2.0)
+ viewer.send_clipboard_text("ping from viewer")
+ assert _wait_until(lambda: host.applied == [("text", "ping from viewer")])
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_viewer_send_clipboard_image_reaches_host():
+ host = _start_host()
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.connect(timeout=2.0)
+ viewer.send_clipboard_image(b"\x89PNGfake")
+ assert _wait_until(lambda: host.applied == [("image", b"\x89PNGfake")])
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_host_broadcast_clipboard_reaches_viewer():
+ host = _start_host()
+ try:
+ received = []
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ on_clipboard=lambda kind, data: received.append((kind, data)),
+ )
+ viewer.connect(timeout=2.0)
+ # Wait for the receiver thread to come up and the auth handshake
+ # to count the viewer as a connected client.
+ assert _wait_until(lambda: host.connected_clients == 1)
+ sent = host.broadcast_clipboard_text("greetings from host")
+ assert sent == 1
+ assert _wait_until(
+ lambda: ("text", "greetings from host") in received,
+ )
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_viewer_clipboard_methods_require_connection():
+ viewer = RemoteDesktopViewer(host="127.0.0.1", port=1, token="t")
+ with pytest.raises(ConnectionError):
+ viewer.send_clipboard_text("x")
+ with pytest.raises(ConnectionError):
+ viewer.send_clipboard_image(b"\x89PNGfake")
+
+
+def test_host_apply_clipboard_unknown_kind_raises():
+ host = _start_host()
+ try:
+ with pytest.raises(ValueError):
+ # Bypass the recorded subclass and exercise the parent logic.
+ RemoteDesktopHost._apply_clipboard(host, "video", b"")
+ finally:
+ host.stop(timeout=1.0)
diff --git a/test/unit_test/headless/test_remote_desktop_executor.py b/test/unit_test/headless/test_remote_desktop_executor.py
new file mode 100644
index 00000000..1178a747
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_executor.py
@@ -0,0 +1,117 @@
+"""Tests for the AC_remote_* executor commands and registry singleton."""
+import time
+
+import pytest
+
+from je_auto_control.utils.executor.action_executor import executor
+from je_auto_control.utils.remote_desktop.registry import registry
+
+
+@pytest.fixture(autouse=True)
+def reset_registry():
+ """Tear down any leftover host/viewer before and after each test."""
+ registry.disconnect_viewer()
+ registry.stop_host()
+ yield
+ registry.disconnect_viewer()
+ registry.stop_host()
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+def test_known_commands_include_remote_desktop():
+ assert "AC_start_remote_host" in executor.known_commands()
+ assert "AC_stop_remote_host" in executor.known_commands()
+ assert "AC_remote_host_status" in executor.known_commands()
+ assert "AC_remote_connect" in executor.known_commands()
+ assert "AC_remote_disconnect" in executor.known_commands()
+ assert "AC_remote_viewer_status" in executor.known_commands()
+ assert "AC_remote_send_input" in executor.known_commands()
+
+
+def test_start_host_then_status_via_executor():
+ captured = []
+
+ def stub_provider() -> bytes:
+ return b"test-frame"
+
+ # Reach into the registry to install a stub provider so this test
+ # never touches PIL.ImageGrab; mirrors what GUI would do for a fake.
+ from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+
+ host = RemoteDesktopHost(
+ token="t", bind="127.0.0.1", port=0, fps=50.0, quality=70,
+ frame_provider=stub_provider, input_dispatcher=captured.append,
+ )
+ host.start()
+ registry._host = host # noqa: SLF001 # test-only injection
+
+ record = executor.execute_action([["AC_remote_host_status"]])
+ status_value = next(iter(record.values()))
+ assert status_value["running"] is True
+ assert status_value["port"] > 0
+
+
+def test_start_host_with_blank_token_records_error():
+ record = executor.execute_action([
+ ["AC_start_remote_host", {"token": ""}],
+ ])
+ assert any("ValueError" in repr(v) for v in record.values())
+
+
+def test_send_input_without_viewer_records_connection_error():
+ record = executor.execute_action([
+ ["AC_remote_send_input", {"action": {"action": "ping"}}],
+ ])
+ assert any("ConnectionError" in repr(v) for v in record.values())
+
+
+def test_remote_round_trip_through_executor():
+ """Start host + connect viewer + send input via executor commands."""
+ record = executor.execute_action([
+ ["AC_start_remote_host", {
+ "token": "tok", "bind": "127.0.0.1", "port": 0,
+ "fps": 50, "quality": 70,
+ }],
+ ])
+ start_status = next(iter(record.values()))
+ assert start_status["running"] is True
+ port = start_status["port"]
+ assert port > 0
+
+ # Replace the default frame provider (PIL.ImageGrab) with a stub so
+ # the test does not depend on a real screen being available.
+ registry._host._frame_provider = lambda: b"executor-frame" # noqa: SLF001
+ captured = []
+ registry._host._dispatch = captured.append # noqa: SLF001
+
+ executor.execute_action([
+ ["AC_remote_connect", {
+ "host": "127.0.0.1", "port": port, "token": "tok",
+ }],
+ ])
+ viewer_status = registry.viewer_status()
+ assert viewer_status["connected"] is True
+ assert _wait_until(lambda: registry.host.connected_clients == 1)
+
+ executor.execute_action([
+ ["AC_remote_send_input", {
+ "action": {"action": "mouse_move", "x": 5, "y": 7},
+ }],
+ ])
+ assert _wait_until(lambda: captured == [
+ {"action": "mouse_move", "x": 5, "y": 7}
+ ])
+
+ executor.execute_action([["AC_remote_disconnect"]])
+ assert registry.viewer_status()["connected"] is False
+ executor.execute_action([["AC_stop_remote_host"]])
+ assert registry.host_status()["running"] is False
diff --git a/test/unit_test/headless/test_remote_desktop_file_transfer.py b/test/unit_test/headless/test_remote_desktop_file_transfer.py
new file mode 100644
index 00000000..6fa07662
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_file_transfer.py
@@ -0,0 +1,196 @@
+"""File-transfer protocol + host<->viewer integration tests."""
+import time
+from pathlib import Path
+
+import pytest
+
+from je_auto_control.utils.remote_desktop import (
+ FileReceiver, FileTransferError, RemoteDesktopHost, RemoteDesktopViewer,
+ send_file,
+)
+from je_auto_control.utils.remote_desktop.file_transfer import (
+ decode_begin, decode_chunk, decode_end, encode_begin, encode_chunk,
+ encode_end, new_transfer_id,
+)
+
+
+def _wait_until(predicate, timeout: float = 4.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+# --- serialization unit tests --------------------------------------------
+
+
+def test_begin_round_trip():
+ tid = new_transfer_id()
+ payload = encode_begin(tid, "drop/a.bin", 4242)
+ out_id, dest, size = decode_begin(payload)
+ assert out_id == tid
+ assert dest == "drop/a.bin"
+ assert size == 4242
+
+
+def test_chunk_round_trip():
+ tid = new_transfer_id()
+ payload = encode_chunk(tid, b"\x01\x02\x03\x04")
+ out_id, body = decode_chunk(payload)
+ assert out_id == tid
+ assert body == b"\x01\x02\x03\x04"
+
+
+def test_end_round_trip_includes_error():
+ tid = new_transfer_id()
+ out_id, status, error = decode_end(
+ encode_end(tid, status="error", error="disk full")
+ )
+ assert (out_id, status, error) == (tid, "error", "disk full")
+
+
+def test_decode_chunk_short_payload_raises():
+ with pytest.raises(FileTransferError):
+ decode_chunk(b"too-short")
+
+
+def test_encode_begin_rejects_invalid_id():
+ with pytest.raises(FileTransferError):
+ encode_begin("short", "drop/x", 1)
+
+
+# --- send_file <-> FileReceiver in-process round-trip --------------------
+
+
+class _BufferChannel:
+ """Deliver typed messages directly into a receiver for unit testing."""
+
+ def __init__(self, receiver: FileReceiver) -> None:
+ self._receiver = receiver
+
+ def send_typed(self, message_type, payload) -> None:
+ from je_auto_control.utils.remote_desktop.protocol import MessageType
+ if message_type is MessageType.FILE_BEGIN:
+ self._receiver.handle_begin(payload)
+ elif message_type is MessageType.FILE_CHUNK:
+ self._receiver.handle_chunk(payload)
+ elif message_type is MessageType.FILE_END:
+ self._receiver.handle_end(payload)
+ else:
+ raise AssertionError(f"unexpected message {message_type!r}")
+
+
+def test_send_file_to_receiver_writes_dest(tmp_path: Path):
+ src = tmp_path / "src.bin"
+ src.write_bytes(b"hello world" * 1000)
+ dest = tmp_path / "dst" / "out.bin"
+
+ completes = []
+ progress = []
+ receiver = FileReceiver(
+ on_progress=lambda tid, done, total: progress.append((tid, done, total)),
+ on_complete=lambda tid, ok, err, dst: completes.append((tid, ok, err, dst)),
+ )
+ channel = _BufferChannel(receiver)
+
+ result = send_file(channel, str(src), str(dest))
+ assert result.success is True
+ assert result.bytes_sent == src.stat().st_size
+ assert dest.read_bytes() == src.read_bytes()
+ assert completes and completes[-1][1] is True
+ # Progress is reported at start (0) and after each chunk; final value
+ # equals the file size.
+ assert progress[-1][1] == src.stat().st_size
+
+
+def test_send_file_missing_source_raises(tmp_path: Path):
+ receiver = FileReceiver()
+ channel = _BufferChannel(receiver)
+ with pytest.raises(FileTransferError):
+ send_file(channel, str(tmp_path / "missing.bin"),
+ str(tmp_path / "out.bin"))
+
+
+# --- end-to-end host<->viewer over a real TCP socket --------------------
+
+
+def _start_host() -> RemoteDesktopHost:
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"frame",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="333222111",
+ )
+ host.start()
+ return host
+
+
+def test_viewer_uploads_file_to_host_dropbox(tmp_path: Path):
+ payload = b"upload from viewer\n" * 5000
+ src = tmp_path / "viewer_src.bin"
+ src.write_bytes(payload)
+ dest = tmp_path / "host_drop" / "result.bin"
+
+ host_completes = []
+ host_progress = []
+ host = _start_host()
+ host.set_file_receiver(FileReceiver(
+ on_progress=lambda *args: host_progress.append(args),
+ on_complete=lambda *args: host_completes.append(args),
+ ))
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.connect(timeout=2.0)
+ result = viewer.send_file(str(src), str(dest))
+ assert result.success is True
+ assert _wait_until(lambda: bool(host_completes))
+ _tid, ok, err, written_path = host_completes[-1]
+ assert ok is True
+ assert err is None
+ assert Path(written_path) == dest
+ assert dest.read_bytes() == payload
+ # Progress fired with at least the final byte count
+ assert host_progress[-1][1] == len(payload)
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_host_pushes_file_to_viewer(tmp_path: Path):
+ payload = b"download from host\n" * 5000
+ src = tmp_path / "host_src.bin"
+ src.write_bytes(payload)
+ dest = tmp_path / "viewer_drop" / "from_host.bin"
+
+ viewer_completes = []
+ host = _start_host()
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.set_file_receiver(FileReceiver(
+ on_complete=lambda *args: viewer_completes.append(args),
+ ))
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: host.connected_clients == 1)
+ host.send_file_to_viewers(str(src), str(dest))
+ assert _wait_until(lambda: bool(viewer_completes), timeout=5.0)
+ _tid, ok, err, written_path = viewer_completes[-1]
+ assert ok is True
+ assert err is None
+ assert Path(written_path) == dest
+ assert dest.read_bytes() == payload
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_send_file_when_viewer_not_connected():
+ viewer = RemoteDesktopViewer(host="127.0.0.1", port=1, token="t")
+ with pytest.raises(ConnectionError):
+ viewer.send_file("anything", "anywhere")
diff --git a/test/unit_test/headless/test_remote_desktop_gui.py b/test/unit_test/headless/test_remote_desktop_gui.py
new file mode 100644
index 00000000..c170285a
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_gui.py
@@ -0,0 +1,149 @@
+"""Qt integration tests for the Remote Desktop GUI tab.
+
+Runs against an offscreen QApplication so it stays headless. Verifies
+the viewer's FrameDisplay actually receives and decodes JPEG frames
+end-to-end, and that the host preview pane mirrors what is being sent.
+"""
+import os
+import time
+from io import BytesIO
+
+import pytest
+
+# Force Qt to use the offscreen platform plugin so the test runs without a
+# display server (and without flashing windows on a real desktop).
+os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+
+PIL = pytest.importorskip("PIL.Image")
+pyside = pytest.importorskip("PySide6.QtWidgets")
+
+from PySide6.QtCore import Qt # noqa: E402
+from PySide6.QtWidgets import QApplication # noqa: E402
+
+from je_auto_control.utils.remote_desktop.registry import registry # noqa: E402
+
+
+@pytest.fixture(scope="module")
+def qapp():
+ app = QApplication.instance() or QApplication([])
+ yield app
+
+
+@pytest.fixture(autouse=True)
+def reset_registry():
+ registry.disconnect_viewer()
+ registry.stop_host()
+ yield
+ registry.disconnect_viewer()
+ registry.stop_host()
+
+
+def _make_jpeg(width: int = 64, height: int = 48) -> bytes:
+ """Encode a small solid-color image to JPEG."""
+ from PIL import Image
+ img = Image.new("RGB", (width, height), color=(255, 0, 0))
+ buf = BytesIO()
+ img.save(buf, format="JPEG", quality=70)
+ return buf.getvalue()
+
+
+def _process_until(app: QApplication, predicate, timeout: float = 3.0,
+ interval_ms: int = 20) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ app.processEvents()
+ if predicate():
+ return True
+ time.sleep(interval_ms / 1000.0)
+ app.processEvents()
+ return predicate()
+
+
+def test_viewer_panel_renders_frame_from_host(qapp):
+ from je_auto_control.gui.remote_desktop_tab import _ViewerPanel
+ from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+
+ jpeg = _make_jpeg()
+ captured_input = []
+
+ host = RemoteDesktopHost(
+ token="t", bind="127.0.0.1", port=0, fps=30.0,
+ frame_provider=lambda: jpeg,
+ input_dispatcher=captured_input.append,
+ )
+ host.start()
+ registry._host = host # noqa: SLF001 # test-only injection
+ try:
+ panel = _ViewerPanel()
+ panel._host_field.setText("127.0.0.1") # noqa: SLF001
+ panel._port.setValue(host.port) # noqa: SLF001
+ panel._token.setText("t") # noqa: SLF001
+ panel._connect() # noqa: SLF001
+ assert _process_until(qapp, panel._display.has_image) # noqa: SLF001
+ # Display image must match the encoded frame size.
+ assert panel._display._image.width() == 64 # noqa: SLF001
+ assert panel._display._image.height() == 48 # noqa: SLF001
+ finally:
+ registry.disconnect_viewer()
+ host.stop(timeout=1.0)
+ registry._host = None # noqa: SLF001
+
+
+def test_host_preview_shows_streamed_frame(qapp):
+ from je_auto_control.gui.remote_desktop_tab import _HostPanel
+ from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+
+ jpeg = _make_jpeg(80, 60)
+ host = RemoteDesktopHost(
+ token="t", bind="127.0.0.1", port=0, fps=30.0,
+ frame_provider=lambda: jpeg,
+ )
+ host.start()
+ registry._host = host # noqa: SLF001
+ try:
+ panel = _HostPanel()
+ # Speed the preview poll up so the test does not need to wait 250ms+.
+ panel._preview_timer.setInterval(20) # noqa: SLF001
+ assert _process_until(qapp, panel._preview.has_image) # noqa: SLF001
+ assert panel._preview._image.width() == 80 # noqa: SLF001
+ assert panel._preview._image.height() == 60 # noqa: SLF001
+ finally:
+ host.stop(timeout=1.0)
+ registry._host = None # noqa: SLF001
+
+
+def test_viewer_input_round_trips_to_dispatcher(qapp):
+ from je_auto_control.gui.remote_desktop_tab import _ViewerPanel
+ from je_auto_control.utils.remote_desktop.host import RemoteDesktopHost
+
+ jpeg = _make_jpeg()
+ captured = []
+ host = RemoteDesktopHost(
+ token="t", bind="127.0.0.1", port=0, fps=30.0,
+ frame_provider=lambda: jpeg,
+ input_dispatcher=captured.append,
+ )
+ host.start()
+ registry._host = host # noqa: SLF001
+ try:
+ panel = _ViewerPanel()
+ panel._host_field.setText("127.0.0.1") # noqa: SLF001
+ panel._port.setValue(host.port) # noqa: SLF001
+ panel._token.setText("t") # noqa: SLF001
+ panel._connect() # noqa: SLF001
+ assert _process_until(qapp, panel._display.has_image) # noqa: SLF001
+
+ panel._send_mouse_move(11, 13) # noqa: SLF001
+ panel._send_mouse_press(11, 13, "mouse_left") # noqa: SLF001
+
+ assert _process_until(
+ qapp,
+ lambda: any(c.get("action") == "mouse_press" for c in captured),
+ )
+ moves = [c for c in captured if c.get("action") == "mouse_move"]
+ assert any(c == {"action": "mouse_move", "x": 11, "y": 13}
+ for c in moves)
+ finally:
+ registry.disconnect_viewer()
+ host.stop(timeout=1.0)
+ registry._host = None # noqa: SLF001
diff --git a/test/unit_test/headless/test_remote_desktop_host_id.py b/test/unit_test/headless/test_remote_desktop_host_id.py
new file mode 100644
index 00000000..71cee040
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_host_id.py
@@ -0,0 +1,153 @@
+"""Tests for the persistent host-ID and the AUTH_OK handshake extension."""
+import time
+from pathlib import Path
+
+import pytest
+
+from je_auto_control.utils.remote_desktop import (
+ RemoteDesktopHost, RemoteDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.host_id import (
+ HostIdError, format_host_id, generate_host_id, load_or_create_host_id,
+ parse_host_id, validate_host_id,
+)
+from je_auto_control.utils.remote_desktop.protocol import AuthenticationError
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+def test_generate_host_id_is_nine_digits():
+ value = generate_host_id()
+ assert isinstance(value, str)
+ assert len(value) == 9
+ assert value.isdigit()
+
+
+def test_validate_host_id_accepts_valid_id():
+ assert validate_host_id("123456789") == "123456789"
+
+
+def test_validate_host_id_rejects_short_id():
+ with pytest.raises(HostIdError):
+ validate_host_id("12345")
+
+
+def test_validate_host_id_rejects_alpha():
+ with pytest.raises(HostIdError):
+ validate_host_id("12345abcd")
+
+
+def test_format_host_id_groups_in_threes():
+ assert format_host_id("123456789") == "123 456 789"
+
+
+def test_parse_host_id_strips_whitespace_and_separators():
+ assert parse_host_id("123 456 789") == "123456789"
+ assert parse_host_id("123-456-789") == "123456789"
+ assert parse_host_id("123_456_789") == "123456789"
+
+
+def test_parse_host_id_rejects_garbage():
+ with pytest.raises(HostIdError):
+ parse_host_id("hello-world")
+
+
+def test_load_or_create_persists_across_calls(tmp_path: Path):
+ target = tmp_path / "host_id"
+ first = load_or_create_host_id(target)
+ second = load_or_create_host_id(target)
+ assert first == second
+ assert target.read_text(encoding="utf-8").strip() == first
+
+
+def test_load_or_create_regenerates_corrupt_file(tmp_path: Path):
+ target = tmp_path / "host_id"
+ target.write_text("not-a-valid-id", encoding="utf-8")
+ new_id = load_or_create_host_id(target)
+ assert new_id.isdigit() and len(new_id) == 9
+ # Corrupt content was rewritten with the new valid ID.
+ assert target.read_text(encoding="utf-8").strip() == new_id
+
+
+def test_host_exposes_host_id():
+ host = RemoteDesktopHost(token="t", host_id="111222333")
+ assert host.host_id == "111222333"
+
+
+def test_host_rejects_invalid_host_id():
+ with pytest.raises(HostIdError):
+ RemoteDesktopHost(token="t", host_id="abc")
+
+
+def _start_loopback_host(host_id: str = "987654321") -> RemoteDesktopHost:
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"frame",
+ input_dispatcher=lambda *_args, **_kwargs: None,
+ host_id=host_id,
+ )
+ host.start()
+ return host
+
+
+def test_viewer_receives_host_id_in_auth_ok():
+ host = _start_loopback_host("555666777")
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: viewer.remote_host_id == "555666777")
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_expected_host_id_matches_connects_normally():
+ host = _start_loopback_host("123123123")
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ expected_host_id="123123123",
+ )
+ viewer.connect(timeout=2.0)
+ assert viewer.connected
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_expected_host_id_mismatch_raises():
+ host = _start_loopback_host("100000001")
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ expected_host_id="999999999",
+ )
+ with pytest.raises(AuthenticationError):
+ viewer.connect(timeout=2.0)
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_registry_host_status_reports_host_id():
+ from je_auto_control.utils.remote_desktop.registry import registry
+
+ registry.disconnect_viewer()
+ registry.stop_host()
+ try:
+ registry.start_host(token="tok", port=0, fps=30.0,
+ host_id="222333444")
+ status = registry.host_status()
+ assert status["host_id"] == "222333444"
+ assert status["running"] is True
+ finally:
+ registry.stop_host()
diff --git a/test/unit_test/headless/test_remote_desktop_input_dispatch.py b/test/unit_test/headless/test_remote_desktop_input_dispatch.py
new file mode 100644
index 00000000..6e974478
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_input_dispatch.py
@@ -0,0 +1,88 @@
+"""Tests for the input-message dispatcher (no real OS calls)."""
+import pytest
+
+from je_auto_control.utils.remote_desktop import input_dispatch
+from je_auto_control.utils.remote_desktop.input_dispatch import (
+ InputDispatchError, dispatch_input,
+)
+
+
+@pytest.fixture()
+def fake_wrappers(monkeypatch):
+ calls = []
+
+ def make(name):
+ def stub(*args, **kwargs):
+ calls.append((name, args, kwargs))
+ return ("ok", name)
+ return stub
+
+ fake = {
+ name: make(name)
+ for name in (
+ "click_mouse", "mouse_scroll", "press_mouse", "release_mouse",
+ "set_mouse_position", "press_keyboard_key", "release_keyboard_key",
+ "write",
+ )
+ }
+ monkeypatch.setattr(input_dispatch, "_import_wrappers", lambda: fake)
+ return calls
+
+
+def test_unknown_action_is_rejected(fake_wrappers):
+ with pytest.raises(InputDispatchError):
+ dispatch_input({"action": "drop_table"})
+ assert fake_wrappers == []
+
+
+def test_non_mapping_message_is_rejected():
+ with pytest.raises(InputDispatchError):
+ dispatch_input(["not", "a", "mapping"]) # type: ignore[arg-type] # NOSONAR S5655 # intentional bad-type test
+
+
+def test_ping_returns_none_without_calling_wrappers(fake_wrappers):
+ assert dispatch_input({"action": "ping"}) is None
+ assert fake_wrappers == []
+
+
+def test_mouse_move_calls_set_mouse_position(fake_wrappers):
+ dispatch_input({"action": "mouse_move", "x": 12, "y": 34})
+ assert fake_wrappers == [("set_mouse_position", (12, 34), {})]
+
+
+def test_mouse_click_with_coords_moves_then_clicks(fake_wrappers):
+ dispatch_input({"action": "mouse_click", "x": 5, "y": 6,
+ "button": "mouse_right"})
+ assert [name for name, *_ in fake_wrappers] == [
+ "set_mouse_position", "click_mouse",
+ ]
+ assert fake_wrappers[1][1] == ("mouse_right",)
+
+
+def test_mouse_click_without_coords_skips_move(fake_wrappers):
+ dispatch_input({"action": "mouse_click"})
+ assert [name for name, *_ in fake_wrappers] == ["click_mouse"]
+
+
+def test_mouse_scroll_passes_through_amount_and_position(fake_wrappers):
+ dispatch_input({"action": "mouse_scroll", "amount": -3, "x": 10, "y": 20})
+ assert fake_wrappers == [("mouse_scroll", (-3, 10, 20), {})]
+
+
+def test_key_press_and_release(fake_wrappers):
+ dispatch_input({"action": "key_press", "keycode": "a"})
+ dispatch_input({"action": "key_release", "keycode": "a"})
+ assert [name for name, *_ in fake_wrappers] == [
+ "press_keyboard_key", "release_keyboard_key",
+ ]
+
+
+def test_type_writes_text(fake_wrappers):
+ dispatch_input({"action": "type", "text": "hello"})
+ assert fake_wrappers == [("write", ("hello",), {})]
+
+
+def test_type_rejects_non_string(fake_wrappers):
+ with pytest.raises(InputDispatchError):
+ dispatch_input({"action": "type", "text": 123})
+ assert fake_wrappers == []
diff --git a/test/unit_test/headless/test_remote_desktop_io.py b/test/unit_test/headless/test_remote_desktop_io.py
new file mode 100644
index 00000000..a5d5080a
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_io.py
@@ -0,0 +1,163 @@
+"""End-to-end tests for RemoteDesktopHost <-> RemoteDesktopViewer.
+
+These exercise real localhost sockets but stub the screen-capture and
+input-dispatch sides so no OS-level mouse/keyboard interaction happens.
+"""
+import time
+from typing import List
+
+import pytest
+
+from je_auto_control.utils.remote_desktop import (
+ RemoteDesktopHost, RemoteDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.protocol import AuthenticationError
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ """Poll until ``predicate`` returns True or ``timeout`` elapses."""
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+@pytest.fixture()
+def fake_frame_provider():
+ """Frame provider returning sequential payloads so tests can spot updates."""
+ state = {"i": 0}
+
+ def provide() -> bytes:
+ state["i"] += 1
+ return f"frame-{state['i']}".encode("ascii")
+
+ return provide
+
+
+@pytest.fixture()
+def host_factory(fake_frame_provider):
+ """Build hosts with a stub frame provider; clean up on teardown."""
+ started: List[RemoteDesktopHost] = []
+ captured_input: List[dict] = []
+
+ def make(token: str = "secret",
+ dispatcher=None,
+ fps: float = 50.0) -> RemoteDesktopHost:
+ host = RemoteDesktopHost(
+ token=token,
+ bind="127.0.0.1",
+ port=0,
+ fps=fps,
+ quality=70,
+ frame_provider=fake_frame_provider,
+ input_dispatcher=dispatcher or captured_input.append,
+ )
+ host.start()
+ started.append(host)
+ return host
+
+ yield make, captured_input
+ for host in started:
+ host.stop(timeout=1.0)
+
+
+def _connect_viewer(host: RemoteDesktopHost, *, token: str = "secret"
+ ) -> "ViewerHarness":
+ received: List[bytes] = []
+ errors: List[Exception] = []
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token=token,
+ on_frame=received.append, on_error=errors.append,
+ )
+ viewer.connect(timeout=2.0)
+ return ViewerHarness(viewer=viewer, frames=received, errors=errors)
+
+
+class ViewerHarness:
+ """Wrapper that pairs a viewer with the lists its callbacks fill."""
+
+ def __init__(self, *, viewer: RemoteDesktopViewer,
+ frames: List[bytes], errors: List[Exception]) -> None:
+ self.viewer = viewer
+ self.frames = frames
+ self.errors = errors
+
+ def close(self) -> None:
+ self.viewer.disconnect(timeout=1.0)
+
+
+def test_viewer_authenticates_and_receives_frames(host_factory):
+ make_host, _ = host_factory
+ host = make_host()
+ harness = _connect_viewer(host)
+ try:
+ assert _wait_until(lambda: len(harness.frames) >= 2, timeout=2.0)
+ assert all(frame.startswith(b"frame-") for frame in harness.frames)
+ finally:
+ harness.close()
+
+
+def test_viewer_with_wrong_token_is_rejected(host_factory):
+ make_host, _ = host_factory
+ host = make_host(token="right")
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="wrong",
+ )
+ with pytest.raises(AuthenticationError):
+ viewer.connect(timeout=2.0)
+ assert host.connected_clients == 0
+
+
+def test_viewer_input_reaches_host_dispatcher(host_factory):
+ make_host, captured_input = host_factory
+ host = make_host()
+ harness = _connect_viewer(host)
+ try:
+ harness.viewer.send_input({"action": "mouse_move", "x": 7, "y": 9})
+ harness.viewer.send_input({"action": "type", "text": "hi"})
+ assert _wait_until(lambda: len(captured_input) >= 2, timeout=2.0)
+ assert captured_input[0] == {"action": "mouse_move", "x": 7, "y": 9}
+ assert captured_input[1] == {"action": "type", "text": "hi"}
+ finally:
+ harness.close()
+
+
+def test_host_reports_connected_clients(host_factory):
+ make_host, _ = host_factory
+ host = make_host()
+ harness = _connect_viewer(host)
+ try:
+ assert _wait_until(lambda: host.connected_clients == 1, timeout=2.0)
+ finally:
+ harness.close()
+ assert _wait_until(lambda: host.connected_clients == 0, timeout=2.0)
+
+
+def test_host_stop_disconnects_viewer(host_factory):
+ make_host, _ = host_factory
+ host = make_host()
+ harness = _connect_viewer(host)
+ try:
+ assert _wait_until(lambda: len(harness.frames) >= 1, timeout=2.0)
+ host.stop(timeout=1.0)
+ assert _wait_until(lambda: not harness.viewer.connected, timeout=2.0)
+ finally:
+ harness.close()
+
+
+def test_host_rejects_invalid_construction():
+ with pytest.raises(ValueError):
+ RemoteDesktopHost(token="")
+ with pytest.raises(ValueError):
+ RemoteDesktopHost(token="t", fps=0)
+ with pytest.raises(ValueError):
+ RemoteDesktopHost(token="t", quality=99)
+
+
+def test_viewer_send_before_connect_raises():
+ viewer = RemoteDesktopViewer(host="127.0.0.1", port=1, token="t")
+ with pytest.raises(ConnectionError):
+ viewer.send_input({"action": "ping"})
diff --git a/test/unit_test/headless/test_remote_desktop_protocol.py b/test/unit_test/headless/test_remote_desktop_protocol.py
new file mode 100644
index 00000000..ba3f8a26
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_protocol.py
@@ -0,0 +1,79 @@
+"""Tests for the remote_desktop wire protocol and auth helpers."""
+import pytest
+
+from je_auto_control.utils.remote_desktop import auth
+from je_auto_control.utils.remote_desktop.protocol import (
+ HEADER_SIZE, MAX_PAYLOAD_BYTES, MessageType, ProtocolError,
+ decode_frame_header, encode_frame,
+)
+
+
+def test_encode_round_trips_through_decode_header():
+ payload = b"hello"
+ frame = encode_frame(MessageType.AUTH_OK, payload)
+ msg_type, length = decode_frame_header(frame[:HEADER_SIZE])
+ assert msg_type is MessageType.AUTH_OK
+ assert length == len(payload)
+ assert frame[HEADER_SIZE:] == payload
+
+
+def test_decode_rejects_bad_magic():
+ bad = b"XX" + bytes([MessageType.FRAME]) + (0).to_bytes(4, "big")
+ with pytest.raises(ProtocolError):
+ decode_frame_header(bad)
+
+
+def test_decode_rejects_unknown_type():
+ bad = b"AC" + bytes([0xEE]) + (0).to_bytes(4, "big")
+ with pytest.raises(ProtocolError):
+ decode_frame_header(bad)
+
+
+def test_decode_rejects_oversized_payload():
+ bad = b"AC" + bytes([MessageType.FRAME]) + (MAX_PAYLOAD_BYTES + 1).to_bytes(4, "big")
+ with pytest.raises(ProtocolError):
+ decode_frame_header(bad)
+
+
+def test_encode_rejects_oversized_payload():
+ with pytest.raises(ProtocolError):
+ encode_frame(MessageType.FRAME, b"x" * (MAX_PAYLOAD_BYTES + 1))
+
+
+def test_encode_requires_bytes_payload():
+ with pytest.raises(TypeError):
+ encode_frame(MessageType.FRAME, "not bytes") # type: ignore[arg-type]
+
+
+def test_compute_response_is_deterministic():
+ nonce = bytes(range(auth.NONCE_BYTES))
+ a = auth.compute_response("hunter2", nonce)
+ b = auth.compute_response("hunter2", nonce)
+ assert a == b
+ assert len(a) == 32 # SHA-256 digest
+
+
+def test_compute_response_different_token_diverges():
+ nonce = bytes(range(auth.NONCE_BYTES))
+ assert auth.compute_response("a", nonce) != auth.compute_response("b", nonce)
+
+
+def test_verify_response_accepts_correct_hmac():
+ nonce = auth.make_nonce()
+ response = auth.compute_response("token", nonce)
+ assert auth.verify_response("token", nonce, response) is True
+
+
+def test_verify_response_rejects_wrong_token():
+ nonce = auth.make_nonce()
+ response = auth.compute_response("token", nonce)
+ assert auth.verify_response("other", nonce, response) is False
+
+
+def test_verify_response_rejects_non_bytes():
+ nonce = auth.make_nonce()
+ assert auth.verify_response("token", nonce, "not bytes") is False # type: ignore[arg-type]
+
+
+def test_make_nonce_has_expected_length():
+ assert len(auth.make_nonce()) == auth.NONCE_BYTES
diff --git a/test/unit_test/headless/test_remote_desktop_tls.py b/test/unit_test/headless/test_remote_desktop_tls.py
new file mode 100644
index 00000000..b86262bc
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_tls.py
@@ -0,0 +1,192 @@
+"""End-to-end TLS tests using a self-signed loopback certificate."""
+import datetime
+import ipaddress
+import socket
+import ssl
+import time
+from pathlib import Path
+from typing import Tuple
+
+import pytest
+
+cryptography = pytest.importorskip("cryptography")
+
+from cryptography import x509 # noqa: E402
+from cryptography.hazmat.primitives import hashes, serialization # noqa: E402
+from cryptography.hazmat.primitives.asymmetric import rsa # noqa: E402
+from cryptography.x509.oid import NameOID # noqa: E402
+
+from je_auto_control.utils.remote_desktop import (
+ RemoteDesktopHost, RemoteDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.protocol import AuthenticationError
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+def _generate_self_signed(tmp_path: Path) -> Tuple[Path, Path]:
+ """Write a self-signed cert + key for ``127.0.0.1`` to ``tmp_path``."""
+ key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
+ name = x509.Name([
+ x509.NameAttribute(NameOID.COMMON_NAME, "remote-desktop-test"),
+ ])
+ now = datetime.datetime.now(datetime.timezone.utc)
+ cert = (
+ x509.CertificateBuilder()
+ .subject_name(name)
+ .issuer_name(name)
+ .public_key(key.public_key())
+ .serial_number(x509.random_serial_number())
+ .not_valid_before(now - datetime.timedelta(minutes=1))
+ .not_valid_after(now + datetime.timedelta(days=1))
+ .add_extension(
+ x509.SubjectAlternativeName([
+ x509.IPAddress(ipaddress.IPv4Address("127.0.0.1")),
+ x509.DNSName("localhost"),
+ ]),
+ critical=False,
+ )
+ .sign(private_key=key, algorithm=hashes.SHA256())
+ )
+ cert_path = tmp_path / "cert.pem"
+ key_path = tmp_path / "key.pem"
+ cert_path.write_bytes(
+ cert.public_bytes(serialization.Encoding.PEM)
+ )
+ key_path.write_bytes(
+ key.private_bytes(
+ encoding=serialization.Encoding.PEM,
+ format=serialization.PrivateFormat.TraditionalOpenSSL,
+ encryption_algorithm=serialization.NoEncryption(),
+ )
+ )
+ return cert_path, key_path
+
+
+def _server_context(cert_path: Path, key_path: Path) -> ssl.SSLContext:
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+ ctx.load_cert_chain(certfile=str(cert_path), keyfile=str(key_path))
+ return ctx
+
+
+def _trusting_client_context(ca_path: Path) -> ssl.SSLContext:
+ """Verifying client context that trusts only the supplied test CA cert."""
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+ ctx.load_verify_locations(cafile=str(ca_path))
+ ctx.check_hostname = True
+ ctx.verify_mode = ssl.CERT_REQUIRED
+ return ctx
+
+
+def _insecure_client_context() -> ssl.SSLContext:
+ """Self-signed loopback test context — verification deliberately off."""
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) # NOSONAR S5527 # loopback self-signed test
+ ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+ ctx.check_hostname = False
+ ctx.verify_mode = ssl.CERT_NONE # NOSONAR S4830 # loopback self-signed test
+ return ctx
+
+
+def _start_tls_host(tmp_path: Path) -> Tuple[RemoteDesktopHost, Path, Path]:
+ cert_path, key_path = _generate_self_signed(tmp_path)
+ server_ctx = _server_context(cert_path, key_path)
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"tls-frame",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="111111111", ssl_context=server_ctx,
+ )
+ host.start()
+ return host, cert_path, key_path
+
+
+def test_tls_round_trip_with_trusting_client(tmp_path):
+ host, cert_path, _ = _start_tls_host(tmp_path)
+ try:
+ client_ctx = _trusting_client_context(cert_path)
+ received = []
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ on_frame=received.append,
+ ssl_context=client_ctx,
+ )
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: len(received) >= 1, timeout=2.0)
+ assert all(frame == b"tls-frame" for frame in received)
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_tls_round_trip_with_insecure_client(tmp_path):
+ host, _, _ = _start_tls_host(tmp_path)
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ ssl_context=_insecure_client_context(),
+ )
+ viewer.connect(timeout=2.0)
+ assert viewer.connected
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_plain_viewer_against_tls_host_fails(tmp_path):
+ """A non-TLS viewer cannot finish the handshake against a TLS host."""
+ host, _, _ = _start_tls_host(tmp_path)
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ with pytest.raises((OSError, AuthenticationError)):
+ viewer.connect(timeout=2.0)
+ # Host should refuse to count an incomplete handshake as connected.
+ assert _wait_until(lambda: host.connected_clients == 0, timeout=2.0)
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_tls_client_against_plain_host_fails():
+ """A TLS-only viewer cannot speak to a plain TCP host."""
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"plain",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="222222222",
+ )
+ host.start()
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ ssl_context=_insecure_client_context(),
+ )
+ with pytest.raises((OSError, ssl.SSLError, AuthenticationError)):
+ viewer.connect(timeout=2.0)
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_tls_uses_socket_class_after_wrap(tmp_path):
+ """After connect, the viewer's socket should be an SSLSocket."""
+ host, cert_path, _ = _start_tls_host(tmp_path)
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ ssl_context=_trusting_client_context(cert_path),
+ )
+ viewer.connect(timeout=2.0)
+ assert isinstance(viewer._sock, ssl.SSLSocket) # noqa: SLF001
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
diff --git a/test/unit_test/headless/test_remote_desktop_websocket.py b/test/unit_test/headless/test_remote_desktop_websocket.py
new file mode 100644
index 00000000..9097778a
--- /dev/null
+++ b/test/unit_test/headless/test_remote_desktop_websocket.py
@@ -0,0 +1,214 @@
+"""End-to-end tests for the WebSocket-transport remote-desktop variant."""
+import socket
+import time
+
+import pytest
+
+from je_auto_control.utils.remote_desktop import (
+ RemoteDesktopHost, RemoteDesktopViewer,
+ WebSocketDesktopHost, WebSocketDesktopViewer,
+)
+from je_auto_control.utils.remote_desktop.protocol import (
+ AuthenticationError, MessageType, encode_frame,
+)
+from je_auto_control.utils.remote_desktop.ws_protocol import (
+ WsProtocolError, client_handshake, recv_message, send_binary,
+ server_handshake,
+)
+
+
+def _wait_until(predicate, timeout: float = 2.0,
+ interval: float = 0.02) -> bool:
+ deadline = time.monotonic() + timeout
+ while time.monotonic() < deadline:
+ if predicate():
+ return True
+ time.sleep(interval)
+ return predicate()
+
+
+# --- ws_protocol smoke tests ---------------------------------------------
+
+
+def _make_socketpair():
+ """Return a pair of connected TCP sockets via loopback (cross-platform)."""
+ listen = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ listen.bind(("127.0.0.1", 0))
+ listen.listen(1)
+ port = listen.getsockname()[1]
+ client = socket.create_connection(("127.0.0.1", port))
+ server, _ = listen.accept()
+ listen.close()
+ return server, client
+
+
+def test_handshake_round_trip():
+ server, client = _make_socketpair()
+ try:
+ import threading
+ path = {}
+
+ def server_side():
+ path["value"] = server_handshake(server)
+
+ thread = threading.Thread(target=server_side)
+ thread.start()
+ client_handshake(client, "127.0.0.1", 1234, path="/rd")
+ thread.join(timeout=1.0)
+ assert path["value"] == "/rd"
+ finally:
+ server.close()
+ client.close()
+
+
+def test_binary_frame_round_trip_unmasked_then_masked():
+ server, client = _make_socketpair()
+ try:
+ # Unmasked server -> client
+ send_binary(server, b"hello world", mask=False)
+ assert recv_message(client) == b"hello world"
+ # Masked client -> server (RFC 6455 mandates client masking)
+ send_binary(client, b"\x01\x02\x03", mask=True)
+ assert recv_message(server) == b"\x01\x02\x03"
+ finally:
+ server.close()
+ client.close()
+
+
+def test_recv_handles_extended_payload_length():
+ server, client = _make_socketpair()
+ try:
+ big = b"A" * 70_000 # forces 64-bit length encoding
+ send_binary(server, big, mask=False)
+ assert recv_message(client) == big
+ finally:
+ server.close()
+ client.close()
+
+
+def test_handshake_rejects_non_websocket_request():
+ server, client = _make_socketpair()
+ try:
+ client.sendall(b"GET / HTTP/1.1\r\nHost: x\r\n\r\n")
+ with pytest.raises(WsProtocolError):
+ server_handshake(server)
+ finally:
+ server.close()
+ client.close()
+
+
+# --- end-to-end host <-> viewer over WS ----------------------------------
+
+
+def _start_ws_host(token: str = "tok",
+ host_id: str = "100200300") -> WebSocketDesktopHost:
+ captured = []
+ host = WebSocketDesktopHost(
+ token=token, bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"ws-frame",
+ input_dispatcher=captured.append,
+ host_id=host_id,
+ )
+ host.start()
+ host._test_captured_input = captured # noqa: SLF001 # test helper
+ return host
+
+
+def test_ws_viewer_authenticates_and_receives_frames():
+ host = _start_ws_host()
+ try:
+ received = []
+ viewer = WebSocketDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ on_frame=received.append,
+ )
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: len(received) >= 2)
+ assert all(frame == b"ws-frame" for frame in received)
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_ws_viewer_with_wrong_token_is_rejected():
+ host = _start_ws_host(token="right")
+ try:
+ viewer = WebSocketDesktopViewer(
+ host="127.0.0.1", port=host.port, token="wrong",
+ )
+ with pytest.raises(AuthenticationError):
+ viewer.connect(timeout=2.0)
+ assert host.connected_clients == 0
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_ws_viewer_input_reaches_host_dispatcher():
+ host = _start_ws_host()
+ try:
+ viewer = WebSocketDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ viewer.connect(timeout=2.0)
+ viewer.send_input({"action": "mouse_move", "x": 42, "y": 24})
+ viewer.send_input({"action": "type", "text": "hi"})
+ captured = host._test_captured_input # noqa: SLF001
+ assert _wait_until(lambda: len(captured) >= 2)
+ assert {"action": "mouse_move", "x": 42, "y": 24} in captured
+ assert {"action": "type", "text": "hi"} in captured
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_ws_host_announces_host_id():
+ host = _start_ws_host(host_id="700800900")
+ try:
+ viewer = WebSocketDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ expected_host_id="700800900",
+ )
+ viewer.connect(timeout=2.0)
+ assert viewer.remote_host_id == "700800900"
+ viewer.disconnect()
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_plain_tcp_viewer_against_ws_host_is_rejected():
+ host = _start_ws_host()
+ try:
+ viewer = RemoteDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ with pytest.raises((OSError, AuthenticationError)):
+ viewer.connect(timeout=2.0)
+ assert _wait_until(lambda: host.connected_clients == 0)
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_ws_viewer_against_plain_host_fails():
+ host = RemoteDesktopHost(
+ token="tok", bind="127.0.0.1", port=0, fps=50.0,
+ frame_provider=lambda: b"plain",
+ input_dispatcher=lambda *_a, **_k: None,
+ host_id="111222333",
+ )
+ host.start()
+ try:
+ viewer = WebSocketDesktopViewer(
+ host="127.0.0.1", port=host.port, token="tok",
+ )
+ with pytest.raises((OSError, ConnectionError, WsProtocolError,
+ AuthenticationError)):
+ viewer.connect(timeout=2.0)
+ finally:
+ host.stop(timeout=1.0)
+
+
+def test_ws_viewer_path_validation():
+ with pytest.raises(ValueError):
+ WebSocketDesktopViewer(
+ host="127.0.0.1", port=1, token="t", path="no-leading-slash",
+ )