diff --git a/Cargo.toml b/Cargo.toml index 80fac6831b..6d101defef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,7 @@ cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", fe "io_surface", "mtl", "vt", + "vn", ], default-features = false } windows = "0.60.0" diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml index d41d28fecb..1d8271a549 100644 --- a/apps/desktop/src-tauri/Cargo.toml +++ b/apps/desktop/src-tauri/Cargo.toml @@ -150,9 +150,16 @@ parakeet-rs = "0.3.4" [target.'cfg(target_os= "windows")'.dependencies] windows = { workspace = true, features = [ + "Foundation", + "Foundation_Collections", + "Globalization", + "Graphics_Imaging", + "Media_Ocr", + "Storage_Streams", "Win32_Foundation", "Win32_System", "Win32_System_Power", + "Win32_System_WinRT", "Win32_UI_WindowsAndMessaging", "Win32_Graphics_Gdi", ] } diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 4c8a8f278d..0ef45d6b9d 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -71,8 +71,8 @@ use recording::{InProgressRecording, RecordingEvent, RecordingInputKind}; use scap_targets::{Display, DisplayId, WindowId, bounds::LogicalBounds}; use screenshot_editor::{ PendingScreenshotEditorInstances, ScreenshotEditorInstances, WindowScreenshotEditorInstance, - create_screenshot_editor_instance, render_screenshot_for_export, render_screenshot_png, - update_screenshot_config, + create_screenshot_editor_instance, recognize_screenshot_text, render_screenshot_for_export, + render_screenshot_png, update_screenshot_config, }; mod gpu_context; @@ -3951,6 +3951,7 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) { upload_screenshot, create_screenshot_editor_instance, update_screenshot_config, + recognize_screenshot_text, get_recording_meta, save_file_dialog, list_recordings, diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs index 02bdb2cdbf..4118f50a34 100644 --- a/apps/desktop/src-tauri/src/screenshot_editor.rs +++ b/apps/desktop/src-tauri/src/screenshot_editor.rs @@ -14,7 +14,7 @@ use image::{ GenericImageView, ImageEncoder, RgbImage, buffer::ConvertBuffer, codecs::png::PngEncoder, }; use relative_path::RelativePathBuf; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use specta::Type; use std::io::Cursor; use std::str::FromStr; @@ -645,6 +645,37 @@ pub struct SerializedScreenshotEditorInstance { pub image_height: u32, } +#[derive(Clone, Copy, Deserialize, Serialize, Type, Debug)] +#[serde(rename_all = "camelCase")] +pub struct ScreenshotOcrRegion { + pub x: u32, + pub y: u32, + pub width: u32, + pub height: u32, +} + +#[derive(Clone, Serialize, Type, Debug)] +#[serde(rename_all = "camelCase")] +pub struct ScreenshotOcrLine { + pub text: String, + pub confidence: Option, + pub bounds: ScreenshotOcrRegion, +} + +#[derive(Clone, Serialize, Type, Debug)] +#[serde(rename_all = "camelCase")] +pub struct ScreenshotOcrResult { + pub text: String, + pub lines: Vec, + pub engine: String, +} + +struct ScreenshotOcrImage { + bgra: Vec, + width: u32, + height: u32, +} + #[tauri::command] #[specta::specta] pub async fn create_screenshot_editor_instance( @@ -714,6 +745,466 @@ pub async fn update_screenshot_config( Ok(()) } +#[tauri::command] +#[specta::specta] +pub async fn recognize_screenshot_text( + instance: WindowScreenshotEditorInstance, + region: ScreenshotOcrRegion, +) -> Result { + let region = clamp_screenshot_ocr_region(region, instance.image_width, instance.image_height)?; + let image = create_screenshot_ocr_image( + instance.source_rgba.as_ref(), + instance.image_width, + instance.image_height, + region, + )?; + let mut result = recognize_screenshot_ocr_image(image).await?; + + for line in &mut result.lines { + line.bounds.x = line.bounds.x.saturating_add(region.x); + line.bounds.y = line.bounds.y.saturating_add(region.y); + } + + Ok(result) +} + +fn clamp_screenshot_ocr_region( + region: ScreenshotOcrRegion, + image_width: u32, + image_height: u32, +) -> Result { + if image_width == 0 || image_height == 0 { + return Err("Screenshot image is empty".to_string()); + } + + let x = region.x.min(image_width.saturating_sub(1)); + let y = region.y.min(image_height.saturating_sub(1)); + let width = region.width.min(image_width.saturating_sub(x)); + let height = region.height.min(image_height.saturating_sub(y)); + + if width < 4 || height < 4 { + return Err("Select a larger text area".to_string()); + } + + Ok(ScreenshotOcrRegion { + x, + y, + width, + height, + }) +} + +fn create_screenshot_ocr_image( + source_rgba: &[u8], + image_width: u32, + image_height: u32, + region: ScreenshotOcrRegion, +) -> Result { + let image_width = usize::try_from(image_width) + .map_err(|_| "Screenshot width is too large for OCR".to_string())?; + let image_height = usize::try_from(image_height) + .map_err(|_| "Screenshot height is too large for OCR".to_string())?; + let region_x = + usize::try_from(region.x).map_err(|_| "OCR region x is too large".to_string())?; + let region_y = + usize::try_from(region.y).map_err(|_| "OCR region y is too large".to_string())?; + let region_width = + usize::try_from(region.width).map_err(|_| "OCR region width is too large".to_string())?; + let region_height = + usize::try_from(region.height).map_err(|_| "OCR region height is too large".to_string())?; + + let expected_len = image_width + .checked_mul(image_height) + .and_then(|pixels| pixels.checked_mul(4)) + .ok_or_else(|| "Screenshot image is too large for OCR".to_string())?; + + if source_rgba.len() != expected_len { + return Err("Screenshot image data is invalid for OCR".to_string()); + } + + let output_len = region_width + .checked_mul(region_height) + .and_then(|pixels| pixels.checked_mul(4)) + .ok_or_else(|| "OCR region is too large".to_string())?; + let mut bgra = vec![0; output_len]; + let source_row_bytes = image_width + .checked_mul(4) + .ok_or_else(|| "Screenshot row is too large for OCR".to_string())?; + let region_row_bytes = region_width + .checked_mul(4) + .ok_or_else(|| "OCR row is too large".to_string())?; + let region_x_bytes = region_x + .checked_mul(4) + .ok_or_else(|| "OCR region x is too large".to_string())?; + + for row in 0..region_height { + let source_start = region_y + .checked_add(row) + .and_then(|source_row| source_row.checked_mul(source_row_bytes)) + .and_then(|source_offset| source_offset.checked_add(region_x_bytes)) + .ok_or_else(|| "OCR source region is invalid".to_string())?; + let source_end = source_start + .checked_add(region_row_bytes) + .ok_or_else(|| "OCR source region is invalid".to_string())?; + let output_start = row + .checked_mul(region_row_bytes) + .ok_or_else(|| "OCR output region is invalid".to_string())?; + let output_end = output_start + .checked_add(region_row_bytes) + .ok_or_else(|| "OCR output region is invalid".to_string())?; + let source_row = source_rgba + .get(source_start..source_end) + .ok_or_else(|| "OCR source region is outside the screenshot".to_string())?; + let output_row = bgra + .get_mut(output_start..output_end) + .ok_or_else(|| "OCR output region is invalid".to_string())?; + + for (source_pixel, output_pixel) in source_row + .chunks_exact(4) + .zip(output_row.chunks_exact_mut(4)) + { + output_pixel[0] = source_pixel[2]; + output_pixel[1] = source_pixel[1]; + output_pixel[2] = source_pixel[0]; + output_pixel[3] = source_pixel[3]; + } + } + + Ok(ScreenshotOcrImage { + bgra, + width: region.width, + height: region.height, + }) +} + +#[cfg(target_os = "macos")] +async fn recognize_screenshot_ocr_image( + image: ScreenshotOcrImage, +) -> Result { + tokio::task::spawn_blocking(move || recognize_screenshot_ocr_image_macos(image)) + .await + .map_err(|e| format!("OCR task failed: {e}"))? +} + +#[cfg(target_os = "windows")] +async fn recognize_screenshot_ocr_image( + image: ScreenshotOcrImage, +) -> Result { + tokio::task::spawn_blocking(move || recognize_screenshot_ocr_image_windows(image)) + .await + .map_err(|e| format!("OCR task failed: {e}"))? +} + +#[cfg(not(any(target_os = "macos", target_os = "windows")))] +async fn recognize_screenshot_ocr_image( + _image: ScreenshotOcrImage, +) -> Result { + Err("OCR is only available on macOS and Windows".to_string()) +} + +#[cfg(target_os = "macos")] +fn recognize_screenshot_ocr_image_macos( + image: ScreenshotOcrImage, +) -> Result { + cidre::objc::ar_pool(|| { + use cidre::{cv, ns, vn}; + use std::ffi::c_void; + + extern "C" fn release_pixel_buffer_data( + release_ref_con: *mut c_void, + _base_address: *const *const c_void, + ) { + if !release_ref_con.is_null() { + unsafe { + drop(Box::from_raw(release_ref_con.cast::>())); + } + } + } + + let width = + usize::try_from(image.width).map_err(|_| "OCR image width is too large".to_string())?; + let height = usize::try_from(image.height) + .map_err(|_| "OCR image height is too large".to_string())?; + let bytes_per_row = width + .checked_mul(4) + .ok_or_else(|| "OCR image row is too large".to_string())?; + let mut data = Box::new(image.bgra); + let base_address = data.as_mut_ptr().cast::(); + let release_ref_con = Box::into_raw(data).cast::(); + + let pixel_buffer = match cv::PixelBuf::with_bytes( + width, + height, + base_address, + bytes_per_row, + release_pixel_buffer_data, + release_ref_con, + cv::PixelFormat::_32_BGRA, + None, + ) { + Ok(pixel_buffer) => pixel_buffer, + Err(e) => { + unsafe { + drop(Box::from_raw(release_ref_con.cast::>())); + } + return Err(format!("Failed to create OCR image: {e}")); + } + }; + + let mut request = vn::RecognizeTextRequest::new(); + request.set_recognition_level(vn::RequestTextRecognitionLevel::Accurate); + request.set_uses_lang_correction(true); + + if cidre::version!(macos = 13.0) { + request.set_revision(vn::RecognizeTextRequest::REVISION_3); + unsafe { + request.set_automatically_detects_lang(true); + } + } else { + request.set_revision(vn::RecognizeTextRequest::REVISION_2); + } + + let handler = vn::ImageRequestHandler::with_cv_pixel_buf(&pixel_buffer, None) + .ok_or_else(|| "Failed to initialize OCR image handler".to_string())?; + let requests = ns::Array::::from_slice(&[&request]); + handler + .perform(&requests) + .map_err(|e| format!("macOS OCR failed: {e}"))?; + + let observations = request.results().unwrap_or_else(ns::Array::new); + let mut lines = Vec::new(); + + for observation in observations.iter() { + let candidates = observation.top_candidates(1); + let Some(candidate) = candidates.first() else { + continue; + }; + let text = candidate.string().to_string(); + if text.trim().is_empty() { + continue; + } + lines.push(ScreenshotOcrLine { + text, + confidence: Some(candidate.confidence()), + bounds: normalized_macos_ocr_rect_to_region( + observation.bounding_box(), + image.width, + image.height, + ), + }); + } + + let text = lines + .iter() + .map(|line| line.text.as_str()) + .collect::>() + .join("\n"); + + Ok(ScreenshotOcrResult { + text, + lines, + engine: "macos-vision".to_string(), + }) + }) +} + +#[cfg(target_os = "macos")] +fn normalized_macos_ocr_rect_to_region( + rect: cidre::cg::Rect, + width: u32, + height: u32, +) -> ScreenshotOcrRegion { + let width_f = f64::from(width); + let height_f = f64::from(height); + let left = clamp_f64(rect.origin.x * width_f, 0.0, width_f); + let right = clamp_f64((rect.origin.x + rect.size.width) * width_f, 0.0, width_f); + let top = clamp_f64( + (1.0 - rect.origin.y - rect.size.height) * height_f, + 0.0, + height_f, + ); + let bottom = clamp_f64((1.0 - rect.origin.y) * height_f, 0.0, height_f); + let x = left.round() as u32; + let y = top.round() as u32; + let right = right.round() as u32; + let bottom = bottom.round() as u32; + + ScreenshotOcrRegion { + x, + y, + width: right.saturating_sub(x), + height: bottom.saturating_sub(y), + } +} + +#[cfg(target_os = "macos")] +fn clamp_f64(value: f64, min: f64, max: f64) -> f64 { + if value.is_finite() { + value.clamp(min, max) + } else { + min + } +} + +#[cfg(target_os = "windows")] +struct WindowsRuntimeGuard; + +#[cfg(target_os = "windows")] +impl Drop for WindowsRuntimeGuard { + fn drop(&mut self) { + unsafe { + windows::Win32::System::WinRT::RoUninitialize(); + } + } +} + +#[cfg(target_os = "windows")] +fn initialize_windows_runtime() -> Result { + use windows::Win32::System::WinRT::{RO_INIT_MULTITHREADED, RoInitialize}; + + unsafe { RoInitialize(RO_INIT_MULTITHREADED) } + .map_err(|e| format!("Windows OCR runtime failed: {e}"))?; + + Ok(WindowsRuntimeGuard) +} + +#[cfg(target_os = "windows")] +fn recognize_screenshot_ocr_image_windows( + image: ScreenshotOcrImage, +) -> Result { + use windows::Graphics::Imaging::{BitmapAlphaMode, BitmapPixelFormat, SoftwareBitmap}; + use windows::Media::Ocr::OcrEngine; + use windows::Storage::Streams::DataWriter; + + let _runtime = initialize_windows_runtime()?; + + let max_dimension = + OcrEngine::MaxImageDimension().map_err(|e| format!("Windows OCR failed: {e}"))?; + + if image.width > max_dimension || image.height > max_dimension { + return Err(format!( + "Select a smaller text area. Windows OCR supports up to {max_dimension}px per side" + )); + } + + let width = i32::try_from(image.width).map_err(|_| "OCR image width is too large")?; + let height = i32::try_from(image.height).map_err(|_| "OCR image height is too large")?; + let writer = DataWriter::new().map_err(|e| format!("Windows OCR failed: {e}"))?; + writer + .WriteBytes(&image.bgra) + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let buffer = writer + .DetachBuffer() + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let bitmap = SoftwareBitmap::CreateCopyWithAlphaFromBuffer( + &buffer, + BitmapPixelFormat::Bgra8, + width, + height, + BitmapAlphaMode::Premultiplied, + ) + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let engine = OcrEngine::TryCreateFromUserProfileLanguages() + .map_err(|e| format!("Windows OCR is not available: {e}"))?; + let result = engine + .RecognizeAsync(&bitmap) + .map_err(|e| format!("Windows OCR failed: {e}"))? + .get() + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let text = result + .Text() + .map_err(|e| format!("Windows OCR failed: {e}"))? + .to_string_lossy(); + let ocr_lines = result + .Lines() + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let mut lines = Vec::new(); + + for index in 0..ocr_lines + .Size() + .map_err(|e| format!("Windows OCR failed: {e}"))? + { + let line = ocr_lines + .GetAt(index) + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let line_text = line + .Text() + .map_err(|e| format!("Windows OCR failed: {e}"))? + .to_string_lossy(); + if line_text.trim().is_empty() { + continue; + } + let words = line + .Words() + .map_err(|e| format!("Windows OCR failed: {e}"))?; + let mut bounds: Option<(f32, f32, f32, f32)> = None; + + for word_index in 0..words + .Size() + .map_err(|e| format!("Windows OCR failed: {e}"))? + { + let rect = words + .GetAt(word_index) + .and_then(|word| word.BoundingRect()) + .map_err(|e| format!("Windows OCR failed: {e}"))?; + bounds = Some(match bounds { + Some((left, top, right, bottom)) => ( + left.min(rect.X), + top.min(rect.Y), + right.max(rect.X + rect.Width), + bottom.max(rect.Y + rect.Height), + ), + None => (rect.X, rect.Y, rect.X + rect.Width, rect.Y + rect.Height), + }); + } + + lines.push(ScreenshotOcrLine { + text: line_text, + confidence: None, + bounds: bounds + .map(windows_ocr_bounds_to_region) + .unwrap_or(ScreenshotOcrRegion { + x: 0, + y: 0, + width: 0, + height: 0, + }), + }); + } + + Ok(ScreenshotOcrResult { + text, + lines, + engine: "windows-media-ocr".to_string(), + }) +} + +#[cfg(target_os = "windows")] +fn windows_ocr_bounds_to_region( + (left, top, right, bottom): (f32, f32, f32, f32), +) -> ScreenshotOcrRegion { + let x = clamp_f32_to_u32(left); + let y = clamp_f32_to_u32(top); + let right = clamp_f32_to_u32(right); + let bottom = clamp_f32_to_u32(bottom); + + ScreenshotOcrRegion { + x, + y, + width: right.saturating_sub(x), + height: bottom.saturating_sub(y), + } +} + +#[cfg(target_os = "windows")] +fn clamp_f32_to_u32(value: f32) -> u32 { + if value.is_finite() && value > 0.0 { + value.round().min(u32::MAX as f32) as u32 + } else { + 0 + } +} + #[tauri::command] #[specta::specta] pub async fn render_screenshot_for_export( diff --git a/apps/desktop/src-tauri/src/windows.rs b/apps/desktop/src-tauri/src/windows.rs index e35f4f0dd0..66ab21277f 100644 --- a/apps/desktop/src-tauri/src/windows.rs +++ b/apps/desktop/src-tauri/src/windows.rs @@ -1074,14 +1074,13 @@ impl ShowCapWindow { ensure_camera_input_active(&mut app_state).await; - if enable_native_camera_preview { - if let Err(err) = + if enable_native_camera_preview + && let Err(err) = init_native_camera_preview(&mut app_state, window.clone()).await - { - error!( - "Error reinitializing camera preview for existing window: {err}" - ); - } + { + error!( + "Error reinitializing camera preview for existing window: {err}" + ); } drop(app_state); @@ -1156,14 +1155,11 @@ impl ShowCapWindow { ensure_camera_input_active(&mut app_state).await; - if enable_native_camera_preview { - if let Err(err) = + if enable_native_camera_preview + && let Err(err) = init_native_camera_preview(&mut app_state, window.clone()).await - { - error!( - "Error reinitializing camera preview for existing window: {err}" - ); - } + { + error!("Error reinitializing camera preview for existing window: {err}"); } drop(app_state); @@ -2122,14 +2118,13 @@ impl ShowCapWindow { } } - if enable_native_camera_preview { - if let Err(err) = + if enable_native_camera_preview + && let Err(err) = init_native_camera_preview(&mut state, window.clone()).await - { - error!( - "Error initializing camera preview, falling back to WebSocket preview: {err}" - ); - } + { + error!( + "Error initializing camera preview, falling back to WebSocket preview: {err}" + ); } #[cfg(not(target_os = "macos"))] diff --git a/apps/desktop/src/routes/camera.tsx b/apps/desktop/src/routes/camera.tsx index 8d049c08a9..e00313436e 100644 --- a/apps/desktop/src/routes/camera.tsx +++ b/apps/desktop/src/routes/camera.tsx @@ -48,6 +48,7 @@ const CAMERA_MAX_SIZE = 600; const CAMERA_DEFAULT_SIZE = 230; const CAMERA_PRESET_SMALL = 230; const CAMERA_PRESET_LARGE = 400; +const CAMERA_TOOLBAR_HEIGHT = 56; const CAMERA_PREVIEW_ERROR_EVENT = "camera-preview-error"; const CAMERA_PREVIEW_CLEAR_EVENT = "camera-preview-clear"; const CAMERA_DISCONNECTED_ISSUE: CameraPreviewIssue = { @@ -371,9 +372,17 @@ function NativeCameraPreviewPage(props: { onPointerCancel={chrome.hide} > - {(issue) => } + {(issue) => ( + + )} -
+
@@ -1009,14 +1018,13 @@ function LegacyCameraPreviewPage(props: { frameDimensions()?.height, ] as const, async ([size, shape, frameWidth, frameHeight]) => { - const BAR_HEIGHT = 56; const base = Math.max(CAMERA_MIN_SIZE, Math.min(CAMERA_MAX_SIZE, size)); const aspect = frameWidth && frameHeight ? frameWidth / frameHeight : 1; const windowWidth = shape === "full" ? (aspect >= 1 ? base * aspect : base) : base; const windowHeight = shape === "full" ? (aspect >= 1 ? base : base / aspect) : base; - const totalHeight = windowHeight + BAR_HEIGHT; + const totalHeight = windowHeight + CAMERA_TOOLBAR_HEIGHT; const currentWindow = getCurrentWindow(); await currentWindow.setSize(new LogicalSize(windowWidth, totalHeight)); @@ -1128,9 +1136,6 @@ function LegacyCameraPreviewPage(props: { onPointerLeave={chrome.hide} onPointerCancel={chrome.hide} > - - {(issue) => } -
+ + {(issue) => } +
); @@ -1319,15 +1327,67 @@ function cameraBorderRadius(state: CameraWindowState) { return `${radius}rem`; } -function CameraIssueOverlay(props: { issue: CameraPreviewIssue }) { +function cameraOverlayTextMetrics(size: number) { + const normalized = + (Math.max(CAMERA_MIN_SIZE, Math.min(CAMERA_MAX_SIZE, size)) - + CAMERA_MIN_SIZE) / + (CAMERA_MAX_SIZE - CAMERA_MIN_SIZE); + const titleSize = 0.75 + normalized * 0.375; + const messageSize = 0.625 + normalized * 0.25; + const lineHeight = 1.2 + normalized * 0.2; + const gap = 0.375 + normalized * 0.25; + const maxWidth = Math.max(7.5, Math.min(18, size / 16)); + + return { + gap: `${gap}rem`, + maxWidth: `${maxWidth}rem`, + messageLineHeight: `${lineHeight}rem`, + messageSize: `${messageSize}rem`, + titleSize: `${titleSize}rem`, + }; +} + +function CameraIssueOverlay(props: { + issue: CameraPreviewIssue; + size: number; + class?: string; + top?: number; + borderRadius?: string; +}) { + const textMetrics = () => cameraOverlayTextMetrics(props.size); + const style = () => { + const base = { "border-radius": props.borderRadius ?? "inherit" }; + if (props.top === undefined) return base; + return { ...base, top: `${props.top}px` }; + }; + return (
-
-

{props.issue.title}

-

{props.issue.message}

+
+

+ {props.issue.title} +

+

+ {props.issue.message} +

); diff --git a/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx b/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx index 1a13181a9d..2bf44cbec6 100644 --- a/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx +++ b/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx @@ -488,8 +488,10 @@ export function AnnotationLayer(props: { }; const startDrag = (e: MouseEvent, id: string, handle?: string) => { + e.preventDefault(); e.stopPropagation(); if (activeTool() !== "select") return; + window.getSelection()?.removeAllRanges(); const svg = (e.currentTarget as Element).closest("svg"); if (!svg) return; @@ -542,8 +544,9 @@ export function AnnotationLayer(props: { position: "absolute", top: 0, left: 0, - "pointer-events": "all", - "z-index": 10, + "pointer-events": + activeTool() === "select" && !dragState() ? "none" : "all", + "z-index": 20, cursor: activeTool() === "select" ? props.isPanning diff --git a/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx b/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx index 37c0ed861b..22acae883a 100644 --- a/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx +++ b/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx @@ -8,7 +8,10 @@ import IconLucideLayers from "~icons/lucide/layers"; import IconLucideMousePointer2 from "~icons/lucide/mouse-pointer-2"; import IconLucideSquare from "~icons/lucide/square"; import IconLucideType from "~icons/lucide/type"; -import { type AnnotationType, useScreenshotEditorContext } from "./context"; +import { + type ScreenshotEditorTool, + useScreenshotEditorContext, +} from "./context"; export function AnnotationTools() { const { layersPanelOpen, setLayersPanelOpen } = useScreenshotEditorContext(); @@ -66,7 +69,7 @@ export function AnnotationTools() { } function ToolButton(props: { - tool: AnnotationType | "select"; + tool: ScreenshotEditorTool; icon: Component<{ class?: string }>; label: string; shortcut?: string; diff --git a/apps/desktop/src/routes/screenshot-editor/Editor.tsx b/apps/desktop/src/routes/screenshot-editor/Editor.tsx index 9786113087..543d6e453e 100644 --- a/apps/desktop/src/routes/screenshot-editor/Editor.tsx +++ b/apps/desktop/src/routes/screenshot-editor/Editor.tsx @@ -13,6 +13,7 @@ import { Show, Switch, } from "solid-js"; +import { unwrap } from "solid-js/store"; import { Transition } from "solid-transition-group"; import { CROP_ZERO, @@ -27,7 +28,7 @@ import IconCapCircleX from "~icons/cap/circle-x"; import IconLucideMaximize from "~icons/lucide/maximize"; import IconLucideRatio from "~icons/lucide/ratio"; import { AnnotationConfigBar } from "./AnnotationConfig"; -import { useScreenshotEditorContext } from "./context"; +import { type Annotation, useScreenshotEditorContext } from "./context"; import { Header } from "./Header"; import { LayersPanel } from "./LayersPanel"; import { Preview } from "./Preview"; @@ -40,12 +41,17 @@ export function Editor() { projectHistory, setActiveTool, setSelectedAnnotationId, + annotations, + setAnnotations, + selectedAnnotationId, layersPanelOpen, setLayersPanelOpen, activePopover, setActivePopover, isRenderReady, } = useScreenshotEditorContext(); + const [copiedAnnotation, setCopiedAnnotation] = + createSignal(null); createEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { @@ -61,9 +67,41 @@ export function Editor() { const isMod = e.metaKey || e.ctrlKey; const isShift = e.shiftKey; + const key = e.key.toLowerCase(); + + if (isMod && key === "c") { + const id = selectedAnnotationId(); + const annotation = annotations.find((a) => a.id === id); + if (annotation) { + e.preventDefault(); + e.stopImmediatePropagation(); + setCopiedAnnotation(structuredClone(unwrap(annotation))); + return; + } + } + + if (isMod && key === "v") { + const annotation = copiedAnnotation(); + if (annotation) { + e.preventDefault(); + e.stopImmediatePropagation(); + projectHistory.push(); + const duplicate = { + ...structuredClone(annotation), + id: crypto.randomUUID(), + x: annotation.x + 16, + y: annotation.y + 16, + }; + setAnnotations((prev) => [...prev, duplicate]); + setSelectedAnnotationId(duplicate.id); + setActiveTool("select"); + setCopiedAnnotation(duplicate); + return; + } + } // Undo / Redo - if (isMod && e.key.toLowerCase() === "z") { + if (isMod && key === "z") { e.preventDefault(); if (isShift) { projectHistory.redo(); @@ -72,7 +110,7 @@ export function Editor() { } return; } - if (isMod && e.key.toLowerCase() === "y") { + if (isMod && key === "y") { e.preventDefault(); projectHistory.redo(); return; @@ -80,7 +118,7 @@ export function Editor() { // Tools (No modifiers) if (!isMod && !isShift) { - switch (e.key.toLowerCase()) { + switch (key) { case "a": setActiveTool("arrow"); setSelectedAnnotationId(null); diff --git a/apps/desktop/src/routes/screenshot-editor/Header.tsx b/apps/desktop/src/routes/screenshot-editor/Header.tsx index 3515fbcaa1..ea589cb936 100644 --- a/apps/desktop/src/routes/screenshot-editor/Header.tsx +++ b/apps/desktop/src/routes/screenshot-editor/Header.tsx @@ -33,13 +33,20 @@ import { useScreenshotExport } from "./useScreenshotExport"; export function Header() { const ctx = useScreenshotEditorContext(); - const { setDialog, project, originalImageSize, isImageFileReady } = ctx; + const { + setDialog, + project, + originalImageSize, + isImageFileReady, + selectedAnnotationId, + } = ctx; const path = () => ctx.editorInstance()?.path ?? ""; const { exportImage, isExporting } = useScreenshotExport(); createEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { + if (e.defaultPrevented) return; const target = e.target as HTMLElement | null; if ( target && @@ -53,6 +60,13 @@ export function Header() { if (!e.metaKey && !e.ctrlKey) return; const key = e.key.toLowerCase(); if (key === "c") { + if (selectedAnnotationId()) { + return; + } + const selection = window.getSelection(); + if (selection && !selection.isCollapsed && selection.toString()) { + return; + } e.preventDefault(); if (!isExporting()) exportImage("clipboard"); } else if (key === "s") { diff --git a/apps/desktop/src/routes/screenshot-editor/OcrSelectionOverlay.tsx b/apps/desktop/src/routes/screenshot-editor/OcrSelectionOverlay.tsx new file mode 100644 index 0000000000..3e2a7844c5 --- /dev/null +++ b/apps/desktop/src/routes/screenshot-editor/OcrSelectionOverlay.tsx @@ -0,0 +1,227 @@ +import { invoke } from "@tauri-apps/api/core"; +import { createEffect, createMemo, createSignal, For } from "solid-js"; +import { type ScreenshotProject, useScreenshotEditorContext } from "./context"; + +type Rect = { + x: number; + y: number; + width: number; + height: number; +}; + +type ScreenshotOcrRegion = { + x: number; + y: number; + width: number; + height: number; +}; + +type ScreenshotOcrResult = { + text: string; + lines: { + text: string; + confidence: number | null; + bounds: ScreenshotOcrRegion; + }[]; + engine: string; +}; + +type TextLayout = { + text: string; + rect: Rect; + fontSize: number; + lineHeight: number; + textWidth: number; + scaleX: number; +}; + +const fontFamily = + '-apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif'; + +export function OcrSelectionOverlay(props: { + bounds: Rect; + cssWidth: number; + cssHeight: number; + imageRect: Rect; + originalImageSize: { width: number; height: number } | null; + crop: ScreenshotProject["background"]["crop"]; +}) { + const { activeTool, setSelectedAnnotationId } = useScreenshotEditorContext(); + const [ocrResult, setOcrResult] = createSignal( + null, + ); + let requestId = 0; + let measureCanvas: HTMLCanvasElement | null = null; + + const clamp = (value: number, min: number, max: number) => + Math.min(Math.max(value, min), max); + + const sourceRegion = createMemo(() => { + const original = props.originalImageSize; + if (!original || original.width <= 0 || original.height <= 0) return null; + const crop = props.crop ?? { + position: { x: 0, y: 0 }, + size: { x: original.width, y: original.height }, + }; + const left = clamp(crop.position.x, 0, original.width); + const top = clamp(crop.position.y, 0, original.height); + const right = clamp(crop.position.x + crop.size.x, left, original.width); + const bottom = clamp(crop.position.y + crop.size.y, top, original.height); + const x = Math.floor(left); + const y = Math.floor(top); + const sourceRight = Math.ceil(right); + const sourceBottom = Math.ceil(bottom); + const width = sourceRight - x; + const height = sourceBottom - y; + if (width < 4 || height < 4) return null; + return { x, y, width, height }; + }); + + const sourceRegionKey = createMemo(() => { + const region = sourceRegion(); + if (!region) return null; + return `${region.x}:${region.y}:${region.width}:${region.height}`; + }); + + createEffect(() => { + const key = sourceRegionKey(); + const region = sourceRegion(); + requestId += 1; + const currentRequestId = requestId; + + if (!key || !region) { + setOcrResult(null); + return; + } + + setOcrResult(null); + + void (async () => { + try { + const result = await invoke( + "recognize_screenshot_text", + { region }, + ); + if (currentRequestId !== requestId) return; + setOcrResult(result); + } catch { + if (currentRequestId !== requestId) return; + setOcrResult(null); + } + })(); + }); + + const sourceToCssRect = (rect: ScreenshotOcrRegion): Rect | null => { + const region = sourceRegion(); + if (!region) return null; + if (props.bounds.width <= 0 || props.bounds.height <= 0) return null; + if (props.imageRect.width <= 0 || props.imageRect.height <= 0) return null; + const regionRight = region.x + region.width; + const regionBottom = region.y + region.height; + const left = clamp(rect.x, region.x, regionRight); + const top = clamp(rect.y, region.y, regionBottom); + const right = clamp(rect.x + rect.width, left, regionRight); + const bottom = clamp(rect.y + rect.height, top, regionBottom); + const frameRect = { + x: + props.imageRect.x + + ((left - region.x) / region.width) * props.imageRect.width, + y: + props.imageRect.y + + ((top - region.y) / region.height) * props.imageRect.height, + width: ((right - left) / region.width) * props.imageRect.width, + height: ((bottom - top) / region.height) * props.imageRect.height, + }; + if (frameRect.width <= 0 || frameRect.height <= 0) return null; + return { + x: ((frameRect.x - props.bounds.x) / props.bounds.width) * props.cssWidth, + y: + ((frameRect.y - props.bounds.y) / props.bounds.height) * + props.cssHeight, + width: (frameRect.width / props.bounds.width) * props.cssWidth, + height: (frameRect.height / props.bounds.height) * props.cssHeight, + }; + }; + + const measureText = (text: string, fontSize: number) => { + if (typeof document === "undefined") { + return Math.max(text.length * fontSize * 0.55, 1); + } + measureCanvas ??= document.createElement("canvas"); + const ctx = measureCanvas.getContext("2d"); + if (!ctx) return Math.max(text.length * fontSize * 0.55, 1); + ctx.font = `${fontSize}px ${fontFamily}`; + return Math.max(ctx.measureText(text).width, 1); + }; + + const textLayouts = createMemo(() => { + const result = ocrResult(); + if (!result) return []; + return result.lines.flatMap((line) => { + const text = line.text; + const rect = sourceToCssRect(line.bounds); + if (!text.trim() || !rect) return []; + const lineHeight = Math.max(rect.height, 1); + const fontSize = Math.max(lineHeight * 0.78, 1); + const textWidth = measureText(text, fontSize); + const scaleX = rect.width / textWidth; + return [ + { + text, + rect, + fontSize, + lineHeight, + textWidth, + scaleX, + }, + ]; + }); + }); + + return ( +
+ + {(layout) => ( + setSelectedAnnotationId(null)} + > + {layout.text} + + )} + +
+ ); +} diff --git a/apps/desktop/src/routes/screenshot-editor/Preview.tsx b/apps/desktop/src/routes/screenshot-editor/Preview.tsx index a2164a3545..2b353d3c8f 100644 --- a/apps/desktop/src/routes/screenshot-editor/Preview.tsx +++ b/apps/desktop/src/routes/screenshot-editor/Preview.tsx @@ -13,6 +13,13 @@ import { EditorButton, Slider } from "../editor/ui"; import { AnnotationLayer } from "./AnnotationLayer"; import { useScreenshotEditorContext } from "./context"; import { getImageRect } from "./layout"; +import { OcrSelectionOverlay } from "./OcrSelectionOverlay"; + +type WebKitGestureEvent = Event & { + scale?: number; + clientX?: number; + clientY?: number; +}; // CSS for checkerboard grid const gridStyle = { @@ -65,6 +72,7 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) { originalImageSize(), project.background.padding, project.background.crop, + project.aspectRatio, ); }); @@ -175,6 +183,8 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) { setPan({ x: 0, y: 0 }); }; + const clampZoom = (zoom: number) => Math.max(0.1, Math.min(3, zoom)); + createEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { const target = e.target as HTMLElement; @@ -201,54 +211,74 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) { onCleanup(() => window.removeEventListener("keydown", handleKeyDown)); }); + const zoomAtPoint = (clientX: number, clientY: number, newZoom: number) => { + const rect = viewportRef?.getBoundingClientRect(); + const currentScale = fitScale() * props.zoom; + const nextScale = fitScale() * newZoom; + const sizeData = size(); + const boundsData = bounds(); + + if ( + rect && + currentScale > 0 && + nextScale > 0 && + sizeData.width > 0 && + sizeData.height > 0 + ) { + const pointerX = clientX - rect.left; + const pointerY = clientY - rect.top; + const currentPan = pan(); + const contentX = + boundsData.x + + (pointerX - + (sizeData.width - sizeData.width * props.zoom) / 2 - + currentPan.x) / + currentScale; + const contentY = + boundsData.y + + (pointerY - + (sizeData.height - sizeData.height * props.zoom) / 2 - + currentPan.y) / + currentScale; + + setPan({ + x: + pointerX - + (sizeData.width - sizeData.width * newZoom) / 2 - + (contentX - boundsData.x) * nextScale, + y: + pointerY - + (sizeData.height - sizeData.height * newZoom) / 2 - + (contentY - boundsData.y) * nextScale, + }); + } + + props.setZoom(newZoom); + }; + + const normalizeWheelDeltaY = (e: WheelEvent) => { + if (e.deltaMode === 1) return e.deltaY * 16; + if (e.deltaMode === 2) return e.deltaY * window.innerHeight; + return e.deltaY; + }; + + let lastGestureScale = 1; + let lastGestureAt = 0; + const handleWheel = (e: WheelEvent) => { e.preventDefault(); if (e.ctrlKey) { - const delta = -e.deltaY; + if (performance.now() - lastGestureAt < 80) return; + const normalizedDelta = normalizeWheelDeltaY(e); + if (normalizedDelta === 0) return; + const delta = + -Math.sign(normalizedDelta) * Math.max(Math.abs(normalizedDelta), 8); const zoomStep = 0.005; - const newZoom = Math.max(0.1, Math.min(3, props.zoom + delta * zoomStep)); - const rect = viewportRef?.getBoundingClientRect(); - const currentScale = fitScale() * props.zoom; - const nextScale = fitScale() * newZoom; - const sizeData = size(); - const boundsData = bounds(); - - if ( - rect && - currentScale > 0 && - nextScale > 0 && - sizeData.width > 0 && - sizeData.height > 0 - ) { - const pointerX = e.clientX - rect.left; - const pointerY = e.clientY - rect.top; - const currentPan = pan(); - const contentX = - boundsData.x + - (pointerX - - (sizeData.width - sizeData.width * props.zoom) / 2 - - currentPan.x) / - currentScale; - const contentY = - boundsData.y + - (pointerY - - (sizeData.height - sizeData.height * props.zoom) / 2 - - currentPan.y) / - currentScale; - - setPan({ - x: - pointerX - - (sizeData.width - sizeData.width * newZoom) / 2 - - (contentX - boundsData.x) * nextScale, - y: - pointerY - - (sizeData.height - sizeData.height * newZoom) / 2 - - (contentY - boundsData.y) * nextScale, - }); - } - - props.setZoom(newZoom); + zoomAtPoint( + e.clientX, + e.clientY, + clampZoom(props.zoom + delta * zoomStep), + ); } else { setPan((p) => ({ x: p.x - e.deltaX, @@ -257,6 +287,81 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) { } }; + const getGesturePoint = (e: WebKitGestureEvent) => { + const rect = viewportRef?.getBoundingClientRect(); + return { + clientX: e.clientX ?? (rect ? rect.left + rect.width / 2 : 0), + clientY: e.clientY ?? (rect ? rect.top + rect.height / 2 : 0), + }; + }; + + const handleGestureStart = (event: Event) => { + const e = event as WebKitGestureEvent; + e.preventDefault(); + lastGestureScale = e.scale ?? 1; + lastGestureAt = performance.now(); + }; + + const handleGestureChange = (event: Event) => { + const e = event as WebKitGestureEvent; + e.preventDefault(); + const scale = e.scale ?? 1; + const scaleDelta = scale / Math.max(lastGestureScale, 0.001); + lastGestureScale = scale; + lastGestureAt = performance.now(); + const point = getGesturePoint(e); + zoomAtPoint( + point.clientX, + point.clientY, + clampZoom(props.zoom * scaleDelta), + ); + }; + + const handleGestureEnd = (event: Event) => { + event.preventDefault(); + lastGestureScale = 1; + lastGestureAt = performance.now(); + }; + + createEffect(() => { + const element = canvasContainerRef(); + if (!element) return; + const listenerOptions = { capture: true, passive: false }; + const cleanupOptions = { capture: true }; + + element.addEventListener("wheel", handleWheel, listenerOptions); + element.addEventListener( + "gesturestart", + handleGestureStart, + listenerOptions, + ); + element.addEventListener( + "gesturechange", + handleGestureChange, + listenerOptions, + ); + element.addEventListener("gestureend", handleGestureEnd, listenerOptions); + + onCleanup(() => { + element.removeEventListener("wheel", handleWheel, cleanupOptions); + element.removeEventListener( + "gesturestart", + handleGestureStart, + cleanupOptions, + ); + element.removeEventListener( + "gesturechange", + handleGestureChange, + cleanupOptions, + ); + element.removeEventListener( + "gestureend", + handleGestureEnd, + cleanupOptions, + ); + }); + }); + const startPanDrag = (clientX: number, clientY: number) => { setIsDragging(true); setDragStart({ @@ -327,7 +432,6 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) { ref={setCanvasContainerRef} class="flex-1 relative flex items-center justify-center overflow-hidden outline-none" style={gridStyle} - onWheel={handleWheel} onMouseDown={handleMiddleMouseDown} >
@@ -612,6 +716,26 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) { "pointer-events": "none", }} /> +
+ (null); - const [activeTool, setActiveTool] = createSignal( - "select", - ); + const [activeTool, setActiveTool] = + createSignal("select"); const [layersPanelOpen, setLayersPanelOpen] = makePersisted( createSignal(false), @@ -443,8 +443,9 @@ function createScreenshotEditorContext() { imageSize: originalImageSize(), padding: project.background.padding, crop: project.background.crop, + aspectRatio: project.aspectRatio, }), - ({ frame, imageSize, padding, crop }) => { + ({ frame, imageSize, padding, crop, aspectRatio }) => { if (!frame || !imageSize) return; const frameSize = { width: frame.width, height: frame.height }; @@ -463,6 +464,7 @@ function createScreenshotEditorContext() { imageSize, padding, crop, + aspectRatio, ); const rawAnnotations = unwrap(annotations); diff --git a/apps/desktop/src/routes/screenshot-editor/layout.ts b/apps/desktop/src/routes/screenshot-editor/layout.ts index 1a2784516d..38bd306072 100644 --- a/apps/desktop/src/routes/screenshot-editor/layout.ts +++ b/apps/desktop/src/routes/screenshot-editor/layout.ts @@ -1,25 +1,124 @@ -import type { XY } from "~/utils/tauri"; +import type { AspectRatio, XY } from "~/utils/tauri"; export const SCREEN_MAX_PADDING = 0.4; +const roundBaseDimension = (value: number) => + Math.max((Math.ceil(value) + 1) & ~1, 2); + +const roundAutoBaseDimension = (value: number) => (Math.floor(value) + 1) & ~1; + +function getAspectRatioValue(aspectRatio: AspectRatio) { + switch (aspectRatio) { + case "wide": + return 16 / 9; + case "vertical": + return 9 / 16; + case "square": + return 1; + case "classic": + return 4 / 3; + case "tall": + return 3 / 4; + } +} + +function getBaseSize( + cropWidth: number, + cropHeight: number, + paddingFactor: number, + aspectRatio: AspectRatio | null, +) { + if (aspectRatio === null) { + const scale = 1 + paddingFactor * 2; + return { + width: roundAutoBaseDimension(cropWidth * scale), + height: roundAutoBaseDimension(cropHeight * scale), + }; + } + + const cropAspect = cropWidth / cropHeight; + const targetAspect = getAspectRatioValue(aspectRatio); + const padding = Math.max(cropWidth, cropHeight) * paddingFactor * 2; + + if (cropAspect > targetAspect) { + const width = cropWidth + padding; + const height = width / targetAspect; + return { + width: roundBaseDimension(width), + height: roundBaseDimension(height), + }; + } + + const height = cropHeight + padding; + const width = height * targetAspect; + return { + width: roundBaseDimension(width), + height: roundBaseDimension(height), + }; +} + export function calculateImageTransform( frameSize: { width: number; height: number }, imageSize: { width: number; height: number }, padding: number, crop: { position: XY; size: XY } | null, + aspectRatio: AspectRatio | null, ) { const cropWidth = crop?.size.x ?? imageSize.width; const cropHeight = crop?.size.y ?? imageSize.height; + + if ( + frameSize.width <= 0 || + frameSize.height <= 0 || + cropWidth <= 0 || + cropHeight <= 0 + ) { + return { + offset: { x: 0, y: 0 }, + size: { + width: Math.max(frameSize.width, 0), + height: Math.max(frameSize.height, 0), + }, + }; + } + const croppedAspect = cropWidth / cropHeight; const outputAspect = frameSize.width / frameSize.height; const paddingFactor = (padding / 100.0) * SCREEN_MAX_PADDING; + const baseSize = getBaseSize( + cropWidth, + cropHeight, + paddingFactor, + aspectRatio, + ); + const outputScale = Math.min( + frameSize.width / Math.max(baseSize.width, 1), + frameSize.height / Math.max(baseSize.height, 1), + ); + + if (aspectRatio === null) { + const offsetX = cropWidth * paddingFactor * outputScale; + const offsetY = cropHeight * paddingFactor * outputScale; + + return { + offset: { x: offsetX, y: offsetY }, + size: { + width: Math.max(frameSize.width - offsetX * 2, 1), + height: Math.max(frameSize.height - offsetY * 2, 1), + }, + }; + } + const cropBasis = Math.max(cropWidth, cropHeight); const maxPadding = Math.max( Math.min((frameSize.width - 1) / 2, (frameSize.height - 1) / 2), 0, ); - const paddingPixels = Math.min(cropBasis * paddingFactor, maxPadding); + const paddingPixels = Math.min( + cropBasis * paddingFactor * outputScale, + maxPadding, + ); const availableWidth = Math.max(frameSize.width - 2 * paddingPixels, 1); const availableHeight = Math.max(frameSize.height - 2 * paddingPixels, 1); @@ -53,6 +152,7 @@ export function getImageRect( imageSize: { width: number; height: number } | null, padding: number, crop: { position: XY; size: XY } | null, + aspectRatio: AspectRatio | null, ) { if (!imageSize) { return { @@ -68,6 +168,7 @@ export function getImageRect( imageSize, padding, crop, + aspectRatio, ); return { diff --git a/crates/recording/src/feeds/camera.rs b/crates/recording/src/feeds/camera.rs index 879713e4fc..ee6bc98deb 100644 --- a/crates/recording/src/feeds/camera.rs +++ b/crates/recording/src/feeds/camera.rs @@ -327,7 +327,7 @@ struct FinalizePendingRelease { id: DeviceOrModelID, } -fn spawn_camera_setup( +struct CameraSetupArgs { id: DeviceOrModelID, generation: u64, settings: Option, @@ -336,7 +336,22 @@ fn spawn_camera_setup( native_frame_recipient: Recipient, native_sender_count: Arc, flow: CameraSetupFlow, +} + +fn spawn_camera_setup( + args: CameraSetupArgs, ) -> (ReadyFuture, SyncSender<()>, std::thread::JoinHandle<()>) { + let CameraSetupArgs { + id, + generation, + settings, + actor_ref, + new_frame_recipient, + native_frame_recipient, + native_sender_count, + flow, + } = args; + let (ready_tx, ready_rx) = oneshot::channel::>(); let (done_tx, done_rx) = std::sync::mpsc::sync_channel(1); @@ -358,100 +373,13 @@ fn spawn_camera_setup( .build() .expect("Failed to build camera tokio runtime"); - LocalSet::new().block_on(&runtime, async move { + { #[cfg(target_os = "macos")] let _capture_lifecycle_guard = camera_capture_lifecycle_guard(); - if done_rx_thread.try_recv().is_ok() { - let _ = ready_tx_thread.send(Err(SetInputError::BuildStreamCrashed)); - - if matches!(flow, CameraSetupFlow::Open) { - let _ = actor_ref - .tell(InputConnectFailed { - id: id.clone(), - generation, - }) - .await; - } - - return; - } - - let setup_result = setup_camera( - &id, - settings, - new_frame_recipient, - native_frame_recipient, - native_sender_count, - ) - .await; - - let handle = match setup_result { - Ok(result) => { - let SetupCameraResult { - handle, - camera_info, - video_info, - } = result; - - let ready_payload = InputConnected { - generation, - id: id.clone(), - camera_info: camera_info.clone(), - video_info, - done_tx: done_tx_thread.clone(), - }; - - match flow { - CameraSetupFlow::Open => { - let _ = ready_tx_thread.send(Ok(ready_payload.clone())); - let _ = actor_ref.ask(ready_payload).await; - } - CameraSetupFlow::Locked => { - let reconnect_result = actor_ref - .ask(LockedCameraInputReconnected { - id: id.clone(), - camera_info, - video_info, - done_tx: done_tx_thread.clone(), - }) - .await; - - match reconnect_result { - Ok(true) => { - let _ = ready_tx_thread.send(Ok(ready_payload)); - let _ = actor_ref - .tell(FinalizePendingRelease { id: id.clone() }) - .await; - } - Ok(false) => { - warn!( - "Locked camera state changed before reconnecting {:?}", - id - ); - let _ = ready_tx_thread - .send(Err(SetInputError::BuildStreamCrashed)); - let _ = handle.stop_capturing(); - return; - } - Err(err) => { - error!( - ?err, - "Failed to update locked camera state for {:?}", id - ); - let _ = ready_tx_thread - .send(Err(SetInputError::BuildStreamCrashed)); - let _ = handle.stop_capturing(); - return; - } - } - } - } - - handle - } - Err(e) => { - let _ = ready_tx_thread.send(Err(e.clone())); + LocalSet::new().block_on(&runtime, async move { + if done_rx_thread.try_recv().is_ok() { + let _ = ready_tx_thread.send(Err(SetInputError::BuildStreamCrashed)); if matches!(flow, CameraSetupFlow::Open) { let _ = actor_ref @@ -464,27 +392,116 @@ fn spawn_camera_setup( return; } - }; - info!( - "Camera capture thread: waiting for done signal for {:?}", - &id - ); + let setup_result = setup_camera( + &id, + settings, + new_frame_recipient, + native_frame_recipient, + native_sender_count, + ) + .await; + + let handle = match setup_result { + Ok(result) => { + let SetupCameraResult { + handle, + camera_info, + video_info, + } = result; + + let ready_payload = InputConnected { + generation, + id: id.clone(), + camera_info: camera_info.clone(), + video_info, + done_tx: done_tx_thread.clone(), + }; + + match flow { + CameraSetupFlow::Open => { + let _ = ready_tx_thread.send(Ok(ready_payload.clone())); + let _ = actor_ref.ask(ready_payload).await; + } + CameraSetupFlow::Locked => { + let reconnect_result = actor_ref + .ask(LockedCameraInputReconnected { + id: id.clone(), + camera_info, + video_info, + done_tx: done_tx_thread.clone(), + }) + .await; + + match reconnect_result { + Ok(true) => { + let _ = ready_tx_thread.send(Ok(ready_payload)); + let _ = actor_ref + .tell(FinalizePendingRelease { id: id.clone() }) + .await; + } + Ok(false) => { + warn!( + "Locked camera state changed before reconnecting {:?}", + id + ); + let _ = ready_tx_thread + .send(Err(SetInputError::BuildStreamCrashed)); + let _ = handle.stop_capturing(); + return; + } + Err(err) => { + error!( + ?err, + "Failed to update locked camera state for {:?}", id + ); + let _ = ready_tx_thread + .send(Err(SetInputError::BuildStreamCrashed)); + let _ = handle.stop_capturing(); + return; + } + } + } + } - drop(done_tx_thread); - let recv_result = done_rx_thread.recv(); + handle + } + Err(e) => { + let _ = ready_tx_thread.send(Err(e.clone())); - warn!( - "Camera capture thread: done signal received for {:?}, result={:?}", - &id, recv_result - ); + if matches!(flow, CameraSetupFlow::Open) { + let _ = actor_ref + .tell(InputConnectFailed { + id: id.clone(), + generation, + }) + .await; + } + + return; + } + }; - let _ = handle.stop_capturing(); + info!( + "Camera capture thread: waiting for done signal for {:?}", + &id + ); - std::thread::sleep(Duration::from_millis(50)); + drop(done_tx_thread); + let recv_result = done_rx_thread.recv(); - warn!("Camera capture thread: stopped capture of {:?}", &id); - }); + warn!( + "Camera capture thread: done signal received for {:?}, result={:?}", + &id, recv_result + ); + + let _ = handle.stop_capturing(); + + std::thread::sleep(Duration::from_millis(50)); + + warn!("Camera capture thread: stopped capture of {:?}", &id); + }); + } drop(runtime); }); @@ -946,16 +963,16 @@ impl Message for CameraFeed { let native_frame_recipient = actor_ref.clone().recipient(); let id = msg.id.clone(); - let (ready, done_tx, join_handle) = spawn_camera_setup( - id.clone(), + let (ready, done_tx, join_handle) = spawn_camera_setup(CameraSetupArgs { + id: id.clone(), generation, - msg.settings, + settings: msg.settings, actor_ref, new_frame_recipient, native_frame_recipient, - self.native_sender_count.clone(), - CameraSetupFlow::Open, - ); + native_sender_count: self.native_sender_count.clone(), + flow: CameraSetupFlow::Open, + }); self.previous_thread = Some(join_handle); @@ -983,16 +1000,16 @@ impl Message for CameraFeed { let new_frame_recipient = actor_ref.clone().recipient(); let native_frame_recipient = actor_ref.clone().recipient(); - let (ready, _done_tx, join_handle) = spawn_camera_setup( - msg.id.clone(), + let (ready, _done_tx, join_handle) = spawn_camera_setup(CameraSetupArgs { + id: msg.id.clone(), generation, - msg.settings, + settings: msg.settings, actor_ref, new_frame_recipient, native_frame_recipient, - self.native_sender_count.clone(), - CameraSetupFlow::Locked, - ); + native_sender_count: self.native_sender_count.clone(), + flow: CameraSetupFlow::Locked, + }); self.previous_thread = Some(join_handle);