diff --git a/Cargo.toml b/Cargo.toml
index 80fac6831b..6d101defef 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -65,6 +65,7 @@ cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", fe
     "io_surface",
     "mtl",
     "vt",
+    "vn",
 ], default-features = false }
 
 windows = "0.60.0"
diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml
index d41d28fecb..1d8271a549 100644
--- a/apps/desktop/src-tauri/Cargo.toml
+++ b/apps/desktop/src-tauri/Cargo.toml
@@ -150,9 +150,16 @@ parakeet-rs = "0.3.4"
 
 [target.'cfg(target_os= "windows")'.dependencies]
 windows = { workspace = true, features = [
+    "Foundation",
+    "Foundation_Collections",
+    "Globalization",
+    "Graphics_Imaging",
+    "Media_Ocr",
+    "Storage_Streams",
     "Win32_Foundation",
     "Win32_System",
     "Win32_System_Power",
+    "Win32_System_WinRT",
     "Win32_UI_WindowsAndMessaging",
     "Win32_Graphics_Gdi",
 ] }
diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs
index 4c8a8f278d..0ef45d6b9d 100644
--- a/apps/desktop/src-tauri/src/lib.rs
+++ b/apps/desktop/src-tauri/src/lib.rs
@@ -71,8 +71,8 @@ use recording::{InProgressRecording, RecordingEvent, RecordingInputKind};
 use scap_targets::{Display, DisplayId, WindowId, bounds::LogicalBounds};
 use screenshot_editor::{
     PendingScreenshotEditorInstances, ScreenshotEditorInstances, WindowScreenshotEditorInstance,
-    create_screenshot_editor_instance, render_screenshot_for_export, render_screenshot_png,
-    update_screenshot_config,
+    create_screenshot_editor_instance, recognize_screenshot_text, render_screenshot_for_export,
+    render_screenshot_png, update_screenshot_config,
 };
 
 mod gpu_context;
@@ -3951,6 +3951,7 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
             upload_screenshot,
             create_screenshot_editor_instance,
             update_screenshot_config,
+            recognize_screenshot_text,
             get_recording_meta,
             save_file_dialog,
             list_recordings,
diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs
index 02bdb2cdbf..4118f50a34 100644
--- a/apps/desktop/src-tauri/src/screenshot_editor.rs
+++ b/apps/desktop/src-tauri/src/screenshot_editor.rs
@@ -14,7 +14,7 @@ use image::{
     GenericImageView, ImageEncoder, RgbImage, buffer::ConvertBuffer, codecs::png::PngEncoder,
 };
 use relative_path::RelativePathBuf;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use specta::Type;
 use std::io::Cursor;
 use std::str::FromStr;
@@ -645,6 +645,37 @@ pub struct SerializedScreenshotEditorInstance {
     pub image_height: u32,
 }
 
+#[derive(Clone, Copy, Deserialize, Serialize, Type, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct ScreenshotOcrRegion {
+    pub x: u32,
+    pub y: u32,
+    pub width: u32,
+    pub height: u32,
+}
+
+#[derive(Clone, Serialize, Type, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct ScreenshotOcrLine {
+    pub text: String,
+    pub confidence: Option<f32>,
+    pub bounds: ScreenshotOcrRegion,
+}
+
+#[derive(Clone, Serialize, Type, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct ScreenshotOcrResult {
+    pub text: String,
+    pub lines: Vec<ScreenshotOcrLine>,
+    pub engine: String,
+}
+
+struct ScreenshotOcrImage {
+    bgra: Vec<u8>,
+    width: u32,
+    height: u32,
+}
+
 #[tauri::command]
 #[specta::specta]
 pub async fn create_screenshot_editor_instance(
@@ -714,6 +745,466 @@ pub async fn update_screenshot_config(
     Ok(())
 }
 
+#[tauri::command]
+#[specta::specta]
+pub async fn recognize_screenshot_text(
+    instance: WindowScreenshotEditorInstance,
+    region: ScreenshotOcrRegion,
+) -> Result<ScreenshotOcrResult, String> {
+    let region = clamp_screenshot_ocr_region(region, instance.image_width, instance.image_height)?;
+    let image = create_screenshot_ocr_image(
+        instance.source_rgba.as_ref(),
+        instance.image_width,
+        instance.image_height,
+        region,
+    )?;
+    let mut result = recognize_screenshot_ocr_image(image).await?;
+
+    for line in &mut result.lines {
+        line.bounds.x = line.bounds.x.saturating_add(region.x);
+        line.bounds.y = line.bounds.y.saturating_add(region.y);
+    }
+
+    Ok(result)
+}
+
+fn clamp_screenshot_ocr_region(
+    region: ScreenshotOcrRegion,
+    image_width: u32,
+    image_height: u32,
+) -> Result<ScreenshotOcrRegion, String> {
+    if image_width == 0 || image_height == 0 {
+        return Err("Screenshot image is empty".to_string());
+    }
+
+    let x = region.x.min(image_width.saturating_sub(1));
+    let y = region.y.min(image_height.saturating_sub(1));
+    let width = region.width.min(image_width.saturating_sub(x));
+    let height = region.height.min(image_height.saturating_sub(y));
+
+    if width < 4 || height < 4 {
+        return Err("Select a larger text area".to_string());
+    }
+
+    Ok(ScreenshotOcrRegion {
+        x,
+        y,
+        width,
+        height,
+    })
+}
+
+fn create_screenshot_ocr_image(
+    source_rgba: &[u8],
+    image_width: u32,
+    image_height: u32,
+    region: ScreenshotOcrRegion,
+) -> Result<ScreenshotOcrImage, String> {
+    let image_width = usize::try_from(image_width)
+        .map_err(|_| "Screenshot width is too large for OCR".to_string())?;
+    let image_height = usize::try_from(image_height)
+        .map_err(|_| "Screenshot height is too large for OCR".to_string())?;
+    let region_x =
+        usize::try_from(region.x).map_err(|_| "OCR region x is too large".to_string())?;
+    let region_y =
+        usize::try_from(region.y).map_err(|_| "OCR region y is too large".to_string())?;
+    let region_width =
+        usize::try_from(region.width).map_err(|_| "OCR region width is too large".to_string())?;
+    let region_height =
+        usize::try_from(region.height).map_err(|_| "OCR region height is too large".to_string())?;
+
+    let expected_len = image_width
+        .checked_mul(image_height)
+        .and_then(|pixels| pixels.checked_mul(4))
+        .ok_or_else(|| "Screenshot image is too large for OCR".to_string())?;
+
+    if source_rgba.len() != expected_len {
+        return Err("Screenshot image data is invalid for OCR".to_string());
+    }
+
+    let output_len = region_width
+        .checked_mul(region_height)
+        .and_then(|pixels| pixels.checked_mul(4))
+        .ok_or_else(|| "OCR region is too large".to_string())?;
+    let mut bgra = vec![0; output_len];
+    let source_row_bytes = image_width
+        .checked_mul(4)
+        .ok_or_else(|| "Screenshot row is too large for OCR".to_string())?;
+    let region_row_bytes = region_width
+        .checked_mul(4)
+        .ok_or_else(|| "OCR row is too large".to_string())?;
+    let region_x_bytes = region_x
+        .checked_mul(4)
+        .ok_or_else(|| "OCR region x is too large".to_string())?;
+
+    for row in 0..region_height {
+        let source_start = region_y
+            .checked_add(row)
+            .and_then(|source_row| source_row.checked_mul(source_row_bytes))
+            .and_then(|source_offset| source_offset.checked_add(region_x_bytes))
+            .ok_or_else(|| "OCR source region is invalid".to_string())?;
+        let source_end = source_start
+            .checked_add(region_row_bytes)
+            .ok_or_else(|| "OCR source region is invalid".to_string())?;
+        let output_start = row
+            .checked_mul(region_row_bytes)
+            .ok_or_else(|| "OCR output region is invalid".to_string())?;
+        let output_end = output_start
+            .checked_add(region_row_bytes)
+            .ok_or_else(|| "OCR output region is invalid".to_string())?;
+        let source_row = source_rgba
+            .get(source_start..source_end)
+            .ok_or_else(|| "OCR source region is outside the screenshot".to_string())?;
+        let output_row = bgra
+            .get_mut(output_start..output_end)
+            .ok_or_else(|| "OCR output region is invalid".to_string())?;
+
+        for (source_pixel, output_pixel) in source_row
+            .chunks_exact(4)
+            .zip(output_row.chunks_exact_mut(4))
+        {
+            output_pixel[0] = source_pixel[2];
+            output_pixel[1] = source_pixel[1];
+            output_pixel[2] = source_pixel[0];
+            output_pixel[3] = source_pixel[3];
+        }
+    }
+
+    Ok(ScreenshotOcrImage {
+        bgra,
+        width: region.width,
+        height: region.height,
+    })
+}
+
+#[cfg(target_os = "macos")]
+async fn recognize_screenshot_ocr_image(
+    image: ScreenshotOcrImage,
+) -> Result<ScreenshotOcrResult, String> {
+    tokio::task::spawn_blocking(move || recognize_screenshot_ocr_image_macos(image))
+        .await
+        .map_err(|e| format!("OCR task failed: {e}"))?
+}
+
+#[cfg(target_os = "windows")]
+async fn recognize_screenshot_ocr_image(
+    image: ScreenshotOcrImage,
+) -> Result<ScreenshotOcrResult, String> {
+    tokio::task::spawn_blocking(move || recognize_screenshot_ocr_image_windows(image))
+        .await
+        .map_err(|e| format!("OCR task failed: {e}"))?
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "windows")))]
+async fn recognize_screenshot_ocr_image(
+    _image: ScreenshotOcrImage,
+) -> Result<ScreenshotOcrResult, String> {
+    Err("OCR is only available on macOS and Windows".to_string())
+}
+
+#[cfg(target_os = "macos")]
+fn recognize_screenshot_ocr_image_macos(
+    image: ScreenshotOcrImage,
+) -> Result<ScreenshotOcrResult, String> {
+    cidre::objc::ar_pool(|| {
+        use cidre::{cv, ns, vn};
+        use std::ffi::c_void;
+
+        extern "C" fn release_pixel_buffer_data(
+            release_ref_con: *mut c_void,
+            _base_address: *const *const c_void,
+        ) {
+            if !release_ref_con.is_null() {
+                unsafe {
+                    drop(Box::from_raw(release_ref_con.cast::<Vec<u8>>()));
+                }
+            }
+        }
+
+        let width =
+            usize::try_from(image.width).map_err(|_| "OCR image width is too large".to_string())?;
+        let height = usize::try_from(image.height)
+            .map_err(|_| "OCR image height is too large".to_string())?;
+        let bytes_per_row = width
+            .checked_mul(4)
+            .ok_or_else(|| "OCR image row is too large".to_string())?;
+        let mut data = Box::new(image.bgra);
+        let base_address = data.as_mut_ptr().cast::<c_void>();
+        let release_ref_con = Box::into_raw(data).cast::<c_void>();
+
+        let pixel_buffer = match cv::PixelBuf::with_bytes(
+            width,
+            height,
+            base_address,
+            bytes_per_row,
+            release_pixel_buffer_data,
+            release_ref_con,
+            cv::PixelFormat::_32_BGRA,
+            None,
+        ) {
+            Ok(pixel_buffer) => pixel_buffer,
+            Err(e) => {
+                unsafe {
+                    drop(Box::from_raw(release_ref_con.cast::<Vec<u8>>()));
+                }
+                return Err(format!("Failed to create OCR image: {e}"));
+            }
+        };
+
+        let mut request = vn::RecognizeTextRequest::new();
+        request.set_recognition_level(vn::RequestTextRecognitionLevel::Accurate);
+        request.set_uses_lang_correction(true);
+
+        if cidre::version!(macos = 13.0) {
+            request.set_revision(vn::RecognizeTextRequest::REVISION_3);
+            unsafe {
+                request.set_automatically_detects_lang(true);
+            }
+        } else {
+            request.set_revision(vn::RecognizeTextRequest::REVISION_2);
+        }
+
+        let handler = vn::ImageRequestHandler::with_cv_pixel_buf(&pixel_buffer, None)
+            .ok_or_else(|| "Failed to initialize OCR image handler".to_string())?;
+        let requests = ns::Array::<vn::Request>::from_slice(&[&request]);
+        handler
+            .perform(&requests)
+            .map_err(|e| format!("macOS OCR failed: {e}"))?;
+
+        let observations = request.results().unwrap_or_else(ns::Array::new);
+        let mut lines = Vec::new();
+
+        for observation in observations.iter() {
+            let candidates = observation.top_candidates(1);
+            let Some(candidate) = candidates.first() else {
+                continue;
+            };
+            let text = candidate.string().to_string();
+            if text.trim().is_empty() {
+                continue;
+            }
+            lines.push(ScreenshotOcrLine {
+                text,
+                confidence: Some(candidate.confidence()),
+                bounds: normalized_macos_ocr_rect_to_region(
+                    observation.bounding_box(),
+                    image.width,
+                    image.height,
+                ),
+            });
+        }
+
+        let text = lines
+            .iter()
+            .map(|line| line.text.as_str())
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        Ok(ScreenshotOcrResult {
+            text,
+            lines,
+            engine: "macos-vision".to_string(),
+        })
+    })
+}
+
+#[cfg(target_os = "macos")]
+fn normalized_macos_ocr_rect_to_region(
+    rect: cidre::cg::Rect,
+    width: u32,
+    height: u32,
+) -> ScreenshotOcrRegion {
+    let width_f = f64::from(width);
+    let height_f = f64::from(height);
+    let left = clamp_f64(rect.origin.x * width_f, 0.0, width_f);
+    let right = clamp_f64((rect.origin.x + rect.size.width) * width_f, 0.0, width_f);
+    let top = clamp_f64(
+        (1.0 - rect.origin.y - rect.size.height) * height_f,
+        0.0,
+        height_f,
+    );
+    let bottom = clamp_f64((1.0 - rect.origin.y) * height_f, 0.0, height_f);
+    let x = left.round() as u32;
+    let y = top.round() as u32;
+    let right = right.round() as u32;
+    let bottom = bottom.round() as u32;
+
+    ScreenshotOcrRegion {
+        x,
+        y,
+        width: right.saturating_sub(x),
+        height: bottom.saturating_sub(y),
+    }
+}
+
+#[cfg(target_os = "macos")]
+fn clamp_f64(value: f64, min: f64, max: f64) -> f64 {
+    if value.is_finite() {
+        value.clamp(min, max)
+    } else {
+        min
+    }
+}
+
+#[cfg(target_os = "windows")]
+struct WindowsRuntimeGuard;
+
+#[cfg(target_os = "windows")]
+impl Drop for WindowsRuntimeGuard {
+    fn drop(&mut self) {
+        unsafe {
+            windows::Win32::System::WinRT::RoUninitialize();
+        }
+    }
+}
+
+#[cfg(target_os = "windows")]
+fn initialize_windows_runtime() -> Result<WindowsRuntimeGuard, String> {
+    use windows::Win32::System::WinRT::{RO_INIT_MULTITHREADED, RoInitialize};
+
+    unsafe { RoInitialize(RO_INIT_MULTITHREADED) }
+        .map_err(|e| format!("Windows OCR runtime failed: {e}"))?;
+
+    Ok(WindowsRuntimeGuard)
+}
+
+#[cfg(target_os = "windows")]
+fn recognize_screenshot_ocr_image_windows(
+    image: ScreenshotOcrImage,
+) -> Result<ScreenshotOcrResult, String> {
+    use windows::Graphics::Imaging::{BitmapAlphaMode, BitmapPixelFormat, SoftwareBitmap};
+    use windows::Media::Ocr::OcrEngine;
+    use windows::Storage::Streams::DataWriter;
+
+    let _runtime = initialize_windows_runtime()?;
+
+    let max_dimension =
+        OcrEngine::MaxImageDimension().map_err(|e| format!("Windows OCR failed: {e}"))?;
+
+    if image.width > max_dimension || image.height > max_dimension {
+        return Err(format!(
+            "Select a smaller text area. Windows OCR supports up to {max_dimension}px per side"
+        ));
+    }
+
+    let width = i32::try_from(image.width).map_err(|_| "OCR image width is too large")?;
+    let height = i32::try_from(image.height).map_err(|_| "OCR image height is too large")?;
+    let writer = DataWriter::new().map_err(|e| format!("Windows OCR failed: {e}"))?;
+    writer
+        .WriteBytes(&image.bgra)
+        .map_err(|e| format!("Windows OCR failed: {e}"))?;
+    let buffer = writer
+        .DetachBuffer()
+        .map_err(|e| format!("Windows OCR failed: {e}"))?;
+    let bitmap = SoftwareBitmap::CreateCopyWithAlphaFromBuffer(
+        &buffer,
+        BitmapPixelFormat::Bgra8,
+        width,
+        height,
+        BitmapAlphaMode::Premultiplied,
+    )
+    .map_err(|e| format!("Windows OCR failed: {e}"))?;
+    let engine = OcrEngine::TryCreateFromUserProfileLanguages()
+        .map_err(|e| format!("Windows OCR is not available: {e}"))?;
+    let result = engine
+        .RecognizeAsync(&bitmap)
+        .map_err(|e| format!("Windows OCR failed: {e}"))?
+        .get()
+        .map_err(|e| format!("Windows OCR failed: {e}"))?;
+    let text = result
+        .Text()
+        .map_err(|e| format!("Windows OCR failed: {e}"))?
+        .to_string_lossy();
+    let ocr_lines = result
+        .Lines()
+        .map_err(|e| format!("Windows OCR failed: {e}"))?;
+    let mut lines = Vec::new();
+
+    for index in 0..ocr_lines
+        .Size()
+        .map_err(|e| format!("Windows OCR failed: {e}"))?
+    {
+        let line = ocr_lines
+            .GetAt(index)
+            .map_err(|e| format!("Windows OCR failed: {e}"))?;
+        let line_text = line
+            .Text()
+            .map_err(|e| format!("Windows OCR failed: {e}"))?
+            .to_string_lossy();
+        if line_text.trim().is_empty() {
+            continue;
+        }
+        let words = line
+            .Words()
+            .map_err(|e| format!("Windows OCR failed: {e}"))?;
+        let mut bounds: Option<(f32, f32, f32, f32)> = None;
+
+        for word_index in 0..words
+            .Size()
+            .map_err(|e| format!("Windows OCR failed: {e}"))?
+        {
+            let rect = words
+                .GetAt(word_index)
+                .and_then(|word| word.BoundingRect())
+                .map_err(|e| format!("Windows OCR failed: {e}"))?;
+            bounds = Some(match bounds {
+                Some((left, top, right, bottom)) => (
+                    left.min(rect.X),
+                    top.min(rect.Y),
+                    right.max(rect.X + rect.Width),
+                    bottom.max(rect.Y + rect.Height),
+                ),
+                None => (rect.X, rect.Y, rect.X + rect.Width, rect.Y + rect.Height),
+            });
+        }
+
+        lines.push(ScreenshotOcrLine {
+            text: line_text,
+            confidence: None,
+            bounds: bounds
+                .map(windows_ocr_bounds_to_region)
+                .unwrap_or(ScreenshotOcrRegion {
+                    x: 0,
+                    y: 0,
+                    width: 0,
+                    height: 0,
+                }),
+        });
+    }
+
+    Ok(ScreenshotOcrResult {
+        text,
+        lines,
+        engine: "windows-media-ocr".to_string(),
+    })
+}
+
+#[cfg(target_os = "windows")]
+fn windows_ocr_bounds_to_region(
+    (left, top, right, bottom): (f32, f32, f32, f32),
+) -> ScreenshotOcrRegion {
+    let x = clamp_f32_to_u32(left);
+    let y = clamp_f32_to_u32(top);
+    let right = clamp_f32_to_u32(right);
+    let bottom = clamp_f32_to_u32(bottom);
+
+    ScreenshotOcrRegion {
+        x,
+        y,
+        width: right.saturating_sub(x),
+        height: bottom.saturating_sub(y),
+    }
+}
+
+#[cfg(target_os = "windows")]
+fn clamp_f32_to_u32(value: f32) -> u32 {
+    if value.is_finite() && value > 0.0 {
+        value.round().min(u32::MAX as f32) as u32
+    } else {
+        0
+    }
+}
+
 #[tauri::command]
 #[specta::specta]
 pub async fn render_screenshot_for_export(
diff --git a/apps/desktop/src-tauri/src/windows.rs b/apps/desktop/src-tauri/src/windows.rs
index e35f4f0dd0..66ab21277f 100644
--- a/apps/desktop/src-tauri/src/windows.rs
+++ b/apps/desktop/src-tauri/src/windows.rs
@@ -1074,14 +1074,13 @@ impl ShowCapWindow {
 
                         ensure_camera_input_active(&mut app_state).await;
 
-                        if enable_native_camera_preview {
-                            if let Err(err) =
+                        if enable_native_camera_preview
+                            && let Err(err) =
                                 init_native_camera_preview(&mut app_state, window.clone()).await
-                            {
-                                error!(
-                                    "Error reinitializing camera preview for existing window: {err}"
-                                );
-                            }
+                        {
+                            error!(
+                                "Error reinitializing camera preview for existing window: {err}"
+                            );
                         }
 
                         drop(app_state);
@@ -1156,14 +1155,11 @@ impl ShowCapWindow {
 
                     ensure_camera_input_active(&mut app_state).await;
 
-                    if enable_native_camera_preview {
-                        if let Err(err) =
+                    if enable_native_camera_preview
+                        && let Err(err) =
                             init_native_camera_preview(&mut app_state, window.clone()).await
-                        {
-                            error!(
-                                "Error reinitializing camera preview for existing window: {err}"
-                            );
-                        }
+                    {
+                        error!("Error reinitializing camera preview for existing window: {err}");
                     }
 
                     drop(app_state);
@@ -2122,14 +2118,13 @@ impl ShowCapWindow {
                         }
                     }
 
-                    if enable_native_camera_preview {
-                        if let Err(err) =
+                    if enable_native_camera_preview
+                        && let Err(err) =
                             init_native_camera_preview(&mut state, window.clone()).await
-                        {
-                            error!(
-                                "Error initializing camera preview, falling back to WebSocket preview: {err}"
-                            );
-                        }
+                    {
+                        error!(
+                            "Error initializing camera preview, falling back to WebSocket preview: {err}"
+                        );
                     }
 
                     #[cfg(not(target_os = "macos"))]
diff --git a/apps/desktop/src/routes/camera.tsx b/apps/desktop/src/routes/camera.tsx
index 8d049c08a9..e00313436e 100644
--- a/apps/desktop/src/routes/camera.tsx
+++ b/apps/desktop/src/routes/camera.tsx
@@ -48,6 +48,7 @@ const CAMERA_MAX_SIZE = 600;
 const CAMERA_DEFAULT_SIZE = 230;
 const CAMERA_PRESET_SMALL = 230;
 const CAMERA_PRESET_LARGE = 400;
+const CAMERA_TOOLBAR_HEIGHT = 56;
 const CAMERA_PREVIEW_ERROR_EVENT = "camera-preview-error";
 const CAMERA_PREVIEW_CLEAR_EVENT = "camera-preview-clear";
 const CAMERA_DISCONNECTED_ISSUE: CameraPreviewIssue = {
@@ -371,9 +372,17 @@ function NativeCameraPreviewPage(props: {
 			onPointerCancel={chrome.hide}
 		>
 			<Show when={props.issue()}>
-				{(issue) => <CameraIssueOverlay issue={issue()} />}
+				{(issue) => (
+					<CameraIssueOverlay
+						issue={issue()}
+						size={state.size}
+						class="inset-x-0 bottom-0"
+						top={CAMERA_TOOLBAR_HEIGHT}
+						borderRadius={cameraBorderRadius(state)}
+					/>
+				)}
 			</Show>
-			<div class="h-13">
+			<div class="h-14">
 				<div class="flex flex-row justify-center items-center">
 					<div
 						class={toolbarClass()}
@@ -444,7 +453,7 @@ function NativeCameraPreviewPage(props: {
 			<CameraResizeHandles
 				state={state}
 				setState={setState}
-				toolbarHeight={52}
+				toolbarHeight={CAMERA_TOOLBAR_HEIGHT}
 				visible={chrome.visible()}
 			/>
 
@@ -1009,14 +1018,13 @@ function LegacyCameraPreviewPage(props: {
 				frameDimensions()?.height,
 			] as const,
 		async ([size, shape, frameWidth, frameHeight]) => {
-			const BAR_HEIGHT = 56;
 			const base = Math.max(CAMERA_MIN_SIZE, Math.min(CAMERA_MAX_SIZE, size));
 			const aspect = frameWidth && frameHeight ? frameWidth / frameHeight : 1;
 			const windowWidth =
 				shape === "full" ? (aspect >= 1 ? base * aspect : base) : base;
 			const windowHeight =
 				shape === "full" ? (aspect >= 1 ? base : base / aspect) : base;
-			const totalHeight = windowHeight + BAR_HEIGHT;
+			const totalHeight = windowHeight + CAMERA_TOOLBAR_HEIGHT;
 
 			const currentWindow = getCurrentWindow();
 			await currentWindow.setSize(new LogicalSize(windowWidth, totalHeight));
@@ -1128,9 +1136,6 @@ function LegacyCameraPreviewPage(props: {
 			onPointerLeave={chrome.hide}
 			onPointerCancel={chrome.hide}
 		>
-			<Show when={props.issue()}>
-				{(issue) => <CameraIssueOverlay issue={issue()} />}
-			</Show>
 			<div class="h-14">
 				<div class="flex flex-row justify-center items-center">
 					<div
@@ -1201,7 +1206,7 @@ function LegacyCameraPreviewPage(props: {
 			<CameraResizeHandles
 				state={state}
 				setState={setState}
-				toolbarHeight={56}
+				toolbarHeight={CAMERA_TOOLBAR_HEIGHT}
 				visible={chrome.visible()}
 			/>
 			<div
@@ -1222,6 +1227,9 @@ function LegacyCameraPreviewPage(props: {
 						/>
 					</Show>
 				</Suspense>
+				<Show when={props.issue()}>
+					{(issue) => <CameraIssueOverlay issue={issue()} size={state.size} />}
+				</Show>
 			</div>
 		</div>
 	);
@@ -1319,15 +1327,67 @@ function cameraBorderRadius(state: CameraWindowState) {
 	return `${radius}rem`;
 }
 
-function CameraIssueOverlay(props: { issue: CameraPreviewIssue }) {
+function cameraOverlayTextMetrics(size: number) {
+	const normalized =
+		(Math.max(CAMERA_MIN_SIZE, Math.min(CAMERA_MAX_SIZE, size)) -
+			CAMERA_MIN_SIZE) /
+		(CAMERA_MAX_SIZE - CAMERA_MIN_SIZE);
+	const titleSize = 0.75 + normalized * 0.375;
+	const messageSize = 0.625 + normalized * 0.25;
+	const lineHeight = 1.2 + normalized * 0.2;
+	const gap = 0.375 + normalized * 0.25;
+	const maxWidth = Math.max(7.5, Math.min(18, size / 16));
+
+	return {
+		gap: `${gap}rem`,
+		maxWidth: `${maxWidth}rem`,
+		messageLineHeight: `${lineHeight}rem`,
+		messageSize: `${messageSize}rem`,
+		titleSize: `${titleSize}rem`,
+	};
+}
+
+function CameraIssueOverlay(props: {
+	issue: CameraPreviewIssue;
+	size: number;
+	class?: string;
+	top?: number;
+	borderRadius?: string;
+}) {
+	const textMetrics = () => cameraOverlayTextMetrics(props.size);
+	const style = () => {
+		const base = { "border-radius": props.borderRadius ?? "inherit" };
+		if (props.top === undefined) return base;
+		return { ...base, top: `${props.top}px` };
+	};
+
 	return (
 		<div
-			class="absolute inset-0 z-50 flex items-center justify-center bg-black/75 backdrop-blur-sm px-4 pointer-events-none"
-			style={{ "border-radius": "inherit" }}
+			class={cx(
+				"absolute z-10 flex items-center justify-center overflow-hidden bg-black/75 backdrop-blur-sm px-4 pointer-events-none",
+				props.class ?? "inset-0",
+			)}
+			style={style()}
 		>
-			<div class="flex max-w-[18rem] flex-col items-center gap-2 text-center text-white">
-				<p class="text-sm font-semibold text-white">{props.issue.title}</p>
-				<p class="text-xs leading-5 text-white/75">{props.issue.message}</p>
+			<div
+				class="flex flex-col items-center text-center text-white"
+				style={{ gap: textMetrics().gap, "max-width": textMetrics().maxWidth }}
+			>
+				<p
+					class="font-semibold text-white"
+					style={{ "font-size": textMetrics().titleSize }}
+				>
+					{props.issue.title}
+				</p>
+				<p
+					class="text-white/75"
+					style={{
+						"font-size": textMetrics().messageSize,
+						"line-height": textMetrics().messageLineHeight,
+					}}
+				>
+					{props.issue.message}
+				</p>
 			</div>
 		</div>
 	);
diff --git a/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx b/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx
index 1a13181a9d..2bf44cbec6 100644
--- a/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx
+++ b/apps/desktop/src/routes/screenshot-editor/AnnotationLayer.tsx
@@ -488,8 +488,10 @@ export function AnnotationLayer(props: {
 	};
 
 	const startDrag = (e: MouseEvent, id: string, handle?: string) => {
+		e.preventDefault();
 		e.stopPropagation();
 		if (activeTool() !== "select") return;
+		window.getSelection()?.removeAllRanges();
 
 		const svg = (e.currentTarget as Element).closest("svg");
 		if (!svg) return;
@@ -542,8 +544,9 @@ export function AnnotationLayer(props: {
 				position: "absolute",
 				top: 0,
 				left: 0,
-				"pointer-events": "all",
-				"z-index": 10,
+				"pointer-events":
+					activeTool() === "select" && !dragState() ? "none" : "all",
+				"z-index": 20,
 				cursor:
 					activeTool() === "select"
 						? props.isPanning
diff --git a/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx b/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx
index 37c0ed861b..22acae883a 100644
--- a/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx
+++ b/apps/desktop/src/routes/screenshot-editor/AnnotationTools.tsx
@@ -8,7 +8,10 @@ import IconLucideLayers from "~icons/lucide/layers";
 import IconLucideMousePointer2 from "~icons/lucide/mouse-pointer-2";
 import IconLucideSquare from "~icons/lucide/square";
 import IconLucideType from "~icons/lucide/type";
-import { type AnnotationType, useScreenshotEditorContext } from "./context";
+import {
+	type ScreenshotEditorTool,
+	useScreenshotEditorContext,
+} from "./context";
 
 export function AnnotationTools() {
 	const { layersPanelOpen, setLayersPanelOpen } = useScreenshotEditorContext();
@@ -66,7 +69,7 @@ export function AnnotationTools() {
 }
 
 function ToolButton(props: {
-	tool: AnnotationType | "select";
+	tool: ScreenshotEditorTool;
 	icon: Component<{ class?: string }>;
 	label: string;
 	shortcut?: string;
diff --git a/apps/desktop/src/routes/screenshot-editor/Editor.tsx b/apps/desktop/src/routes/screenshot-editor/Editor.tsx
index 9786113087..543d6e453e 100644
--- a/apps/desktop/src/routes/screenshot-editor/Editor.tsx
+++ b/apps/desktop/src/routes/screenshot-editor/Editor.tsx
@@ -13,6 +13,7 @@ import {
 	Show,
 	Switch,
 } from "solid-js";
+import { unwrap } from "solid-js/store";
 import { Transition } from "solid-transition-group";
 import {
 	CROP_ZERO,
@@ -27,7 +28,7 @@ import IconCapCircleX from "~icons/cap/circle-x";
 import IconLucideMaximize from "~icons/lucide/maximize";
 import IconLucideRatio from "~icons/lucide/ratio";
 import { AnnotationConfigBar } from "./AnnotationConfig";
-import { useScreenshotEditorContext } from "./context";
+import { type Annotation, useScreenshotEditorContext } from "./context";
 import { Header } from "./Header";
 import { LayersPanel } from "./LayersPanel";
 import { Preview } from "./Preview";
@@ -40,12 +41,17 @@ export function Editor() {
 		projectHistory,
 		setActiveTool,
 		setSelectedAnnotationId,
+		annotations,
+		setAnnotations,
+		selectedAnnotationId,
 		layersPanelOpen,
 		setLayersPanelOpen,
 		activePopover,
 		setActivePopover,
 		isRenderReady,
 	} = useScreenshotEditorContext();
+	const [copiedAnnotation, setCopiedAnnotation] =
+		createSignal<Annotation | null>(null);
 
 	createEffect(() => {
 		const handleKeyDown = (e: KeyboardEvent) => {
@@ -61,9 +67,41 @@ export function Editor() {
 
 			const isMod = e.metaKey || e.ctrlKey;
 			const isShift = e.shiftKey;
+			const key = e.key.toLowerCase();
+
+			if (isMod && key === "c") {
+				const id = selectedAnnotationId();
+				const annotation = annotations.find((a) => a.id === id);
+				if (annotation) {
+					e.preventDefault();
+					e.stopImmediatePropagation();
+					setCopiedAnnotation(structuredClone(unwrap(annotation)));
+					return;
+				}
+			}
+
+			if (isMod && key === "v") {
+				const annotation = copiedAnnotation();
+				if (annotation) {
+					e.preventDefault();
+					e.stopImmediatePropagation();
+					projectHistory.push();
+					const duplicate = {
+						...structuredClone(annotation),
+						id: crypto.randomUUID(),
+						x: annotation.x + 16,
+						y: annotation.y + 16,
+					};
+					setAnnotations((prev) => [...prev, duplicate]);
+					setSelectedAnnotationId(duplicate.id);
+					setActiveTool("select");
+					setCopiedAnnotation(duplicate);
+					return;
+				}
+			}
 
 			// Undo / Redo
-			if (isMod && e.key.toLowerCase() === "z") {
+			if (isMod && key === "z") {
 				e.preventDefault();
 				if (isShift) {
 					projectHistory.redo();
@@ -72,7 +110,7 @@ export function Editor() {
 				}
 				return;
 			}
-			if (isMod && e.key.toLowerCase() === "y") {
+			if (isMod && key === "y") {
 				e.preventDefault();
 				projectHistory.redo();
 				return;
@@ -80,7 +118,7 @@ export function Editor() {
 
 			// Tools (No modifiers)
 			if (!isMod && !isShift) {
-				switch (e.key.toLowerCase()) {
+				switch (key) {
 					case "a":
 						setActiveTool("arrow");
 						setSelectedAnnotationId(null);
diff --git a/apps/desktop/src/routes/screenshot-editor/Header.tsx b/apps/desktop/src/routes/screenshot-editor/Header.tsx
index 3515fbcaa1..ea589cb936 100644
--- a/apps/desktop/src/routes/screenshot-editor/Header.tsx
+++ b/apps/desktop/src/routes/screenshot-editor/Header.tsx
@@ -33,13 +33,20 @@ import { useScreenshotExport } from "./useScreenshotExport";
 
 export function Header() {
 	const ctx = useScreenshotEditorContext();
-	const { setDialog, project, originalImageSize, isImageFileReady } = ctx;
+	const {
+		setDialog,
+		project,
+		originalImageSize,
+		isImageFileReady,
+		selectedAnnotationId,
+	} = ctx;
 	const path = () => ctx.editorInstance()?.path ?? "";
 
 	const { exportImage, isExporting } = useScreenshotExport();
 
 	createEffect(() => {
 		const handleKeyDown = (e: KeyboardEvent) => {
+			if (e.defaultPrevented) return;
 			const target = e.target as HTMLElement | null;
 			if (
 				target &&
@@ -53,6 +60,13 @@ export function Header() {
 			if (!e.metaKey && !e.ctrlKey) return;
 			const key = e.key.toLowerCase();
 			if (key === "c") {
+				if (selectedAnnotationId()) {
+					return;
+				}
+				const selection = window.getSelection();
+				if (selection && !selection.isCollapsed && selection.toString()) {
+					return;
+				}
 				e.preventDefault();
 				if (!isExporting()) exportImage("clipboard");
 			} else if (key === "s") {
diff --git a/apps/desktop/src/routes/screenshot-editor/OcrSelectionOverlay.tsx b/apps/desktop/src/routes/screenshot-editor/OcrSelectionOverlay.tsx
new file mode 100644
index 0000000000..3e2a7844c5
--- /dev/null
+++ b/apps/desktop/src/routes/screenshot-editor/OcrSelectionOverlay.tsx
@@ -0,0 +1,227 @@
+import { invoke } from "@tauri-apps/api/core";
+import { createEffect, createMemo, createSignal, For } from "solid-js";
+import { type ScreenshotProject, useScreenshotEditorContext } from "./context";
+
+type Rect = {
+	x: number;
+	y: number;
+	width: number;
+	height: number;
+};
+
+type ScreenshotOcrRegion = {
+	x: number;
+	y: number;
+	width: number;
+	height: number;
+};
+
+type ScreenshotOcrResult = {
+	text: string;
+	lines: {
+		text: string;
+		confidence: number | null;
+		bounds: ScreenshotOcrRegion;
+	}[];
+	engine: string;
+};
+
+type TextLayout = {
+	text: string;
+	rect: Rect;
+	fontSize: number;
+	lineHeight: number;
+	textWidth: number;
+	scaleX: number;
+};
+
+const fontFamily =
+	'-apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif';
+
+export function OcrSelectionOverlay(props: {
+	bounds: Rect;
+	cssWidth: number;
+	cssHeight: number;
+	imageRect: Rect;
+	originalImageSize: { width: number; height: number } | null;
+	crop: ScreenshotProject["background"]["crop"];
+}) {
+	const { activeTool, setSelectedAnnotationId } = useScreenshotEditorContext();
+	const [ocrResult, setOcrResult] = createSignal<ScreenshotOcrResult | null>(
+		null,
+	);
+	let requestId = 0;
+	let measureCanvas: HTMLCanvasElement | null = null;
+
+	const clamp = (value: number, min: number, max: number) =>
+		Math.min(Math.max(value, min), max);
+
+	const sourceRegion = createMemo<ScreenshotOcrRegion | null>(() => {
+		const original = props.originalImageSize;
+		if (!original || original.width <= 0 || original.height <= 0) return null;
+		const crop = props.crop ?? {
+			position: { x: 0, y: 0 },
+			size: { x: original.width, y: original.height },
+		};
+		const left = clamp(crop.position.x, 0, original.width);
+		const top = clamp(crop.position.y, 0, original.height);
+		const right = clamp(crop.position.x + crop.size.x, left, original.width);
+		const bottom = clamp(crop.position.y + crop.size.y, top, original.height);
+		const x = Math.floor(left);
+		const y = Math.floor(top);
+		const sourceRight = Math.ceil(right);
+		const sourceBottom = Math.ceil(bottom);
+		const width = sourceRight - x;
+		const height = sourceBottom - y;
+		if (width < 4 || height < 4) return null;
+		return { x, y, width, height };
+	});
+
+	const sourceRegionKey = createMemo(() => {
+		const region = sourceRegion();
+		if (!region) return null;
+		return `${region.x}:${region.y}:${region.width}:${region.height}`;
+	});
+
+	createEffect(() => {
+		const key = sourceRegionKey();
+		const region = sourceRegion();
+		requestId += 1;
+		const currentRequestId = requestId;
+
+		if (!key || !region) {
+			setOcrResult(null);
+			return;
+		}
+
+		setOcrResult(null);
+
+		void (async () => {
+			try {
+				const result = await invoke<ScreenshotOcrResult>(
+					"recognize_screenshot_text",
+					{ region },
+				);
+				if (currentRequestId !== requestId) return;
+				setOcrResult(result);
+			} catch {
+				if (currentRequestId !== requestId) return;
+				setOcrResult(null);
+			}
+		})();
+	});
+
+	const sourceToCssRect = (rect: ScreenshotOcrRegion): Rect | null => {
+		const region = sourceRegion();
+		if (!region) return null;
+		if (props.bounds.width <= 0 || props.bounds.height <= 0) return null;
+		if (props.imageRect.width <= 0 || props.imageRect.height <= 0) return null;
+		const regionRight = region.x + region.width;
+		const regionBottom = region.y + region.height;
+		const left = clamp(rect.x, region.x, regionRight);
+		const top = clamp(rect.y, region.y, regionBottom);
+		const right = clamp(rect.x + rect.width, left, regionRight);
+		const bottom = clamp(rect.y + rect.height, top, regionBottom);
+		const frameRect = {
+			x:
+				props.imageRect.x +
+				((left - region.x) / region.width) * props.imageRect.width,
+			y:
+				props.imageRect.y +
+				((top - region.y) / region.height) * props.imageRect.height,
+			width: ((right - left) / region.width) * props.imageRect.width,
+			height: ((bottom - top) / region.height) * props.imageRect.height,
+		};
+		if (frameRect.width <= 0 || frameRect.height <= 0) return null;
+		return {
+			x: ((frameRect.x - props.bounds.x) / props.bounds.width) * props.cssWidth,
+			y:
+				((frameRect.y - props.bounds.y) / props.bounds.height) *
+				props.cssHeight,
+			width: (frameRect.width / props.bounds.width) * props.cssWidth,
+			height: (frameRect.height / props.bounds.height) * props.cssHeight,
+		};
+	};
+
+	const measureText = (text: string, fontSize: number) => {
+		if (typeof document === "undefined") {
+			return Math.max(text.length * fontSize * 0.55, 1);
+		}
+		measureCanvas ??= document.createElement("canvas");
+		const ctx = measureCanvas.getContext("2d");
+		if (!ctx) return Math.max(text.length * fontSize * 0.55, 1);
+		ctx.font = `${fontSize}px ${fontFamily}`;
+		return Math.max(ctx.measureText(text).width, 1);
+	};
+
+	const textLayouts = createMemo<TextLayout[]>(() => {
+		const result = ocrResult();
+		if (!result) return [];
+		return result.lines.flatMap((line) => {
+			const text = line.text;
+			const rect = sourceToCssRect(line.bounds);
+			if (!text.trim() || !rect) return [];
+			const lineHeight = Math.max(rect.height, 1);
+			const fontSize = Math.max(lineHeight * 0.78, 1);
+			const textWidth = measureText(text, fontSize);
+			const scaleX = rect.width / textWidth;
+			return [
+				{
+					text,
+					rect,
+					fontSize,
+					lineHeight,
+					textWidth,
+					scaleX,
+				},
+			];
+		});
+	});
+
+	return (
+		<div
+			style={{
+				width: `${props.cssWidth}px`,
+				height: `${props.cssHeight}px`,
+				position: "absolute",
+				top: 0,
+				left: 0,
+				"pointer-events": "none",
+				"z-index": 15,
+				overflow: "visible",
+			}}
+		>
+			<For each={textLayouts()}>
+				{(layout) => (
+					<span
+						style={{
+							position: "absolute",
+							display: "block",
+							left: `${layout.rect.x}px`,
+							top: `${layout.rect.y}px`,
+							width: `${layout.textWidth}px`,
+							height: `${layout.lineHeight}px`,
+							"font-family": fontFamily,
+							"font-size": `${layout.fontSize}px`,
+							"line-height": `${layout.lineHeight}px`,
+							"letter-spacing": "0",
+							"white-space": "pre",
+							color: "transparent",
+							"caret-color": "transparent",
+							overflow: "visible",
+							"pointer-events": activeTool() === "select" ? "auto" : "none",
+							"user-select": "text",
+							"-webkit-user-select": "text",
+							cursor: "text",
+							transform: `scaleX(${layout.scaleX})`,
+							"transform-origin": "left top",
+						}}
+						onMouseDown={() => setSelectedAnnotationId(null)}
+					>
+						{layout.text}
+					</span>
+				)}
+			</For>
+		</div>
+	);
+}
diff --git a/apps/desktop/src/routes/screenshot-editor/Preview.tsx b/apps/desktop/src/routes/screenshot-editor/Preview.tsx
index a2164a3545..2b353d3c8f 100644
--- a/apps/desktop/src/routes/screenshot-editor/Preview.tsx
+++ b/apps/desktop/src/routes/screenshot-editor/Preview.tsx
@@ -13,6 +13,13 @@ import { EditorButton, Slider } from "../editor/ui";
 import { AnnotationLayer } from "./AnnotationLayer";
 import { useScreenshotEditorContext } from "./context";
 import { getImageRect } from "./layout";
+import { OcrSelectionOverlay } from "./OcrSelectionOverlay";
+
+type WebKitGestureEvent = Event & {
+	scale?: number;
+	clientX?: number;
+	clientY?: number;
+};
 
 // CSS for checkerboard grid
 const gridStyle = {
@@ -65,6 +72,7 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) {
 			originalImageSize(),
 			project.background.padding,
 			project.background.crop,
+			project.aspectRatio,
 		);
 	});
 
@@ -175,6 +183,8 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) {
 		setPan({ x: 0, y: 0 });
 	};
 
+	const clampZoom = (zoom: number) => Math.max(0.1, Math.min(3, zoom));
+
 	createEffect(() => {
 		const handleKeyDown = (e: KeyboardEvent) => {
 			const target = e.target as HTMLElement;
@@ -201,54 +211,74 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) {
 		onCleanup(() => window.removeEventListener("keydown", handleKeyDown));
 	});
 
+	const zoomAtPoint = (clientX: number, clientY: number, newZoom: number) => {
+		const rect = viewportRef?.getBoundingClientRect();
+		const currentScale = fitScale() * props.zoom;
+		const nextScale = fitScale() * newZoom;
+		const sizeData = size();
+		const boundsData = bounds();
+
+		if (
+			rect &&
+			currentScale > 0 &&
+			nextScale > 0 &&
+			sizeData.width > 0 &&
+			sizeData.height > 0
+		) {
+			const pointerX = clientX - rect.left;
+			const pointerY = clientY - rect.top;
+			const currentPan = pan();
+			const contentX =
+				boundsData.x +
+				(pointerX -
+					(sizeData.width - sizeData.width * props.zoom) / 2 -
+					currentPan.x) /
+					currentScale;
+			const contentY =
+				boundsData.y +
+				(pointerY -
+					(sizeData.height - sizeData.height * props.zoom) / 2 -
+					currentPan.y) /
+					currentScale;
+
+			setPan({
+				x:
+					pointerX -
+					(sizeData.width - sizeData.width * newZoom) / 2 -
+					(contentX - boundsData.x) * nextScale,
+				y:
+					pointerY -
+					(sizeData.height - sizeData.height * newZoom) / 2 -
+					(contentY - boundsData.y) * nextScale,
+			});
+		}
+
+		props.setZoom(newZoom);
+	};
+
+	const normalizeWheelDeltaY = (e: WheelEvent) => {
+		if (e.deltaMode === 1) return e.deltaY * 16;
+		if (e.deltaMode === 2) return e.deltaY * window.innerHeight;
+		return e.deltaY;
+	};
+
+	let lastGestureScale = 1;
+	let lastGestureAt = 0;
+
 	const handleWheel = (e: WheelEvent) => {
 		e.preventDefault();
 		if (e.ctrlKey) {
-			const delta = -e.deltaY;
+			if (performance.now() - lastGestureAt < 80) return;
+			const normalizedDelta = normalizeWheelDeltaY(e);
+			if (normalizedDelta === 0) return;
+			const delta =
+				-Math.sign(normalizedDelta) * Math.max(Math.abs(normalizedDelta), 8);
 			const zoomStep = 0.005;
-			const newZoom = Math.max(0.1, Math.min(3, props.zoom + delta * zoomStep));
-			const rect = viewportRef?.getBoundingClientRect();
-			const currentScale = fitScale() * props.zoom;
-			const nextScale = fitScale() * newZoom;
-			const sizeData = size();
-			const boundsData = bounds();
-
-			if (
-				rect &&
-				currentScale > 0 &&
-				nextScale > 0 &&
-				sizeData.width > 0 &&
-				sizeData.height > 0
-			) {
-				const pointerX = e.clientX - rect.left;
-				const pointerY = e.clientY - rect.top;
-				const currentPan = pan();
-				const contentX =
-					boundsData.x +
-					(pointerX -
-						(sizeData.width - sizeData.width * props.zoom) / 2 -
-						currentPan.x) /
-						currentScale;
-				const contentY =
-					boundsData.y +
-					(pointerY -
-						(sizeData.height - sizeData.height * props.zoom) / 2 -
-						currentPan.y) /
-						currentScale;
-
-				setPan({
-					x:
-						pointerX -
-						(sizeData.width - sizeData.width * newZoom) / 2 -
-						(contentX - boundsData.x) * nextScale,
-					y:
-						pointerY -
-						(sizeData.height - sizeData.height * newZoom) / 2 -
-						(contentY - boundsData.y) * nextScale,
-				});
-			}
-
-			props.setZoom(newZoom);
+			zoomAtPoint(
+				e.clientX,
+				e.clientY,
+				clampZoom(props.zoom + delta * zoomStep),
+			);
 		} else {
 			setPan((p) => ({
 				x: p.x - e.deltaX,
@@ -257,6 +287,81 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) {
 		}
 	};
 
+	const getGesturePoint = (e: WebKitGestureEvent) => {
+		const rect = viewportRef?.getBoundingClientRect();
+		return {
+			clientX: e.clientX ?? (rect ? rect.left + rect.width / 2 : 0),
+			clientY: e.clientY ?? (rect ? rect.top + rect.height / 2 : 0),
+		};
+	};
+
+	const handleGestureStart = (event: Event) => {
+		const e = event as WebKitGestureEvent;
+		e.preventDefault();
+		lastGestureScale = e.scale ?? 1;
+		lastGestureAt = performance.now();
+	};
+
+	const handleGestureChange = (event: Event) => {
+		const e = event as WebKitGestureEvent;
+		e.preventDefault();
+		const scale = e.scale ?? 1;
+		const scaleDelta = scale / Math.max(lastGestureScale, 0.001);
+		lastGestureScale = scale;
+		lastGestureAt = performance.now();
+		const point = getGesturePoint(e);
+		zoomAtPoint(
+			point.clientX,
+			point.clientY,
+			clampZoom(props.zoom * scaleDelta),
+		);
+	};
+
+	const handleGestureEnd = (event: Event) => {
+		event.preventDefault();
+		lastGestureScale = 1;
+		lastGestureAt = performance.now();
+	};
+
+	createEffect(() => {
+		const element = canvasContainerRef();
+		if (!element) return;
+		const listenerOptions = { capture: true, passive: false };
+		const cleanupOptions = { capture: true };
+
+		element.addEventListener("wheel", handleWheel, listenerOptions);
+		element.addEventListener(
+			"gesturestart",
+			handleGestureStart,
+			listenerOptions,
+		);
+		element.addEventListener(
+			"gesturechange",
+			handleGestureChange,
+			listenerOptions,
+		);
+		element.addEventListener("gestureend", handleGestureEnd, listenerOptions);
+
+		onCleanup(() => {
+			element.removeEventListener("wheel", handleWheel, cleanupOptions);
+			element.removeEventListener(
+				"gesturestart",
+				handleGestureStart,
+				cleanupOptions,
+			);
+			element.removeEventListener(
+				"gesturechange",
+				handleGestureChange,
+				cleanupOptions,
+			);
+			element.removeEventListener(
+				"gestureend",
+				handleGestureEnd,
+				cleanupOptions,
+			);
+		});
+	});
+
 	const startPanDrag = (clientX: number, clientY: number) => {
 		setIsDragging(true);
 		setDragStart({
@@ -327,7 +432,6 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) {
 				ref={setCanvasContainerRef}
 				class="flex-1 relative flex items-center justify-center overflow-hidden outline-none"
 				style={gridStyle}
-				onWheel={handleWheel}
 				onMouseDown={handleMiddleMouseDown}
 			>
 				<div class="absolute left-4 bottom-4 z-10 flex items-center gap-2 bg-gray-1 dark:bg-gray-3 rounded-lg shadow-sm p-1 border border-gray-4">
@@ -612,6 +716,26 @@ export function Preview(props: { zoom: number; setZoom: (z: number) => void }) {
 												"pointer-events": "none",
 											}}
 										/>
+										<div
+											style={{
+												position: "absolute",
+												left: "0px",
+												top: "0px",
+												width: `${scaledWidth()}px`,
+												height: `${scaledHeight()}px`,
+												"z-index": 12,
+												cursor: isDragging() ? "grabbing" : "grab",
+											}}
+											onMouseDown={handleMouseDown}
+										/>
+										<OcrSelectionOverlay
+											bounds={bounds()}
+											cssWidth={scaledWidth()}
+											cssHeight={scaledHeight()}
+											imageRect={imageRect()}
+											originalImageSize={originalImageSize()}
+											crop={project.background.crop}
+										/>
 										<AnnotationLayer
 											bounds={bounds()}
 											cssWidth={scaledWidth()}
diff --git a/apps/desktop/src/routes/screenshot-editor/context.tsx b/apps/desktop/src/routes/screenshot-editor/context.tsx
index f3dc0e13b1..356f2ba206 100644
--- a/apps/desktop/src/routes/screenshot-editor/context.tsx
+++ b/apps/desktop/src/routes/screenshot-editor/context.tsx
@@ -82,6 +82,7 @@ function convertNv12ToRgba(
 
 export type ScreenshotProject = ProjectConfiguration;
 export type { Annotation, AnnotationType };
+export type ScreenshotEditorTool = AnnotationType | "select";
 
 export type CurrentDialog =
 	| { type: "createPreset" }
@@ -169,9 +170,8 @@ function createScreenshotEditorContext() {
 	const [selectedAnnotationId, setSelectedAnnotationId] = createSignal<
 		string | null
 	>(null);
-	const [activeTool, setActiveTool] = createSignal<AnnotationType | "select">(
-		"select",
-	);
+	const [activeTool, setActiveTool] =
+		createSignal<ScreenshotEditorTool>("select");
 
 	const [layersPanelOpen, setLayersPanelOpen] = makePersisted(
 		createSignal(false),
@@ -443,8 +443,9 @@ function createScreenshotEditorContext() {
 				imageSize: originalImageSize(),
 				padding: project.background.padding,
 				crop: project.background.crop,
+				aspectRatio: project.aspectRatio,
 			}),
-			({ frame, imageSize, padding, crop }) => {
+			({ frame, imageSize, padding, crop, aspectRatio }) => {
 				if (!frame || !imageSize) return;
 
 				const frameSize = { width: frame.width, height: frame.height };
@@ -463,6 +464,7 @@ function createScreenshotEditorContext() {
 					imageSize,
 					padding,
 					crop,
+					aspectRatio,
 				);
 
 				const rawAnnotations = unwrap(annotations);
diff --git a/apps/desktop/src/routes/screenshot-editor/layout.ts b/apps/desktop/src/routes/screenshot-editor/layout.ts
index 1a2784516d..38bd306072 100644
--- a/apps/desktop/src/routes/screenshot-editor/layout.ts
+++ b/apps/desktop/src/routes/screenshot-editor/layout.ts
@@ -1,25 +1,124 @@
-import type { XY } from "~/utils/tauri";
+import type { AspectRatio, XY } from "~/utils/tauri";
 
 export const SCREEN_MAX_PADDING = 0.4;
 
+const roundBaseDimension = (value: number) =>
+	Math.max((Math.ceil(value) + 1) & ~1, 2);
+
+const roundAutoBaseDimension = (value: number) => (Math.floor(value) + 1) & ~1;
+
+function getAspectRatioValue(aspectRatio: AspectRatio) {
+	switch (aspectRatio) {
+		case "wide":
+			return 16 / 9;
+		case "vertical":
+			return 9 / 16;
+		case "square":
+			return 1;
+		case "classic":
+			return 4 / 3;
+		case "tall":
+			return 3 / 4;
+	}
+}
+
+function getBaseSize(
+	cropWidth: number,
+	cropHeight: number,
+	paddingFactor: number,
+	aspectRatio: AspectRatio | null,
+) {
+	if (aspectRatio === null) {
+		const scale = 1 + paddingFactor * 2;
+		return {
+			width: roundAutoBaseDimension(cropWidth * scale),
+			height: roundAutoBaseDimension(cropHeight * scale),
+		};
+	}
+
+	const cropAspect = cropWidth / cropHeight;
+	const targetAspect = getAspectRatioValue(aspectRatio);
+	const padding = Math.max(cropWidth, cropHeight) * paddingFactor * 2;
+
+	if (cropAspect > targetAspect) {
+		const width = cropWidth + padding;
+		const height = width / targetAspect;
+		return {
+			width: roundBaseDimension(width),
+			height: roundBaseDimension(height),
+		};
+	}
+
+	const height = cropHeight + padding;
+	const width = height * targetAspect;
+	return {
+		width: roundBaseDimension(width),
+		height: roundBaseDimension(height),
+	};
+}
+
 export function calculateImageTransform(
 	frameSize: { width: number; height: number },
 	imageSize: { width: number; height: number },
 	padding: number,
 	crop: { position: XY<number>; size: XY<number> } | null,
+	aspectRatio: AspectRatio | null,
 ) {
 	const cropWidth = crop?.size.x ?? imageSize.width;
 	const cropHeight = crop?.size.y ?? imageSize.height;
+
+	if (
+		frameSize.width <= 0 ||
+		frameSize.height <= 0 ||
+		cropWidth <= 0 ||
+		cropHeight <= 0
+	) {
+		return {
+			offset: { x: 0, y: 0 },
+			size: {
+				width: Math.max(frameSize.width, 0),
+				height: Math.max(frameSize.height, 0),
+			},
+		};
+	}
+
 	const croppedAspect = cropWidth / cropHeight;
 	const outputAspect = frameSize.width / frameSize.height;
 
 	const paddingFactor = (padding / 100.0) * SCREEN_MAX_PADDING;
+	const baseSize = getBaseSize(
+		cropWidth,
+		cropHeight,
+		paddingFactor,
+		aspectRatio,
+	);
+	const outputScale = Math.min(
+		frameSize.width / Math.max(baseSize.width, 1),
+		frameSize.height / Math.max(baseSize.height, 1),
+	);
+
+	if (aspectRatio === null) {
+		const offsetX = cropWidth * paddingFactor * outputScale;
+		const offsetY = cropHeight * paddingFactor * outputScale;
+
+		return {
+			offset: { x: offsetX, y: offsetY },
+			size: {
+				width: Math.max(frameSize.width - offsetX * 2, 1),
+				height: Math.max(frameSize.height - offsetY * 2, 1),
+			},
+		};
+	}
+
 	const cropBasis = Math.max(cropWidth, cropHeight);
 	const maxPadding = Math.max(
 		Math.min((frameSize.width - 1) / 2, (frameSize.height - 1) / 2),
 		0,
 	);
-	const paddingPixels = Math.min(cropBasis * paddingFactor, maxPadding);
+	const paddingPixels = Math.min(
+		cropBasis * paddingFactor * outputScale,
+		maxPadding,
+	);
 
 	const availableWidth = Math.max(frameSize.width - 2 * paddingPixels, 1);
 	const availableHeight = Math.max(frameSize.height - 2 * paddingPixels, 1);
@@ -53,6 +152,7 @@ export function getImageRect(
 	imageSize: { width: number; height: number } | null,
 	padding: number,
 	crop: { position: XY<number>; size: XY<number> } | null,
+	aspectRatio: AspectRatio | null,
 ) {
 	if (!imageSize) {
 		return {
@@ -68,6 +168,7 @@ export function getImageRect(
 		imageSize,
 		padding,
 		crop,
+		aspectRatio,
 	);
 
 	return {
diff --git a/crates/recording/src/feeds/camera.rs b/crates/recording/src/feeds/camera.rs
index 879713e4fc..ee6bc98deb 100644
--- a/crates/recording/src/feeds/camera.rs
+++ b/crates/recording/src/feeds/camera.rs
@@ -327,7 +327,7 @@ struct FinalizePendingRelease {
     id: DeviceOrModelID,
 }
 
-fn spawn_camera_setup(
+struct CameraSetupArgs {
     id: DeviceOrModelID,
     generation: u64,
     settings: Option<CameraDeviceSettings>,
@@ -336,7 +336,22 @@ fn spawn_camera_setup(
     native_frame_recipient: Recipient<NewNativeFrame>,
     native_sender_count: Arc<std::sync::atomic::AtomicUsize>,
     flow: CameraSetupFlow,
+}
+
+fn spawn_camera_setup(
+    args: CameraSetupArgs,
 ) -> (ReadyFuture, SyncSender<()>, std::thread::JoinHandle<()>) {
+    let CameraSetupArgs {
+        id,
+        generation,
+        settings,
+        actor_ref,
+        new_frame_recipient,
+        native_frame_recipient,
+        native_sender_count,
+        flow,
+    } = args;
+
     let (ready_tx, ready_rx) = oneshot::channel::<Result<InputConnected, SetInputError>>();
     let (done_tx, done_rx) = std::sync::mpsc::sync_channel(1);
 
@@ -358,100 +373,13 @@ fn spawn_camera_setup(
             .build()
             .expect("Failed to build camera tokio runtime");
 
-        LocalSet::new().block_on(&runtime, async move {
+        {
             #[cfg(target_os = "macos")]
             let _capture_lifecycle_guard = camera_capture_lifecycle_guard();
 
-            if done_rx_thread.try_recv().is_ok() {
-                let _ = ready_tx_thread.send(Err(SetInputError::BuildStreamCrashed));
-
-                if matches!(flow, CameraSetupFlow::Open) {
-                    let _ = actor_ref
-                        .tell(InputConnectFailed {
-                            id: id.clone(),
-                            generation,
-                        })
-                        .await;
-                }
-
-                return;
-            }
-
-            let setup_result = setup_camera(
-                &id,
-                settings,
-                new_frame_recipient,
-                native_frame_recipient,
-                native_sender_count,
-            )
-            .await;
-
-            let handle = match setup_result {
-                Ok(result) => {
-                    let SetupCameraResult {
-                        handle,
-                        camera_info,
-                        video_info,
-                    } = result;
-
-                    let ready_payload = InputConnected {
-                        generation,
-                        id: id.clone(),
-                        camera_info: camera_info.clone(),
-                        video_info,
-                        done_tx: done_tx_thread.clone(),
-                    };
-
-                    match flow {
-                        CameraSetupFlow::Open => {
-                            let _ = ready_tx_thread.send(Ok(ready_payload.clone()));
-                            let _ = actor_ref.ask(ready_payload).await;
-                        }
-                        CameraSetupFlow::Locked => {
-                            let reconnect_result = actor_ref
-                                .ask(LockedCameraInputReconnected {
-                                    id: id.clone(),
-                                    camera_info,
-                                    video_info,
-                                    done_tx: done_tx_thread.clone(),
-                                })
-                                .await;
-
-                            match reconnect_result {
-                                Ok(true) => {
-                                    let _ = ready_tx_thread.send(Ok(ready_payload));
-                                    let _ = actor_ref
-                                        .tell(FinalizePendingRelease { id: id.clone() })
-                                        .await;
-                                }
-                                Ok(false) => {
-                                    warn!(
-                                        "Locked camera state changed before reconnecting {:?}",
-                                        id
-                                    );
-                                    let _ = ready_tx_thread
-                                        .send(Err(SetInputError::BuildStreamCrashed));
-                                    let _ = handle.stop_capturing();
-                                    return;
-                                }
-                                Err(err) => {
-                                    error!(
-                                        ?err,
-                                        "Failed to update locked camera state for {:?}", id
-                                    );
-                                    let _ = ready_tx_thread
-                                        .send(Err(SetInputError::BuildStreamCrashed));
-                                    let _ = handle.stop_capturing();
-                                    return;
-                                }
-                            }
-                        }
-                    }
-
-                    handle
-                }
-                Err(e) => {
-                    let _ = ready_tx_thread.send(Err(e.clone()));
+            LocalSet::new().block_on(&runtime, async move {
+                if done_rx_thread.try_recv().is_ok() {
+                    let _ = ready_tx_thread.send(Err(SetInputError::BuildStreamCrashed));
 
                     if matches!(flow, CameraSetupFlow::Open) {
                         let _ = actor_ref
@@ -464,27 +392,116 @@ fn spawn_camera_setup(
 
                     return;
                 }
-            };
 
-            info!(
-                "Camera capture thread: waiting for done signal for {:?}",
-                &id
-            );
+                let setup_result = setup_camera(
+                    &id,
+                    settings,
+                    new_frame_recipient,
+                    native_frame_recipient,
+                    native_sender_count,
+                )
+                .await;
+
+                let handle = match setup_result {
+                    Ok(result) => {
+                        let SetupCameraResult {
+                            handle,
+                            camera_info,
+                            video_info,
+                        } = result;
+
+                        let ready_payload = InputConnected {
+                            generation,
+                            id: id.clone(),
+                            camera_info: camera_info.clone(),
+                            video_info,
+                            done_tx: done_tx_thread.clone(),
+                        };
+
+                        match flow {
+                            CameraSetupFlow::Open => {
+                                let _ = ready_tx_thread.send(Ok(ready_payload.clone()));
+                                let _ = actor_ref.ask(ready_payload).await;
+                            }
+                            CameraSetupFlow::Locked => {
+                                let reconnect_result = actor_ref
+                                    .ask(LockedCameraInputReconnected {
+                                        id: id.clone(),
+                                        camera_info,
+                                        video_info,
+                                        done_tx: done_tx_thread.clone(),
+                                    })
+                                    .await;
+
+                                match reconnect_result {
+                                    Ok(true) => {
+                                        let _ = ready_tx_thread.send(Ok(ready_payload));
+                                        let _ = actor_ref
+                                            .tell(FinalizePendingRelease { id: id.clone() })
+                                            .await;
+                                    }
+                                    Ok(false) => {
+                                        warn!(
+                                            "Locked camera state changed before reconnecting {:?}",
+                                            id
+                                        );
+                                        let _ = ready_tx_thread
+                                            .send(Err(SetInputError::BuildStreamCrashed));
+                                        let _ = handle.stop_capturing();
+                                        return;
+                                    }
+                                    Err(err) => {
+                                        error!(
+                                            ?err,
+                                            "Failed to update locked camera state for {:?}", id
+                                        );
+                                        let _ = ready_tx_thread
+                                            .send(Err(SetInputError::BuildStreamCrashed));
+                                        let _ = handle.stop_capturing();
+                                        return;
+                                    }
+                                }
+                            }
+                        }
 
-            drop(done_tx_thread);
-            let recv_result = done_rx_thread.recv();
+                        handle
+                    }
+                    Err(e) => {
+                        let _ = ready_tx_thread.send(Err(e.clone()));
 
-            warn!(
-                "Camera capture thread: done signal received for {:?}, result={:?}",
-                &id, recv_result
-            );
+                        if matches!(flow, CameraSetupFlow::Open) {
+                            let _ = actor_ref
+                                .tell(InputConnectFailed {
+                                    id: id.clone(),
+                                    generation,
+                                })
+                                .await;
+                        }
+
+                        return;
+                    }
+                };
 
-            let _ = handle.stop_capturing();
+                info!(
+                    "Camera capture thread: waiting for done signal for {:?}",
+                    &id
+                );
 
-            std::thread::sleep(Duration::from_millis(50));
+                drop(done_tx_thread);
+                let recv_result = done_rx_thread.recv();
 
-            warn!("Camera capture thread: stopped capture of {:?}", &id);
-        });
+                warn!(
+                    "Camera capture thread: done signal received for {:?}, result={:?}",
+                    &id, recv_result
+                );
+
+                let _ = handle.stop_capturing();
+
+                std::thread::sleep(Duration::from_millis(50));
+
+                warn!("Camera capture thread: stopped capture of {:?}", &id);
+            });
+        }
 
         drop(runtime);
     });
@@ -946,16 +963,16 @@ impl Message<SetInput> for CameraFeed {
                 let native_frame_recipient = actor_ref.clone().recipient();
                 let id = msg.id.clone();
 
-                let (ready, done_tx, join_handle) = spawn_camera_setup(
-                    id.clone(),
+                let (ready, done_tx, join_handle) = spawn_camera_setup(CameraSetupArgs {
+                    id: id.clone(),
                     generation,
-                    msg.settings,
+                    settings: msg.settings,
                     actor_ref,
                     new_frame_recipient,
                     native_frame_recipient,
-                    self.native_sender_count.clone(),
-                    CameraSetupFlow::Open,
-                );
+                    native_sender_count: self.native_sender_count.clone(),
+                    flow: CameraSetupFlow::Open,
+                });
 
                 self.previous_thread = Some(join_handle);
 
@@ -983,16 +1000,16 @@ impl Message<SetInput> for CameraFeed {
                 let new_frame_recipient = actor_ref.clone().recipient();
                 let native_frame_recipient = actor_ref.clone().recipient();
 
-                let (ready, _done_tx, join_handle) = spawn_camera_setup(
-                    msg.id.clone(),
+                let (ready, _done_tx, join_handle) = spawn_camera_setup(CameraSetupArgs {
+                    id: msg.id.clone(),
                     generation,
-                    msg.settings,
+                    settings: msg.settings,
                     actor_ref,
                     new_frame_recipient,
                     native_frame_recipient,
-                    self.native_sender_count.clone(),
-                    CameraSetupFlow::Locked,
-                );
+                    native_sender_count: self.native_sender_count.clone(),
+                    flow: CameraSetupFlow::Locked,
+                });
 
                 self.previous_thread = Some(join_handle);