diff --git a/.gitignore b/.gitignore
index c51a621..3450d9d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ src/scansplitter/model_cache/
 
 # Built frontend (generated by npm run build)
 src/scansplitter/static/
+.DS_Store
diff --git a/README.md b/README.md
index 0ff9943..8aab4d9 100644
--- a/README.md
+++ b/README.md
@@ -40,13 +40,29 @@ uvx scansplitter api --port 8001
 
 ## Features
 
-- **Auto-detection** - Finds multiple photos in a single scan using contour detection
+- **Multiple detection modes** - Choose between ScanSplitterv1, ScanSplitterv2 (default), and AI (U2-Net)
 - **Interactive editing** - Adjust, rotate, and resize bounding boxes before cropping
 - **Auto-rotation** - Detects and corrects 90°/180°/270° rotations
 - **PDF support** - Extract and process pages from PDF files
 - **Web UI** - Modern React interface with Fabric.js canvas editor
 - **CLI** - Batch process files from the command line
 
+## Detection Modes & Models
+
+### Photo detection (splitter)
+
+- **ScanSplitterv2 (default)**: An improved contour-based detector. It applies contrast enhancement (CLAHE), adaptive thresholding, adaptive morphology (kernel scales with resolution), and contour quality filtering (solidity/aspect/extent). It can also use convex-hull borders for irregular edges.
+- **ScanSplitterv1**: The first contour-based detector used with adaptive threshold + fixed morphology + `minAreaRect` filtering. It’s simpler and can be useful as a fallback if v2 behaves unexpectedly on a specific scan.
+- **AI (U2-Net)**: A deep-learning salient-object model (ONNX) that produces a mask; ScanSplitter then extracts regions from that mask. It’s best for difficult scans (busy backgrounds, low contrast), but requires downloading a model on first use. Might be less accurate for multiple photos at once.
+
+### Auto-rotation model
+
+- **Orientation model**: An EfficientNetV2-based ONNX classifier that predicts the correct 0°/90°/180°/270° rotation for each cropped photo. ScanSplitter may fall back to classic heuristics if the model can’t be loaded.
+
+### Model downloads
+
+Some modes require downloading models on first use (U2-Net (5Mb / 176MB) and the orientation model (80MB)). The web UI shows download progress while this is happening.
+
 ## Installation Options
 
 ### Option 1: Run directly with uvx (recommended)
@@ -105,6 +121,7 @@ uv run scansplitter process scan.jpg \
   --no-rotate \
   --min-area 5 \
   --max-area 70 \
+  --detection-mode scansplitterv2 \
   --format jpg \
   -o ./output/
 ```
@@ -117,17 +134,24 @@ uv run scansplitter process scan.jpg \
 | `--no-rotate` | Disable auto-rotation |
 | `--min-area` | Minimum photo size as % of scan (default: 2) |
 | `--max-area` | Maximum photo size as % of scan (default: 80) |
+| `--detection-mode` | `scansplitterv2` (default), `scansplitterv1` (legacy), or `u2net` (deep learning); `classic` is an alias for `scansplitterv2` |
+| `--u2net-full` | Use full U2-Net model instead of lite (slower, more accurate) |
 | `--format` | Output format: `png` or `jpg` (default: png) |
 
 ## How It Works
 
-1. **Preprocessing** - Convert to grayscale, apply Gaussian blur
-2. **Thresholding** - Adaptive binary threshold to separate photos from background
-3. **Contour Detection** - Find distinct regions using OpenCV
-4. **Filtering** - Keep regions between min/max area thresholds
-5. **Interactive Adjustment** - User can modify detected boxes in the web UI
-6. **Rotation Detection** - Score each 90° rotation using Hough line detection
-7. **Cropping** - Extract photos using adjusted bounding boxes
+1. **Photo detection** - Runs the selected detection mode (ScanSplitterv1 / ScanSplitterv2 / AI (U2-Net)) to produce rotatable bounding boxes.
+2. **Interactive adjustment** - You can refine boxes in the web UI before cropping.
+3. **Cropping** - Extracts rotated regions using the adjusted boxes.
+4. **Auto-rotation (optional)** - Uses the orientation model (with fallbacks) to fix 90°/180°/270° rotations.
+
+## Credits
+
+ScanSplitter depends on excellent open models and upstream work:
+
+- **U²-Net (salient object detection)** by Xuebin Qin et al. — paper: https://arxiv.org/abs/2005.09007, code: https://github.com/xuebinqin/U-2-Net
+- **U2-Net ONNX weights** are downloaded from `rembg` releases by Daniel Gatis (with a ScanSplitter backup mirror) — https://github.com/danielgatis/rembg
+- **Orientation model (EfficientNetV2)** is downloaded from Duarte Barbosa’s deep image orientation detection project (with a ScanSplitter backup mirror) — https://github.com/duartebarbosadev/deep-image-orientation-detection
 
 ## Development
 
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 3f2bf0d..26b9d0c 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -12,9 +12,9 @@ import { Toast, type ToastType } from "@/components/Toast";
 import { ConfirmDialog } from "@/components/ConfirmDialog";
 import { KeyboardShortcutsDialog } from "@/components/KeyboardShortcutsDialog";
 import { Button } from "@/components/ui/button";
-import { uploadFile, detectBoxes, cropImages, exportZip, exportLocal, getImageUrl, FileConflictError } from "@/lib/api";
+import { uploadFile, detectBoxes, cropImages, exportZip, exportLocal, getImageUrl, FileConflictError, getModelStatuses, startModelDownload } from "@/lib/api";
 import { generateName } from "@/lib/naming";
-import type { UploadedFile, BoundingBox, CroppedImage, DetectionSettings, NamingPattern } from "@/types";
+import type { UploadedFile, BoundingBox, CroppedImage, DetectionSettings, NamingPattern, ModelKey, ModelStatus } from "@/types";
 
 function App() {
   // File state
@@ -40,8 +40,13 @@ function App() {
     maxArea: 80,
     autoRotate: true,
     autoDetect: true,
+    detectionMode: "scansplitterv2",
+    u2netLite: true,
   });
 
+  // Model download status (orientation + U2-Net)
+  const [modelStatuses, setModelStatuses] = useState<Record<ModelKey, ModelStatus> | null>(null);
+
   // Loading states
   const [isUploading, setIsUploading] = useState(false);
   const [isDetecting, setIsDetecting] = useState(false);
@@ -68,6 +73,61 @@ function App() {
     setToast({ message, type });
   }, []);
 
+  const refreshModelStatuses = useCallback(async () => {
+    try {
+      const statuses = await getModelStatuses();
+      setModelStatuses(statuses);
+      return statuses;
+    } catch (error) {
+      console.error("Failed to refresh model statuses:", error);
+      return null;
+    }
+  }, []);
+
+  const ensureModelReady = useCallback(async (modelKey: ModelKey) => {
+    const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+
+    let statuses = await refreshModelStatuses();
+    if (!statuses) {
+      throw new Error("Failed to load model status");
+    }
+
+    const current = statuses[modelKey];
+    if (current?.status === "ready") return;
+
+    await startModelDownload(modelKey);
+
+    // Poll until ready (or error)
+    for (;;) {
+      await sleep(500);
+      statuses = await refreshModelStatuses();
+      if (!statuses) continue;
+      const next = statuses[modelKey];
+      if (!next) throw new Error("Unknown model");
+      if (next.status === "ready") return;
+      if (next.status === "error") {
+        throw new Error(next.error || "Model download failed");
+      }
+    }
+  }, [refreshModelStatuses]);
+
+  useEffect(() => {
+    refreshModelStatuses();
+  }, [refreshModelStatuses]);
+
+  useEffect(() => {
+    if (settings.detectionMode !== "u2net") return;
+
+    const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+    (async () => {
+      const statuses = await refreshModelStatuses();
+      const current = statuses?.[modelKey];
+      if (!current || current.status === "ready" || current.status === "downloading") return;
+      await startModelDownload(modelKey);
+      await refreshModelStatuses();
+    })();
+  }, [settings.detectionMode, settings.u2netLite, refreshModelStatuses]);
+
   // Persist output directory to localStorage
   useEffect(() => {
     localStorage.setItem("scansplitter_output_dir", outputDirectory);
@@ -117,11 +177,18 @@ function App() {
     );
 
     try {
+      if (settings.detectionMode === "u2net") {
+        const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+        await ensureModelReady(modelKey);
+      }
+
       const result = await detectBoxes(
         sessionId,
         page,
         settings.minArea,
-        settings.maxArea
+        settings.maxArea,
+        settings.detectionMode,
+        settings.u2netLite
       );
       // Update with detected boxes
       setFiles((prev) =>
@@ -139,7 +206,7 @@ function App() {
         )
       );
     }
-  }, [settings.minArea, settings.maxArea]);
+  }, [settings.minArea, settings.maxArea, settings.detectionMode, settings.u2netLite, ensureModelReady]);
 
   // Handle file upload (multiple files)
   const handleUpload = useCallback(async (filesToUpload: File[]) => {
@@ -317,11 +384,17 @@ function App() {
     if (!activeFile) return;
     setIsDetecting(true);
     try {
+      if (settings.detectionMode === "u2net") {
+        const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+        await ensureModelReady(modelKey);
+      }
       const result = await detectBoxes(
         activeFile.sessionId,
         activeFile.currentPage,
         settings.minArea,
-        settings.maxArea
+        settings.maxArea,
+        settings.detectionMode,
+        settings.u2netLite
       );
       handleBoxesChange(result.boxes);
     } catch (error) {
@@ -330,13 +403,16 @@ function App() {
     } finally {
       setIsDetecting(false);
     }
-  }, [activeFile, settings, handleBoxesChange]);
+  }, [activeFile, settings, handleBoxesChange, ensureModelReady]);
 
   // Handle crop
   const handleCrop = useCallback(async () => {
     if (!activeFile || activeFile.boxes.length === 0) return;
     setIsCropping(true);
     try {
+      if (settings.autoRotate) {
+        await ensureModelReady("orientation");
+      }
       const result = await cropImages(
         activeFile.sessionId,
         activeFile.currentPage,
@@ -376,7 +452,7 @@ function App() {
     } finally {
       setIsCropping(false);
     }
-  }, [activeFile, activeFileIndex, settings.autoRotate]);
+  }, [activeFile, activeFileIndex, settings.autoRotate, ensureModelReady]);
 
   // Handle export
   const handleExport = useCallback(async () => {
@@ -518,6 +594,7 @@ function App() {
               isDetecting={isDetecting}
               isCropping={isCropping}
               hasBoxes={(activeFile?.boxes.length ?? 0) > 0}
+              modelStatuses={modelStatuses}
             />
             <ExifEditor
               sessionId={activeFile?.sessionId ?? null}
diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx
index 87052f5..d392a80 100644
--- a/frontend/src/components/SettingsPanel.tsx
+++ b/frontend/src/components/SettingsPanel.tsx
@@ -1,7 +1,8 @@
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Slider } from "@/components/ui/slider";
 import { Button } from "@/components/ui/button";
-import type { DetectionSettings } from "@/types";
+import { Loader2 } from "lucide-react";
+import type { DetectionSettings, ModelKey, ModelStatus } from "@/types";
 
 interface SettingsPanelProps {
   settings: DetectionSettings;
@@ -11,6 +12,7 @@ interface SettingsPanelProps {
   isDetecting: boolean;
   isCropping: boolean;
   hasBoxes: boolean;
+  modelStatuses?: Record<ModelKey, ModelStatus> | null;
 }
 
 export function SettingsPanel({
@@ -21,7 +23,12 @@ export function SettingsPanel({
   isDetecting,
   isCropping,
   hasBoxes,
+  modelStatuses = null,
 }: SettingsPanelProps) {
+  const u2netKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+  const u2netStatus = modelStatuses?.[u2netKey] ?? null;
+  const orientationStatus = modelStatuses?.["orientation"] ?? null;
+
   return (
     <Card>
       <CardHeader className="pb-3">
@@ -74,6 +81,21 @@ export function SettingsPanel({
             Auto-rotate photos
           </label>
         </div>
+        {settings.autoRotate && orientationStatus && (orientationStatus.status === "downloading" || orientationStatus.status === "error") && (
+          <div className="text-xs text-muted-foreground flex items-center gap-2">
+            {orientationStatus.status === "downloading" ? (
+              <>
+                <Loader2 className="w-3 h-3 animate-spin" />
+                <span>
+                  Downloading {orientationStatus.label} ({orientationStatus.size_desc}){" "}
+                  {orientationStatus.progress}%
+                </span>
+              </>
+            ) : orientationStatus.status === "error" ? (
+              <span>{orientationStatus.error || "Rotation model download failed"}</span>
+            ) : null}
+          </div>
+        )}
 
         <div className="flex items-center gap-2">
           <input
@@ -90,6 +112,75 @@ export function SettingsPanel({
           </label>
         </div>
 
+        <div className="space-y-2">
+          <label htmlFor="detection-mode" className="text-sm">
+            Detection Mode
+          </label>
+          <select
+            id="detection-mode"
+            value={settings.detectionMode}
+            onChange={(e) =>
+              onSettingsChange({
+                ...settings,
+                detectionMode: e.target.value as
+                  | "scansplitterv1"
+                  | "scansplitterv2"
+                  | "u2net",
+              })
+            }
+            className="w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring"
+          >
+            <option value="scansplitterv2">ScanSplitterv2</option>
+            <option value="scansplitterv1">ScanSplitterv1</option>
+            <option value="u2net">AI (U2-Net)</option>
+          </select>
+          <p className="text-xs text-muted-foreground">
+            {settings.detectionMode === "u2net"
+              ? "Deep learning model - best for difficult scans"
+              : settings.detectionMode === "scansplitterv1"
+                ? "Legacy contour detector from main"
+                : "Default contour detector - fast and improved"}
+          </p>
+        </div>
+
+        {settings.detectionMode === "u2net" && (
+          <div className="flex items-center gap-2">
+            <input
+              type="checkbox"
+              id="u2net-lite"
+              checked={settings.u2netLite}
+              onChange={(e) =>
+                onSettingsChange({ ...settings, u2netLite: e.target.checked })
+              }
+              className="rounded"
+            />
+            <label htmlFor="u2net-lite" className="text-sm">
+              Use lite model (faster)
+            </label>
+            <p className="text-xs text-muted-foreground ml-auto">
+              {settings.u2netLite ? "5MB" : "176MB"}
+            </p>
+          </div>
+        )}
+        {settings.detectionMode === "u2net" && u2netStatus && u2netStatus.status !== "ready" && (
+          <div className="text-xs text-muted-foreground flex items-center gap-2">
+            {u2netStatus.status === "downloading" ? (
+              <>
+                <Loader2 className="w-3 h-3 animate-spin" />
+                <span>
+                  Downloading {u2netStatus.label} ({u2netStatus.size_desc}) {u2netStatus.progress}%
+                </span>
+              </>
+            ) : u2netStatus.status === "error" ? (
+              <span>{u2netStatus.error || "Model download failed"}</span>
+            ) : (
+              <span>
+                {u2netStatus.label} not downloaded yet ({u2netStatus.size_desc})
+              </span>
+            )}
+          </div>
+        )}
+
         <div className="space-y-2 pt-2">
           <Button
             onClick={onDetect}
diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts
index 448ad63..9379727 100644
--- a/frontend/src/lib/api.ts
+++ b/frontend/src/lib/api.ts
@@ -3,6 +3,9 @@ import type {
   CropResponse,
   CroppedImage,
   DetectResponse,
+  DetectionMode,
+  ModelKey,
+  ModelStatus,
   UploadResponse,
 } from "@/types";
 
@@ -41,7 +44,9 @@ export async function detectBoxes(
   sessionId: string,
   page: number,
   minArea: number,
-  maxArea: number
+  maxArea: number,
+  detectionMode: DetectionMode = "scansplitterv2",
+  u2netLite: boolean = true
 ): Promise<{ boxes: BoundingBox[]; imageUrl: string }> {
   const response = await fetch(`${API_BASE}/detect`, {
     method: "POST",
@@ -51,6 +56,8 @@ export async function detectBoxes(
       page,
       min_area: minArea,
       max_area: maxArea,
+      detection_mode: detectionMode,
+      u2net_lite: u2netLite,
     }),
   });
 
@@ -226,3 +233,25 @@ export async function updateExif(
     throw new Error("Failed to update EXIF");
   }
 }
+
+export async function getModelStatuses(): Promise<Record<ModelKey, ModelStatus>> {
+  const response = await fetch(`${API_BASE}/models/status`);
+  if (!response.ok) {
+    throw new Error(`Failed to get model status: ${response.statusText}`);
+  }
+  return response.json();
+}
+
+export async function startModelDownload(model: ModelKey): Promise<ModelStatus> {
+  const response = await fetch(`${API_BASE}/models/download`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ model }),
+  });
+  if (!response.ok) {
+    const error = await response.json().catch(() => ({ detail: response.statusText }));
+    const message = typeof error.detail === "string" ? error.detail : response.statusText;
+    throw new Error(message);
+  }
+  return response.json();
+}
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
index 29967d7..24402d7 100644
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -43,12 +43,17 @@ export interface CroppedImage {
   dateTaken: string | null; // YYYY-MM-DD format for EXIF
 }
 
+// Detection mode
+export type DetectionMode = "scansplitterv1" | "scansplitterv2" | "u2net";
+
 // Detection settings
 export interface DetectionSettings {
   minArea: number; // percentage
   maxArea: number; // percentage
   autoRotate: boolean;
   autoDetect: boolean; // auto-detect on upload
+  detectionMode: DetectionMode; // scansplitterv1 (legacy), scansplitterv2 (default), or u2net (deep learning)
+  u2netLite: boolean; // use lite model (5MB, faster) vs full (176MB, more accurate)
 }
 
 // Naming pattern for batch export
@@ -88,3 +93,20 @@ export interface CropResponse {
     rotation_applied: number;
   }>;
 }
+
+// Downloadable model keys (backend `/api/models/*`)
+export type ModelKey = "orientation" | "u2net_lite" | "u2net_full";
+
+export type ModelDownloadStatus = "missing" | "downloading" | "ready" | "error";
+
+export interface ModelStatus {
+  key: ModelKey;
+  status: ModelDownloadStatus;
+  progress: number; // 0-100
+  downloaded_bytes: number;
+  total_bytes: number;
+  size_desc: string;
+  filename: string;
+  label: string;
+  error?: string | null;
+}
diff --git a/src/scansplitter/api.py b/src/scansplitter/api.py
index c158ee7..d87ae3e 100644
--- a/src/scansplitter/api.py
+++ b/src/scansplitter/api.py
@@ -14,11 +14,18 @@
 from PIL import Image
 from pydantic import BaseModel
 
-from .detector import DetectedRegion, crop_rotated_region, detect_photos
+from .detector import (
+    DetectedRegion,
+    crop_rotated_region,
+    detect_photos_u2net,
+    detect_photos_v1,
+    detect_photos_v2,
+)
 from .exif_handler import apply_exif_to_jpeg, create_exif_bytes, extract_exif
 from .pdf_handler import extract_images_from_pdf, is_pdf
 from .rotator import auto_rotate
 from .session import Session, get_session_manager
+from .models import get_model_statuses, start_model_download
 
 app = FastAPI(title="ScanSplitter API", version="0.1.0")
 
@@ -63,6 +70,17 @@ class DetectRequest(BaseModel):
     page: int = 1
     min_area: float = 2.0  # percentage
     max_area: float = 80.0  # percentage
+    # Phase 1: Enhanced detection options
+    enhance_contrast: bool = True
+    adaptive_morphology: bool = True
+    min_solidity: float = 0.7
+    max_aspect_ratio: float = 5.0
+    min_extent: float = 0.4
+    border_mode: str = "minAreaRect"  # "minAreaRect" or "convexHull"
+    border_padding: float = 0.02
+    # Detection algorithms
+    detection_mode: str = "scansplitterv2"  # "scansplitterv1", "scansplitterv2", or "u2net"
+    u2net_lite: bool = True  # Use lightweight model (faster) vs full (more accurate)
 
 
 class DetectResponse(BaseModel):
@@ -150,6 +168,12 @@ class UpdateExifRequest(BaseModel):
     date_taken: str | None = None  # Format: "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS"
 
 
+class ModelDownloadRequest(BaseModel):
+    """Request to download an ML model in the background."""
+
+    model: str  # "orientation", "u2net_lite", or "u2net_full"
+
+
 # --- Helper Functions ---
 
 
@@ -298,6 +322,21 @@ async def get_image(session_id: str, filename: str, page: int = 1):
     return Response(content=buffer.getvalue(), media_type="image/jpeg")
 
 
+@app.get("/api/models/status")
+async def models_status():
+    """Get the current status of downloadable models."""
+    return get_model_statuses()
+
+
+@app.post("/api/models/download")
+async def models_download(request: ModelDownloadRequest):
+    """Start downloading a model in the background (if missing)."""
+    try:
+        return start_model_download(request.model)
+    except KeyError as e:
+        raise HTTPException(status_code=400, detail=str(e)) from e
+
+
 @app.post("/api/detect", response_model=DetectResponse)
 async def detect_boxes(request: DetectRequest):
     """Detect bounding boxes in an image."""
@@ -310,12 +349,41 @@ async def detect_boxes(request: DetectRequest):
     filename = list(session.files.keys())[0]
     image = load_page_image(session, filename, request.page)
 
-    # Run detection
-    regions = detect_photos(
-        image,
-        min_area_ratio=request.min_area / 100,
-        max_area_ratio=request.max_area / 100,
-    )
+    detection_mode = request.detection_mode
+    if detection_mode in ("classic", "ScanSplitterv2", "v2"):
+        detection_mode = "scansplitterv2"
+    elif detection_mode in ("ScanSplitterv1", "v1", "legacy"):
+        detection_mode = "scansplitterv1"
+
+    # Run detection based on mode
+    if detection_mode == "u2net":
+        # Use U2-Net deep learning detection
+        regions = detect_photos_u2net(
+            image,
+            min_area_ratio=request.min_area / 100,
+            max_area_ratio=request.max_area / 100,
+            lite=request.u2net_lite,
+        )
+    elif detection_mode == "scansplitterv1":
+        regions = detect_photos_v1(
+            image,
+            min_area_ratio=request.min_area / 100,
+            max_area_ratio=request.max_area / 100,
+        )
+    else:
+        # Use ScanSplitterv2 contour-based detection with enhancements
+        regions = detect_photos_v2(
+            image,
+            min_area_ratio=request.min_area / 100,
+            max_area_ratio=request.max_area / 100,
+            enhance_contrast=request.enhance_contrast,
+            adaptive_morphology=request.adaptive_morphology,
+            min_solidity=request.min_solidity,
+            max_aspect_ratio=request.max_aspect_ratio,
+            min_extent=request.min_extent,
+            border_mode=request.border_mode,  # type: ignore
+            border_padding=request.border_padding,
+        )
 
     # Convert to BoundingBox format
     boxes = []
diff --git a/src/scansplitter/cli.py b/src/scansplitter/cli.py
index ddbf981..66793de 100644
--- a/src/scansplitter/cli.py
+++ b/src/scansplitter/cli.py
@@ -111,6 +111,30 @@ def main():
         default="png",
         help="Output format (default: png)",
     )
+    # Phase 1: Enhanced detection options
+    process_parser.add_argument(
+        "--no-enhance",
+        action="store_true",
+        help="Disable contrast enhancement (CLAHE)",
+    )
+    process_parser.add_argument(
+        "--border-mode",
+        choices=["minAreaRect", "convexHull"],
+        default="minAreaRect",
+        help="Border detection mode (default: minAreaRect; convexHull preserves irregular borders)",
+    )
+    # Phase 2: U2-Net detection mode
+    process_parser.add_argument(
+        "--detection-mode",
+        choices=["scansplitterv2", "scansplitterv1", "u2net", "classic"],
+        default="scansplitterv2",
+        help="Detection mode: scansplitterv2 (default), scansplitterv1 (legacy), or u2net (deep learning); 'classic' is an alias for scansplitterv2",
+    )
+    process_parser.add_argument(
+        "--u2net-full",
+        action="store_true",
+        help="Use full U2-Net model instead of lite (slower but more accurate)",
+    )
 
     args = parser.parse_args()
 
@@ -178,6 +202,10 @@ def process_files_cli(args):
             auto_rotate_enabled=not args.no_rotate,
             min_area_ratio=args.min_area / 100,
             max_area_ratio=args.max_area / 100,
+            enhance_contrast=not args.no_enhance,
+            border_mode=args.border_mode,
+            detection_mode="scansplitterv2" if args.detection_mode == "classic" else args.detection_mode,
+            u2net_lite=not args.u2net_full,
         )
 
         print(f"  Found {len(results)} photo(s)")
diff --git a/src/scansplitter/detector.py b/src/scansplitter/detector.py
index 69e51ac..51d40c1 100644
--- a/src/scansplitter/detector.py
+++ b/src/scansplitter/detector.py
@@ -1,12 +1,60 @@
-"""Contour-based photo detection for scanned images."""
+"""Photo detection for scanned images."""
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from typing import Literal
 
 import cv2
 import numpy as np
 from PIL import Image
 
 
+def _apply_clahe(gray: np.ndarray, clip_limit: float = 2.0) -> np.ndarray:
+    """Apply Contrast Limited Adaptive Histogram Equalization."""
+    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(8, 8))
+    return clahe.apply(gray)
+
+
+def _adaptive_kernel_size(image_shape: tuple[int, int], base: int = 5) -> int:
+    """Scale morphology kernel size based on image resolution."""
+    # Reference: 3000x4000 image uses base size
+    reference_area = 3000 * 4000
+    actual_area = image_shape[0] * image_shape[1]
+    scale = (actual_area / reference_area) ** 0.5
+    size = int(base * max(0.5, min(2.0, scale)))
+    # Kernel must be odd and at least 3
+    size = max(3, size)
+    return size if size % 2 == 1 else size + 1
+
+
+def _compute_contour_quality(contour: np.ndarray) -> dict:
+    """Compute shape quality metrics for contour filtering."""
+    area = cv2.contourArea(contour)
+    x, y, w, h = cv2.boundingRect(contour)
+    hull = cv2.convexHull(contour)
+    hull_area = cv2.contourArea(hull)
+
+    return {
+        "area": area,
+        "aspect_ratio": max(w, h) / max(1, min(w, h)),
+        "solidity": area / max(1, hull_area),
+        "extent": area / max(1, w * h),
+    }
+
+
+def _passes_quality_filter(
+    metrics: dict,
+    min_solidity: float,
+    max_aspect_ratio: float,
+    min_extent: float,
+) -> bool:
+    """Check if contour passes quality filters."""
+    return (
+        metrics["solidity"] >= min_solidity
+        and metrics["aspect_ratio"] <= max_aspect_ratio
+        and metrics["extent"] >= min_extent
+    )
+
+
 @dataclass
 class DetectedRegion:
     """A detected photo/document region in a scan."""
@@ -24,13 +72,16 @@ class DetectedRegion:
     width: int
     height: int
 
+    # Optional: convex hull points for border preservation mode
+    hull_points: np.ndarray | None = field(default=None, repr=False)
+
     @property
     def bbox(self) -> tuple[int, int, int, int]:
         """Return axis-aligned bounding box as (x, y, x+width, y+height)."""
         return (self.x, self.y, self.x + self.width, self.y + self.height)
 
 
-def detect_photos(
+def detect_photos_v1(
     image: Image.Image,
     min_area_ratio: float = 0.02,
     max_area_ratio: float = 0.80,
@@ -39,6 +90,96 @@ def detect_photos(
     threshold_c: int = 2,
     padding: int = 0,
     inset: int = 10,
+) -> list[DetectedRegion]:
+    """
+    Detect multiple photos/documents in a scanned image (ScanSplitterv1).
+
+    This is the original contour-based detector from `main`.
+    """
+    cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    total_area = cv_image.shape[0] * cv_image.shape[1]
+
+    gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (blur_kernel, blur_kernel), 0)
+
+    thresh = cv2.adaptiveThreshold(
+        blurred,
+        255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY_INV,
+        threshold_block_size,
+        threshold_c,
+    )
+
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
+
+    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    regions = []
+    img_height, img_width = cv_image.shape[:2]
+
+    for contour in contours:
+        rect = cv2.minAreaRect(contour)
+        center, size, angle = rect
+        rect_width, rect_height = size
+        area = rect_width * rect_height
+        area_ratio = area / total_area
+
+        if not (min_area_ratio <= area_ratio <= max_area_ratio):
+            continue
+
+        x, y, w, h = cv2.boundingRect(contour)
+
+        net_adjust = padding - inset
+        x_padded = max(0, x - net_adjust)
+        y_padded = max(0, y - net_adjust)
+        w_padded = max(1, min(img_width - x_padded, w + 2 * net_adjust))
+        h_padded = max(1, min(img_height - y_padded, h + 2 * net_adjust))
+
+        padded_width = max(1, rect_width + 2 * net_adjust)
+        padded_height = max(1, rect_height + 2 * net_adjust)
+
+        if rect_width < rect_height:
+            padded_width, padded_height = padded_height, padded_width
+            angle = angle + 90
+
+        regions.append(
+            DetectedRegion(
+                center=center,
+                size=(padded_width, padded_height),
+                angle=angle,
+                area=area,
+                area_ratio=area_ratio,
+                x=x_padded,
+                y=y_padded,
+                width=w_padded,
+                height=h_padded,
+            )
+        )
+
+    regions.sort(key=lambda r: (r.y // 100, r.x))
+    return regions
+
+
+def detect_photos_v2(
+    image: Image.Image,
+    min_area_ratio: float = 0.02,
+    max_area_ratio: float = 0.80,
+    blur_kernel: int = 5,
+    threshold_block_size: int = 11,
+    threshold_c: int = 2,
+    padding: int = 0,
+    inset: int = 10,
+    # Phase 1 improvements
+    enhance_contrast: bool = True,
+    adaptive_morphology: bool = True,
+    min_solidity: float = 0.7,
+    max_aspect_ratio: float = 5.0,
+    min_extent: float = 0.4,
+    border_mode: Literal["minAreaRect", "convexHull"] = "minAreaRect",
+    border_padding: float = 0.02,
 ) -> list[DetectedRegion]:
     """
     Detect multiple photos/documents in a scanned image.
@@ -54,17 +195,29 @@ def detect_photos(
         threshold_c: Constant subtracted from threshold
         padding: Extra pixels to include around detected regions
         inset: Pixels to shrink the bounding box inward (removes border artifacts)
+        enhance_contrast: Apply CLAHE for better low-contrast detection
+        adaptive_morphology: Scale morphology kernel based on image size
+        min_solidity: Minimum solidity (area/hull_area) to filter noise (0-1)
+        max_aspect_ratio: Maximum aspect ratio to filter thin strips
+        min_extent: Minimum extent (area/bbox_area) to filter irregular shapes
+        border_mode: "minAreaRect" (tight) or "convexHull" (preserves irregular borders)
+        border_padding: Padding ratio when using convexHull mode (fraction of image)
 
     Returns:
         List of DetectedRegion objects sorted by position (top-to-bottom, left-to-right)
     """
     # Convert PIL to OpenCV format
     cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    total_area = cv_image.shape[0] * cv_image.shape[1]
+    img_height, img_width = cv_image.shape[:2]
+    total_area = img_height * img_width
 
     # Step 1: Convert to grayscale
     gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
 
+    # Step 1.5: Apply CLAHE for better contrast (helps with low-contrast photos)
+    if enhance_contrast:
+        gray = _apply_clahe(gray)
+
     # Step 2: Apply Gaussian blur to reduce noise
     blurred = cv2.GaussianBlur(gray, (blur_kernel, blur_kernel), 0)
 
@@ -80,66 +233,92 @@ def detect_photos(
     )
 
     # Step 4: Morphological operations to clean up the mask
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    if adaptive_morphology:
+        kernel_size = _adaptive_kernel_size((img_height, img_width))
+    else:
+        kernel_size = 5
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
 
     # Step 5: Find contours
     contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
-    # Step 6: Filter contours by area using rotated rectangles
+    # Step 6: Filter contours by area and quality metrics
     regions = []
-    img_height, img_width = cv_image.shape[:2]
 
     for contour in contours:
+        # Compute quality metrics for filtering
+        quality = _compute_contour_quality(contour)
+
         # Get minimum area rotated rectangle
-        rect = cv2.minAreaRect(contour)
+        if border_mode == "convexHull":
+            hull = cv2.convexHull(contour)
+            rect = cv2.minAreaRect(hull)
+            hull_points = hull
+        else:
+            rect = cv2.minAreaRect(contour)
+            hull_points = None
+
         center, size, angle = rect
         rect_width, rect_height = size
         area = rect_width * rect_height
         area_ratio = area / total_area
 
         # Filter by area ratio
-        if min_area_ratio <= area_ratio <= max_area_ratio:
-            # Also get axis-aligned bounding box for quick reference
-            x, y, w, h = cv2.boundingRect(contour)
-
-            # Apply padding then inset to axis-aligned box while staying within image bounds
-            # Net effect = padding - inset (e.g., padding=0, inset=3 shrinks by 3px each side)
-            net_adjust = padding - inset
-            x_padded = max(0, x - net_adjust)
-            y_padded = max(0, y - net_adjust)
-            w_padded = max(1, min(img_width - x_padded, w + 2 * net_adjust))
-            h_padded = max(1, min(img_height - y_padded, h + 2 * net_adjust))
-
-            # Apply padding then inset to rotated rect size
-            padded_width = max(1, rect_width + 2 * net_adjust)
-            padded_height = max(1, rect_height + 2 * net_adjust)
-
-            # Normalize OpenCV's minAreaRect output:
-            # minAreaRect returns angle in [-90, 0) with arbitrary width/height order.
-            # We normalize so that:
-            # - angle is always 0 when the box is axis-aligned
-            # - width corresponds to the dimension along the angle direction
-            # This matches what the user sees and edits in the UI
-            if rect_width < rect_height:
-                # Swap to make width the larger dimension and adjust angle
-                padded_width, padded_height = padded_height, padded_width
-                angle = angle + 90
-
-            regions.append(
-                DetectedRegion(
-                    center=center,
-                    size=(padded_width, padded_height),
-                    angle=angle,
-                    area=area,
-                    area_ratio=area_ratio,
-                    x=x_padded,
-                    y=y_padded,
-                    width=w_padded,
-                    height=h_padded,
-                )
+        if not (min_area_ratio <= area_ratio <= max_area_ratio):
+            continue
+
+        # Filter by quality metrics (solidity, aspect ratio, extent)
+        if not _passes_quality_filter(quality, min_solidity, max_aspect_ratio, min_extent):
+            continue
+
+        # Get axis-aligned bounding box for quick reference
+        x, y, w, h = cv2.boundingRect(contour)
+
+        # Apply padding then inset to axis-aligned box while staying within image bounds
+        # Net effect = padding - inset (e.g., padding=0, inset=3 shrinks by 3px each side)
+        net_adjust = padding - inset
+
+        # Add border_padding if using convexHull mode
+        if border_mode == "convexHull":
+            extra_padding = int(min(img_width, img_height) * border_padding)
+            net_adjust += extra_padding
+
+        x_padded = max(0, x - net_adjust)
+        y_padded = max(0, y - net_adjust)
+        w_padded = max(1, min(img_width - x_padded, w + 2 * net_adjust))
+        h_padded = max(1, min(img_height - y_padded, h + 2 * net_adjust))
+
+        # Apply padding then inset to rotated rect size
+        padded_width = max(1, rect_width + 2 * net_adjust)
+        padded_height = max(1, rect_height + 2 * net_adjust)
+
+        # Normalize OpenCV's minAreaRect output:
+        # minAreaRect returns angle in [-90, 0) with arbitrary width/height order.
+        # We normalize so that:
+        # - angle is always 0 when the box is axis-aligned
+        # - width corresponds to the dimension along the angle direction
+        # This matches what the user sees and edits in the UI
+        if rect_width < rect_height:
+            # Swap to make width the larger dimension and adjust angle
+            padded_width, padded_height = padded_height, padded_width
+            angle = angle + 90
+
+        regions.append(
+            DetectedRegion(
+                center=center,
+                size=(padded_width, padded_height),
+                angle=angle,
+                area=area,
+                area_ratio=area_ratio,
+                x=x_padded,
+                y=y_padded,
+                width=w_padded,
+                height=h_padded,
+                hull_points=hull_points,
             )
+        )
 
     # Sort by position: top-to-bottom, then left-to-right
     regions.sort(key=lambda r: (r.y // 100, r.x))  # Group rows within 100px
@@ -147,6 +326,179 @@ def detect_photos(
     return regions
 
 
+# Backwards-compatible alias: "classic" / previous default points at ScanSplitterv2.
+detect_photos = detect_photos_v2
+
+
+# Global U2-Net session cache (lazy loaded)
+_u2net_session: "onnxruntime.InferenceSession | None" = None
+_u2net_lite: bool | None = None
+
+
+def _get_u2net_session(lite: bool = True) -> "onnxruntime.InferenceSession":
+    """Get or create the U2-Net ONNX inference session."""
+    global _u2net_session, _u2net_lite
+
+    if _u2net_session is None or _u2net_lite != lite:
+        import onnxruntime
+
+        from .models import get_u2net_model_path
+
+        model_path = get_u2net_model_path(lite=lite)
+        _u2net_session = onnxruntime.InferenceSession(
+            str(model_path),
+            providers=["CPUExecutionProvider"],
+        )
+        _u2net_lite = lite
+
+    return _u2net_session
+
+
+def _u2net_preprocess(image: np.ndarray, size: int = 320) -> np.ndarray:
+    """Preprocess image for U2-Net inference."""
+    # Resize to model input size
+    resized = cv2.resize(image, (size, size), interpolation=cv2.INTER_LINEAR)
+
+    # Convert BGR to RGB
+    rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
+
+    # Normalize to [0, 1] then apply ImageNet normalization
+    normalized = rgb.astype(np.float32) / 255.0
+    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+    normalized = (normalized - mean) / std
+
+    # Convert to NCHW format (batch, channels, height, width)
+    transposed = normalized.transpose(2, 0, 1)
+    batched = np.expand_dims(transposed, axis=0)
+
+    return batched
+
+
+def _u2net_postprocess(
+    output: np.ndarray, original_size: tuple[int, int], threshold: float = 0.5
+) -> np.ndarray:
+    """Convert U2-Net output to binary mask at original image size."""
+    # Output shape is (1, 1, H, W), squeeze to (H, W)
+    mask = output.squeeze()
+
+    # Normalize to [0, 1] range
+    mask = (mask - mask.min()) / (mask.max() - mask.min() + 1e-8)
+
+    # Resize to original image size
+    h, w = original_size
+    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
+
+    # Threshold to binary
+    binary = (mask_resized > threshold).astype(np.uint8) * 255
+
+    return binary
+
+
+def detect_photos_u2net(
+    image: Image.Image,
+    min_area_ratio: float = 0.02,
+    max_area_ratio: float = 0.80,
+    threshold: float = 0.5,
+    lite: bool = True,
+    padding: int = 0,
+    inset: int = 10,
+) -> list[DetectedRegion]:
+    """
+    Detect photos using U2-Net salient object detection.
+
+    Uses deep learning for more accurate detection of photos on complex backgrounds.
+    Best for difficult scans where traditional methods fail.
+
+    Args:
+        image: PIL Image to analyze
+        min_area_ratio: Minimum region area as fraction of total (default 2%)
+        max_area_ratio: Maximum region area as fraction of total (default 80%)
+        threshold: Saliency threshold for binary mask (0-1, default 0.5)
+        lite: Use lightweight u2netp model (faster) vs full u2net (more accurate)
+        padding: Extra pixels to include around detected regions
+        inset: Pixels to shrink the bounding box inward
+
+    Returns:
+        List of DetectedRegion objects sorted by position
+    """
+    # Convert PIL to OpenCV format
+    cv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    img_height, img_width = cv_image.shape[:2]
+    total_area = img_height * img_width
+
+    # Get U2-Net session and run inference
+    session = _get_u2net_session(lite=lite)
+    input_tensor = _u2net_preprocess(cv_image)
+
+    # Run inference - U2-Net outputs multiple scales, we use the first (finest)
+    input_name = session.get_inputs()[0].name
+    outputs = session.run(None, {input_name: input_tensor})
+    saliency_map = outputs[0]
+
+    # Post-process to binary mask
+    binary_mask = _u2net_postprocess(saliency_map, (img_height, img_width), threshold)
+
+    # Apply morphological operations to clean up
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
+    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel, iterations=1)
+
+    # Find contours
+    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    # Convert contours to DetectedRegion objects
+    regions = []
+
+    for contour in contours:
+        rect = cv2.minAreaRect(contour)
+        center, size, angle = rect
+        rect_width, rect_height = size
+        area = rect_width * rect_height
+        area_ratio = area / total_area
+
+        # Filter by area ratio
+        if not (min_area_ratio <= area_ratio <= max_area_ratio):
+            continue
+
+        # Get axis-aligned bounding box
+        x, y, w, h = cv2.boundingRect(contour)
+
+        # Apply padding/inset
+        net_adjust = padding - inset
+        x_padded = max(0, x - net_adjust)
+        y_padded = max(0, y - net_adjust)
+        w_padded = max(1, min(img_width - x_padded, w + 2 * net_adjust))
+        h_padded = max(1, min(img_height - y_padded, h + 2 * net_adjust))
+
+        padded_width = max(1, rect_width + 2 * net_adjust)
+        padded_height = max(1, rect_height + 2 * net_adjust)
+
+        # Normalize angle
+        if rect_width < rect_height:
+            padded_width, padded_height = padded_height, padded_width
+            angle = angle + 90
+
+        regions.append(
+            DetectedRegion(
+                center=center,
+                size=(padded_width, padded_height),
+                angle=angle,
+                area=area,
+                area_ratio=area_ratio,
+                x=x_padded,
+                y=y_padded,
+                width=w_padded,
+                height=h_padded,
+            )
+        )
+
+    # Sort by position
+    regions.sort(key=lambda r: (r.y // 100, r.x))
+
+    return regions
+
+
 def crop_rotated_region(cv_image: np.ndarray, region: DetectedRegion) -> np.ndarray:
     """
     Extract a rotated region from an image and deskew it.
diff --git a/src/scansplitter/models.py b/src/scansplitter/models.py
index 2979083..c59d331 100644
--- a/src/scansplitter/models.py
+++ b/src/scansplitter/models.py
@@ -1,8 +1,10 @@
 """Model download and management for face detection and orientation detection."""
 
 import sys
+import threading
 import urllib.request
 from pathlib import Path
+from typing import Any
 
 # Model URLs from OpenCV's GitHub (face detection)
 PROTOTXT_URL = "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt"
@@ -16,10 +18,94 @@
 ]
 ORIENTATION_MODEL_FILENAME = "orientation_model_v2.onnx"
 
+# U2-Net salient object detection models (ONNX)
+# u2netp is the lightweight version (~4.7MB), u2net is the full version (~176MB)
+U2NETP_MODEL_URLS = [
+    "https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2netp.onnx",
+    "https://github.com/Madnex/ScanSplitter/releases/download/models-v1/u2netp.onnx",
+]
+U2NETP_MODEL_FILENAME = "u2netp.onnx"
+
+U2NET_MODEL_URLS = [
+    "https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx",
+    "https://github.com/Madnex/ScanSplitter/releases/download/models-v1/u2net.onnx",
+]
+U2NET_MODEL_FILENAME = "u2net.onnx"
+
 # Cache directory for models
 MODELS_DIR = Path(__file__).parent / "model_cache"
 
 
+_MODEL_SPECS: dict[str, dict[str, Any]] = {
+    "orientation": {
+        "filename": ORIENTATION_MODEL_FILENAME,
+        "urls": ORIENTATION_MODEL_URLS,
+        "size_desc": "~80MB",
+        "label": "Orientation model",
+    },
+    "u2net_lite": {
+        "filename": U2NETP_MODEL_FILENAME,
+        "urls": U2NETP_MODEL_URLS,
+        "size_desc": "~5MB",
+        "label": "U2-Net lite model",
+    },
+    "u2net_full": {
+        "filename": U2NET_MODEL_FILENAME,
+        "urls": U2NET_MODEL_URLS,
+        "size_desc": "~176MB",
+        "label": "U2-Net full model",
+    },
+}
+
+_MODEL_STATUS_LOCK = threading.Lock()
+_MODEL_STATUS: dict[str, dict[str, Any]] = {}
+_MODEL_DOWNLOAD_THREADS: dict[str, threading.Thread] = {}
+
+
+def _model_path(key: str) -> Path:
+    MODELS_DIR.mkdir(exist_ok=True)
+    spec = _MODEL_SPECS.get(key)
+    if not spec:
+        raise KeyError(f"Unknown model key: {key}")
+    return MODELS_DIR / str(spec["filename"])
+
+
+def _set_model_status(key: str, **updates: Any) -> None:
+    spec = _MODEL_SPECS.get(key, {})
+    with _MODEL_STATUS_LOCK:
+        current = _MODEL_STATUS.get(key, {})
+        merged = {
+            "key": key,
+            "status": current.get("status", "missing"),
+            "progress": current.get("progress", 0),
+            "downloaded_bytes": current.get("downloaded_bytes", 0),
+            "total_bytes": current.get("total_bytes", 0),
+            "error": current.get("error"),
+            "size_desc": spec.get("size_desc", ""),
+            "filename": spec.get("filename", ""),
+            "label": spec.get("label", key),
+        }
+        merged.update(updates)
+        _MODEL_STATUS[key] = merged
+
+
+def get_model_statuses() -> dict[str, dict[str, Any]]:
+    """Return current download status for known models."""
+    MODELS_DIR.mkdir(exist_ok=True)
+    for key in _MODEL_SPECS:
+        path = _model_path(key)
+        with _MODEL_STATUS_LOCK:
+            current = _MODEL_STATUS.get(key)
+        if path.exists():
+            if not current or current.get("status") != "downloading":
+                _set_model_status(key, status="ready", progress=100, error=None)
+        else:
+            if not current:
+                _set_model_status(key, status="missing", progress=0, error=None)
+    with _MODEL_STATUS_LOCK:
+        return {k: dict(v) for k, v in _MODEL_STATUS.items()}
+
+
 def get_model_paths() -> tuple[Path, Path]:
     """Get paths to the face detection model files, downloading if needed.
 
@@ -58,6 +144,88 @@ def report_progress(block_num: int, block_size: int, total_size: int) -> None:
     print()  # Newline after progress
 
 
+def _download_model_blocking(key: str) -> Path:
+    """Download a model (if missing), updating global status as it progresses."""
+    spec = _MODEL_SPECS.get(key)
+    if not spec:
+        raise KeyError(f"Unknown model key: {key}")
+
+    dest = _model_path(key)
+    if dest.exists():
+        _set_model_status(key, status="ready", progress=100, error=None)
+        return dest
+
+    urls: list[str] = list(spec["urls"])
+    label: str = str(spec["label"])
+    size_desc: str = str(spec["size_desc"])
+
+    _set_model_status(
+        key,
+        status="downloading",
+        progress=0,
+        downloaded_bytes=0,
+        total_bytes=0,
+        error=None,
+    )
+
+    def report_progress(block_num: int, block_size: int, total_size: int) -> None:
+        downloaded = block_num * block_size
+        percent = 0
+        if total_size > 0:
+            percent = int(min(100, downloaded * 100 // total_size))
+        _set_model_status(
+            key,
+            status="downloading",
+            progress=percent,
+            downloaded_bytes=int(downloaded),
+            total_bytes=int(total_size),
+        )
+        if total_size > 0:
+            mb_downloaded = downloaded / (1024 * 1024)
+            mb_total = total_size / (1024 * 1024)
+            sys.stdout.write(f"\rDownloading {label}: {mb_downloaded:.1f}/{mb_total:.1f} MB ({percent}%)")
+            sys.stdout.flush()
+
+    last_error: Exception | None = None
+    for i, url in enumerate(urls):
+        try:
+            urllib.request.urlretrieve(url, dest, reporthook=report_progress)
+            print()
+            _set_model_status(key, status="ready", progress=100, error=None)
+            return dest
+        except Exception as e:
+            last_error = e
+            dest.unlink(missing_ok=True)
+            if i < len(urls) - 1:
+                print(f"\nPrimary URL failed, trying backup...")
+            continue
+
+    message = f"Failed to download {label} ({size_desc}): {last_error}"
+    _set_model_status(key, status="error", error=message)
+    raise RuntimeError(message) from last_error
+
+
+def start_model_download(key: str) -> dict[str, Any]:
+    """Start downloading a model in the background (if needed)."""
+    statuses = get_model_statuses()
+    current = statuses.get(key)
+    if not current:
+        raise KeyError(f"Unknown model key: {key}")
+    if current.get("status") == "ready":
+        return current
+
+    with _MODEL_STATUS_LOCK:
+        existing = _MODEL_DOWNLOAD_THREADS.get(key)
+        if existing and existing.is_alive():
+            return dict(_MODEL_STATUS.get(key, current))
+
+        thread = threading.Thread(target=_download_model_blocking, args=(key,), daemon=True)
+        _MODEL_DOWNLOAD_THREADS[key] = thread
+        thread.start()
+
+        return dict(_MODEL_STATUS.get(key, current))
+
+
 def get_orientation_model_path() -> Path:
     """Get path to the orientation detection ONNX model, downloading if needed.
 
@@ -66,20 +234,18 @@ def get_orientation_model_path() -> Path:
     Returns:
         Path to the ONNX model file
     """
-    MODELS_DIR.mkdir(exist_ok=True)
+    return _download_model_blocking("orientation")
 
-    model_path = MODELS_DIR / ORIENTATION_MODEL_FILENAME
-
-    if not model_path.exists():
-        print("Downloading orientation detection model (~80MB)...")
-        for i, url in enumerate(ORIENTATION_MODEL_URLS):
-            try:
-                _download_with_progress(url, model_path, "Downloading")
-                break
-            except Exception as e:
-                if i < len(ORIENTATION_MODEL_URLS) - 1:
-                    print(f"\nPrimary URL failed, trying backup...")
-                else:
-                    raise RuntimeError(f"Failed to download orientation model: {e}") from e
-
-    return model_path
+
+def get_u2net_model_path(lite: bool = True) -> Path:
+    """Get path to the U2-Net salient object detection ONNX model.
+
+    Downloads the model on first use if not already cached.
+
+    Args:
+        lite: If True, use u2netp (4.7MB, faster). If False, use u2net (176MB, more accurate).
+
+    Returns:
+        Path to the ONNX model file
+    """
+    return _download_model_blocking("u2net_lite" if lite else "u2net_full")
diff --git a/src/scansplitter/processor.py b/src/scansplitter/processor.py
index 3ca5da0..bdfbc77 100644
--- a/src/scansplitter/processor.py
+++ b/src/scansplitter/processor.py
@@ -2,10 +2,11 @@
 
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Literal
 
 from PIL import Image
 
-from .detector import detect_and_crop
+from .detector import crop_regions, detect_photos_u2net, detect_photos_v1, detect_photos_v2
 from .pdf_handler import extract_images_from_pdf, is_pdf
 from .rotator import auto_rotate
 
@@ -45,6 +46,12 @@ def process_image(
     auto_rotate_enabled: bool = True,
     min_area_ratio: float = 0.02,
     max_area_ratio: float = 0.80,
+    # Phase 1 improvements
+    enhance_contrast: bool = True,
+    border_mode: Literal["minAreaRect", "convexHull"] = "minAreaRect",
+    # Detection algorithms
+    detection_mode: Literal["scansplitterv1", "scansplitterv2", "u2net"] = "scansplitterv2",
+    u2net_lite: bool = True,
 ) -> list[ProcessedImage]:
     """
     Process a single image: detect photos and optionally auto-rotate.
@@ -56,16 +63,49 @@ def process_image(
         auto_rotate_enabled: Whether to auto-rotate detected photos
         min_area_ratio: Minimum photo area as fraction of scan
         max_area_ratio: Maximum photo area as fraction of scan
+        enhance_contrast: Apply CLAHE for better low-contrast detection
+        border_mode: "minAreaRect" (tight) or "convexHull" (preserves irregular borders)
+        detection_mode: "scansplitterv1" (legacy), "scansplitterv2" (default), or "u2net" (deep learning)
+        u2net_lite: Use lightweight U2-Net model (faster) vs full (more accurate)
 
     Returns:
         List of ProcessedImage objects
     """
-    # Detect and crop photos
-    cropped_images = detect_and_crop(
-        image,
-        min_area_ratio=min_area_ratio,
-        max_area_ratio=max_area_ratio,
-    )
+    # Detect photos based on selected mode (accept older aliases for compatibility)
+    if detection_mode in ("u2net",):
+        regions = detect_photos_u2net(
+            image,
+            min_area_ratio=min_area_ratio,
+            max_area_ratio=max_area_ratio,
+            lite=u2net_lite,
+        )
+    else:
+        normalized_mode = detection_mode
+        if normalized_mode in ("classic", "ScanSplitterv2", "v2"):  # type: ignore[comparison-overlap]
+            normalized_mode = "scansplitterv2"  # type: ignore[assignment]
+        if normalized_mode in ("ScanSplitterv1", "v1", "legacy"):  # type: ignore[comparison-overlap]
+            normalized_mode = "scansplitterv1"  # type: ignore[assignment]
+
+        if normalized_mode == "scansplitterv1":
+            regions = detect_photos_v1(
+                image,
+                min_area_ratio=min_area_ratio,
+                max_area_ratio=max_area_ratio,
+            )
+        else:
+            regions = detect_photos_v2(
+                image,
+                min_area_ratio=min_area_ratio,
+                max_area_ratio=max_area_ratio,
+                enhance_contrast=enhance_contrast,
+                border_mode=border_mode,
+            )
+
+    # If no regions detected, return the original image
+    if not regions:
+        cropped_images = [image]
+    else:
+        cropped_images = crop_regions(image, regions)
 
     results = []
     for idx, cropped in enumerate(cropped_images):
@@ -93,6 +133,12 @@ def process_file(
     min_area_ratio: float = 0.02,
     max_area_ratio: float = 0.80,
     pdf_dpi: int = 300,
+    # Phase 1 improvements
+    enhance_contrast: bool = True,
+    border_mode: Literal["minAreaRect", "convexHull"] = "minAreaRect",
+    # Detection algorithms
+    detection_mode: Literal["scansplitterv1", "scansplitterv2", "u2net"] = "scansplitterv2",
+    u2net_lite: bool = True,
 ) -> list[ProcessedImage]:
     """
     Process a single file (image or PDF).
@@ -103,6 +149,10 @@ def process_file(
         min_area_ratio: Minimum photo area as fraction of scan
         max_area_ratio: Maximum photo area as fraction of scan
         pdf_dpi: DPI for PDF rendering
+        enhance_contrast: Apply CLAHE for better low-contrast detection
+        border_mode: "minAreaRect" (tight) or "convexHull" (preserves irregular borders)
+        detection_mode: "scansplitterv1" (legacy), "scansplitterv2" (default), or "u2net" (deep learning)
+        u2net_lite: Use lightweight U2-Net model (faster) vs full (more accurate)
 
     Returns:
         List of ProcessedImage objects
@@ -123,6 +173,10 @@ def process_file(
                 auto_rotate_enabled=auto_rotate_enabled,
                 min_area_ratio=min_area_ratio,
                 max_area_ratio=max_area_ratio,
+                enhance_contrast=enhance_contrast,
+                border_mode=border_mode,
+                detection_mode=detection_mode,
+                u2net_lite=u2net_lite,
             )
             results.extend(page_results)
     else:
@@ -134,6 +188,10 @@ def process_file(
             auto_rotate_enabled=auto_rotate_enabled,
             min_area_ratio=min_area_ratio,
             max_area_ratio=max_area_ratio,
+            enhance_contrast=enhance_contrast,
+            border_mode=border_mode,
+            detection_mode=detection_mode,
+            u2net_lite=u2net_lite,
         )
 
     return results