Madnex · Madnex · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,4 @@ src/scansplitter/model_cache/
 
 # Built frontend (generated by npm run build)
 src/scansplitter/static/
+.DS_Store
diff --git a/README.md b/README.md
@@ -40,13 +40,29 @@ uvx scansplitter api --port 8001
 
 ## Features
 
-- **Auto-detection** - Finds multiple photos in a single scan using contour detection
+- **Multiple detection modes** - Choose between ScanSplitterv1, ScanSplitterv2 (default), and AI (U2-Net)
 - **Interactive editing** - Adjust, rotate, and resize bounding boxes before cropping
 - **Auto-rotation** - Detects and corrects 90°/180°/270° rotations
 - **PDF support** - Extract and process pages from PDF files
 - **Web UI** - Modern React interface with Fabric.js canvas editor
 - **CLI** - Batch process files from the command line
 
+## Detection Modes & Models
+
+### Photo detection (splitter)
+
+- **ScanSplitterv2 (default)**: An improved contour-based detector. It applies contrast enhancement (CLAHE), adaptive thresholding, adaptive morphology (kernel scales with resolution), and contour quality filtering (solidity/aspect/extent). It can also use convex-hull borders for irregular edges.
+- **ScanSplitterv1**: The first contour-based detector used with adaptive threshold + fixed morphology + `minAreaRect` filtering. It’s simpler and can be useful as a fallback if v2 behaves unexpectedly on a specific scan.
+- **AI (U2-Net)**: A deep-learning salient-object model (ONNX) that produces a mask; ScanSplitter then extracts regions from that mask. It’s best for difficult scans (busy backgrounds, low contrast), but requires downloading a model on first use. Might be less accurate for multiple photos at once.
+
+### Auto-rotation model
+
+- **Orientation model**: An EfficientNetV2-based ONNX classifier that predicts the correct 0°/90°/180°/270° rotation for each cropped photo. ScanSplitter may fall back to classic heuristics if the model can’t be loaded.
+
+### Model downloads
+
+Some modes require downloading models on first use (U2-Net (5Mb / 176MB) and the orientation model (80MB)). The web UI shows download progress while this is happening.
+
 ## Installation Options
 
 ### Option 1: Run directly with uvx (recommended)
@@ -105,6 +121,7 @@ uv run scansplitter process scan.jpg \
   --no-rotate \
   --min-area 5 \
   --max-area 70 \
+  --detection-mode scansplitterv2 \
   --format jpg \
   -o ./output/
 ```
@@ -117,17 +134,24 @@ uv run scansplitter process scan.jpg \
 | `--no-rotate` | Disable auto-rotation |
 | `--min-area` | Minimum photo size as % of scan (default: 2) |
 | `--max-area` | Maximum photo size as % of scan (default: 80) |
+| `--detection-mode` | `scansplitterv2` (default), `scansplitterv1` (legacy), or `u2net` (deep learning); `classic` is an alias for `scansplitterv2` |
+| `--u2net-full` | Use full U2-Net model instead of lite (slower, more accurate) |
 | `--format` | Output format: `png` or `jpg` (default: png) |
 
 ## How It Works
 
-1. **Preprocessing** - Convert to grayscale, apply Gaussian blur
-2. **Thresholding** - Adaptive binary threshold to separate photos from background
-3. **Contour Detection** - Find distinct regions using OpenCV
-4. **Filtering** - Keep regions between min/max area thresholds
-5. **Interactive Adjustment** - User can modify detected boxes in the web UI
-6. **Rotation Detection** - Score each 90° rotation using Hough line detection
-7. **Cropping** - Extract photos using adjusted bounding boxes
+1. **Photo detection** - Runs the selected detection mode (ScanSplitterv1 / ScanSplitterv2 / AI (U2-Net)) to produce rotatable bounding boxes.
+2. **Interactive adjustment** - You can refine boxes in the web UI before cropping.
+3. **Cropping** - Extracts rotated regions using the adjusted boxes.
+4. **Auto-rotation (optional)** - Uses the orientation model (with fallbacks) to fix 90°/180°/270° rotations.
+
+## Credits
+
+ScanSplitter depends on excellent open models and upstream work:
+
+- **U²-Net (salient object detection)** by Xuebin Qin et al. — paper: https://arxiv.org/abs/2005.09007, code: https://github.com/xuebinqin/U-2-Net
+- **U2-Net ONNX weights** are downloaded from `rembg` releases by Daniel Gatis (with a ScanSplitter backup mirror) — https://github.com/danielgatis/rembg
+- **Orientation model (EfficientNetV2)** is downloaded from Duarte Barbosa’s deep image orientation detection project (with a ScanSplitter backup mirror) — https://github.com/duartebarbosadev/deep-image-orientation-detection
 
 ## Development
 

diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
@@ -12,9 +12,9 @@ import { Toast, type ToastType } from "@/components/Toast";
 import { ConfirmDialog } from "@/components/ConfirmDialog";
 import { KeyboardShortcutsDialog } from "@/components/KeyboardShortcutsDialog";
 import { Button } from "@/components/ui/button";
-import { uploadFile, detectBoxes, cropImages, exportZip, exportLocal, getImageUrl, FileConflictError } from "@/lib/api";
+import { uploadFile, detectBoxes, cropImages, exportZip, exportLocal, getImageUrl, FileConflictError, getModelStatuses, startModelDownload } from "@/lib/api";
 import { generateName } from "@/lib/naming";
-import type { UploadedFile, BoundingBox, CroppedImage, DetectionSettings, NamingPattern } from "@/types";
+import type { UploadedFile, BoundingBox, CroppedImage, DetectionSettings, NamingPattern, ModelKey, ModelStatus } from "@/types";
 
 function App() {
   // File state
@@ -40,8 +40,13 @@ function App() {
     maxArea: 80,
     autoRotate: true,
     autoDetect: true,
+    detectionMode: "scansplitterv2",
+    u2netLite: true,
   });
 
+  // Model download status (orientation + U2-Net)
+  const [modelStatuses, setModelStatuses] = useState<Record<ModelKey, ModelStatus> | null>(null);
+
   // Loading states
   const [isUploading, setIsUploading] = useState(false);
   const [isDetecting, setIsDetecting] = useState(false);
@@ -68,6 +73,61 @@ function App() {
     setToast({ message, type });
   }, []);
 
+  const refreshModelStatuses = useCallback(async () => {
+    try {
+      const statuses = await getModelStatuses();
+      setModelStatuses(statuses);
+      return statuses;
+    } catch (error) {
+      console.error("Failed to refresh model statuses:", error);
+      return null;
+    }
+  }, []);
+
+  const ensureModelReady = useCallback(async (modelKey: ModelKey) => {
+    const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+
+    let statuses = await refreshModelStatuses();
+    if (!statuses) {
+      throw new Error("Failed to load model status");
+    }
+
+    const current = statuses[modelKey];
+    if (current?.status === "ready") return;
+
+    await startModelDownload(modelKey);
+
+    // Poll until ready (or error)
+    for (;;) {
+      await sleep(500);
+      statuses = await refreshModelStatuses();
+      if (!statuses) continue;
+      const next = statuses[modelKey];
+      if (!next) throw new Error("Unknown model");
+      if (next.status === "ready") return;
+      if (next.status === "error") {
+        throw new Error(next.error || "Model download failed");
+      }
+    }
+  }, [refreshModelStatuses]);
+
+  useEffect(() => {
+    refreshModelStatuses();
+  }, [refreshModelStatuses]);
+
+  useEffect(() => {
+    if (settings.detectionMode !== "u2net") return;
+
+    const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+    (async () => {
+      const statuses = await refreshModelStatuses();
+      const current = statuses?.[modelKey];
+      if (!current || current.status === "ready" || current.status === "downloading") return;
+      await startModelDownload(modelKey);
+      await refreshModelStatuses();
+    })();
+  }, [settings.detectionMode, settings.u2netLite, refreshModelStatuses]);
+
   // Persist output directory to localStorage
   useEffect(() => {
     localStorage.setItem("scansplitter_output_dir", outputDirectory);
@@ -117,11 +177,18 @@ function App() {
     );
 
     try {
+      if (settings.detectionMode === "u2net") {
+        const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+        await ensureModelReady(modelKey);
+      }
+
       const result = await detectBoxes(
         sessionId,
         page,
         settings.minArea,
-        settings.maxArea
+        settings.maxArea,
+        settings.detectionMode,
+        settings.u2netLite
       );
       // Update with detected boxes
       setFiles((prev) =>
@@ -139,7 +206,7 @@ function App() {
         )
       );
     }
-  }, [settings.minArea, settings.maxArea]);
+  }, [settings.minArea, settings.maxArea, settings.detectionMode, settings.u2netLite, ensureModelReady]);
 
   // Handle file upload (multiple files)
   const handleUpload = useCallback(async (filesToUpload: File[]) => {
@@ -317,11 +384,17 @@ function App() {
     if (!activeFile) return;
     setIsDetecting(true);
     try {
+      if (settings.detectionMode === "u2net") {
+        const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+        await ensureModelReady(modelKey);
+      }
       const result = await detectBoxes(
         activeFile.sessionId,
         activeFile.currentPage,
         settings.minArea,
-        settings.maxArea
+        settings.maxArea,
+        settings.detectionMode,
+        settings.u2netLite
       );
       handleBoxesChange(result.boxes);
     } catch (error) {
@@ -330,13 +403,16 @@ function App() {
     } finally {
       setIsDetecting(false);
     }
-  }, [activeFile, settings, handleBoxesChange]);
+  }, [activeFile, settings, handleBoxesChange, ensureModelReady]);
 
   // Handle crop
   const handleCrop = useCallback(async () => {
     if (!activeFile || activeFile.boxes.length === 0) return;
     setIsCropping(true);
     try {
+      if (settings.autoRotate) {
+        await ensureModelReady("orientation");
+      }
       const result = await cropImages(
         activeFile.sessionId,
         activeFile.currentPage,
@@ -376,7 +452,7 @@ function App() {
     } finally {
       setIsCropping(false);
     }
-  }, [activeFile, activeFileIndex, settings.autoRotate]);
+  }, [activeFile, activeFileIndex, settings.autoRotate, ensureModelReady]);
 
   // Handle export
   const handleExport = useCallback(async () => {
@@ -518,6 +594,7 @@ function App() {
               isDetecting={isDetecting}
               isCropping={isCropping}
               hasBoxes={(activeFile?.boxes.length ?? 0) > 0}
+              modelStatuses={modelStatuses}
             />
             <ExifEditor
               sessionId={activeFile?.sessionId ?? null}

diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx
@@ -1,7 +1,8 @@
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Slider } from "@/components/ui/slider";
 import { Button } from "@/components/ui/button";
-import type { DetectionSettings } from "@/types";
+import { Loader2 } from "lucide-react";
+import type { DetectionSettings, ModelKey, ModelStatus } from "@/types";
 
 interface SettingsPanelProps {
   settings: DetectionSettings;
@@ -11,6 +12,7 @@ interface SettingsPanelProps {
   isDetecting: boolean;
   isCropping: boolean;
   hasBoxes: boolean;
+  modelStatuses?: Record<ModelKey, ModelStatus> | null;
 }
 
 export function SettingsPanel({
@@ -21,7 +23,12 @@ export function SettingsPanel({
   isDetecting,
   isCropping,
   hasBoxes,
+  modelStatuses = null,
 }: SettingsPanelProps) {
+  const u2netKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full";
+  const u2netStatus = modelStatuses?.[u2netKey] ?? null;
+  const orientationStatus = modelStatuses?.["orientation"] ?? null;
+
   return (
     <Card>
       <CardHeader className="pb-3">
@@ -74,6 +81,21 @@ export function SettingsPanel({
             Auto-rotate photos
           </label>
         </div>
+        {settings.autoRotate && orientationStatus && (orientationStatus.status === "downloading" || orientationStatus.status === "error") && (
+          <div className="text-xs text-muted-foreground flex items-center gap-2">
+            {orientationStatus.status === "downloading" ? (
+              <>
+                <Loader2 className="w-3 h-3 animate-spin" />
+                <span>
+                  Downloading {orientationStatus.label} ({orientationStatus.size_desc}){" "}
+                  {orientationStatus.progress}%
+                </span>
+              </>
+            ) : orientationStatus.status === "error" ? (
+              <span>{orientationStatus.error || "Rotation model download failed"}</span>
+            ) : null}
+          </div>
+        )}
 
         <div className="flex items-center gap-2">
           <input
@@ -90,6 +112,75 @@ export function SettingsPanel({
           </label>
         </div>
 
+        <div className="space-y-2">
+          <label htmlFor="detection-mode" className="text-sm">
+            Detection Mode
+          </label>
+          <select
+            id="detection-mode"
+            value={settings.detectionMode}
+            onChange={(e) =>
+              onSettingsChange({
+                ...settings,
+                detectionMode: e.target.value as
+                  | "scansplitterv1"
+                  | "scansplitterv2"
+                  | "u2net",
+              })
+            }
+            className="w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring"
+          >
+            <option value="scansplitterv2">ScanSplitterv2</option>
+            <option value="scansplitterv1">ScanSplitterv1</option>
+            <option value="u2net">AI (U2-Net)</option>
+          </select>
+          <p className="text-xs text-muted-foreground">
+            {settings.detectionMode === "u2net"
+              ? "Deep learning model - best for difficult scans"
+              : settings.detectionMode === "scansplitterv1"
+                ? "Legacy contour detector from main"
+                : "Default contour detector - fast and improved"}
+          </p>
+        </div>
+
+        {settings.detectionMode === "u2net" && (
+          <div className="flex items-center gap-2">
+            <input
+              type="checkbox"
+              id="u2net-lite"
+              checked={settings.u2netLite}
+              onChange={(e) =>
+                onSettingsChange({ ...settings, u2netLite: e.target.checked })
+              }
+              className="rounded"
+            />
+            <label htmlFor="u2net-lite" className="text-sm">
+              Use lite model (faster)
+            </label>
+            <p className="text-xs text-muted-foreground ml-auto">
+              {settings.u2netLite ? "5MB" : "176MB"}
+            </p>
+          </div>
+        )}
+        {settings.detectionMode === "u2net" && u2netStatus && u2netStatus.status !== "ready" && (
+          <div className="text-xs text-muted-foreground flex items-center gap-2">
+            {u2netStatus.status === "downloading" ? (
+              <>
+                <Loader2 className="w-3 h-3 animate-spin" />
+                <span>
+                  Downloading {u2netStatus.label} ({u2netStatus.size_desc}) {u2netStatus.progress}%
+                </span>
+              </>
+            ) : u2netStatus.status === "error" ? (
+              <span>{u2netStatus.error || "Model download failed"}</span>
+            ) : (
+              <span>
+                {u2netStatus.label} not downloaded yet ({u2netStatus.size_desc})
+              </span>
+            )}
+          </div>
+        )}
+
         <div className="space-y-2 pt-2">
           <Button
             onClick={onDetect}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14,3 +14,4 @@ src/scansplitter/model_cache/

		# Built frontend (generated by npm run build)
		src/scansplitter/static/
		.DS_Store