diff --git a/.gitignore b/.gitignore index c51a621..3450d9d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ src/scansplitter/model_cache/ # Built frontend (generated by npm run build) src/scansplitter/static/ +.DS_Store diff --git a/README.md b/README.md index 0ff9943..8aab4d9 100644 --- a/README.md +++ b/README.md @@ -40,13 +40,29 @@ uvx scansplitter api --port 8001 ## Features -- **Auto-detection** - Finds multiple photos in a single scan using contour detection +- **Multiple detection modes** - Choose between ScanSplitterv1, ScanSplitterv2 (default), and AI (U2-Net) - **Interactive editing** - Adjust, rotate, and resize bounding boxes before cropping - **Auto-rotation** - Detects and corrects 90°/180°/270° rotations - **PDF support** - Extract and process pages from PDF files - **Web UI** - Modern React interface with Fabric.js canvas editor - **CLI** - Batch process files from the command line +## Detection Modes & Models + +### Photo detection (splitter) + +- **ScanSplitterv2 (default)**: An improved contour-based detector. It applies contrast enhancement (CLAHE), adaptive thresholding, adaptive morphology (kernel scales with resolution), and contour quality filtering (solidity/aspect/extent). It can also use convex-hull borders for irregular edges. +- **ScanSplitterv1**: The first contour-based detector used with adaptive threshold + fixed morphology + `minAreaRect` filtering. It’s simpler and can be useful as a fallback if v2 behaves unexpectedly on a specific scan. +- **AI (U2-Net)**: A deep-learning salient-object model (ONNX) that produces a mask; ScanSplitter then extracts regions from that mask. It’s best for difficult scans (busy backgrounds, low contrast), but requires downloading a model on first use. Might be less accurate for multiple photos at once. + +### Auto-rotation model + +- **Orientation model**: An EfficientNetV2-based ONNX classifier that predicts the correct 0°/90°/180°/270° rotation for each cropped photo. ScanSplitter may fall back to classic heuristics if the model can’t be loaded. + +### Model downloads + +Some modes require downloading models on first use (U2-Net (5Mb / 176MB) and the orientation model (80MB)). The web UI shows download progress while this is happening. + ## Installation Options ### Option 1: Run directly with uvx (recommended) @@ -105,6 +121,7 @@ uv run scansplitter process scan.jpg \ --no-rotate \ --min-area 5 \ --max-area 70 \ + --detection-mode scansplitterv2 \ --format jpg \ -o ./output/ ``` @@ -117,17 +134,24 @@ uv run scansplitter process scan.jpg \ | `--no-rotate` | Disable auto-rotation | | `--min-area` | Minimum photo size as % of scan (default: 2) | | `--max-area` | Maximum photo size as % of scan (default: 80) | +| `--detection-mode` | `scansplitterv2` (default), `scansplitterv1` (legacy), or `u2net` (deep learning); `classic` is an alias for `scansplitterv2` | +| `--u2net-full` | Use full U2-Net model instead of lite (slower, more accurate) | | `--format` | Output format: `png` or `jpg` (default: png) | ## How It Works -1. **Preprocessing** - Convert to grayscale, apply Gaussian blur -2. **Thresholding** - Adaptive binary threshold to separate photos from background -3. **Contour Detection** - Find distinct regions using OpenCV -4. **Filtering** - Keep regions between min/max area thresholds -5. **Interactive Adjustment** - User can modify detected boxes in the web UI -6. **Rotation Detection** - Score each 90° rotation using Hough line detection -7. **Cropping** - Extract photos using adjusted bounding boxes +1. **Photo detection** - Runs the selected detection mode (ScanSplitterv1 / ScanSplitterv2 / AI (U2-Net)) to produce rotatable bounding boxes. +2. **Interactive adjustment** - You can refine boxes in the web UI before cropping. +3. **Cropping** - Extracts rotated regions using the adjusted boxes. +4. **Auto-rotation (optional)** - Uses the orientation model (with fallbacks) to fix 90°/180°/270° rotations. + +## Credits + +ScanSplitter depends on excellent open models and upstream work: + +- **U²-Net (salient object detection)** by Xuebin Qin et al. — paper: https://arxiv.org/abs/2005.09007, code: https://github.com/xuebinqin/U-2-Net +- **U2-Net ONNX weights** are downloaded from `rembg` releases by Daniel Gatis (with a ScanSplitter backup mirror) — https://github.com/danielgatis/rembg +- **Orientation model (EfficientNetV2)** is downloaded from Duarte Barbosa’s deep image orientation detection project (with a ScanSplitter backup mirror) — https://github.com/duartebarbosadev/deep-image-orientation-detection ## Development diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 3f2bf0d..26b9d0c 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -12,9 +12,9 @@ import { Toast, type ToastType } from "@/components/Toast"; import { ConfirmDialog } from "@/components/ConfirmDialog"; import { KeyboardShortcutsDialog } from "@/components/KeyboardShortcutsDialog"; import { Button } from "@/components/ui/button"; -import { uploadFile, detectBoxes, cropImages, exportZip, exportLocal, getImageUrl, FileConflictError } from "@/lib/api"; +import { uploadFile, detectBoxes, cropImages, exportZip, exportLocal, getImageUrl, FileConflictError, getModelStatuses, startModelDownload } from "@/lib/api"; import { generateName } from "@/lib/naming"; -import type { UploadedFile, BoundingBox, CroppedImage, DetectionSettings, NamingPattern } from "@/types"; +import type { UploadedFile, BoundingBox, CroppedImage, DetectionSettings, NamingPattern, ModelKey, ModelStatus } from "@/types"; function App() { // File state @@ -40,8 +40,13 @@ function App() { maxArea: 80, autoRotate: true, autoDetect: true, + detectionMode: "scansplitterv2", + u2netLite: true, }); + // Model download status (orientation + U2-Net) + const [modelStatuses, setModelStatuses] = useState | null>(null); + // Loading states const [isUploading, setIsUploading] = useState(false); const [isDetecting, setIsDetecting] = useState(false); @@ -68,6 +73,61 @@ function App() { setToast({ message, type }); }, []); + const refreshModelStatuses = useCallback(async () => { + try { + const statuses = await getModelStatuses(); + setModelStatuses(statuses); + return statuses; + } catch (error) { + console.error("Failed to refresh model statuses:", error); + return null; + } + }, []); + + const ensureModelReady = useCallback(async (modelKey: ModelKey) => { + const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + + let statuses = await refreshModelStatuses(); + if (!statuses) { + throw new Error("Failed to load model status"); + } + + const current = statuses[modelKey]; + if (current?.status === "ready") return; + + await startModelDownload(modelKey); + + // Poll until ready (or error) + for (;;) { + await sleep(500); + statuses = await refreshModelStatuses(); + if (!statuses) continue; + const next = statuses[modelKey]; + if (!next) throw new Error("Unknown model"); + if (next.status === "ready") return; + if (next.status === "error") { + throw new Error(next.error || "Model download failed"); + } + } + }, [refreshModelStatuses]); + + useEffect(() => { + refreshModelStatuses(); + }, [refreshModelStatuses]); + + useEffect(() => { + if (settings.detectionMode !== "u2net") return; + + const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full"; + (async () => { + const statuses = await refreshModelStatuses(); + const current = statuses?.[modelKey]; + if (!current || current.status === "ready" || current.status === "downloading") return; + await startModelDownload(modelKey); + await refreshModelStatuses(); + })(); + }, [settings.detectionMode, settings.u2netLite, refreshModelStatuses]); + // Persist output directory to localStorage useEffect(() => { localStorage.setItem("scansplitter_output_dir", outputDirectory); @@ -117,11 +177,18 @@ function App() { ); try { + if (settings.detectionMode === "u2net") { + const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full"; + await ensureModelReady(modelKey); + } + const result = await detectBoxes( sessionId, page, settings.minArea, - settings.maxArea + settings.maxArea, + settings.detectionMode, + settings.u2netLite ); // Update with detected boxes setFiles((prev) => @@ -139,7 +206,7 @@ function App() { ) ); } - }, [settings.minArea, settings.maxArea]); + }, [settings.minArea, settings.maxArea, settings.detectionMode, settings.u2netLite, ensureModelReady]); // Handle file upload (multiple files) const handleUpload = useCallback(async (filesToUpload: File[]) => { @@ -317,11 +384,17 @@ function App() { if (!activeFile) return; setIsDetecting(true); try { + if (settings.detectionMode === "u2net") { + const modelKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full"; + await ensureModelReady(modelKey); + } const result = await detectBoxes( activeFile.sessionId, activeFile.currentPage, settings.minArea, - settings.maxArea + settings.maxArea, + settings.detectionMode, + settings.u2netLite ); handleBoxesChange(result.boxes); } catch (error) { @@ -330,13 +403,16 @@ function App() { } finally { setIsDetecting(false); } - }, [activeFile, settings, handleBoxesChange]); + }, [activeFile, settings, handleBoxesChange, ensureModelReady]); // Handle crop const handleCrop = useCallback(async () => { if (!activeFile || activeFile.boxes.length === 0) return; setIsCropping(true); try { + if (settings.autoRotate) { + await ensureModelReady("orientation"); + } const result = await cropImages( activeFile.sessionId, activeFile.currentPage, @@ -376,7 +452,7 @@ function App() { } finally { setIsCropping(false); } - }, [activeFile, activeFileIndex, settings.autoRotate]); + }, [activeFile, activeFileIndex, settings.autoRotate, ensureModelReady]); // Handle export const handleExport = useCallback(async () => { @@ -518,6 +594,7 @@ function App() { isDetecting={isDetecting} isCropping={isCropping} hasBoxes={(activeFile?.boxes.length ?? 0) > 0} + modelStatuses={modelStatuses} /> | null; } export function SettingsPanel({ @@ -21,7 +23,12 @@ export function SettingsPanel({ isDetecting, isCropping, hasBoxes, + modelStatuses = null, }: SettingsPanelProps) { + const u2netKey: ModelKey = settings.u2netLite ? "u2net_lite" : "u2net_full"; + const u2netStatus = modelStatuses?.[u2netKey] ?? null; + const orientationStatus = modelStatuses?.["orientation"] ?? null; + return ( @@ -74,6 +81,21 @@ export function SettingsPanel({ Auto-rotate photos + {settings.autoRotate && orientationStatus && (orientationStatus.status === "downloading" || orientationStatus.status === "error") && ( +
+ {orientationStatus.status === "downloading" ? ( + <> + + + Downloading {orientationStatus.label} ({orientationStatus.size_desc}){" "} + {orientationStatus.progress}% + + + ) : orientationStatus.status === "error" ? ( + {orientationStatus.error || "Rotation model download failed"} + ) : null} +
+ )}
+
+ + +

+ {settings.detectionMode === "u2net" + ? "Deep learning model - best for difficult scans" + : settings.detectionMode === "scansplitterv1" + ? "Legacy contour detector from main" + : "Default contour detector - fast and improved"} +

+
+ + {settings.detectionMode === "u2net" && ( +
+ + onSettingsChange({ ...settings, u2netLite: e.target.checked }) + } + className="rounded" + /> + +

+ {settings.u2netLite ? "5MB" : "176MB"} +

+
+ )} + {settings.detectionMode === "u2net" && u2netStatus && u2netStatus.status !== "ready" && ( +
+ {u2netStatus.status === "downloading" ? ( + <> + + + Downloading {u2netStatus.label} ({u2netStatus.size_desc}) {u2netStatus.progress}% + + + ) : u2netStatus.status === "error" ? ( + {u2netStatus.error || "Model download failed"} + ) : ( + + {u2netStatus.label} not downloaded yet ({u2netStatus.size_desc}) + + )} +
+ )} +