diff --git a/CHANGELOG.md b/CHANGELOG.md index 177d224..be31818 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ All notable changes to this project will be documented in this file. The format ### Added +- **First-run UX overhaul.** Three new pieces close the path from `git clone` to a working `dpub convert --transcribe ...` without manual treasure hunts: + - **`dpub doctor`** — read-only diagnostic showing build state (version, GPU acceleration: Metal / CUDA / CPU), every runtime prerequisite (`epubcheck`, `ace`, `ffmpeg`), and Whisper model cache contents. `--json` for CI use; same stable schema pattern as `dpub validate --json`. + - **`dpub setup --whisper-model `** — downloads any of `tiny`, `base`, `small`, `medium`, `large-v3` from huggingface.co/ggerganov/whisper.cpp into `~/.cache/dpub/models/` (or `%LOCALAPPDATA%\dpub\models\` on Windows) with SHA256 verification and an atomic `.partial` rename. Re-running on an already-cached model skips the download after a hash check. + - **`scripts/build.sh`** — host-aware release build that auto-picks `--features metal` on Apple Silicon and `--features cuda` on Linux+nvcc, falling back to CPU-only otherwise. Pre-flights `cmake`. Documented as the recommended build command. +- **Auto-discovery for `--transcribe`.** Calling `dpub convert --transcribe nl` without `--whisper-model` now picks the most-recently-modified `ggml-*.bin` from the cache. The `--whisper-model ` override stays for explicit control. +- **Interactive first-run prompt.** When `--transcribe` is used on a TTY with no cached model, dpub offers to download `ggml-medium.bin` instead of failing. Set `DPUB_NONINTERACTIVE=1` (or run in a non-TTY pipe) to suppress the prompt; the failure message points at `dpub setup`. +- **`dpub doctor --install`** — opt-in best-effort installer for missing runtime tools. Uses `brew` on macOS, `apt-get` / `dnf` on Linux (with `sudo`), and prints commands on Windows. Per-tool consent unless `--yes` is passed. Never tries to install Java directly. Whisper models are handled by `dpub setup` rather than the OS package manager. - **Word-level Media Overlay sync** (M6.5). When `--transcribe` runs and the cleanup path is active, dpub now extracts per-token timestamps from whisper.cpp, coalesces BPE pieces back into whole words via a leading-space rule (with punctuation attachment and degenerate-timing clamping), wraps each word in a `` inside the cleaned `

`, and emits one SMIL `` per word — wrapped in nested `` per paragraph. The result is karaoke-style highlight-along-with-audio in compatible reading systems (Thorium, Readium). Default-on; pass `--no-word-sync` to fall back to per-paragraph sync. Workspace EPUBCheck assertions extended to gate the new overlay shape; reference book stays 0/0/0. - `dpub-whisper` exposes a public `Word { start_seconds, end_seconds, text }` struct and `Segment.words: Vec` populated by the new BPE coalescer (`crates/dpub-whisper/src/words.rs`). Eight unit tests cover the BPE coalescing rules. diff --git a/Cargo.lock b/Cargo.lock index e5e6e5f..e9f9f16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,15 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -261,6 +270,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -295,6 +313,16 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "derive_arbitrary" version = "1.4.2" @@ -306,6 +334,16 @@ dependencies = [ "syn", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -335,14 +373,17 @@ dependencies = [ "dpub-audio", "dpub-convert", "dpub-core", + "dpub-meta", "dpub-validate", "rayon", "serde", "serde_json", + "sha2", "tempfile", "tracing", "tracing-subscriber", "walkdir", + "which", ] [[package]] @@ -526,6 +567,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1180,6 +1231,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1500,6 +1562,12 @@ dependencies = [ "strength_reduce", ] +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1576,6 +1644,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index e45754a..e5ebbde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ uuid = { version = "1", features = ["v4"] } serde_json = "1" which = "7" rayon = "1" +sha2 = "0.10" walkdir = "2" whisper-rs = "0.16" symphonia = { version = "0.5", default-features = false, features = ["mp3", "ogg", "isomp4", "vorbis"] } diff --git a/README.md b/README.md index 706c35e..085aaa6 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,31 @@ The reference toolchain for this conversion is the [DAISY Pipeline 2](https://da ## Quickstart -Build from source (requires `cmake` for the bundled `dpub-whisper` crate): +### First-time setup + +Five commands from a fresh clone to a fully-working dpub: + +```sh +# macOS — install build + runtime prerequisites +brew install cmake epubcheck ffmpeg +npm install -g @daisy/ace # optional: enables `dpub a11y` + +# Build with the right GPU acceleration for the host +git clone https://github.com/11ways/dpub && cd dpub +./scripts/build.sh + +# Download a Whisper model (only needed if you'll use --transcribe) +./target/release/dpub setup --whisper-model medium + +# Confirm everything's green +./target/release/dpub doctor +``` + +`./scripts/build.sh` auto-detects Apple Silicon (Metal) / Linux+nvcc (CUDA) / falls back to CPU-only. Power users who want different feature flags call `cargo build --release -p dpub-cli` directly. + +`dpub doctor` shows the status of every prerequisite with platform-specific install hints. `dpub setup --whisper-model ` downloads a Whisper model into `~/.cache/dpub/models/` with SHA256 verification — `--transcribe` then auto-discovers the most recent cached model so you don't have to thread `--whisper-model ` through every invocation. Sizes: `tiny`, `base`, `small`, `medium` (recommended for Dutch), `large-v3`. + +### Manual build (if you prefer) ```sh git clone https://github.com/11ways/dpub diff --git a/crates/dpub-cli/Cargo.toml b/crates/dpub-cli/Cargo.toml index 503b2ca..b78f4a4 100644 --- a/crates/dpub-cli/Cargo.toml +++ b/crates/dpub-cli/Cargo.toml @@ -25,12 +25,15 @@ cuda = ["dpub-convert/cuda"] dpub-audio = { path = "../dpub-audio", version = "0.5.0" } dpub-core = { path = "../dpub-core", version = "0.5.0" } dpub-convert = { path = "../dpub-convert", version = "0.5.0" } +dpub-meta = { path = "../dpub-meta", version = "0.5.0" } dpub-validate = { path = "../dpub-validate", version = "0.5.0" } +sha2 = { workspace = true } clap = { workspace = true } anyhow = { workspace = true } rayon = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +which = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } walkdir = { workspace = true } diff --git a/crates/dpub-cli/src/doctor.rs b/crates/dpub-cli/src/doctor.rs new file mode 100644 index 0000000..c69303e --- /dev/null +++ b/crates/dpub-cli/src/doctor.rs @@ -0,0 +1,323 @@ +//! `dpub doctor` — diagnostic for build state, runtime tools, and +//! cached Whisper models. +//! +//! Read-only by default; with `--install` (handled in `main.rs`) it +//! offers to invoke the platform's package manager to fill missing +//! tools. + +use std::path::Path; +use std::process::Command; + +use serde::Serialize; + +use crate::setup; + +/// One row in the diagnostic report. +#[derive(Debug, Clone, Serialize)] +pub struct Tool { + pub key: &'static str, + pub label: &'static str, + pub status: Status, + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub detail: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub install_hint: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum Status { + Ok, + Warning, + Missing, +} + +impl Status { + pub fn glyph(self) -> &'static str { + match self { + Status::Ok => "✓", + Status::Warning => "⚠", + Status::Missing => "✗", + } + } +} + +/// Aggregated report. Tools are emitted in display order. +#[derive(Debug, Clone, Serialize)] +pub struct Report { + pub dpub_version: &'static str, + pub gpu_acceleration: &'static str, + pub tools: Vec, +} + +/// Run every detector and return the aggregated report. +pub fn diagnose() -> Report { + Report { + dpub_version: env!("CARGO_PKG_VERSION"), + gpu_acceleration: gpu_label(), + tools: vec![ + check_convert(), + check_epubcheck(), + check_ace(), + check_ffmpeg(), + check_whisper_model(), + ], + } +} + +fn gpu_label() -> &'static str { + if cfg!(feature = "metal") { + "Metal" + } else if cfg!(feature = "cuda") { + "CUDA" + } else { + "CPU only" + } +} + +fn check_convert() -> Tool { + // Conversion has no required external tool — the binary itself is + // sufficient. We surface the row anyway so the report has a + // visible "core conversion is fine" line. + Tool { + key: "convert", + label: "DAISY → EPUB conversion", + status: Status::Ok, + version: None, + detail: Some("ready (built-in, no external dependency)".into()), + install_hint: None, + } +} + +fn check_epubcheck() -> Tool { + let label = "EPUB validation (epubcheck)"; + let Ok(path) = which::which("epubcheck") else { + return Tool { + key: "epubcheck", + label, + status: Status::Missing, + version: None, + detail: Some("not on PATH".into()), + install_hint: Some(install_hint_for(&EpubcheckHint).into()), + }; + }; + let version = run_version(&path, &["--version"]).map(|s| { + // `EPUBCheck v5.3.0` → `5.3.0` + s.trim_start_matches("EPUBCheck v") + .trim_start_matches("EPUBCheck ") + .to_owned() + }); + Tool { + key: "epubcheck", + label, + status: Status::Ok, + version, + detail: None, + install_hint: None, + } +} + +fn check_ace() -> Tool { + let label = "Accessibility (ace)"; + let Ok(path) = which::which("ace") else { + return Tool { + key: "ace", + label, + status: Status::Missing, + version: None, + detail: Some("not on PATH".into()), + install_hint: Some(install_hint_for(&AceHint).into()), + }; + }; + let version = run_version(&path, &["--version"]); + Tool { + key: "ace", + label, + status: Status::Ok, + version, + detail: None, + install_hint: None, + } +} + +fn check_ffmpeg() -> Tool { + let label = "Audio recompression (ffmpeg)"; + let Ok(path) = which::which("ffmpeg") else { + return Tool { + key: "ffmpeg", + label, + status: Status::Missing, + version: None, + detail: Some("not on PATH".into()), + install_hint: Some(install_hint_for(&FfmpegHint).into()), + }; + }; + let version = run_version(&path, &["-version"]).and_then(|s| { + // `ffmpeg version 8.1.1 Copyright (c) ...` → `8.1.1` + let rest = s.trim_start_matches("ffmpeg version ").trim(); + rest.split_whitespace().next().map(str::to_owned) + }); + Tool { + key: "ffmpeg", + label, + status: Status::Ok, + version, + detail: None, + install_hint: None, + } +} + +fn check_whisper_model() -> Tool { + let label = "Whisper transcription"; + match setup::list_cached_models() { + Ok(models) if !models.is_empty() => { + let names: Vec = models + .iter() + .filter_map(|p| p.file_name().map(|n| n.to_string_lossy().into_owned())) + .collect(); + Tool { + key: "whisper-model", + label, + status: Status::Ok, + version: None, + detail: Some(format!("{} cached: {}", models.len(), names.join(", "))), + install_hint: None, + } + } + Ok(_) => Tool { + key: "whisper-model", + label, + status: Status::Warning, + version: None, + detail: Some(format!("no model in {}", setup::cache_dir().display())), + install_hint: Some("dpub setup --whisper-model medium".into()), + }, + Err(e) => Tool { + key: "whisper-model", + label, + status: Status::Warning, + version: None, + detail: Some(format!("cache check failed: {e}")), + install_hint: Some("dpub setup --whisper-model medium".into()), + }, + } +} + +/// Run ` ` and return its first line of stdout, trimmed. +/// Used for `--version` parsing. Returns `None` on any failure. +fn run_version(bin: &Path, args: &[&str]) -> Option { + let output = Command::new(bin).args(args).output().ok()?; + let s = String::from_utf8_lossy(&output.stdout); + let first_non_empty = s.lines().find(|l| !l.trim().is_empty())?; + Some(first_non_empty.trim().to_owned()) +} + +/// Marker trait for per-tool install hints. Each variant produces a +/// platform-appropriate one-line install command for the host. +trait InstallHint { + fn for_host(&self) -> &'static str; +} + +struct EpubcheckHint; +struct AceHint; +struct FfmpegHint; + +impl InstallHint for EpubcheckHint { + fn for_host(&self) -> &'static str { + if cfg!(target_os = "macos") { + "brew install epubcheck" + } else if cfg!(target_os = "linux") { + "see https://github.com/w3c/epubcheck/releases (also needs Java 11)" + } else { + "https://github.com/w3c/epubcheck/releases (also needs Java 11)" + } + } +} + +impl InstallHint for AceHint { + fn for_host(&self) -> &'static str { + // Same command on all platforms; npm hides the difference. + "npm install -g @daisy/ace" + } +} + +impl InstallHint for FfmpegHint { + fn for_host(&self) -> &'static str { + if cfg!(target_os = "macos") { + "brew install ffmpeg" + } else if cfg!(target_os = "linux") { + "sudo apt-get install -y ffmpeg # or: sudo dnf install -y ffmpeg" + } else { + "https://ffmpeg.org/download.html" + } + } +} + +fn install_hint_for(h: &impl InstallHint) -> &'static str { + h.for_host() +} + +/// Render the human-readable doctor output to stdout. Mirrors +/// `print_report` in `dpub_validate` for visual consistency. +pub fn print_report(report: &Report) { + println!( + "Build: ✓ dpub {} (GPU: {})", + report.dpub_version, report.gpu_acceleration, + ); + for tool in &report.tools { + let glyph = tool.status.glyph(); + let label = tool.label; + // Pad to 28 character columns so the glyph aligns vertically. + // Use char count (not byte length) so multi-byte chars like the + // `→` in "DAISY → EPUB conversion" are counted as one column. + let mut line = format!("{label}:"); + while line.chars().count() < 29 { + line.push(' '); + } + let mut detail_parts = Vec::new(); + if let Some(v) = &tool.version { + detail_parts.push(v.clone()); + } + if let Some(d) = &tool.detail { + detail_parts.push(d.clone()); + } + let detail = if detail_parts.is_empty() { + "ready".to_owned() + } else { + detail_parts.join(" — ") + }; + println!("{line}{glyph} {detail}"); + if let Some(hint) = &tool.install_hint { + println!(" install: {hint}"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn gpu_label_resolves_to_a_known_string() { + let label = gpu_label(); + assert!(matches!(label, "Metal" | "CUDA" | "CPU only")); + } + + #[test] + fn report_serializes_to_json() { + let report = diagnose(); + let json = serde_json::to_string(&report).expect("serialize"); + assert!(json.contains("\"dpub_version\"")); + assert!(json.contains("\"gpu_acceleration\"")); + assert!(json.contains("\"tools\"")); + } + + #[test] + fn convert_row_is_always_ok() { + let row = check_convert(); + assert_eq!(row.status, Status::Ok); + assert_eq!(row.key, "convert"); + } +} diff --git a/crates/dpub-cli/src/install.rs b/crates/dpub-cli/src/install.rs new file mode 100644 index 0000000..bd5fe38 --- /dev/null +++ b/crates/dpub-cli/src/install.rs @@ -0,0 +1,237 @@ +//! Platform-aware best-effort installer for dpub's runtime tools. +//! +//! Invoked from `dpub doctor --install`. For each missing tool the +//! report flags, look up the right command for the host, prompt the +//! user (skip if `--yes`), then run it. Skips tools we can't safely +//! handle on a given platform (e.g. epubcheck on Linux) and prints +//! a URL the user can follow instead. +//! +//! Never auto-installs Java — too much variability in users' JVM +//! setups. Java is a hint, not an action. + +use std::io::Write; +use std::process::Command; + +use anyhow::{Context, Result}; + +use crate::doctor::{Report, Status, Tool}; + +#[derive(Debug, Clone)] +struct InstallStep { + /// Human-readable label of the tool we're installing for. + tool_label: String, + /// Argv to run. First element is the binary name. + argv: Vec, + /// Whether the command needs to be run via `sudo` (Linux package + /// managers typically do). + needs_sudo: bool, +} + +/// Run the installer for every missing tool the report flags. Returns +/// `Ok(())` even when individual installs fail — the user sees the +/// errors inline and can rerun `doctor` to see the new state. +pub fn run_install(report: &Report, yes: bool) -> Result<()> { + let plan = plan_install(report); + if plan.is_empty() { + println!("Nothing to install — `dpub doctor` is already green."); + return Ok(()); + } + println!("Will run the following commands to install missing tools:"); + println!(); + for step in &plan { + let prefix = if step.needs_sudo { "sudo " } else { "" }; + println!(" {} → {}{}", step.tool_label, prefix, step.argv.join(" ")); + } + println!(); + + if !yes && !confirm("Proceed?")? { + println!("Aborted."); + return Ok(()); + } + + for step in plan { + println!(); + let prefix = if step.needs_sudo { "sudo " } else { "" }; + println!("==> {} {}{}", step.tool_label, prefix, step.argv.join(" ")); + let status = if step.needs_sudo { + Command::new("sudo") + .args(&step.argv) + .status() + .with_context(|| format!("spawning sudo {}", step.argv.join(" ")))? + } else { + Command::new(&step.argv[0]) + .args(&step.argv[1..]) + .status() + .with_context(|| format!("spawning {}", step.argv.join(" ")))? + }; + if !status.success() { + eprintln!( + " ✗ {} failed (exit code {:?}). Continuing with remaining steps.", + step.tool_label, + status.code(), + ); + } + } + Ok(()) +} + +fn plan_install(report: &Report) -> Vec { + let mut out = Vec::new(); + for tool in &report.tools { + if tool.status == Status::Ok { + continue; + } + if let Some(step) = step_for(tool) { + out.push(step); + } + } + out +} + +fn step_for(tool: &Tool) -> Option { + let label = tool.label.to_owned(); + match tool.key { + "epubcheck" => epubcheck_step(label), + "ace" => ace_step(label), + "ffmpeg" => ffmpeg_step(label), + // Anything else (notably "whisper-model", which is `dpub + // setup` territory rather than an OS package) the doctor + // report already points the user at the right command. + _ => None, + } +} + +fn epubcheck_step(label: String) -> Option { + if cfg!(target_os = "macos") && which::which("brew").is_ok() { + return Some(InstallStep { + tool_label: label, + argv: vec!["brew".into(), "install".into(), "epubcheck".into()], + needs_sudo: false, + }); + } + eprintln!( + " • epubcheck has no straightforward package on this platform. \ + Download from https://github.com/w3c/epubcheck/releases (also needs Java 11)." + ); + None +} + +fn ace_step(label: String) -> Option { + if which::which("npm").is_ok() { + return Some(InstallStep { + tool_label: label, + argv: vec![ + "npm".into(), + "install".into(), + "-g".into(), + "@daisy/ace".into(), + ], + needs_sudo: !cfg!(target_os = "macos") && cfg!(target_os = "linux"), + }); + } + eprintln!( + " • npm not found. Install Node.js first, then run: npm install -g @daisy/ace" + ); + None +} + +fn ffmpeg_step(label: String) -> Option { + if cfg!(target_os = "macos") && which::which("brew").is_ok() { + return Some(InstallStep { + tool_label: label, + argv: vec!["brew".into(), "install".into(), "ffmpeg".into()], + needs_sudo: false, + }); + } + if cfg!(target_os = "linux") { + if which::which("apt-get").is_ok() { + return Some(InstallStep { + tool_label: label, + argv: vec![ + "apt-get".into(), + "install".into(), + "-y".into(), + "ffmpeg".into(), + ], + needs_sudo: true, + }); + } + if which::which("dnf").is_ok() { + return Some(InstallStep { + tool_label: label, + argv: vec![ + "dnf".into(), + "install".into(), + "-y".into(), + "ffmpeg".into(), + ], + needs_sudo: true, + }); + } + } + eprintln!( + " • No supported package manager detected for ffmpeg. \ + See https://ffmpeg.org/download.html" + ); + None +} + +/// Yes/no prompt on stderr; default = yes (empty answer accepted). +fn confirm(question: &str) -> Result { + eprint!("{question} [Y/n] "); + std::io::stderr().flush().ok(); + let mut answer = String::new(); + std::io::stdin().read_line(&mut answer).context("read stdin")?; + let answer = answer.trim().to_ascii_lowercase(); + Ok(answer.is_empty() || answer == "y" || answer == "yes") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::doctor::{Status, Tool}; + + fn missing(key: &'static str, label: &'static str) -> Tool { + Tool { + key, + label, + status: Status::Missing, + version: None, + detail: None, + install_hint: None, + } + } + + #[test] + fn plan_skips_ok_tools() { + let mut tool = missing("ffmpeg", "ffmpeg"); + tool.status = Status::Ok; + let report = Report { + dpub_version: "test", + gpu_acceleration: "CPU only", + tools: vec![tool], + }; + assert!(plan_install(&report).is_empty()); + } + + #[test] + fn plan_skips_unknown_tool_keys() { + let report = Report { + dpub_version: "test", + gpu_acceleration: "CPU only", + tools: vec![missing("nonsense", "Nonsense")], + }; + assert!(plan_install(&report).is_empty()); + } + + #[test] + fn plan_skips_whisper_model() { + // Whisper model is `dpub setup` territory, not the OS package manager. + let report = Report { + dpub_version: "test", + gpu_acceleration: "CPU only", + tools: vec![missing("whisper-model", "Whisper transcription")], + }; + assert!(plan_install(&report).is_empty()); + } +} diff --git a/crates/dpub-cli/src/main.rs b/crates/dpub-cli/src/main.rs index 8195717..55aba4e 100644 --- a/crates/dpub-cli/src/main.rs +++ b/crates/dpub-cli/src/main.rs @@ -4,6 +4,10 @@ use anyhow::{Context, Result}; use clap::{Parser, Subcommand, ValueEnum}; use dpub_core::{Book, NavItem}; +mod doctor; +mod install; +mod setup; + #[derive(Parser)] #[command(name = "dpub", version, about = "DAISY 2.02 → EPUB 3 toolkit")] struct Cli { @@ -94,6 +98,30 @@ enum Command { #[arg(long)] json: bool, }, + /// Diagnose build state, runtime tools, and cached Whisper models. + /// Read-only; pass `--install` to invoke the platform's package + /// manager for missing tools after explicit consent. + Doctor { + /// Emit the structured report as JSON on stdout instead of + /// the human-readable summary. + #[arg(long)] + json: bool, + /// Offer to install missing tools using the platform's + /// package manager (`brew` / `apt-get` / `dnf`). Requires + /// per-tool confirmation unless `--yes` is also passed. + #[arg(long)] + install: bool, + /// Skip per-tool confirmation when `--install` is set. + #[arg(long)] + yes: bool, + }, + /// Set up dpub's per-user data: Whisper model cache, etc. + Setup { + /// Download a GGML Whisper model into the cache. One of: + /// `tiny`, `base`, `small`, `medium`, `large-v3`. + #[arg(long, value_name = "SIZE")] + whisper_model: Option, + }, /// Convert every DAISY 2.02 book under `` to EPUB 3, in /// parallel. A "book" is any directory containing an `ncc.html`. /// Writes a JSON summary to stdout when finished. Per-book errors @@ -179,6 +207,8 @@ fn main() -> Result<()> { ), Command::Validate { epub, json } => cmd_validate(&epub, json), Command::A11y { epub, json } => cmd_a11y(&epub, json), + Command::Doctor { json, install, yes } => cmd_doctor(json, install, yes), + Command::Setup { whisper_model } => cmd_setup(whisper_model.as_deref()), Command::Batch { input, output, @@ -228,7 +258,7 @@ fn cmd_convert( (Some(language), Some(model_path)) => { if !model_path.is_file() { anyhow::bail!( - "Whisper model not found at {} (download from https://huggingface.co/ggerganov/whisper.cpp)", + "Whisper model not found at {} (run `dpub setup --whisper-model medium` to download one)", model_path.display() ); } @@ -241,8 +271,26 @@ fn cmd_convert( language, }) } - (Some(_), None) => { - anyhow::bail!("--transcribe requires --whisper-model"); + (Some(language), None) => { + // Auto-discover: pick the most-recently-modified ggml-*.bin + // in dpub's per-user cache. If none, prompt on TTY (B.1) + // or fail with a hint. + let Some(model_path) = resolve_or_prompt_for_model()? else { + anyhow::bail!( + "no Whisper model found in {}. \ + Run `dpub setup --whisper-model medium` to download one, \ + or pass `--whisper-model ` directly.", + setup::cache_dir().display(), + ); + }; + println!( + " Transcribe: lang={language} model={} (auto-discovered)", + model_path.display() + ); + Some(dpub_convert::TranscribeOptions { + model_path, + language, + }) } (None, Some(_)) => { anyhow::bail!("--whisper-model requires --transcribe"); @@ -335,6 +383,94 @@ fn cmd_a11y(epub: &std::path::Path, json: bool) -> Result<()> { Ok(()) } +/// Look for a Whisper model the user already downloaded via +/// `dpub setup`. Returns `Some(path)` if a cached model exists, +/// `None` otherwise. On a TTY with no cached model, prompts the +/// user to download `medium` (Tier B.1). +/// +/// The non-interactive guard (`DPUB_NONINTERACTIVE=1` or non-TTY +/// stdin/stderr) skips the prompt and returns `None` so the caller +/// can produce a static failure message. +fn resolve_or_prompt_for_model() -> Result> { + if let Some(path) = setup::most_recent_model() { + return Ok(Some(path)); + } + if should_prompt_for_install() { + return prompt_and_install_default_model(); + } + Ok(None) +} + +/// `true` when stdin and stderr are both TTYs and the +/// `DPUB_NONINTERACTIVE` env var is unset. +fn should_prompt_for_install() -> bool { + use std::io::IsTerminal; + if std::env::var_os("DPUB_NONINTERACTIVE").is_some() { + return false; + } + std::io::stdin().is_terminal() && std::io::stderr().is_terminal() +} + +/// Prompt the user to download the default `medium` Whisper model. +/// Returns the cached path on consent; `None` on decline. +fn prompt_and_install_default_model() -> Result> { + eprintln!(); + eprintln!( + "No Whisper model found in {}.", + setup::cache_dir().display(), + ); + eprint!("Download ggml-medium.bin (≈ 1.5 GB)? [Y/n] "); + std::io::Write::flush(&mut std::io::stderr()).ok(); + + let mut answer = String::new(); + std::io::stdin().read_line(&mut answer).context("read stdin")?; + let answer = answer.trim().to_ascii_lowercase(); + if !(answer.is_empty() || answer == "y" || answer == "yes") { + return Ok(None); + } + let spec = setup::lookup("medium").expect("medium is a known size"); + let path = setup::install_model(spec)?; + Ok(Some(path)) +} + +fn cmd_doctor(json: bool, install: bool, yes: bool) -> Result<()> { + let report = doctor::diagnose(); + if json { + let s = serde_json::to_string_pretty(&report).context("serialise doctor report")?; + println!("{s}"); + return Ok(()); + } + doctor::print_report(&report); + if install { + println!(); + crate::install::run_install(&report, yes)?; + println!(); + println!("Re-running doctor to confirm:"); + println!(); + let after = doctor::diagnose(); + doctor::print_report(&after); + } + Ok(()) +} + +fn cmd_setup(whisper_model: Option<&str>) -> Result<()> { + let Some(size) = whisper_model else { + anyhow::bail!( + "nothing to set up. Pass --whisper-model (one of: {})", + setup::known_size_names().join(", "), + ); + }; + let Some(spec) = setup::lookup(size) else { + anyhow::bail!( + "unknown whisper-model size {size:?}. Known sizes: {}", + setup::known_size_names().join(", "), + ); + }; + let path = setup::install_model(spec)?; + println!("Default model for `dpub convert --transcribe`: {}", path.display()); + Ok(()) +} + /// Either print the human-readable summary, or serialise the report to /// stdout as pretty JSON. The JSON shape is the `Report` struct from /// `dpub-validate`; field names are stable as part of the 1.0 contract. diff --git a/crates/dpub-cli/src/setup.rs b/crates/dpub-cli/src/setup.rs new file mode 100644 index 0000000..3d2549f --- /dev/null +++ b/crates/dpub-cli/src/setup.rs @@ -0,0 +1,324 @@ +//! `dpub setup --whisper-model ` — download a GGML Whisper +//! model into a per-user cache directory, with SHA256 verification. +//! +//! Cache layout: +//! - macOS / Linux: `$HOME/.cache/dpub/models/ggml-.bin` +//! - Windows: `%LOCALAPPDATA%\dpub\models\ggml-.bin` +//! +//! `dpub convert --transcribe ` (without `--whisper-model`) +//! auto-discovers the most recently modified model in the cache dir. + +use std::fs; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::time::{Duration, Instant}; + +use anyhow::{Context, Result}; +use sha2::{Digest, Sha256}; + +/// Known whisper.cpp GGML model sizes that this command can download. +/// SHA256s come from the upstream Hugging Face mirror; bumped when +/// upstream rotates a model. +pub const KNOWN_MODELS: &[ModelSpec] = &[ + ModelSpec { + size: "tiny", + sha256: "be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21", + bytes: 77_691_713, + }, + ModelSpec { + size: "base", + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe", + bytes: 147_951_465, + }, + ModelSpec { + size: "small", + sha256: "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b", + bytes: 487_601_967, + }, + ModelSpec { + size: "medium", + sha256: "6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208", + bytes: 1_533_763_059, + }, + ModelSpec { + size: "large-v3", + sha256: "64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2", + bytes: 3_094_623_691, + }, +]; + +#[derive(Debug, Clone, Copy)] +pub struct ModelSpec { + pub size: &'static str, + pub sha256: &'static str, + pub bytes: u64, +} + +impl ModelSpec { + pub fn url(&self) -> String { + format!( + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin", + self.size + ) + } + + pub fn filename(&self) -> String { + format!("ggml-{}.bin", self.size) + } +} + +pub fn lookup(size: &str) -> Option<&'static ModelSpec> { + KNOWN_MODELS.iter().find(|m| m.size == size) +} + +pub fn known_size_names() -> Vec<&'static str> { + KNOWN_MODELS.iter().map(|m| m.size).collect() +} + +/// Resolve dpub's per-user model cache directory. Creates it lazily; +/// callers should still expect `std::io::Error` on disk-full etc. +pub fn cache_dir() -> PathBuf { + if cfg!(target_os = "windows") { + let base = std::env::var_os("LOCALAPPDATA").map_or_else( + || PathBuf::from("."), + PathBuf::from, + ); + base.join("dpub").join("models") + } else { + let home = std::env::var_os("HOME").map_or_else( + || PathBuf::from("."), + PathBuf::from, + ); + home.join(".cache").join("dpub").join("models") + } +} + +/// Return paths of every `ggml-*.bin` file in the cache dir, sorted +/// by most-recently-modified first. Returns an empty `Vec` if the +/// cache dir doesn't exist. +pub fn list_cached_models() -> std::io::Result> { + let dir = cache_dir(); + if !dir.is_dir() { + return Ok(Vec::new()); + } + let mut out: Vec<(std::time::SystemTime, PathBuf)> = Vec::new(); + for entry in fs::read_dir(&dir)? { + let entry = entry?; + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + if !name.starts_with("ggml-") || !name.to_ascii_lowercase().ends_with(".bin") { + continue; + } + let mtime = entry + .metadata() + .and_then(|m| m.modified()) + .unwrap_or(std::time::UNIX_EPOCH); + out.push((mtime, path)); + } + out.sort_by_key(|(t, _)| std::cmp::Reverse(*t)); // newest first + Ok(out.into_iter().map(|(_, p)| p).collect()) +} + +/// Most-recently-modified model in the cache, if any. +pub fn most_recent_model() -> Option { + list_cached_models().ok().and_then(|v| v.into_iter().next()) +} + +/// Download `spec` into the cache dir, verifying SHA256. Atomic via +/// a `.partial` rename; on hash mismatch the partial file is +/// deleted and an error is returned. Skips the download if the dest +/// already exists with a matching hash. +pub fn install_model(spec: &ModelSpec) -> Result { + let dir = cache_dir(); + fs::create_dir_all(&dir) + .with_context(|| format!("creating cache dir {}", dir.display()))?; + let final_path = dir.join(spec.filename()); + let partial_path = dir.join(format!("{}.partial", spec.filename())); + + if final_path.is_file() { + eprintln!( + "Verifying existing {} ...", + final_path.file_name().unwrap_or_default().to_string_lossy() + ); + match verify_sha256(&final_path, spec.sha256) { + Ok(true) => { + eprintln!("Already cached and verified: {}", final_path.display()); + return Ok(final_path); + } + Ok(false) => { + eprintln!("Existing file failed SHA check; re-downloading."); + fs::remove_file(&final_path).ok(); + } + Err(e) => { + eprintln!("Verifying existing file failed ({e}); re-downloading."); + fs::remove_file(&final_path).ok(); + } + } + } + + eprintln!( + "Downloading {} ({}) ...", + spec.filename(), + format_bytes(spec.bytes), + ); + let agent = dpub_meta::agent(); + let mut hasher = Sha256::new(); + let mut file = fs::File::create(&partial_path) + .with_context(|| format!("creating {}", partial_path.display()))?; + let mut last_tick = Instant::now(); + let started = Instant::now(); + { + let mut tee = HashingWriter { + inner: &mut file, + hasher: &mut hasher, + }; + dpub_meta::download_to_writer(&agent, &spec.url(), &mut tee, |bytes, total| { + let now = Instant::now(); + if now.duration_since(last_tick) < Duration::from_millis(250) + && bytes < total.max(spec.bytes) + { + return; + } + last_tick = now; + render_progress(bytes, total.max(spec.bytes), started); + }) + .with_context(|| format!("downloading {}", spec.url()))?; + } + eprintln!(); // newline after the in-place progress bar + file.flush().ok(); + drop(file); + + let actual = hex(hasher.finalize().as_slice()); + if actual != spec.sha256 { + fs::remove_file(&partial_path).ok(); + anyhow::bail!( + "SHA256 mismatch for {}: expected {}, got {}", + spec.filename(), + spec.sha256, + actual, + ); + } + fs::rename(&partial_path, &final_path) + .with_context(|| format!("renaming {} → {}", partial_path.display(), final_path.display()))?; + eprintln!("Verified SHA256."); + eprintln!("Cached: {}", final_path.display()); + Ok(final_path) +} + +/// Verify an existing file's SHA256 against `expected_hex` without +/// re-downloading. +fn verify_sha256(path: &Path, expected_hex: &str) -> Result { + use std::io::Read; + let mut file = fs::File::open(path) + .with_context(|| format!("opening {}", path.display()))?; + let mut hasher = Sha256::new(); + let mut buf = vec![0u8; 64 * 1024]; + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + Ok(hex(hasher.finalize().as_slice()) == expected_hex) +} + +struct HashingWriter<'a> { + inner: &'a mut fs::File, + hasher: &'a mut Sha256, +} + +impl Write for HashingWriter<'_> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let n = self.inner.write(buf)?; + self.hasher.update(&buf[..n]); + Ok(n) + } + fn flush(&mut self) -> std::io::Result<()> { + self.inner.flush() + } +} + +fn render_progress(bytes: u64, total: u64, started: Instant) { + let total = total.max(1); + #[allow(clippy::cast_precision_loss)] + let pct = ((bytes as f64 / total as f64) * 100.0).clamp(0.0, 100.0); + let bar_width: usize = 40; + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::cast_precision_loss)] + let filled = ((pct / 100.0) * bar_width as f64).round() as usize; + let filled = filled.min(bar_width); + let bar: String = std::iter::repeat_n('#', filled) + .chain(std::iter::repeat_n('-', bar_width - filled)) + .collect(); + let elapsed = started.elapsed().as_secs_f64().max(0.001); + #[allow(clippy::cast_precision_loss)] + let mbps = (bytes as f64 / 1_048_576.0) / elapsed; + eprint!( + "\r [{bar}] {pct:>5.1}% ({} of {}, {mbps:.1} MiB/s) ", + format_bytes(bytes), + format_bytes(total), + ); +} + +fn format_bytes(n: u64) -> String { + #[allow(clippy::cast_precision_loss)] + let f = n as f64; + if n >= 1_000_000_000 { + format!("{:.2} GB", f / 1_000_000_000.0) + } else if n >= 1_000_000 { + format!("{:.1} MB", f / 1_000_000.0) + } else if n >= 1_000 { + format!("{:.0} KB", f / 1_000.0) + } else { + format!("{n} B") + } +} + +fn hex(bytes: &[u8]) -> String { + use std::fmt::Write; + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + let _ = write!(&mut s, "{b:02x}"); + } + s +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lookup_finds_known_sizes() { + assert!(lookup("medium").is_some()); + assert!(lookup("tiny").is_some()); + assert!(lookup("nonsense").is_none()); + } + + #[test] + fn cache_dir_is_under_home_or_localappdata() { + let dir = cache_dir(); + assert!(dir.ends_with("dpub/models") || dir.ends_with("dpub\\models")); + } + + #[test] + fn known_size_names_includes_medium() { + let names = known_size_names(); + assert!(names.contains(&"medium")); + } + + #[test] + fn format_bytes_human_readable() { + assert_eq!(format_bytes(0), "0 B"); + assert_eq!(format_bytes(1_500), "2 KB"); + assert!(format_bytes(1_500_000).starts_with("1.5")); + assert!(format_bytes(1_500_000_000).contains("GB")); + } + + #[test] + fn hex_round_trip() { + assert_eq!(hex(&[0xde, 0xad, 0xbe, 0xef]), "deadbeef"); + assert_eq!(hex(&[]), ""); + } +} diff --git a/crates/dpub-meta/src/lib.rs b/crates/dpub-meta/src/lib.rs index d099a79..f74a607 100644 --- a/crates/dpub-meta/src/lib.rs +++ b/crates/dpub-meta/src/lib.rs @@ -20,6 +20,7 @@ //! present) ISBN to a third party. Callers should keep the feature //! opt-in for that reason. +use std::io::{Read, Write}; use std::time::Duration; use serde::Deserialize; @@ -34,6 +35,54 @@ const USER_AGENT_BASE: &str = "dpub"; const TIMEOUT: Duration = Duration::from_secs(8); const MAX_COVER_BYTES: usize = 4 * 1024 * 1024; +/// Build a fresh `ureq::Agent` with dpub's standard configuration: +/// 8-second timeout, identifying User-Agent. Callers that need a +/// generic HTTP-download path (Whisper model fetch, etc.) can use +/// this directly via [`download_to_writer`]. +pub fn agent() -> ureq::Agent { + ureq::AgentBuilder::new() + .timeout(Duration::from_secs(60)) + .user_agent(&format!( + "{USER_AGENT_BASE}/{} (+https://github.com/11ways/dpub)", + env!("CARGO_PKG_VERSION") + )) + .build() +} + +/// Stream a URL into `dest`, calling `on_progress(bytes_so_far, +/// content_length_or_zero)` periodically. Used by the Whisper model +/// downloader. No `Range` resumption in v1 — a partial file is +/// truncated on retry. +/// +/// Returns the total number of bytes written. +pub fn download_to_writer( + agent: &ureq::Agent, + url: &str, + dest: &mut W, + mut on_progress: impl FnMut(u64, u64), +) -> Result { + let resp = agent.get(url).call()?; + let content_length: u64 = resp + .header("content-length") + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + + let mut reader = resp.into_reader(); + let mut buf = vec![0u8; 64 * 1024]; + let mut total: u64 = 0; + on_progress(0, content_length); + loop { + let n = reader.read(&mut buf)?; + if n == 0 { + break; + } + dest.write_all(&buf[..n])?; + total += n as u64; + on_progress(total, content_length); + } + Ok(total) +} + /// Identifying bits we have for a book — typically derived from /// DAISY 2.02 NCC metadata (`dc:title`, `dc:creator`, `dc:language`, /// `dc:identifier`). @@ -188,8 +237,6 @@ fn fetch_cover(agent: &ureq::Agent, candidate: &Candidate) -> Result/dev/null 2>&1; then + echo >&2 "error: cmake is required to build dpub (whisper-rs-sys compiles whisper.cpp)" + case "$(uname -s)" in + Darwin) echo >&2 " install: brew install cmake" ;; + Linux) echo >&2 " install: sudo apt-get install -y cmake (or: sudo dnf install -y cmake)" ;; + esac + exit 1 +fi + +features=() +case "$(uname -s)/$(uname -m)" in + Darwin/arm64) + features+=("metal") + echo "Detected Apple Silicon — building with Metal Whisper acceleration." + ;; + Darwin/x86_64) + echo "Detected Intel macOS — building CPU-only (Metal needs Apple Silicon)." + ;; + Linux/x86_64) + if command -v nvcc >/dev/null 2>&1; then + features+=("cuda") + echo "Detected NVIDIA toolkit — building with CUDA Whisper acceleration." + else + echo "Detected Linux x86_64 — building CPU-only (install CUDA toolkit for nvcc-detected GPU build)." + fi + ;; + *) + echo "Unknown host $(uname -s)/$(uname -m) — building CPU-only." + ;; +esac + +set -x +if [ ${#features[@]} -gt 0 ]; then + exec cargo build --release -p dpub-cli --features "${features[*]}" +else + exec cargo build --release -p dpub-cli +fi