diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 545e1d8..218c3e9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,20 +5,16 @@ on:
branches:
- main
paths-ignore:
- - 'README'
- - 'COPYRIGHT'
- 'LICENSE-*'
- '**.md'
- '**.txt'
pull_request:
paths-ignore:
- - 'README'
- - 'COPYRIGHT'
- 'LICENSE-*'
- '**.md'
- '**.txt'
workflow_dispatch:
- schedule: [cron: "0 1 */7 * *"]
+ schedule: [cron: "0 2 * * 1"] # Mondays at 02:00 UTC
env:
CARGO_TERM_COLOR: always
@@ -26,81 +22,38 @@ env:
RUST_BACKTRACE: 1
jobs:
- # Check formatting (platform-independent, one OS is enough)
rustfmt:
name: rustfmt
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v6
- - name: Install Rust
- run: rustup update stable && rustup default stable && rustup component add rustfmt
- - name: Check formatting
- run: cargo fmt --all -- --check
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@stable
+ with:
+ components: rustfmt
+ - name: Check formatting
+ run: cargo fmt --all -- --check
- # Apply clippy lints
clippy:
name: clippy
- strategy:
- matrix:
- os:
- - ubuntu-latest
- - macos-latest
- - windows-latest
- runs-on: ${{ matrix.os }}
- steps:
- - uses: actions/checkout@v6
- - name: Install Rust
- # --no-self-update is necessary because the windows environment cannot self-update rustup.exe.
- run: rustup update stable --no-self-update && rustup default stable && rustup component add clippy
- - name: Install cargo-hack
- run: cargo install cargo-hack
- - name: Apply clippy lints
- run: cargo hack clippy --each-feature --exclude-no-default-features
-
- # Run tests on some extra platforms
- cross:
- name: cross
- strategy:
- matrix:
- target:
- - aarch64-unknown-linux-gnu
- - aarch64-linux-android
- - aarch64-unknown-linux-musl
- - i686-linux-android
- - x86_64-linux-android
- - i686-pc-windows-gnu
- - x86_64-pc-windows-gnu
- - i686-unknown-linux-gnu
- - powerpc64-unknown-linux-gnu
- - riscv64gc-unknown-linux-gnu
- - wasm32-unknown-unknown
- - wasm32-unknown-emscripten
- - wasm32-wasip1
- - wasm32-wasip1-threads
- - wasm32-wasip2
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@stable
with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-cross-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-cross-
- - name: Install Rust
- run: rustup update stable && rustup default stable
- - name: cargo build --target ${{ matrix.target }}
- run: |
- rustup target add ${{ matrix.target }}
- cargo build --target ${{ matrix.target }}
+ toolchain: "1.95"
+ components: clippy
+ - uses: Swatinem/rust-cache@v2
+ - name: clippy (default features)
+ run: cargo clippy --all-targets -- -D warnings
+ - name: clippy (no-default-features)
+ run: cargo clippy --all-targets --no-default-features -- -D warnings
+ - name: clippy (decoders only)
+ run: cargo clippy --all-targets --no-default-features --features decoders -- -D warnings
build:
- name: build
+ name: build (${{ matrix.os }})
strategy:
+ fail-fast: false
matrix:
os:
- ubuntu-latest
@@ -108,28 +61,24 @@ jobs:
- windows-latest
runs-on: ${{ matrix.os }}
steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
- with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-build-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-build-
- - name: Install Rust
- # --no-self-update is necessary because the windows environment cannot self-update rustup.exe.
- run: rustup update stable --no-self-update && rustup default stable
- - name: Install cargo-hack
- run: cargo install cargo-hack
- - name: Run build
- run: cargo hack build --feature-powerset --exclude-no-default-features
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@stable
+ with:
+ toolchain: "1.95"
+ - uses: Swatinem/rust-cache@v2
+ - name: build (default features)
+ run: cargo build --verbose
+ - name: build (--no-default-features)
+ run: cargo build --no-default-features --verbose
+ - name: build (--no-default-features --features decoders)
+ run: cargo build --no-default-features --features decoders --verbose
+ - name: build (--features serde)
+ run: cargo build --features serde --verbose
test:
- name: test
+ name: test (${{ matrix.os }})
strategy:
+ fail-fast: false
matrix:
os:
- ubuntu-latest
@@ -137,208 +86,41 @@ jobs:
- windows-latest
runs-on: ${{ matrix.os }}
steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
- with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-test-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-test-
- - name: Install Rust
- # --no-self-update is necessary because the windows environment cannot self-update rustup.exe.
- run: rustup update stable --no-self-update && rustup default stable
- - name: Install cargo-hack
- run: cargo install cargo-hack
- - name: Run test
- run: cargo hack test --feature-powerset --exclude-no-default-features --exclude-features loom
-
- sanitizer:
- name: sanitizer
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@stable
with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-sanitizer-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-sanitizer-
- - name: Install Rust
- run: rustup update nightly && rustup default nightly
- - name: Install rust-src
- run: rustup component add rust-src
- - name: ASAN / LSAN / MSAN / TSAN
- run: bash ci/sanitizer.sh
+ toolchain: "1.95"
+ - uses: Swatinem/rust-cache@v2
+ - name: test (lib only, default features)
+ run: cargo test --lib --verbose
+ - name: test (lib only, --no-default-features)
+ run: cargo test --lib --no-default-features --verbose
+ - name: test (lib only, decoders)
+ run: cargo test --lib --no-default-features --features decoders --verbose
- miri-tb:
- name: miri-tb-${{ matrix.target }}
- strategy:
- matrix:
- include:
- - os: ubuntu-latest
- target: x86_64-unknown-linux-gnu
- - os: ubuntu-latest
- target: aarch64-unknown-linux-gnu
- - os: ubuntu-latest
- target: i686-unknown-linux-gnu
- - os: ubuntu-latest
- target: powerpc64-unknown-linux-gnu
- - os: ubuntu-latest
- target: s390x-unknown-linux-gnu
- - os: ubuntu-latest
- target: riscv64gc-unknown-linux-gnu
- - os: macos-latest
- target: aarch64-apple-darwin
- runs-on: ${{ matrix.os }}
- steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
- with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-miri-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-miri-
- - name: Miri
- run: |
- bash ci/miri_tb.sh "${{ matrix.target }}"
-
- miri-sb:
- name: miri-sb-${{ matrix.target }}
- strategy:
- matrix:
- include:
- - os: ubuntu-latest
- target: x86_64-unknown-linux-gnu
- - os: ubuntu-latest
- target: aarch64-unknown-linux-gnu
- - os: ubuntu-latest
- target: i686-unknown-linux-gnu
- - os: ubuntu-latest
- target: powerpc64-unknown-linux-gnu
- - os: ubuntu-latest
- target: s390x-unknown-linux-gnu
- - os: ubuntu-latest
- target: riscv64gc-unknown-linux-gnu
- - os: macos-latest
- target: aarch64-apple-darwin
- runs-on: ${{ matrix.os }}
- steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
- with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-miri-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-miri-
- - name: Miri
- run: |
- bash ci/miri_sb.sh "${{ matrix.target }}"
-
- loom:
- name: loom
- strategy:
- matrix:
- os:
- - ubuntu-latest
- - macos-latest
- - windows-latest
- runs-on: ${{ matrix.os }}
+ wasm:
+ name: wasm check
+ runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v6
- - name: Cache cargo build and registry
- uses: actions/cache@v5
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@stable
with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-loom-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-loom-
- - name: Install Rust
- run: rustup update nightly --no-self-update && rustup default nightly
- - name: Loom tests
- run: cargo test --tests --features loom
-
- # valgrind:
- # name: valgrind
- # runs-on: ubuntu-latest
- # steps:
- # - uses: actions/checkout@v6
- # - name: Cache cargo build and registry
- # uses: actions/cache@v5
- # with:
- # path: |
- # ~/.cargo/registry
- # ~/.cargo/git
- # target
- # key: ubuntu-latest-valgrind-${{ hashFiles('**/Cargo.lock') }}
- # restore-keys: |
- # ubuntu-latest-valgrind-
- # - name: Install Rust
- # run: rustup update stable && rustup default stable
- # - name: Install Valgrind
- # run: |
- # sudo apt-get update -y
- # sudo apt-get install -y valgrind
- # # Uncomment and customize when you have binaries to test:
- # # - name: cargo build foo
- # # run: cargo build --bin foo
- # # working-directory: integration
- # # - name: Run valgrind foo
- # # run: valgrind --error-exitcode=1 --leak-check=full --show-leak-kinds=all ./target/debug/foo
- # # working-directory: integration
+ toolchain: "1.95"
+ targets: wasm32-unknown-unknown
+ - uses: Swatinem/rust-cache@v2
+ - name: check (no-default-features, wasm32)
+ run: cargo check --target wasm32-unknown-unknown --no-default-features --verbose
+ - name: check (decoders, wasm32)
+ run: cargo check --target wasm32-unknown-unknown --no-default-features --features decoders --verbose
- coverage:
- name: coverage
+ doc:
+ name: doc build
runs-on: ubuntu-latest
- needs:
- - rustfmt
- - clippy
- - build
- - cross
- - test
- - sanitizer
- - loom
+ env:
+ RUSTDOCFLAGS: "--cfg docsrs -D warnings"
steps:
- - uses: actions/checkout@v6
- - name: Install Rust
- run: rustup update nightly && rustup default nightly
- - name: Install cargo-tarpaulin
- run: cargo install cargo-tarpaulin
- - name: Cache cargo build and registry
- uses: actions/cache@v5
- with:
- path: |
- ~/.cargo/registry
- ~/.cargo/git
- target
- key: ${{ runner.os }}-coverage-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-coverage-
- - name: Run tarpaulin
- env:
- RUSTFLAGS: "--cfg tarpaulin"
- run: cargo tarpaulin --all-features --run-types tests --run-types doctests --workspace --out xml
- - name: Upload to codecov.io
- uses: codecov/codecov-action@v6
- with:
- token: ${{ secrets.CODECOV_TOKEN }}
- slug: ${{ github.repository }}
- fail_ci_if_error: true
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@nightly
+ - uses: Swatinem/rust-cache@v2
+ - name: cargo doc
+ run: cargo doc --no-deps --features inference,bundled,decoders,serde
diff --git a/.gitignore b/.gitignore
index 01e0c11..8e0685d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,6 @@
/target
Cargo.lock
+
+# Project-local Claude Code state
+.claude/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bd7a668..198efcc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,65 @@
-# UNRELEASED
+# Changelog
-# 0.1.2 (January 6th, 2022)
+All notable changes follow the format from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this crate adheres to [Semantic Versioning](https://semver.org/).
-FEATURES
+## [0.1.0] — 2026-05-03
+### Added
+- Public `Engine` API for LiquidAI LFM2.5-VL-450M ONNX inference:
+ - `Engine::from_dir(model_dir, opts)` — load from a directory containing
+ the three ONNX graphs + `tokenizer.json`.
+ - `Engine::from_paths(EnginePaths, opts)` — explicit per-graph path
+ override.
+ - `Engine::from_onnx_dir(onnx_dir, opts)` (`bundled` feature) — load from
+ a directory containing **only the ONNX files**; the bundled tokenizer +
+ JSON configs (~4.5 MB embedded via `include_bytes!`) are written to a
+ per-process temp file and used in place of the missing on-disk files.
+ ONNX model files are NOT bundled (vision_encoder ~86 MB, decoder ~350 MB).
+ - `engine.generate(messages, images, req)` — free-form generation;
+ returns the model's raw text output.
+ - `engine.run(&task, messages, images, req)` — schema-constrained
+ generation via any `vlm_tasks::Task` instance; returns `Task::Output`.
+- Bundled `SceneTask` (wrapping `vlm_tasks::SceneAnalysis`) for structured
+ scene analysis without any extra configuration.
+- Public chat types: `ChatMessage`, `ChatContent`, `ContentPart`,
+ `ImageInput`.
+- Public configuration: `Options`, `RequestOptions`, `ImageBudget`,
+ `ThreadOptions`, `GraphOptimizationLevel`.
+- Wasm-friendly preprocessing subset under
+ `--no-default-features --features decoders` (no `ort`, no `tokenizers`):
+ `Preprocessor`, `TileGrid`, `PreprocessedImage`,
+ `decode_bytes_with_orientation`.
+- EXIF-aware image decoding: `decode_with_orientation` (native) and
+ `decode_bytes_with_orientation` (all targets including wasm).
+- Schema-constrained sampling via `llguidance` 1.7 token-mask filtering
+ applied at each decode step.
+- Hybrid KV+conv-state cache management for the LFM2 hybrid LM
+ (10 conv-state layers + 6 KV-attn layers, sparse layer indices).
+- Per-image vision-encoder dispatch (Phase 0 G6 contract: one image per
+ encoder call; batched multi-image calls produce silently-wrong embeddings).
+- Chat template rendering with `minijinja` 2: `apply_chat_template`,
+ `expand_image_placeholders`, bundled Jinja2 source via `include_str!`.
+- Examples:
+ - `smoke` — free-form generation over one image.
+ - `scene_analysis` — structured `SceneAnalysis` output.
+ - `preprocess_only` — preprocessing-only (no inference, no-default-features).
+ - `qwen_compare` — side-by-side LFM vs Qwen3-VL comparison
+ (requires `--features comparison`).
+- Benches: `bench_preproc`, `bench_tile_grid`, `bench_chat_template`.
+- Integration test suite gated on `feature = "integration"` and the
+ `LFM_MODEL_PATH` env var.
+- Execution-provider gates: `cuda`, `tensorrt`, `directml`, `rocm`,
+ `coreml` (all off by default; each implies `inference`).
+- `serde` feature: `Serialize`/`Deserialize` on `Options`,
+ `RequestOptions`, `ThreadOptions`, `ImageBudget`.
+
+### Model weights
+
+The crate wraps [LFM2.5-VL-450M-ONNX](https://huggingface.co/LiquidAI/LFM2.5-VL-450M-ONNX).
+The weights ship under the [LFM Open License v1.0](https://www.liquid.ai/lfm-license)
+— verify your use case complies with Liquid AI's terms separately from
+this crate's MIT OR Apache-2.0 license.
+
+[0.1.0]: https://github.com/findit-ai/lfm/releases/tag/v0.1.0
diff --git a/Cargo.toml b/Cargo.toml
index ff7fe91..7831e38 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,48 +1,121 @@
[package]
-name = "template-rs"
-version = "0.0.0"
-edition = "2021"
-repository = "https://github.com/al8n/template-rs"
-homepage = "https://github.com/al8n/template-rs"
-documentation = "https://docs.rs/template-rs"
-description = "A template for creating Rust open-source repo on GitHub"
-license = "MIT OR Apache-2.0"
-rust-version = "1.73"
-
-[[bench]]
-path = "benches/foo.rs"
-name = "foo"
-harness = false
-
-[features]
-default = ["std"]
-alloc = []
-std = []
+name = "lfm"
+version = "0.1.0"
+edition = "2024"
+rust-version = "1.95"
+description = "Rust ONNX inference for LiquidAI LFM2.5-VL (vision-language) models"
+license = "MIT OR Apache-2.0"
+include = [
+ "src/**/*.rs",
+ "examples/**/*.rs",
+ "benches/**/*.rs",
+ "models/*.json",
+ "models/*.jinja",
+ "build.rs",
+ "Cargo.toml",
+ "README.md",
+ "CHANGELOG.md",
+ "LICENSE-*",
+]
[dependencies]
+llmtask = { version = "0.1", features = ["json", "regex"] }
+ort = { version = "2.0.0-rc.12", optional = true }
+# `default-features = false` drops `progressbar` (training UI),
+# `onig` (C++ regex engine) and `esaxx_fast` (C++ suffix-array
+# Unigram trainer). lfm only uses `tokenizer.from_file` +
+# encode/decode at inference time; the trainer paths are dead
+# code for us. `fancy-regex` is the pure-Rust regex backend
+# replacement for `onig` and is what JSON-defined BPE tokenizers
+# need at runtime. Keeping `esaxx_fast` ON pulled the C++
+# `esaxx-rs` into the link, which on Windows MSVC was built with
+# `/MT` (static CRT) and clashed with `ort_sys` built with
+# `/MD` (dynamic CRT) — `LNK1319: 1 mismatches detected`. This
+# also matches the feature set `toktrie_hf_tokenizers` already
+# uses on `tokenizers` 0.21 transitively.
+tokenizers = { version = "0.23", optional = true, default-features = false, features = ["fancy-regex"] }
+llguidance = { version = "1.7", optional = true }
+toktrie = { version = "1.7", optional = true }
+toktrie_hf_tokenizers = { version = "1.7", optional = true }
+minijinja = { version = "2", optional = true, default-features = false, features = ["builtins", "json", "macros", "serde"] }
+image = { version = "0.25", default-features = false }
+smol_str = "0.3"
+thiserror = "2"
+tracing = "0.1"
+serde = { version = "1", features = ["derive"] }
+serde_json = { version = "1" }
+fast_image_resize = "6.0.0"
[dev-dependencies]
+serde_json = "1"
+
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = "0.8"
-tempfile = "3"
+
+[features]
+default = ["inference", "bundled", "decoders"]
+# `inference` provides ORT, tokenizers, llguidance, minijinja. The Engine
+# additionally requires `decoders` for image preprocessing — see
+# src/lib.rs Engine gate. Use `inference` alone for the runtime types
+# (Sampler, Decoder, KvCache, etc.) without the public Engine.
+inference = ["dep:ort", "dep:tokenizers", "dep:llguidance", "dep:toktrie", "dep:toktrie_hf_tokenizers", "dep:minijinja"]
+# `bundled` ships the tokenizer + small JSON configs as include_bytes!.
+# It implies both `inference` (for Engine machinery) AND `decoders`
+# (for the `Engine::from_onnx_dir` constructor); without this, a
+# `--features bundled` build would fail to expose Engine.
+bundled = ["inference", "decoders"]
+decoders = ["image/jpeg", "image/png"]
+serde = ["smol_str/serde", "llmtask/serde"]
+cuda = ["inference", "ort/cuda"]
+tensorrt = ["inference", "ort/tensorrt"]
+directml = ["inference", "ort/directml"]
+rocm = ["inference", "ort/rocm"]
+coreml = ["inference", "ort/coreml"]
+integration = ["inference"]
+
+[[test]]
+name = "integration"
+path = "tests/integration.rs"
+required-features = ["integration"]
+
+[[example]]
+name = "smoke"
+# Engine + from_dir require all three: bundled (for tokenizer assets +
+# from_dir gate), inference (for the runtime), decoders (for image
+# decode). Round-27 fix: was incorrectly listed as `inference`-only.
+required-features = ["bundled", "inference", "decoders"]
+[[example]]
+name = "scene_analysis"
+required-features = ["bundled", "inference", "decoders"]
+[[example]]
+name = "preprocess_only"
+
+[[bench]]
+name = "bench_preproc"
+harness = false
+[[bench]]
+name = "bench_tile_grid"
+harness = false
+[[bench]]
+name = "bench_chat_template"
+harness = false
+required-features = ["inference"]
[profile.bench]
-opt-level = 3
-debug = false
-codegen-units = 1
-lto = 'thin'
-incremental = false
-debug-assertions = false
-overflow-checks = false
-rpath = false
+opt-level = 3
+debug = false
+codegen-units = 1
+lto = 'thin'
+incremental = false
+debug-assertions = false
+overflow-checks = false
+rpath = false
[package.metadata.docs.rs]
-all-features = true
+features = ["inference", "bundled", "decoders", "serde"]
rustdoc-args = ["--cfg", "docsrs"]
[lints.rust]
-rust_2018_idioms = "warn"
+rust_2018_idioms = "warn"
single_use_lifetimes = "warn"
-unexpected_cfgs = { level = "warn", check-cfg = [
- 'cfg(all_tests)',
- 'cfg(tarpaulin)',
-] }
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(docsrs)', 'cfg(tarpaulin)'] }
diff --git a/README.md b/README.md
index 1af27e2..6fbbe6d 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,214 @@
-
template-rs
+lfm
-A template for creating Rust open-source GitHub repo.
+Rust ONNX inference for [LiquidAI LFM2.5-VL][lfm-card] — a 450M-parameter vision-language model with schema-constrained sampling via [llguidance]. Implements the engine-agnostic [`llmtask::Task`] contract, so any `Task` written against `llmtask` runs through `lfm` unchanged.
-[

][Github-url]
-

-[

][CI-url]
-[

][codecov-url]
+[

][Github-url]
+

+[

][CI-url]
+[

][codecov-url]
-[

][doc-url]
-[

][crates-url]
-[

][crates-url]
-

-
-English | [简体中文][zh-cn-url]
+[

][doc-url]
+[

][crates-url]
+[

][crates-url]
+
+## Overview
+
+`lfm` is the [LiquidAI LFM2.5-VL][lfm-card] inference engine on Rust + ONNX Runtime + llguidance:
+
+- **[`Engine`]** — sync, single-threaded; built on `ort` 2.0. `Engine::run>` accepts any [`llmtask::Task`] whose grammar is JSON Schema, Lark, or Regex. Schema-constrained sampling is enforced by [llguidance] token-mask filtering. `Engine::generate` is the unconstrained path for free-form text.
+- **[`ImageAnalysisTask`]** — built-in image-analysis preset that produces the canonical [`llmtask::ImageAnalysis`] output type, sharing the schema and parser with [`qwen`].
+- **Bundled assets** — the `bundled` feature ships LFM2.5-VL's tokenizer, chat template, and preprocessor configs as `include_bytes!`. `Engine::from_onnx_dir` then accepts a directory containing only the three ONNX graphs; no separate tokenizer download required.
+- **Wasm-friendly preprocessing** — `preproc::Preprocessor`, `TileGrid`, and EXIF-aware decode helpers compile under `--no-default-features --features decoders` (no `ort`, no `tokenizers`).
+
+[`Engine`]: https://docs.rs/lfm/latest/lfm/engine/struct.Engine.html
+[`ImageAnalysisTask`]: https://docs.rs/lfm/latest/lfm/image_analysis/struct.ImageAnalysisTask.html
+[`llmtask::Task`]: https://docs.rs/llmtask/latest/llmtask/task/trait.Task.html
+[`llmtask::ImageAnalysis`]: https://docs.rs/llmtask/latest/llmtask/image_analysis/struct.ImageAnalysis.html
+[`qwen`]: https://docs.rs/qwen
+[llguidance]: https://github.com/microsoft/llguidance
+
+## Why an `llmtask`-driven engine?
+
+A bespoke `lfm::Task` would force every prompt + schema + parser to be rewritten against the next inference engine. Implementing [`llmtask::Task`] instead means the same `Task` code targets `lfm` (llguidance), [`qwen`] (mistralrs), or any future `llmtask`-compatible backend without modification — only the hardware backend selection differs.
+
+```text
+ ┌──────────────────────────┐
+ YourTask: impl Task ──▶ │ llmtask::Task contract │ ──▶ lfm / qwen / …
+ │ prompt + Grammar │
+ │ parse → Output │
+ └──────────────────────────┘
+```
+
+Because lfm's backend is llguidance, all three [`llmtask::Grammar`] variants (JSON Schema, Lark, Regex) are accepted — engines that only speak JSON Schema (e.g. `qwen`) reject the others via `UnsupportedGrammar`, and the caller can route to lfm.
+
+[`llmtask::Grammar`]: https://docs.rs/llmtask/latest/llmtask/grammar/enum.Grammar.html
+
+## Features
+
+- **All three `Grammar` variants** — JSON Schema, Lark, and Regex are all native to llguidance, so any `llmtask::Task` runs through `Engine::run`. The HIR-anchored regex validator on the `Grammar` side matches engine semantics exactly (no substring vs. full-match drift).
+- **Bundled tokenizer + configs (`bundled` feature, default)** — `Engine::from_onnx_dir` accepts an ONNX-only directory; tokenizer / chat template / preprocessor configs are embedded in the binary at compile time. `Engine::from_dir` is the strict constructor that byte-validates a supplied tokenizer + chat template against the bundled blobs to catch silent prompt-envelope drift.
+- **Hybrid KV/conv-state cache decoder** — LFM2 architecture has 10 conv-state layers and 6 attention layers at sparse indices. `decoder.rs` manages the non-contiguous cache layout transparently.
+- **Wasm-friendly preprocessing** — drop the `inference` and `bundled` defaults to get a pure-CPU image-preprocessing surface (`Preprocessor`, `TileGrid`, EXIF-aware decode) usable from `wasm32-unknown-unknown`.
+- **GPU acceleration** — `cuda`, `tensorrt`, `directml`, `rocm`, `coreml` ORT execution providers gated behind feature flags. None are required for CPU inference.
+- **Admission-control DoS guards** — bounded request shape (max messages, max content parts), text-size cap, image-count lower bound from `min_image_tokens`, header-time decoded-buffer cap, and a special-token denylist seeded from the live tokenizer's `added_vocabulary`. All run BEFORE any image decode or template render.
+
+## Example
+
+### From a HuggingFace download (tokenizer.json + configs in dir)
+
+```rust,no_run
+use lfm::{
+ ChatContent, ChatMessage, ContentPart, Engine, ImageInput, Options,
+ RequestOptions,
+};
+use smol_str::SmolStr;
+
+fn main() -> lfm::Result<()> {
+ let model_dir = std::env::var("LFM_MODEL_PATH")
+ .expect("set LFM_MODEL_PATH=/path/to/LFM2.5-VL-450M-ONNX");
+
+ let mut engine = Engine::from_dir(&model_dir, Options::default())?;
+
+ let messages = vec![ChatMessage {
+ role: SmolStr::new_static("user"),
+ content: ChatContent::Parts(vec![
+ ContentPart::Image,
+ ContentPart::Text("Describe this image.".into()),
+ ]),
+ }];
+ let images = vec![ImageInput::Path(std::path::Path::new("photo.jpg"))];
+
+ let text = engine.generate(&messages, &images, &RequestOptions::default())?;
+ println!("{text}");
+ Ok(())
+}
+```
+
+### ONNX-only dir + bundled tokenizer
+
+If you've downloaded just the ONNX files (not `tokenizer.json` and the JSON configs), use `Engine::from_onnx_dir`. The tokenizer + configs are embedded in the binary and written to a temp file on first use.
+
+```rust,no_run
+use lfm::{Engine, Options, RequestOptions};
+
+fn main() -> lfm::Result<()> {
+ let onnx_dir = std::env::var("LFM_ONNX_PATH")
+ .expect("set LFM_ONNX_PATH=/path/with/onnx-files-only");
+ let mut engine = Engine::from_onnx_dir(onnx_dir, Options::default())?;
+ // … same usage as Engine::from_dir
+ # let _ = engine; let _ = RequestOptions::default();
+ Ok(())
+}
+```
+
+### Structured output via the `ImageAnalysisTask` preset
+
+```rust,no_run
+use lfm::{
+ ChatContent, ChatMessage, ContentPart, Engine, ImageAnalysisTask, ImageInput,
+ Options, RequestOptions, Task,
+};
+use smol_str::SmolStr;
+
+fn main() -> lfm::Result<()> {
+ let model_dir = std::env::var("LFM_MODEL_PATH").unwrap();
+ let mut engine = Engine::from_dir(&model_dir, Options::default())?;
+ let task = ImageAnalysisTask::default();
+
+ let messages = vec![ChatMessage {
+ role: SmolStr::new_static("user"),
+ content: ChatContent::Parts(vec![
+ ContentPart::Image,
+ ContentPart::Text(task.prompt().to_owned()),
+ ]),
+ }];
+ let images = vec![ImageInput::Path(std::path::Path::new("frame.jpg"))];
+
+ let analysis = engine.run(&task, &messages, &images, &RequestOptions::default())?;
+ println!("{analysis:#?}");
+ Ok(())
+}
+```
+
## Installation
```toml
[dependencies]
-template_rs = "0.1"
+lfm = "0.1"
```
-## Features
-- [x] Create a Rust open-source repo fast
+Download the ONNX artifacts from [`LiquidAI/LFM2.5-VL-450M-ONNX`][lfm-card] and set `LFM_MODEL_PATH` to the directory containing them:
+
+```text
+vision_encoder.onnx
+embed_tokens.onnx
+decoder_model_merged.onnx
+tokenizer.json (optional — bundled if absent and `bundled` feature is on)
+```
+
+### Cargo features
+
+Defaults: `["inference", "bundled", "decoders"]`.
+
+| Feature | Default | What it adds |
+| ------------- | :-----: | ------------------------------------------------------------------------------------------------------------------ |
+| `inference` | yes | Pulls `ort`, `tokenizers`, `llguidance`, `minijinja`. Activates `Engine`. Native targets only. |
+| `bundled` | yes | Embeds `tokenizer.json` + JSON configs (~4.5 MB) at compile time; adds `Engine::from_onnx_dir`. Implies `inference`. |
+| `decoders` | yes | Activates JPEG/PNG decoding via the `image` crate. |
+| `serde` | no | `Serialize`/`Deserialize` on `Options`, `RequestOptions`, `ThreadOptions`, `ImageBudget`. |
+| `cuda` | no | NVIDIA GPUs (Linux / Windows). Requires CUDA toolkit + cuDNN. Implies `inference`. |
+| `tensorrt` | no | NVIDIA, optimized inference. Falls back to CUDA, then CPU. Implies `inference`. |
+| `directml` | no | Windows GPUs (any vendor) via DirectX 12. Implies `inference`. |
+| `rocm` | no | AMD GPUs (Linux). Requires ROCm SDK. Implies `inference`. |
+| `coreml` | no | macOS / iOS via Core ML (Neural Engine + GPU + Metal). Implies `inference`. |
+| `integration` | no | Enables the integration test (`tests/integration.rs`). Requires `LFM_MODEL_PATH`. |
+
+GPU execution-provider features are off by default — none are required for CPU inference, and each requires its vendor SDK at build time.
+
+### Wasm / preprocessing-only build
+
+```bash
+cargo build --target wasm32-unknown-unknown --no-default-features --features decoders
+```
+
+The public surface under `--no-default-features --features decoders` is `preproc::Preprocessor`, `preproc::TileGrid`, `preproc::PreprocessedImage`, `preproc::decode_bytes_with_orientation`, `options::*`, and `error::*`.
+
+## Architecture
+
+Per-image vision encoding → text+image embedding splice → hybrid KV/conv cache decoder loop → optional schema-constrained sampling.
+
+| Graph | Role | Size |
+| --------------------------- | --------------------------------------------------------------- | ---------- |
+| `vision_encoder.onnx` | SigLIP2 image encoder — single image per call | ~86M params |
+| `embed_tokens.onnx` | Token embedding lookup table | — |
+| `decoder_model_merged.onnx` | LFM2 hybrid LM: 10 conv-state + 6 KV-attn layers (sparse cache) | ~350M params |
+
+The decoder manages a sparse hybrid cache: conv-state layers store recurrent state (not KV pairs), so cache indices are non-contiguous. Schema-constrained sampling is handled by `llguidance` masking the logits at each step to enforce the `Grammar` from the `Task`.
+
+**Multi-image note:** the vision encoder accepts one image per call. Batched multi-image calls produce silently-wrong embeddings — `Engine::generate`/`run` iterate per-image and concatenate the flat `image_features` outputs in source order.
+
+## MSRV
+
+Rust 1.95.
+
+## License
-#### License
+`lfm` is dual-licensed under the [MIT license](LICENSE-MIT) and the [Apache License, Version 2.0](LICENSE-APACHE).
-`template-rs` is under the terms of both the MIT license and the
-Apache License (Version 2.0).
+The LFM2.5-VL model weights this crate runs are governed by the [LFM Open License v1.0](https://www.liquid.ai/lfm-license). Verify your use case complies with Liquid AI's terms separately from this crate's license.
-See [LICENSE-APACHE](LICENSE-APACHE), [LICENSE-MIT](LICENSE-MIT) for details.
+Copyright (c) 2026 FinDIT Studio authors.
-Copyright (c) 2021 Al Liu.
+[lfm-card]: https://huggingface.co/LiquidAI/LFM2.5-VL-450M-ONNX
-[Github-url]: https://github.com/al8n/template-rs/
-[CI-url]: https://github.com/al8n/template-rs/actions/workflows/ci.yml
-[doc-url]: https://docs.rs/template-rs
-[crates-url]: https://crates.io/crates/template-rs
-[codecov-url]: https://app.codecov.io/gh/al8n/template-rs/
-[zh-cn-url]: https://github.com/al8n/template-rs/tree/main/README-zh_CN.md
+[Github-url]: https://github.com/findit-ai/lfm/
+[CI-url]: https://github.com/findit-ai/lfm/actions/workflows/ci.yml
+[doc-url]: https://docs.rs/lfm
+[crates-url]: https://crates.io/crates/lfm
+[codecov-url]: https://app.codecov.io/gh/findit-ai/lfm/
diff --git a/benches/bench_chat_template.rs b/benches/bench_chat_template.rs
new file mode 100644
index 0000000..751568a
--- /dev/null
+++ b/benches/bench_chat_template.rs
@@ -0,0 +1,75 @@
+//! Bench chat-template rendering — pure CPU, no model files needed.
+//!
+//! Run: `cargo bench --bench bench_chat_template --features inference`
+//!
+//! When `inference` is disabled the bench function is a no-op so that
+//! `cargo check --benches` still compiles cleanly without that feature.
+
+use std::hint::black_box;
+
+use criterion::{Criterion, criterion_group, criterion_main};
+
+#[cfg(feature = "inference")]
+use lfm::chat_template::{ContentItem, Message, UserContent, apply_chat_template};
+
+#[cfg(feature = "inference")]
+fn bench_chat_template(c: &mut Criterion) {
+ // Short user message with one image placeholder — the common
+ // production path through the minijinja renderer.
+ let messages_image = vec![Message::User {
+ content: UserContent::Multimodal(vec![
+ ContentItem::Image,
+ ContentItem::Text {
+ text: "Describe this image.",
+ },
+ ]),
+ }];
+
+ // Text-only user message — exercises the simpler template branch.
+ let messages_text = vec![Message::User {
+ content: UserContent::Text("What is 2+2?"),
+ }];
+
+ // System + user — exercises the system-prompt branch.
+ let messages_sys_user = vec![
+ Message::System {
+ content: "You are a helpful assistant.",
+ },
+ Message::User {
+ content: UserContent::Text("Hello."),
+ },
+ ];
+
+ c.bench_function("chat_template_image_user", |b| {
+ b.iter(|| {
+ let _ = black_box(apply_chat_template(black_box(&messages_image), None, true));
+ });
+ });
+
+ c.bench_function("chat_template_text_only", |b| {
+ b.iter(|| {
+ let _ = black_box(apply_chat_template(black_box(&messages_text), None, true));
+ });
+ });
+
+ c.bench_function("chat_template_system_user", |b| {
+ b.iter(|| {
+ let _ = black_box(apply_chat_template(
+ black_box(&messages_sys_user),
+ None,
+ true,
+ ));
+ });
+ });
+}
+
+#[cfg(not(feature = "inference"))]
+fn bench_chat_template(_c: &mut Criterion) {
+ // No-op when the inference feature is disabled.
+ // TODO Task 16: expose apply_chat_template without the inference gate so
+ // this bench can run with just --features decoders.
+ eprintln!("bench_chat_template: inference feature not enabled; no benchmarks to run.");
+}
+
+criterion_group!(benches, bench_chat_template);
+criterion_main!(benches);
diff --git a/benches/bench_preproc.rs b/benches/bench_preproc.rs
new file mode 100644
index 0000000..94b940e
--- /dev/null
+++ b/benches/bench_preproc.rs
@@ -0,0 +1,35 @@
+//! Bench preprocessing throughput with synthetic images at fixed sizes.
+//!
+//! Run: `cargo bench --bench bench_preproc`
+
+use std::hint::black_box;
+
+use criterion::{Criterion, criterion_group, criterion_main};
+use image::{DynamicImage, RgbImage};
+
+fn bench_preproc(c: &mut Criterion) {
+ let budget = lfm::ImageBudget::default();
+ let preproc = lfm::Preprocessor::new(budget);
+
+ // 1024×1024 routes through the multi-tile path (4 tiles + thumbnail
+ // at default budget), giving a realistic workload.
+ let img_1024 = DynamicImage::ImageRgb8(RgbImage::new(1024, 1024));
+
+ // 256×256 routes through the single-tile path.
+ let img_256 = DynamicImage::ImageRgb8(RgbImage::new(256, 256));
+
+ c.bench_function("preprocess_1024x1024_multi_tile", |b| {
+ b.iter(|| {
+ let _ = black_box(preproc.preprocess(black_box(&img_1024)));
+ });
+ });
+
+ c.bench_function("preprocess_256x256_single_tile", |b| {
+ b.iter(|| {
+ let _ = black_box(preproc.preprocess(black_box(&img_256)));
+ });
+ });
+}
+
+criterion_group!(benches, bench_preproc);
+criterion_main!(benches);
diff --git a/benches/bench_tile_grid.rs b/benches/bench_tile_grid.rs
new file mode 100644
index 0000000..fab5e62
--- /dev/null
+++ b/benches/bench_tile_grid.rs
@@ -0,0 +1,44 @@
+//! Bench the tile-grid selection algorithm — pure CPU, no model files needed.
+//!
+//! Run: `cargo bench --bench bench_tile_grid`
+
+use std::hint::black_box;
+
+use criterion::{Criterion, criterion_group, criterion_main};
+
+fn bench_tile_grid(c: &mut Criterion) {
+ let budget = lfm::ImageBudget::default();
+
+ c.bench_function("pick_tile_grid_1920x1080_landscape", |b| {
+ b.iter(|| {
+ let _ = black_box(lfm::preproc::tile_grid::pick_tile_grid(
+ black_box(1920),
+ black_box(1080),
+ &budget,
+ ));
+ });
+ });
+
+ c.bench_function("pick_tile_grid_1024x1024_square", |b| {
+ b.iter(|| {
+ let _ = black_box(lfm::preproc::tile_grid::pick_tile_grid(
+ black_box(1024),
+ black_box(1024),
+ &budget,
+ ));
+ });
+ });
+
+ c.bench_function("pick_tile_grid_256x256_single_tile", |b| {
+ b.iter(|| {
+ let _ = black_box(lfm::preproc::tile_grid::pick_tile_grid(
+ black_box(256),
+ black_box(256),
+ &budget,
+ ));
+ });
+ });
+}
+
+criterion_group!(benches, bench_tile_grid);
+criterion_main!(benches);
diff --git a/benches/foo.rs b/benches/foo.rs
deleted file mode 100644
index f328e4d..0000000
--- a/benches/foo.rs
+++ /dev/null
@@ -1 +0,0 @@
-fn main() {}
diff --git a/docs/codex-review-rejections.md b/docs/codex-review-rejections.md
new file mode 100644
index 0000000..71e3cc3
--- /dev/null
+++ b/docs/codex-review-rejections.md
@@ -0,0 +1,151 @@
+# Codex Adversarial Review — Rejected Findings
+
+This file records findings that **previous adversarial reviews have raised, that we have verified to be false positives**, and the evidence that proves it. Codex re-flags these items in subsequent rounds; instead of reverifying each time, refer to the listed evidence.
+
+Future reviewers (codex or human): if you're about to flag one of these, FIRST consult the cited evidence and confirm the bug truly exists. If you can construct a reproducer that contradicts the evidence below, file it as a NEW finding with the reproducer attached.
+
+---
+
+## Trust model and review scope
+
+**Trust model.** This crate operates inside a trusted boundary on BOTH sides:
+
+1. **The model files are trusted.** `Engine::from_dir` / `Engine::from_onnx_dir` / `Engine::from_paths` callers obtain model assets from a known source (HuggingFace, vendored bundle, internal registry) and vouch for them the same way they vouch for any other dependency.
+2. **The caller-supplied inputs are trusted.** Chat messages, image bytes, image paths, `RequestOptions`, `ImageBudget`, `Task` schemas — all of these come from the same trusted application that loaded the model. The crate is an in-process library, not a public network endpoint.
+
+In other words: **there is no attacker in this threat model**. We are not hardening against malicious model providers, malicious image uploaders, malicious chat content, or malicious schema definitions. Defense-in-depth against any of those is out of scope.
+
+### What's IN SCOPE
+
+These are the things to flag — strictly **CODE** issues:
+
+- **Algorithmic correctness against upstream Python.** smart_resize math, tile_grid selection, marker order vs `crop_image_to_patches`, image-block layout, normalization constants, BPE tokenization parity, chat template rendering parity.
+- **Model input/output contracts.** ONNX session input/output shapes, dtype, axis order. Embedding splice positions. Sampler math (softmax stability, repetition penalty signs, min_p threshold). KV cache shape and lifecycle. Position IDs and attention masks if applicable.
+- **Bugs reachable with valid inputs.** Panics, integer overflow, wrong indexing, wrong loop bounds, off-by-one in sequence handling — when triggered by code paths a normal correct caller can hit.
+- **Concurrency hazards** in code reachable through the public API: data races, deadlocks, ordering bugs.
+- **Cargo manifest correctness** — feature graph, dep version constraints, lints. These compile-time guarantees ARE code.
+
+### Also out of scope: CI / workflow / build-infrastructure findings
+
+Findings about `.github/workflows/*`, GitHub Actions configuration, build-system scripts, repo layout (sibling-checkout requirements, missing `.gitmodules`, sibling repo path-dep resolution at CI time, etc.), or anything that's "the CI workflow doesn't do X" are **out of scope for code review**. Those are infrastructure decisions handled separately. Do NOT flag them.
+
+### What's OUT OF SCOPE — do NOT flag
+
+Past rounds repeatedly flagged variations of these themes. They are settled:
+
+1. **"What if a custom tokenizer/template/config/ONNX is tampered or drifted?"** `from_paths` is documented as the unchecked escape hatch. `from_dir` already enforces byte-equality against bundled tokenizer.json, chat_template.jinja, preprocessor_config.json, and config.json's `max_position_embeddings`. Adding more strict checks for additional fields (tokenizer_config.json chat_template field, processor_config.json fields, generation_config.json fields, model config field-by-field) is not warranted.
+2. **"What if a custom ONNX file's metadata is loose enough to admit a drifted runtime shape?"** ORT session validation already pins input/output names and rank-and-relevant-dim. Exhaustive exact-shape checks at every call site for every dynamic axis is defense-in-depth against a model provider who already controls the model file.
+3. **"What if the on-disk tokenizer file is replaced between Engine construction and a later call?"** Round 35 already addressed this for the `from_onnx_dir` temp tokenizer. Treating local disk as a hostile attacker is outside the trust model.
+4. **"What if a from_paths caller passes a tokenizer with `special: false` on every control token?"** `from_paths` is opt-out by documentation; the denylist already seeds structural + named control tokens unconditionally.
+5. **"What if the caller passes a huge text payload / millions of empty parts / millions of image inputs?"** The caller is trusted. They're allocating their own process's memory. There is no remote attacker. DoS hardening of the public API is out of scope.
+6. **"What if the caller passes image bytes that decode to a huge buffer (e.g., 16-bit PNG bypassing the 4-BPP header estimate)?"** Same as above. The caller chose to feed those bytes into their own process.
+7. **"What if the caller sets `repetition_penalty = f32::MAX` and exploits the sampler?"** Same trust model. The caller is configuring their own sampler. We already cap at `MAX_REPETITION_PENALTY=100.0` for ergonomic correctness, not for adversarial robustness.
+
+If a finding is on a theme above, **do not file it**. If you are unsure whether a finding is in scope, prefer the algorithm/model-I/O lens: "would this still be a bug if my caller and my model were both perfectly cooperative?" If yes, flag it. If the finding only manifests when someone misuses the API on purpose, skip it.
+
+---
+
+## REJECTED-1: "Tile markers and vision features use different traversal orders" (rounds 13, 14, 22)
+
+### Claim
+`build_image_block` emits markers with `cols` outer and `rows` inner, while `flatten_to_patches` builds tiles row-major (`rows` outer, `cols` inner). For non-square multi-tile grids, marker order and feature order diverge — feature k binds to marker for a different spatial position. Token counts still match so `ImageTokenCountMismatch` doesn't fire.
+
+### Why this is NOT a bug
+The "weird" pairing IS upstream Python's actual behavior, replicated bit-for-bit. The semantic confusion comes from upstream's variable-naming inversion at `transformers/models/lfm2_vl/processing_lfm2_vl.py:161-162`:
+
+```python
+images, num_rows, num_cols = self.crop_image_to_patches(...)
+# but crop_image_to_patches actually returns (..., grid_width, grid_height)
+# so num_rows = grid_width and num_cols = grid_height
+```
+
+In `expand_text_with_placeholders` the iteration is then:
+```python
+for row in range(rows): # rows = num_rows = grid_width
+ for col in range(cols): # cols = num_cols = grid_height
+ emit f"<|img_row_{row+1}_col_{col+1}|>"
+```
+
+So upstream's outer loop is over `grid_width` (cols-outer in our terms). The model was trained on this convention. Our marker emission `for outer in 0..img.cols() { for inner in 0..img.rows() }` matches upstream exactly.
+
+The model uses `masked_scatter` (modeling_lfm2_vl.py:281) for positional splicing of the k-th feature into the k-th `` token, identical to our splice loop. Whatever pairing convention upstream produces, the model was trained on it; our replication is correct.
+
+### Evidence
+- `tests/fixtures/image_expansion_cases.json::multi_tile_4x2_widescreen` — captures upstream `expand_text_with_placeholders` output for a 2×4 grid byte-for-byte. We pass this fixture.
+- `tests/fixtures/multi_image_ordering_proof.json` — Phase 0 fixture from real upstream output.
+- Code comment in `src/chat_template.rs::build_image_block` cites upstream lines.
+
+---
+
+## REJECTED-2: "Patch vectors are HWC but config declares channels_first" (round 22)
+
+### Claim
+`flatten_to_patches`'s loop emits 16×16 patches as `(dy, dx, ch)` (HWC interleaved), but `preprocessor_config.json` declares `data_format: channels_first`. Therefore patches are fed to the encoder in the wrong layout.
+
+### Why this is NOT a bug
+The `data_format: channels_first` config refers to the **resized image format going into upstream's `convert_image_to_patches`** — i.e., torch tensor shape `(B, C, H, W)`. Inside that function, upstream PERMUTES to HWC before reshape:
+
+```python
+# upstream image_processing_lfm2_vl_fast.py:143-156 (convert_image_to_patches)
+patched = images.reshape(B, C, n_h, ps, n_w, ps)
+patched = patched.permute(0, 2, 4, 3, 5, 1) # → (B, n_h, n_w, ps, ps, C)
+patched = patched.reshape(B, n_h * n_w, -1) # → (B, n_patches, ps*ps*C in HWC)
+```
+
+The final `.reshape(..., -1)` collapses `(ps, ps, C)` into 768 bytes in HWC order (last dim is C). So the actual ENCODER input IS HWC per-patch despite the upstream pipeline starting from a CHW image.
+
+Our `(dy, dx, ch)` byte order in `flatten_to_patches` matches what upstream produces.
+
+### Evidence
+- `tests/fixtures/multi_image_ordering_proof.json` — captured from upstream and we match bit-for-bit.
+- Code comment in `src/preproc/mod.rs::flatten_to_patches` (added round 22) cites upstream lines.
+- Phase 0 G5 (resolved 2026-05-03) explicitly verified pixel layout.
+
+---
+
+## RESOLVED-1: "Resize uses image crate's Triangle filter, not torchvision bilinear+antialias" (round 42, fixed in same branch)
+
+### Original claim
+`flatten_to_patches` resized via `image::imageops::resize(..., FilterType::Triangle)`. Upstream `Lfm2VlImageProcessorFast` resizes via torchvision `F.resize(..., interpolation=BILINEAR, antialias=True)`. These are not the same algorithm — torchvision's antialiased bilinear runs a low-pass prefilter before sampling, while `image` crate's `Triangle` is a plain tent filter with no antialias prefilter. Cooperative callers feeding non-tile-aligned images would have gotten different `pixel_values` than upstream.
+
+### Resolution
+Replaced both `imageops::resize(..., FilterType::Triangle)` calls in `flatten_to_patches` (main resize and thumbnail resize) with a new `pil_bilinear_resize` helper backed by `fast_image_resize`'s `Convolution(FilterType::Bilinear)`. That's the PIL-compatible bilinear (Pillow's `Image.resize` with `Image.BILINEAR`), which is the exact target torchvision's `antialias=True` path was designed to match.
+
+The `fast_image_resize` crate is widely used as the "PIL-parity" Rust resize and is what production VLM/image pipelines use for parity with HuggingFace processors.
+
+### Why no `LFM_MODEL_PATH` parity script was required
+The previous algorithm (`Triangle`) was definitively NOT what upstream uses. The new algorithm (PIL bilinear via `fast_image_resize`) IS what upstream uses by documented design (torchvision antialias=True ≡ PIL BILINEAR ≡ fast_image_resize Convolution(Bilinear)). Swapping one to the other replaces a known-divergent algorithm with the known-correct one — no real-model A/B is needed to know it's an improvement.
+
+A real-model A/B is still warranted in v0.2 to confirm bit-exactness across all resize ratios (PIL-vs-fast_image_resize have rare 1-LSB differences for specific kernel-edge alignments), but the qualitative correctness fix is in place.
+
+---
+
+## REJECTED-3: "Padding to per-image max instead of upstream's fixed `max_num_patches`" (round 17)
+
+### Claim
+Upstream pads each tile to a fixed `max_num_patches = max(max_image_tokens × downsample_factor², (tile_size/patch_size)²) = 1024` for default budget. Our code pads only to per-image max (e.g., 256 for a 256-token single-tile image). Could silently change vision encoder outputs.
+
+### Why this is INCONCLUSIVE — deferred, not rejected
+Codex itself notes "the ONNX axis is dynamic so it may run". The vision encoder uses `pixel_attention_mask` to know which positions are real vs padded, so padding to a smaller size is more efficient and theoretically equivalent.
+
+Confirming parity requires `LFM_MODEL_PATH` for a real-model side-by-side comparison. This is filed as v0.2 work. If a future review re-flags it with a concrete reproducer (real-model output diff), then it becomes a real bug; until then, it's an unproven theoretical concern.
+
+### What would change my mind
+A bug-finder script that:
+1. Loads the real LFM2.5-VL ONNX vision encoder with `LFM_MODEL_PATH`
+2. Runs a fixed test image through (a) our crate's preprocessing → vision encoder, and (b) upstream Python's preprocessing → same vision encoder
+3. Compares the resulting `image_features` tensors element-wise
+4. Shows non-zero diff > FP rounding tolerance
+
+If such a script reproduces, this becomes a real bug.
+
+---
+
+## How to use this file
+
+When you're about to file an adversarial-review finding, search this file first. If your finding matches a REJECTED entry:
+- Read the cited evidence and verify it still holds against the current code
+- If the evidence is no longer accurate (code changed, fixture changed, etc.), update the entry or file a new finding
+- If the evidence still holds, do NOT re-flag — note in your review that you considered it and found it's already-debunked
+
+If your finding is genuinely new: include enough detail (file path + line numbers, concrete reproducer or upstream-citation) that the next reviewer can verify in one pass.
diff --git a/docs/superpowers/plans/2026-05-03-lfm-vlm-wrapper.md b/docs/superpowers/plans/2026-05-03-lfm-vlm-wrapper.md
new file mode 100644
index 0000000..4c46b39
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-03-lfm-vlm-wrapper.md
@@ -0,0 +1,4151 @@
+# LFM2.5-VL ONNX Wrapper Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Implement the `lfm` Rust crate that wraps LFM2.5-VL-450M-ONNX with a sync `Engine` + `Task` trait + free-form `generate`, sharing the `SceneAnalysis` data type with `qwen` via a new `vlm-tasks` crate.
+
+**Architecture:** 3-graph ONNX inference (vision_encoder, embed_tokens, decoder_model_merged) via raw `ort 2.0.0-rc.12`. **Per-image vision encoder calls** to work around the Phase 0 Gate B silent-corruption finding (multi-image batched calls produce wrong embeddings). `llguidance` for schema-constrained sampling on the structured-output path. `minijinja` for chat-template rendering with a custom no-op `generation` tag.
+
+**Tech Stack:** Rust 2024 edition (rust-version 1.95), ort 2.0.0-rc.12, tokenizers 0.23, llguidance 1.7, minijinja 2, image 0.25, smol_str 0.3, thiserror 2, tracing 0.1.
+
+**Spec:** `lfm/docs/superpowers/specs/2026-05-03-lfm-vlm-wrapper-design.md`
+
+**Workspace structure (pinned — option A: independent repos with path deps):** The three crates (`vlm-tasks/`, `qwen/`, `lfm/`) are independent git repositories under `findit-studio/`. They use Cargo path deps (`vlm-tasks = { path = "../vlm-tasks" }`) — NOT a Cargo workspace, NOT git submodules. **None are published to crates.io** (qwen + lfm READMEs explicitly say "Internal findit-studio crate. Not published"). This matches the existing siglip2/egemma/qwen layout, avoids workspace-wide rebuilds, and eliminates the publishability dependency chain (vlm-tasks would otherwise have to publish first). If that ever changes, the migration to a workspace is a one-line change at `findit-studio/Cargo.toml`; for v0 we stay independent.
+
+**Phase 0 fixtures (already in repo, verified):**
+- `lfm/tests/fixtures/onnx_io_contract.json` — G1–G5 resolved (decoder has no `position_ids`; conv-cache uses sparse layer indices `[0,1,3,4,6,7,9,11,13,15]`; vision output is `image_features`; vision input is pre-patchified `[batch, num_patches, 768]`)
+- `lfm/tests/fixtures/multi_image_ordering_proof.json` — G6 RESOLVED (FAILED → per-image vision calls required)
+
+---
+
+## File Structure
+
+Workspace layout (siblings under `findit-studio/`):
+
+```
+vlm-tasks/ # NEW CRATE (Phase 1)
+├── Cargo.toml
+├── README.md
+├── LICENSE-MIT, LICENSE-APACHE
+└── src/
+ ├── lib.rs # pub use {task::*, scene::*}
+ ├── task.rs # Task trait + ParseError
+ └── scene.rs # SceneAnalysis (data type only)
+
+qwen/ # EXISTING — minor migration (Phase 2)
+├── Cargo.toml # add vlm-tasks dep
+└── src/
+ ├── lib.rs # add re-exports
+ ├── task.rs # remove (moved to vlm-tasks)
+ └── scene.rs # SceneAnalysis becomes re-export
+
+lfm/ # NEW IMPLEMENTATION (Phase 3)
+├── Cargo.toml # rewrite from template-rs
+├── README.md, CHANGELOG.md
+├── build.rs # template default; no-op
+├── models/ # whitelisted by Cargo.toml include
+│ ├── tokenizer.json # bundle-feature-gated, 4.5 MB
+│ ├── chat_template.jinja # always shipped, 3.8 KB
+│ └── preprocessor_config.json # build-fixture only, 0.7 KB
+├── src/
+│ ├── lib.rs # re-exports + features + BUNDLED_* consts
+│ ├── error.rs # Error enum + named constructors
+│ ├── options.rs # RequestOptions, ImageBudget, ThreadOptions, Options
+│ ├── embedding.rs # Embedding (post-projector vision-tile vector)
+│ ├── chat_template.rs # apply_chat_template + expand_image_placeholders + tokens
+│ ├── preproc/
+│ │ ├── mod.rs # Preprocessor + PreprocessedImage + EXIF helpers
+│ │ └── tile_grid.rs # find_closest_aspect_ratio + smart_resize
+│ ├── runtime/ # gated on `inference` feature
+│ │ ├── mod.rs # re-exports
+│ │ ├── session.rs # build_session + check_outlet + validate_*_session
+│ │ ├── vision.rs # VisionEncoder (single-image)
+│ │ ├── embed_tokens.rs # EmbedTokens
+│ │ ├── decoder.rs # Decoder + KvCache (sparse indices)
+│ │ └── sampler.rs # Sampler trait + FreeSampler + ConstrainedSampler
+│ ├── generate.rs # end-to-end pipeline (per-image vision calls)
+│ ├── engine.rs # public Engine
+│ ├── task.rs # re-exports of vlm_tasks::*
+│ └── scene.rs # lfm-specific SceneTask impl
+├── examples/
+│ ├── smoke.rs # phase-zero "does it work"
+│ ├── scene_analysis.rs
+│ ├── preprocess_only.rs # wasm-compat showcase
+│ └── qwen_compare.rs
+├── benches/
+│ ├── bench_preproc.rs
+│ ├── bench_tile_grid.rs
+│ └── bench_chat_template.rs
+├── scripts/ # ALREADY IN REPO from Phase 0
+│ ├── capture_onnx_io.py
+│ ├── verify_multi_image_ordering.py
+│ └── README.md
+└── tests/
+ ├── fixtures/ # Phase-0 JSONs already present; add airport images + parity fixtures
+ │ ├── onnx_io_contract.json # ALREADY HERE
+ │ ├── multi_image_ordering_proof.json # ALREADY HERE
+ │ ├── airport_01.jpg, airport_02.jpg, airport_03.jpg # port from qwen
+ │ ├── chat_template_cases.json
+ │ ├── tile_grid_cases.json
+ │ ├── image_expansion_cases.json
+ │ └── scene_payloads/ # canonical / drift / null cases
+ └── integration.rs # gated on `integration` feature
+```
+
+---
+
+## Phase 1: vlm-tasks crate
+
+### Task 1: Create vlm-tasks crate (one task — small surface)
+
+**Files:**
+- Create: `vlm-tasks/Cargo.toml`
+- Create: `vlm-tasks/src/lib.rs`
+- Create: `vlm-tasks/src/task.rs`
+- Create: `vlm-tasks/src/scene.rs`
+- Create: `vlm-tasks/README.md`
+- Create: `vlm-tasks/LICENSE-MIT`, `vlm-tasks/LICENSE-APACHE` (copy from qwen)
+
+- [ ] **Step 1: Scaffold the directory and copy licenses**
+
+```bash
+mkdir -p /Users/user/Develop/findit-studio/vlm-tasks/src
+cp /Users/user/Develop/findit-studio/qwen/LICENSE-MIT /Users/user/Develop/findit-studio/vlm-tasks/
+cp /Users/user/Develop/findit-studio/qwen/LICENSE-APACHE /Users/user/Develop/findit-studio/vlm-tasks/
+```
+
+- [ ] **Step 2: Write Cargo.toml**
+
+Create `vlm-tasks/Cargo.toml` with:
+
+```toml
+[package]
+name = "vlm-tasks"
+version = "0.1.0"
+edition = "2024"
+rust-version = "1.95"
+description = "Shared types for findit-studio VLM engines: Task trait, ParseError, SceneAnalysis"
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+serde = { version = "1", features = ["derive"], optional = true }
+serde_json = "1"
+smol_str = "0.3"
+thiserror = "2"
+
+[features]
+default = []
+serde = ["dep:serde", "smol_str/serde"]
+
+[lints.rust]
+rust_2018_idioms = "warn"
+single_use_lifetimes = "warn"
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(docsrs)'] }
+```
+
+- [ ] **Step 3: Write the failing tests for `Task` and `ParseError`**
+
+Create `vlm-tasks/src/task.rs`:
+
+```rust
+//! `Task` trait and `ParseError` — the cross-engine abstraction.
+
+use serde_json::Value;
+
+/// A structured-output task description.
+///
+/// Implementations supply the prompt, the JSON schema for constrained
+/// decoding, and a parser that turns the model's raw text into a typed
+/// `Output`. The trait is `Send + Sync` and `Output: Send` so trait
+/// objects (`dyn Task