From 741e10c3234615d14ccbda46dee57817b61ae17f Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Tue, 10 Feb 2026 19:05:10 +0100 Subject: [PATCH 1/7] Various improvements --- neopdf/src/converter.rs | 10 +--- neopdf/src/gridpdf.rs | 122 +++++++++++++++++++++++++++++++++------- neopdf/src/pdf.rs | 2 +- neopdf/src/subgrid.rs | 13 ++++- neopdf/src/writer.rs | 10 +--- neopdf_tmdlib/build.rs | 32 ++++++++++- 6 files changed, 148 insertions(+), 41 deletions(-) diff --git a/neopdf/src/converter.rs b/neopdf/src/converter.rs index 16b67348..519d7a61 100644 --- a/neopdf/src/converter.rs +++ b/neopdf/src/converter.rs @@ -174,10 +174,7 @@ pub fn combine_lhapdf_npdfs>( }; combined_subgrids.push(new_subgrid); } - let combined_grid = GridArray { - pids: pids.clone(), - subgrids: combined_subgrids, - }; + let combined_grid = GridArray::from_parts(pids.clone(), combined_subgrids); combined_grids.push(combined_grid); } @@ -311,10 +308,7 @@ pub fn combine_lhapdf_alphas>( }; combined_subgrids.push(new_subgrid); } - let combined_grid = GridArray { - pids: pids.clone(), - subgrids: combined_subgrids, - }; + let combined_grid = GridArray::from_parts(pids.clone(), combined_subgrids); combined_grids.push(combined_grid); } diff --git a/neopdf/src/gridpdf.rs b/neopdf/src/gridpdf.rs index 65dc917d..65c098ac 100644 --- a/neopdf/src/gridpdf.rs +++ b/neopdf/src/gridpdf.rs @@ -40,9 +40,32 @@ pub struct GridArray { pub pids: Array1, /// A collection of `SubGrid` instances that make up the full grid. pub subgrids: Vec, + /// Precomputed lookup from normalized PID to index (avoids per-call linear scan). + #[serde(skip)] + pid_lookup: HashMap, } impl GridArray { + /// Builds the PID lookup table from a pids array. + fn build_pid_lookup(pids: &Array1) -> HashMap { + let mut pid_lookup = HashMap::with_capacity(pids.len()); + for (idx, &pid) in pids.iter().enumerate() { + let normalized = if pid == 0 { 21 } else { pid }; + pid_lookup.entry(normalized).or_insert(idx); + } + pid_lookup + } + + /// Creates a `GridArray` from prebuilt pids and subgrids. + pub fn from_parts(pids: Array1, subgrids: Vec) -> Self { + let pid_lookup = Self::build_pid_lookup(&pids); + Self { + pids, + subgrids, + pid_lookup, + } + } + /// Creates a new `GridArray` from a vector of `SubgridData`. /// /// # Arguments @@ -80,9 +103,13 @@ impl GridArray { }) .collect(); + let pids = Array1::from_vec(pids); + let pid_lookup = Self::build_pid_lookup(&pids); + Self { - pids: Array1::from_vec(pids), + pids, subgrids, + pid_lookup, } } @@ -131,6 +158,10 @@ impl GridArray { /// /// An `Option` containing the index of the subgrid if found, otherwise `None`. pub fn find_subgrid(&self, points: &[f64]) -> Option { + // Fast path: single subgrid (common case), clamping handles boundaries + if self.subgrids.len() == 1 { + return Some(0); + } self.subgrids .iter() .position(|sg| sg.contains_point(points)) @@ -149,11 +180,15 @@ impl GridArray { /// Gets the index corresponding to a given flavor ID. fn pid_index(&self, flavor_id: i32) -> Option { - let normalize_pid = |pid| if pid == 0 { 21 } else { pid }; - let normalized_pids = normalize_pid(flavor_id); + let normalized = if flavor_id == 0 { 21 } else { flavor_id }; + // Fast path: use precomputed lookup (populated by GridArray::new) + if !self.pid_lookup.is_empty() { + return self.pid_lookup.get(&normalized).copied(); + } + // Fallback: linear scan (for deserialized GridArrays where pid_lookup is empty) self.pids .iter() - .position(|&pid| normalize_pid(pid) == normalized_pids) + .position(|&pid| (if pid == 0 { 21 } else { pid }) == normalized) } /// Gets the overall parameter ranges across all subgrids. @@ -205,6 +240,17 @@ pub enum ForcePositive { NoClipping, } +/// Helper functions for force-positive clipping via function pointer. +fn fp_identity(v: f64) -> f64 { + v +} +fn fp_clip_negative(v: f64) -> f64 { + v.max(0.0) +} +fn fp_clip_small(v: f64) -> f64 { + v.max(1e-10) +} + /// The main PDF grid interface, providing high-level methods for interpolation. pub struct GridPDF { /// The metadata associated with the PDF set. @@ -217,6 +263,10 @@ pub struct GridPDF { alphas: AlphaS, /// Clip the values to positive definite numbers if negatives. pub force_positive: Option, + /// Cached: whether the interpolator uses log-space coordinates. + use_log: bool, + /// Cached: function pointer for force-positive clipping (avoids per-call match). + force_positive_fn: fn(f64) -> f64, } impl GridPDF { @@ -229,6 +279,15 @@ impl GridPDF { pub fn new(info: MetaData, knot_array: GridArray) -> Self { let interpolators = Self::build_interpolators(&info, &knot_array); let alphas = AlphaS::from_metadata(&info).expect("Failed to create AlphaS calculator"); + let use_log = matches!( + info.interpolator_type, + InterpolatorType::LogBilinear + | InterpolatorType::LogBicubic + | InterpolatorType::LogTricubic + | InterpolatorType::LogFourCubic + | InterpolatorType::LogFiveCubic + | InterpolatorType::LogChebyshev + ); Self { info, @@ -236,6 +295,8 @@ impl GridPDF { interpolators, alphas, force_positive: None, + use_log, + force_positive_fn: fp_identity, } } @@ -245,6 +306,11 @@ impl GridPDF { /// /// * `flag` - The `ForcePositive` enum variant specifying the clipping method. pub fn set_force_positive(&mut self, flag: ForcePositive) { + self.force_positive_fn = match &flag { + ForcePositive::ClipNegative => fp_clip_negative, + ForcePositive::ClipSmall => fp_clip_small, + ForcePositive::NoClipping => fp_identity, + }; self.force_positive = Some(flag); } @@ -309,25 +375,39 @@ impl GridPDF { None => return Ok(0.0), }; - let use_log = matches!( - self.info.interpolator_type, - InterpolatorType::LogBilinear - | InterpolatorType::LogBicubic - | InterpolatorType::LogTricubic - | InterpolatorType::LogFourCubic - | InterpolatorType::LogFiveCubic - | InterpolatorType::LogChebyshev - ); + let mut buf = [0.0f64; 8]; + for (i, &p) in points.iter().enumerate() { + buf[i] = if self.use_log { p.ln() } else { p }; + } self.interpolators[subgrid_idx][pid_idx] - .interpolate_point( - &points - .iter() - .map(|&p| if use_log { p.ln() } else { p }) - .collect::>(), - ) + .interpolate_point(&buf[..points.len()]) .map_err(|e| Error::InterpolationError(e.to_string())) - .map(|result| self.apply_force_positive(result)) + .map(|result| (self.force_positive_fn)(result)) + } + + /// Internal fast path for interpolation — returns `f64` directly, no `Result` wrapping. + /// Avoids `map_err` string allocation. Used by `PDF::xfxq2`. + pub(crate) fn xfxq2_fast(&self, flavor_id: i32, points: &[f64]) -> f64 { + let subgrid_idx = match self.knot_array.find_subgrid(points) { + Some(idx) => idx, + None => return 0.0, + }; + + let pid_idx = match self.knot_array.pid_index(flavor_id) { + Some(idx) => idx, + None => return 0.0, + }; + + let mut buf = [0.0f64; 8]; + for (i, &p) in points.iter().enumerate() { + buf[i] = if self.use_log { p.ln() } else { p }; + } + + match self.interpolators[subgrid_idx][pid_idx].interpolate_point(&buf[..points.len()]) { + Ok(result) => (self.force_positive_fn)(result), + Err(e) => panic!("InterpolationError: {e}"), + } } /// Interpolates PDF values for multiple points in parallel. @@ -349,7 +429,7 @@ impl GridPDF { .map(|idx| { let num_cols = slice_points.len(); let (fl_idx, s_idx) = (idx / num_cols, idx % num_cols); - self.xfxq2(flavors[fl_idx], slice_points[s_idx]).unwrap() + self.xfxq2_fast(flavors[fl_idx], slice_points[s_idx]) }) .collect(); diff --git a/neopdf/src/pdf.rs b/neopdf/src/pdf.rs index c7f9a33f..1011e51a 100644 --- a/neopdf/src/pdf.rs +++ b/neopdf/src/pdf.rs @@ -267,7 +267,7 @@ impl PDF { /// /// The interpolated PDF value `xf(nuclone, alphas, flavor, x, Q^2)`. pub fn xfxq2(&self, pid: i32, points: &[f64]) -> f64 { - self.grid_pdf.xfxq2(pid, points).unwrap() + self.grid_pdf.xfxq2_fast(pid, points) } /// Interpolates the PDF value (xf) for multiple nucleons, alphas, flavors, xs, and Q2s. diff --git a/neopdf/src/subgrid.rs b/neopdf/src/subgrid.rs index fd83d09b..520d665d 100644 --- a/neopdf/src/subgrid.rs +++ b/neopdf/src/subgrid.rs @@ -331,8 +331,17 @@ impl SubGrid { /// /// `true` if the point is within the subgrid, `false` otherwise. pub fn contains_point(&self, points: &[f64]) -> bool { - let (expected_len, ranges) = match self.interpolation_config() { - InterpolationConfig::TwoD => (2, vec![]), + let config = self.interpolation_config(); + + // Fast path for 2D (most common case): avoids Vec allocation and match overhead + if let InterpolationConfig::TwoD = config { + return points.len() == 2 + && self.x_range.contains(points[0]) + && self.q2_range.contains(points[1]); + } + + let (expected_len, ranges) = match config { + InterpolationConfig::TwoD => unreachable!(), InterpolationConfig::ThreeDNucleons => (3, vec![&self.nucleons_range]), InterpolationConfig::ThreeDAlphas => (3, vec![&self.alphas_range]), InterpolationConfig::ThreeDXi => (3, vec![&self.xi_range]), diff --git a/neopdf/src/writer.rs b/neopdf/src/writer.rs index 0544184a..918caadb 100644 --- a/neopdf/src/writer.rs +++ b/neopdf/src/writer.rs @@ -382,10 +382,7 @@ impl GridArrayReader { }) .collect(); - GridArray { - pids: legacy_grid.pids, - subgrids, - } + GridArray::from_parts(legacy_grid.pids, subgrids) } /// Returns the number of grid arrays in the collection. @@ -669,9 +666,6 @@ mod tests { } fn test_grid() -> GridArray { - GridArray { - pids: Array1::from(vec![1, 2, 3]), - subgrids: vec![], - } + GridArray::from_parts(Array1::from(vec![1, 2, 3]), vec![]) } } diff --git a/neopdf_tmdlib/build.rs b/neopdf_tmdlib/build.rs index 2ba78ae6..a7f087f1 100644 --- a/neopdf_tmdlib/build.rs +++ b/neopdf_tmdlib/build.rs @@ -3,6 +3,7 @@ use std::process::Command; fn main() { + let mut all_lib_paths = Vec::new(); let cxx_flags: Vec = String::from_utf8( Command::new("TMDlib-config") .arg("--cppflags") @@ -34,6 +35,7 @@ fn main() { .filter_map(|token| token.strip_prefix("-L")) { println!("cargo:rustc-link-search={lib_path}"); + all_lib_paths.push(lib_path.to_owned()); } for lib in libs @@ -59,5 +61,33 @@ fn main() { // Manually link to different libraries as TMDlib need them. println!("cargo:rustc-link-lib=gsl"); println!("cargo:rustc-link-lib=gslcblas"); - println!("cargo:rustc-link-lib=LHAPDF"); + println!("cargo:rustc-link-lib=gfortran"); + + // Discover LHAPDF via lhapdf-config so we pick up the correct library path. + let lhapdf_libs = String::from_utf8( + Command::new("lhapdf-config") + .arg("--libs") + .output() + .expect("Could not find `lhapdf-config`, please install LHAPDF and make sure `lhapdf-config` is in your PATH") + .stdout, + ) + .unwrap(); + + for lib_path in lhapdf_libs + .split_whitespace() + .filter_map(|token| token.strip_prefix("-L")) + { + println!("cargo:rustc-link-search={lib_path}"); + all_lib_paths.push(lib_path.to_owned()); + } + + for lib in lhapdf_libs + .split_whitespace() + .filter_map(|token| token.strip_prefix("-l")) + { + println!("cargo:rustc-link-lib={lib}"); + } + + // Propagate library paths to dependent crates via DEP_NEOPDF_TMDLIB_LIB_PATHS. + println!("cargo:lib_paths={}", all_lib_paths.join(":")); } From 733bf6864c8e330272605fe7c6cfe52b090320a0 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Tue, 10 Feb 2026 20:25:54 +0100 Subject: [PATCH 2/7] Interleave data --- neopdf/src/gridpdf.rs | 226 +++++++++++++++++++++++++++++++++++++ neopdf/src/interpolator.rs | 17 +++ neopdf/src/metadata.rs | 2 +- neopdf/src/pdf.rs | 7 ++ neopdf/src/strategy.rs | 4 +- neopdf_capi/src/lib.rs | 20 +--- 6 files changed, 255 insertions(+), 21 deletions(-) diff --git a/neopdf/src/gridpdf.rs b/neopdf/src/gridpdf.rs index 65c098ac..ed57ab45 100644 --- a/neopdf/src/gridpdf.rs +++ b/neopdf/src/gridpdf.rs @@ -15,7 +15,9 @@ use super::alphas::AlphaS; use super::interpolator::{DynInterpolator, InterpolatorFactory}; use super::metadata::{InterpolatorType, MetaData}; use super::parser::SubgridData; +use super::strategy::LogBicubicInterpolation; use super::subgrid::{ParamRange, RangeParameters, SubGrid}; +use super::utils; /// Errors that can occur during PDF grid operations. #[derive(Debug, Error)] @@ -251,6 +253,133 @@ fn fp_clip_small(v: f64) -> f64 { v.max(1e-10) } +/// Interleaved bicubic coefficients for fast all-flavor evaluation. +/// +/// Layout: coefficients are stored as `[cell][flavor][4]` where +/// `cell = ix * nq2knots + iq2`, giving optimal cache locality when +/// evaluating all flavors at the same `(x, Q2)` point. +struct InterleavedBicubic { + /// Flat coefficient array: `[(ix * nq2 + iq2) * n_flavors * 4 + flavor * 4 + c]` + coeffs: Vec, + /// Log-transformed x grid (shared across all flavors). + log_xs: Vec, + /// Log-transformed Q2 grid (shared across all flavors). + log_q2s: Vec, + /// Number of Q2 knots. + nq2: usize, + /// Number of flavors stored. + n_flavors: usize, +} + +impl InterleavedBicubic { + /// Build interleaved coefficients from a subgrid's per-flavor data. + fn build(subgrid: &SubGrid, n_pids: usize) -> Self { + let log_xs: Vec = subgrid.xs.iter().map(|&x| x.ln()).collect(); + let log_q2s: Vec = subgrid.q2s.iter().map(|&q2| q2.ln()).collect(); + let nxknots = log_xs.len(); + let nq2knots = log_q2s.len(); + + // Compute per-flavor coefficients using the existing algorithm, + // then interleave into [cell][flavor][4]. + let n_cells = (nxknots - 1) * nq2knots; + let mut interleaved = vec![0.0f64; n_cells * n_pids * 4]; + + for pid_idx in 0..n_pids { + let grid_slice = subgrid.grid_slice(pid_idx).to_owned(); + let data = ninterp::data::InterpData2D { + grid: [ + ndarray::Array1::from_vec(log_xs.clone()), + ndarray::Array1::from_vec(log_q2s.clone()), + ], + values: grid_slice, + }; + let flavor_coeffs = LogBicubicInterpolation::compute_polynomial_coefficients(&data); + + // Copy into interleaved layout + for cell in 0..n_cells { + let src = cell * 4; + let dst = (cell * n_pids + pid_idx) * 4; + interleaved[dst..dst + 4].copy_from_slice(&flavor_coeffs[src..src + 4]); + } + } + + Self { + coeffs: interleaved, + log_xs, + log_q2s, + nq2: nq2knots, + n_flavors: n_pids, + } + } + + /// Evaluate the Hermite x-polynomial for a given cell and flavor. + #[inline(always)] + fn hermite_x(&self, cell: usize, flavor: usize, u: f64) -> f64 { + let base = (cell * self.n_flavors + flavor) * 4; + let c = &self.coeffs[base..base + 4]; + let u2 = u * u; + let u3 = u2 * u; + c[0] * u3 + c[1] * u2 + c[2] * u + c[3] + } + + /// Evaluate all flavors at `(ix, iq2, u, v)`. + /// + /// `pid_slots` maps each output position to `Some(flavor_index)` or `None`. + fn eval_allpids( + &self, + ix: usize, + iq2: usize, + u: f64, + v: f64, + pid_slots: &[Option], + force_positive_fn: fn(f64) -> f64, + out: &mut [f64], + ) { + let nq2 = self.nq2; + let dq_1 = self.log_q2s[iq2 + 1] - self.log_q2s[iq2]; + + for (o, slot) in out.iter_mut().zip(pid_slots.iter()) { + let fi = match *slot { + Some(idx) => idx, + None => { + *o = 0.0; + continue; + } + }; + + let cell_lo = ix * nq2 + iq2; + let cell_hi = ix * nq2 + iq2 + 1; + + let vl = self.hermite_x(cell_lo, fi, u); + let vh = self.hermite_x(cell_hi, fi, u); + + let (vdl, vdh) = if iq2 == 0 { + let vdl_val = vh - vl; + let vhh = self.hermite_x(ix * nq2 + iq2 + 2, fi, u); + let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); + let vdh_val = (vdl_val + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + } else if iq2 == nq2 - 2 { + let vdh_val = vh - vl; + let vll = self.hermite_x(ix * nq2 + iq2 - 1, fi, u); + let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); + let vdl_val = (vdh_val + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + (vdl_val, vdh_val) + } else { + let vll = self.hermite_x(ix * nq2 + iq2 - 1, fi, u); + let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); + let vhh = self.hermite_x(ix * nq2 + iq2 + 2, fi, u); + let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); + let vdl_val = ((vh - vl) + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + let vdh_val = ((vh - vl) + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + }; + + *o = force_positive_fn(utils::hermite_cubic_interpolate(v, vl, vdl, vh, vdh)); + } + } +} + /// The main PDF grid interface, providing high-level methods for interpolation. pub struct GridPDF { /// The metadata associated with the PDF set. @@ -267,6 +396,9 @@ pub struct GridPDF { use_log: bool, /// Cached: function pointer for force-positive clipping (avoids per-call match). force_positive_fn: fn(f64) -> f64, + /// Optional fast path for all-flavor evaluation (2D LogBicubic only). + /// One entry per subgrid. + interleaved: Option>, } impl GridPDF { @@ -289,6 +421,29 @@ impl GridPDF { | InterpolatorType::LogChebyshev ); + // Build interleaved coefficients for 2D LogBicubic grids + let interleaved = if info.interpolator_type == InterpolatorType::LogBicubic { + let all_2d = knot_array.subgrids.iter().all(|sg| { + matches!( + sg.interpolation_config(), + super::interpolator::InterpolationConfig::TwoD + ) + }); + if all_2d { + Some( + knot_array + .subgrids + .iter() + .map(|sg| InterleavedBicubic::build(sg, knot_array.pids.len())) + .collect(), + ) + } else { + None + } + } else { + None + }; + Self { info, knot_array, @@ -297,6 +452,7 @@ impl GridPDF { force_positive: None, use_log, force_positive_fn: fp_identity, + interleaved, } } @@ -410,6 +566,76 @@ impl GridPDF { } } + /// Fast path for evaluating all requested flavors at a single kinematic point. + /// + /// For 2D LogBicubic grids with interleaved coefficients, the binary search + /// is performed once and all flavors are evaluated with optimal cache locality. + /// Falls back to per-flavor interpolation for other grid types. + pub(crate) fn xfxq2_allpids(&self, pids: &[i32], points: &[f64], out: &mut [f64]) { + let subgrid_idx = match self.knot_array.find_subgrid(points) { + Some(idx) => idx, + None => { + out.iter_mut().for_each(|v| *v = 0.0); + return; + } + }; + + // Fast path: interleaved 2D LogBicubic coefficients + if let Some(ref il) = self.interleaved { + let il = &il[subgrid_idx]; + let lx = points[0].ln(); + let lq2 = points[1].ln(); + + let ix = match utils::find_interval_index(&il.log_xs, lx) { + Ok(i) => i, + Err(_) => { + out.iter_mut().for_each(|v| *v = 0.0); + return; + } + }; + let iq2 = match utils::find_interval_index(&il.log_q2s, lq2) { + Ok(j) => j, + Err(_) => { + out.iter_mut().for_each(|v| *v = 0.0); + return; + } + }; + + let dx = il.log_xs[ix + 1] - il.log_xs[ix]; + let dy = il.log_q2s[iq2 + 1] - il.log_q2s[iq2]; + let u = (lx - il.log_xs[ix]) / dx; + let v = (lq2 - il.log_q2s[iq2]) / dy; + + // Precompute PID → flavor slot mapping + let mut pid_slots: [Option; 32] = [None; 32]; + for (i, &pid) in pids.iter().enumerate().take(32) { + pid_slots[i] = self.knot_array.pid_index(pid); + } + + il.eval_allpids(ix, iq2, u, v, &pid_slots[..pids.len()], self.force_positive_fn, out); + return; + } + + // Generic fallback + let mut buf = [0.0f64; 8]; + for (i, &p) in points.iter().enumerate() { + buf[i] = if self.use_log { p.ln() } else { p }; + } + let log_points = &buf[..points.len()]; + + for (o, &pid) in out.iter_mut().zip(pids.iter()) { + *o = match self.knot_array.pid_index(pid) { + Some(pid_idx) => { + match self.interpolators[subgrid_idx][pid_idx].interpolate_point(log_points) { + Ok(result) => (self.force_positive_fn)(result), + Err(e) => panic!("InterpolationError: {e}"), + } + } + None => 0.0, + }; + } + } + /// Interpolates PDF values for multiple points in parallel. /// /// # Arguments diff --git a/neopdf/src/interpolator.rs b/neopdf/src/interpolator.rs index 3338c6f4..5b4dd88f 100644 --- a/neopdf/src/interpolator.rs +++ b/neopdf/src/interpolator.rs @@ -11,6 +11,8 @@ //! Interpolation strategies are defined in `strategy.rs`. //! The [`SubGrid`] struct is defined in `subgrid.rs`. +use std::any::Any; + use ndarray::{s, IxDyn, OwnedRepr}; use ninterp::data::{InterpData2D, InterpData3D}; use ninterp::error::InterpolateError; @@ -118,6 +120,9 @@ impl InterpolationConfig { /// A trait for dynamic interpolation across different dimensions. pub trait DynInterpolator: Send + Sync { fn interpolate_point(&self, point: &[f64]) -> Result; + + /// Downcast to concrete type for optimized multi-flavor evaluation. + fn as_any(&self) -> &dyn Any; } // Implement `DynInterpolator` for 2D interpolators. @@ -131,6 +136,10 @@ where .map_err(|_| InterpolateError::Other("Expected 2D point".to_string()))?; self.interpolate(&[x, y]) } + + fn as_any(&self) -> &dyn Any { + self + } } // Implement `DynInterpolator` for 3D interpolators. @@ -144,6 +153,10 @@ where .map_err(|_| InterpolateError::Other("Expected 3D point".to_string()))?; self.interpolate(&[x, y, z]) } + + fn as_any(&self) -> &dyn Any { + self + } } // Implement `DynInterpolator` for N-dimensional interpolators. @@ -154,6 +167,10 @@ where fn interpolate_point(&self, point: &[f64]) -> Result { self.interpolate(point) } + + fn as_any(&self) -> &dyn Any { + self + } } /// An enum to dispatch batch interpolation to the correct Chebyshev interpolator. diff --git a/neopdf/src/metadata.rs b/neopdf/src/metadata.rs index 190f6983..a8eaed7b 100644 --- a/neopdf/src/metadata.rs +++ b/neopdf/src/metadata.rs @@ -17,7 +17,7 @@ pub enum SetType { /// Represents the type of interpolator used for the PDF. /// WARNING: When adding elements, always append at the end!!! #[repr(C)] -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] pub enum InterpolatorType { Bilinear, LogBilinear, diff --git a/neopdf/src/pdf.rs b/neopdf/src/pdf.rs index 1011e51a..f26884c5 100644 --- a/neopdf/src/pdf.rs +++ b/neopdf/src/pdf.rs @@ -270,6 +270,13 @@ impl PDF { self.grid_pdf.xfxq2_fast(pid, points) } + /// Evaluates all requested flavors at a single kinematic point. + /// + /// Performs the subgrid lookup and log transform once, then loops over PIDs. + pub fn xfxq2_allpids(&self, pids: &[i32], points: &[f64], out: &mut [f64]) { + self.grid_pdf.xfxq2_allpids(pids, points, out); + } + /// Interpolates the PDF value (xf) for multiple nucleons, alphas, flavors, xs, and Q2s. /// /// Abstraction to the `GridPDF::xfxq2s` method. diff --git a/neopdf/src/strategy.rs b/neopdf/src/strategy.rs index 1d1cb0f8..5ef413e7 100644 --- a/neopdf/src/strategy.rs +++ b/neopdf/src/strategy.rs @@ -263,7 +263,7 @@ impl LogBicubicInterpolation { } /// Computes the polynomial coefficients for bicubic interpolation, mirroring LHAPDF's C++ implementation. - fn compute_polynomial_coefficients(data: &InterpData2D) -> Vec + pub(crate) fn compute_polynomial_coefficients(data: &InterpData2D) -> Vec where D: Data + RawDataClone + Clone, { @@ -301,7 +301,7 @@ impl LogBicubicInterpolation { } /// Performs bicubic interpolation using pre-computed coefficients. - fn interpolate_with_coeffs( + pub(crate) fn interpolate_with_coeffs( &self, data: &InterpData2D, ix: usize, diff --git a/neopdf_capi/src/lib.rs b/neopdf_capi/src/lib.rs index f0a296b0..ae00c676 100644 --- a/neopdf_capi/src/lib.rs +++ b/neopdf_capi/src/lib.rs @@ -1507,16 +1507,8 @@ pub unsafe extern "C" fn evolvepdf(x: c_double, q: c_double, f: *mut c_double) { let pdf = &pdfs[member]; let q2 = q * q; - let (available_pids, _) = pdf.pids().clone().into_raw_vec_and_offset(); let out_slice = slice::from_raw_parts_mut(f, 14); - - for (out, &pid) in out_slice.iter_mut().zip(DEFAULT_PIDS.iter()) { - *out = if available_pids.contains(&pid) { - pdf.xfxq2(pid, &[x, q2]) - } else { - 0.0 - }; - } + pdf.xfxq2_allpids(&DEFAULT_PIDS, &[x, q2], out_slice); } } } @@ -1542,16 +1534,8 @@ pub unsafe extern "C" fn evolvepdf_(x: *const c_double, q: *const c_double, f: * let pdf = &pdfs[member]; let q2 = (*q) * (*q); - let (available_pids, _) = pdf.pids().clone().into_raw_vec_and_offset(); let out_slice = slice::from_raw_parts_mut(f, 14); - - for (out, &pid) in out_slice.iter_mut().zip(DEFAULT_PIDS.iter()) { - *out = if available_pids.contains(&pid) { - pdf.xfxq2(pid, &[*x, q2]) - } else { - 0.0 - }; - } + pdf.xfxq2_allpids(&DEFAULT_PIDS, &[*x, q2], out_slice); } } } From f36b68d20a59e890d888ce0daca75bff1b8f426f Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 11 Feb 2026 10:38:04 +0100 Subject: [PATCH 3/7] Implement single path fast call for a point-like interpolation --- neopdf/src/gridpdf.rs | 172 +++++++++++++++++++++++++++++++----- neopdf_capi/src/lib.rs | 3 +- neopdf_pyapi/src/gridpdf.rs | 5 +- 3 files changed, 152 insertions(+), 28 deletions(-) diff --git a/neopdf/src/gridpdf.rs b/neopdf/src/gridpdf.rs index ed57ab45..d9c999bb 100644 --- a/neopdf/src/gridpdf.rs +++ b/neopdf/src/gridpdf.rs @@ -35,32 +35,92 @@ pub enum Error { InterpolationError(String), } +/// Direct-indexed PID lookup table. PIDs in range `[PID_MIN, PID_MAX]` are +/// resolved in O(1) via a flat array; out-of-range PIDs fall back to `None`. +const PID_MIN: i32 = -6; +const PID_MAX: i32 = 22; +const PID_RANGE: usize = (PID_MAX - PID_MIN + 1) as usize; // 29 +const PID_NONE: u8 = u8::MAX; // sentinel for "not present" + +#[derive(Debug, Clone)] +struct PidLookup { + table: [u8; PID_RANGE], +} + +impl Default for PidLookup { + fn default() -> Self { + Self { + table: [PID_NONE; PID_RANGE], + } + } +} + +impl PidLookup { + fn build(pids: &Array1) -> Self { + let mut lut = Self::default(); + for (idx, &pid) in pids.iter().enumerate() { + let normalized = if pid == 0 { 21 } else { pid }; + if (PID_MIN..=PID_MAX).contains(&normalized) { + let slot = (normalized - PID_MIN) as usize; + if lut.table[slot] == PID_NONE { + lut.table[slot] = idx as u8; + } + } + } + lut + } + + #[inline(always)] + fn get(&self, pid: i32) -> Option { + let normalized = if pid == 0 { 21 } else { pid }; + if !(PID_MIN..=PID_MAX).contains(&normalized) { + return None; + } + let v = self.table[(normalized - PID_MIN) as usize]; + if v == PID_NONE { + None + } else { + Some(v as usize) + } + } +} + /// Stores the complete PDF grid data, including all subgrids and flavor information. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize)] pub struct GridArray { /// An array of particle flavor IDs (PIDs). pub pids: Array1, /// A collection of `SubGrid` instances that make up the full grid. pub subgrids: Vec, - /// Precomputed lookup from normalized PID to index (avoids per-call linear scan). + /// Direct-indexed PID lookup (O(1), no hashing). #[serde(skip)] - pid_lookup: HashMap, + pid_lookup: PidLookup, } -impl GridArray { - /// Builds the PID lookup table from a pids array. - fn build_pid_lookup(pids: &Array1) -> HashMap { - let mut pid_lookup = HashMap::with_capacity(pids.len()); - for (idx, &pid) in pids.iter().enumerate() { - let normalized = if pid == 0 { 21 } else { pid }; - pid_lookup.entry(normalized).or_insert(idx); +impl<'de> Deserialize<'de> for GridArray { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + #[derive(Deserialize)] + struct Helper { + pids: Array1, + subgrids: Vec, } - pid_lookup + let h = Helper::deserialize(deserializer)?; + let pid_lookup = PidLookup::build(&h.pids); + Ok(Self { + pids: h.pids, + subgrids: h.subgrids, + pid_lookup, + }) } +} +impl GridArray { /// Creates a `GridArray` from prebuilt pids and subgrids. pub fn from_parts(pids: Array1, subgrids: Vec) -> Self { - let pid_lookup = Self::build_pid_lookup(&pids); + let pid_lookup = PidLookup::build(&pids); Self { pids, subgrids, @@ -106,7 +166,7 @@ impl GridArray { .collect(); let pids = Array1::from_vec(pids); - let pid_lookup = Self::build_pid_lookup(&pids); + let pid_lookup = PidLookup::build(&pids); Self { pids, @@ -181,16 +241,9 @@ impl GridArray { } /// Gets the index corresponding to a given flavor ID. + #[inline(always)] fn pid_index(&self, flavor_id: i32) -> Option { - let normalized = if flavor_id == 0 { 21 } else { flavor_id }; - // Fast path: use precomputed lookup (populated by GridArray::new) - if !self.pid_lookup.is_empty() { - return self.pid_lookup.get(&normalized).copied(); - } - // Fallback: linear scan (for deserialized GridArrays where pid_lookup is empty) - self.pids - .iter() - .position(|&pid| (if pid == 0 { 21 } else { pid }) == normalized) + self.pid_lookup.get(flavor_id) } /// Gets the overall parameter ranges across all subgrids. @@ -322,9 +375,66 @@ impl InterleavedBicubic { c[0] * u3 + c[1] * u2 + c[2] * u + c[3] } + /// Evaluate a single flavor at a given `(x, q2)` point. + /// + /// Performs log transform, binary search, and coefficient evaluation + /// in a single direct path — no vtable dispatch, no Result wrapping. + #[inline] + fn eval_single(&self, flavor: usize, x: f64, q2: f64) -> f64 { + let lx = x.ln(); + let lq2 = q2.ln(); + + let ix = match utils::find_interval_index(&self.log_xs, lx) { + Ok(i) => i, + Err(_) => return 0.0, + }; + let iq2 = match utils::find_interval_index(&self.log_q2s, lq2) { + Ok(j) => j, + Err(_) => return 0.0, + }; + + let dx = self.log_xs[ix + 1] - self.log_xs[ix]; + let dy = self.log_q2s[iq2 + 1] - self.log_q2s[iq2]; + let u = (lx - self.log_xs[ix]) / dx; + let v = (lq2 - self.log_q2s[iq2]) / dy; + + let nq2 = self.nq2; + let cell_lo = ix * nq2 + iq2; + + let vl = self.hermite_x(cell_lo, flavor, u); + let vh = self.hermite_x(cell_lo + 1, flavor, u); + + let dq_1 = dy; // = log_q2s[iq2+1] - log_q2s[iq2], already computed + + let (vdl, vdh) = if iq2 == 0 { + let vdl_val = vh - vl; + let vhh = self.hermite_x(cell_lo + 2, flavor, u); + let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); + let vdh_val = (vdl_val + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + } else if iq2 == nq2 - 2 { + let vdh_val = vh - vl; + let vll = self.hermite_x(cell_lo - 1, flavor, u); + let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); + let vdl_val = (vdh_val + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + (vdl_val, vdh_val) + } else { + let vll = self.hermite_x(cell_lo - 1, flavor, u); + let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); + let vhh = self.hermite_x(cell_lo + 2, flavor, u); + let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); + let vdl_val = ((vh - vl) + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + let vdh_val = ((vh - vl) + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + }; + + utils::hermite_cubic_interpolate(v, vl, vdl, vh, vdh) + } + /// Evaluate all flavors at `(ix, iq2, u, v)`. /// /// `pid_slots` maps each output position to `Some(flavor_index)` or `None`. + #[allow(clippy::too_many_arguments)] fn eval_allpids( &self, ix: usize, @@ -555,6 +665,14 @@ impl GridPDF { None => return 0.0, }; + // Fast path: 2D LogBicubic via interleaved coefficients — bypasses + // vtable dispatch, ninterp validation, and Result wrapping. + if let Some(ref il) = self.interleaved { + return (self.force_positive_fn)( + il[subgrid_idx].eval_single(pid_idx, points[0], points[1]), + ); + } + let mut buf = [0.0f64; 8]; for (i, &p) in points.iter().enumerate() { buf[i] = if self.use_log { p.ln() } else { p }; @@ -612,7 +730,15 @@ impl GridPDF { pid_slots[i] = self.knot_array.pid_index(pid); } - il.eval_allpids(ix, iq2, u, v, &pid_slots[..pids.len()], self.force_positive_fn, out); + il.eval_allpids( + ix, + iq2, + u, + v, + &pid_slots[..pids.len()], + self.force_positive_fn, + out, + ); return; } diff --git a/neopdf_capi/src/lib.rs b/neopdf_capi/src/lib.rs index ae00c676..0746d87f 100644 --- a/neopdf_capi/src/lib.rs +++ b/neopdf_capi/src/lib.rs @@ -699,7 +699,7 @@ impl NeoPDFGrid { } /// Adds a subgrid to the grid (v2 for 8D) - #[allow(clippy::too_many_arguments)] + #[allow(clippy::too_many_arguments, clippy::similar_names)] unsafe fn add_subgrid_v2( &mut self, nucleons: *const c_double, @@ -809,6 +809,7 @@ pub unsafe extern "C" fn neopdf_grid_add_subgrid( /// # Safety /// - `grid` must be a valid pointer to a `NeoPDFGrid` created by `neopdf_grid_new`. /// - The data pointers must be valid for the specified lengths. +#[allow(clippy::similar_names)] #[no_mangle] pub unsafe extern "C" fn neopdf_grid_add_subgridv2( grid: *mut NeoPDFGrid, diff --git a/neopdf_pyapi/src/gridpdf.rs b/neopdf_pyapi/src/gridpdf.rs index 681d6d06..710f860f 100644 --- a/neopdf_pyapi/src/gridpdf.rs +++ b/neopdf_pyapi/src/gridpdf.rs @@ -168,10 +168,7 @@ impl PyGridArray { .map(|py_ref| py_ref.subgrid.clone()) .collect(); - let gridarray = GridArray { - pids: Array1::from(pids), - subgrids, - }; + let gridarray = GridArray::from_parts(Array1::from(pids), subgrids); Self { gridarray } } From 692dc78ffe82c62fed275007f200982a640d1afd Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 11 Feb 2026 12:02:07 +0100 Subject: [PATCH 4/7] Implement data interleave across all dimensions --- neopdf/src/gridpdf.rs | 451 ++++++++++++++--------------- neopdf/src/interleaved.rs | 518 ++++++++++++++++++++++++++++++++++ neopdf/src/lib.rs | 1 + neopdf_cli/tests/converter.rs | 4 +- neopdf_cli/tests/pdf.rs | 4 +- 5 files changed, 742 insertions(+), 236 deletions(-) create mode 100644 neopdf/src/interleaved.rs diff --git a/neopdf/src/gridpdf.rs b/neopdf/src/gridpdf.rs index d9c999bb..10c1bda2 100644 --- a/neopdf/src/gridpdf.rs +++ b/neopdf/src/gridpdf.rs @@ -12,12 +12,11 @@ use std::collections::HashMap; use thiserror::Error; use super::alphas::AlphaS; -use super::interpolator::{DynInterpolator, InterpolatorFactory}; +use super::interleaved::InterleavedHermite; +use super::interpolator::{DynInterpolator, InterpolationConfig, InterpolatorFactory}; use super::metadata::{InterpolatorType, MetaData}; use super::parser::SubgridData; -use super::strategy::LogBicubicInterpolation; use super::subgrid::{ParamRange, RangeParameters, SubGrid}; -use super::utils; /// Errors that can occur during PDF grid operations. #[derive(Debug, Error)] @@ -306,187 +305,200 @@ fn fp_clip_small(v: f64) -> f64 { v.max(1e-10) } -/// Interleaved bicubic coefficients for fast all-flavor evaluation. -/// -/// Layout: coefficients are stored as `[cell][flavor][4]` where -/// `cell = ix * nq2knots + iq2`, giving optimal cache locality when -/// evaluating all flavors at the same `(x, Q2)` point. -struct InterleavedBicubic { - /// Flat coefficient array: `[(ix * nq2 + iq2) * n_flavors * 4 + flavor * 4 + c]` - coeffs: Vec, - /// Log-transformed x grid (shared across all flavors). - log_xs: Vec, - /// Log-transformed Q2 grid (shared across all flavors). - log_q2s: Vec, - /// Number of Q2 knots. - nq2: usize, - /// Number of flavors stored. - n_flavors: usize, -} - -impl InterleavedBicubic { - /// Build interleaved coefficients from a subgrid's per-flavor data. - fn build(subgrid: &SubGrid, n_pids: usize) -> Self { - let log_xs: Vec = subgrid.xs.iter().map(|&x| x.ln()).collect(); - let log_q2s: Vec = subgrid.q2s.iter().map(|&q2| q2.ln()).collect(); - let nxknots = log_xs.len(); - let nq2knots = log_q2s.len(); - - // Compute per-flavor coefficients using the existing algorithm, - // then interleave into [cell][flavor][4]. - let n_cells = (nxknots - 1) * nq2knots; - let mut interleaved = vec![0.0f64; n_cells * n_pids * 4]; - - for pid_idx in 0..n_pids { - let grid_slice = subgrid.grid_slice(pid_idx).to_owned(); - let data = ninterp::data::InterpData2D { - grid: [ - ndarray::Array1::from_vec(log_xs.clone()), - ndarray::Array1::from_vec(log_q2s.clone()), - ], - values: grid_slice, - }; - let flavor_coeffs = LogBicubicInterpolation::compute_polynomial_coefficients(&data); - - // Copy into interleaved layout - for cell in 0..n_cells { - let src = cell * 4; - let dst = (cell * n_pids + pid_idx) * 4; - interleaved[dst..dst + 4].copy_from_slice(&flavor_coeffs[src..src + 4]); - } +/// Build an `InterleavedHermite` for a subgrid, if its config is supported. +fn build_interleaved( + subgrid: &SubGrid, + config: InterpolationConfig, + n_pids: usize, +) -> Option { + let log_xs: Vec = subgrid.xs.iter().map(|&x| x.ln()).collect(); + let log_q2s: Vec = subgrid.q2s.iter().map(|&q| q.ln()).collect(); + + match config { + InterpolationConfig::TwoD => { + let extra_grids = vec![log_q2s]; + let grid = subgrid.grid.view(); + let is_8d = subgrid.is_8d(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + if is_8d { + grid[[0, 0, 0, 0, 0, pid, x_idx, q2_idx]] + } else { + grid[[0, 0, pid, 0, x_idx, q2_idx]] + } + }, + )) } - - Self { - coeffs: interleaved, - log_xs, - log_q2s, - nq2: nq2knots, - n_flavors: n_pids, + InterpolationConfig::ThreeDNucleons => { + let log_nucs: Vec = subgrid.nucleons.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_nucs]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let nuc_idx = extra[1]; + grid[[nuc_idx, 0, pid, 0, x_idx, q2_idx]] + }, + )) } - } - - /// Evaluate the Hermite x-polynomial for a given cell and flavor. - #[inline(always)] - fn hermite_x(&self, cell: usize, flavor: usize, u: f64) -> f64 { - let base = (cell * self.n_flavors + flavor) * 4; - let c = &self.coeffs[base..base + 4]; - let u2 = u * u; - let u3 = u2 * u; - c[0] * u3 + c[1] * u2 + c[2] * u + c[3] - } - - /// Evaluate a single flavor at a given `(x, q2)` point. - /// - /// Performs log transform, binary search, and coefficient evaluation - /// in a single direct path — no vtable dispatch, no Result wrapping. - #[inline] - fn eval_single(&self, flavor: usize, x: f64, q2: f64) -> f64 { - let lx = x.ln(); - let lq2 = q2.ln(); - - let ix = match utils::find_interval_index(&self.log_xs, lx) { - Ok(i) => i, - Err(_) => return 0.0, - }; - let iq2 = match utils::find_interval_index(&self.log_q2s, lq2) { - Ok(j) => j, - Err(_) => return 0.0, - }; - - let dx = self.log_xs[ix + 1] - self.log_xs[ix]; - let dy = self.log_q2s[iq2 + 1] - self.log_q2s[iq2]; - let u = (lx - self.log_xs[ix]) / dx; - let v = (lq2 - self.log_q2s[iq2]) / dy; - - let nq2 = self.nq2; - let cell_lo = ix * nq2 + iq2; - - let vl = self.hermite_x(cell_lo, flavor, u); - let vh = self.hermite_x(cell_lo + 1, flavor, u); - - let dq_1 = dy; // = log_q2s[iq2+1] - log_q2s[iq2], already computed - - let (vdl, vdh) = if iq2 == 0 { - let vdl_val = vh - vl; - let vhh = self.hermite_x(cell_lo + 2, flavor, u); - let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); - let vdh_val = (vdl_val + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; - (vdl_val, vdh_val) - } else if iq2 == nq2 - 2 { - let vdh_val = vh - vl; - let vll = self.hermite_x(cell_lo - 1, flavor, u); - let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); - let vdl_val = (vdh_val + (vl - vll) * dq_1 * dq_0_inv) * 0.5; - (vdl_val, vdh_val) - } else { - let vll = self.hermite_x(cell_lo - 1, flavor, u); - let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); - let vhh = self.hermite_x(cell_lo + 2, flavor, u); - let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); - let vdl_val = ((vh - vl) + (vl - vll) * dq_1 * dq_0_inv) * 0.5; - let vdh_val = ((vh - vl) + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; - (vdl_val, vdh_val) - }; - - utils::hermite_cubic_interpolate(v, vl, vdl, vh, vdh) - } - - /// Evaluate all flavors at `(ix, iq2, u, v)`. - /// - /// `pid_slots` maps each output position to `Some(flavor_index)` or `None`. - #[allow(clippy::too_many_arguments)] - fn eval_allpids( - &self, - ix: usize, - iq2: usize, - u: f64, - v: f64, - pid_slots: &[Option], - force_positive_fn: fn(f64) -> f64, - out: &mut [f64], - ) { - let nq2 = self.nq2; - let dq_1 = self.log_q2s[iq2 + 1] - self.log_q2s[iq2]; - - for (o, slot) in out.iter_mut().zip(pid_slots.iter()) { - let fi = match *slot { - Some(idx) => idx, - None => { - *o = 0.0; - continue; - } - }; - - let cell_lo = ix * nq2 + iq2; - let cell_hi = ix * nq2 + iq2 + 1; - - let vl = self.hermite_x(cell_lo, fi, u); - let vh = self.hermite_x(cell_hi, fi, u); - - let (vdl, vdh) = if iq2 == 0 { - let vdl_val = vh - vl; - let vhh = self.hermite_x(ix * nq2 + iq2 + 2, fi, u); - let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); - let vdh_val = (vdl_val + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; - (vdl_val, vdh_val) - } else if iq2 == nq2 - 2 { - let vdh_val = vh - vl; - let vll = self.hermite_x(ix * nq2 + iq2 - 1, fi, u); - let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); - let vdl_val = (vdh_val + (vl - vll) * dq_1 * dq_0_inv) * 0.5; - (vdl_val, vdh_val) - } else { - let vll = self.hermite_x(ix * nq2 + iq2 - 1, fi, u); - let dq_0_inv = 1.0 / (self.log_q2s[iq2] - self.log_q2s[iq2 - 1]); - let vhh = self.hermite_x(ix * nq2 + iq2 + 2, fi, u); - let dq_2_inv = 1.0 / (self.log_q2s[iq2 + 2] - self.log_q2s[iq2 + 1]); - let vdl_val = ((vh - vl) + (vl - vll) * dq_1 * dq_0_inv) * 0.5; - let vdh_val = ((vh - vl) + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; - (vdl_val, vdh_val) - }; - - *o = force_positive_fn(utils::hermite_cubic_interpolate(v, vl, vdl, vh, vdh)); + InterpolationConfig::ThreeDAlphas => { + let log_alp: Vec = subgrid.alphas.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_alp]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let alp_idx = extra[1]; + grid[[0, alp_idx, pid, 0, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::ThreeDKt => { + let log_kts: Vec = subgrid.kts.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_kts]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let kt_idx = extra[1]; + grid[[0, 0, pid, kt_idx, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::ThreeDXi => { + let log_xis: Vec = subgrid.xis.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_xis]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let xi_idx = extra[1]; + grid[[0, 0, xi_idx, 0, 0, pid, x_idx, q2_idx]] + }, + )) } + InterpolationConfig::ThreeDDelta => { + let log_del: Vec = subgrid.deltas.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_del]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let del_idx = extra[1]; + grid[[0, 0, 0, del_idx, 0, pid, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::FourDNucleonsAlphas => { + let log_nucs: Vec = subgrid.nucleons.iter().map(|&v| v.ln()).collect(); + let log_alp: Vec = subgrid.alphas.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_alp, log_nucs]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let alp_idx = extra[1]; + let nuc_idx = extra[2]; + grid[[nuc_idx, alp_idx, pid, 0, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::FourDNucleonsKt => { + let log_nucs: Vec = subgrid.nucleons.iter().map(|&v| v.ln()).collect(); + let log_kts: Vec = subgrid.kts.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_kts, log_nucs]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let kt_idx = extra[1]; + let nuc_idx = extra[2]; + grid[[nuc_idx, 0, pid, kt_idx, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::FourDAlphasKt => { + let log_alp: Vec = subgrid.alphas.iter().map(|&v| v.ln()).collect(); + let log_kts: Vec = subgrid.kts.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_kts, log_alp]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let kt_idx = extra[1]; + let alp_idx = extra[2]; + grid[[0, alp_idx, pid, kt_idx, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::FourDXiDelta => { + let log_xis: Vec = subgrid.xis.iter().map(|&v| v.ln()).collect(); + let log_del: Vec = subgrid.deltas.iter().map(|&v| v.ln()).collect(); + let extra_grids = vec![log_q2s, log_del, log_xis]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let del_idx = extra[1]; + let xi_idx = extra[2]; + grid[[0, 0, xi_idx, del_idx, 0, pid, x_idx, q2_idx]] + }, + )) + } + InterpolationConfig::FiveD => { + let log_xis: Vec = subgrid.xis.iter().map(|&v| v.ln()).collect(); + let log_del: Vec = subgrid.deltas.iter().map(|&v| v.ln()).collect(); + let log_kts: Vec = subgrid.kts.iter().map(|&v| v.ln()).collect(); + // Points arrive as [kT, xi, delta, x, Q2] + // extra_grids order: [Q2, delta, xi, kT] (innermost first) + let extra_grids = vec![log_q2s, log_del, log_xis, log_kts]; + let grid = subgrid.grid.view(); + Some(InterleavedHermite::build( + log_xs, + extra_grids, + n_pids, + |pid, x_idx, extra| { + let q2_idx = extra[0]; + let del_idx = extra[1]; + let xi_idx = extra[2]; + let kt_idx = extra[3]; + grid[[0, 0, xi_idx, del_idx, kt_idx, pid, x_idx, q2_idx]] + }, + )) + } + // SixD, SevenD, and non-cubic configs are not supported + _ => None, } } @@ -506,9 +518,9 @@ pub struct GridPDF { use_log: bool, /// Cached: function pointer for force-positive clipping (avoids per-call match). force_positive_fn: fn(f64) -> f64, - /// Optional fast path for all-flavor evaluation (2D LogBicubic only). + /// Optional fast path for all-flavor evaluation (cubic Hermite, 2D–5D). /// One entry per subgrid. - interleaved: Option>, + interleaved: Option>, } impl GridPDF { @@ -531,22 +543,21 @@ impl GridPDF { | InterpolatorType::LogChebyshev ); - // Build interleaved coefficients for 2D LogBicubic grids - let interleaved = if info.interpolator_type == InterpolatorType::LogBicubic { - let all_2d = knot_array.subgrids.iter().all(|sg| { - matches!( - sg.interpolation_config(), - super::interpolator::InterpolationConfig::TwoD - ) - }); - if all_2d { - Some( - knot_array - .subgrids - .iter() - .map(|sg| InterleavedBicubic::build(sg, knot_array.pids.len())) - .collect(), - ) + // Build interleaved coefficients for cubic Hermite grids (2D–5D) + let interleaved = if matches!( + info.interpolator_type, + InterpolatorType::LogBicubic + | InterpolatorType::LogTricubic + | InterpolatorType::LogFourCubic + | InterpolatorType::LogFiveCubic + ) { + let built: Vec> = knot_array + .subgrids + .iter() + .map(|sg| build_interleaved(sg, sg.interpolation_config(), knot_array.pids.len())) + .collect(); + if built.iter().all(|o| o.is_some()) { + Some(built.into_iter().map(|o| o.unwrap()).collect()) } else { None } @@ -665,12 +676,12 @@ impl GridPDF { None => return 0.0, }; - // Fast path: 2D LogBicubic via interleaved coefficients — bypasses + // Fast path: interleaved Hermite coefficients — bypasses // vtable dispatch, ninterp validation, and Result wrapping. if let Some(ref il) = self.interleaved { - return (self.force_positive_fn)( - il[subgrid_idx].eval_single(pid_idx, points[0], points[1]), - ); + if let Some(val) = il[subgrid_idx].eval_single_fast(pid_idx, points) { + return (self.force_positive_fn)(val); + } } let mut buf = [0.0f64; 8]; @@ -698,47 +709,23 @@ impl GridPDF { } }; - // Fast path: interleaved 2D LogBicubic coefficients + // Fast path: interleaved Hermite coefficients (2D–5D) if let Some(ref il) = self.interleaved { let il = &il[subgrid_idx]; - let lx = points[0].ln(); - let lq2 = points[1].ln(); - - let ix = match utils::find_interval_index(&il.log_xs, lx) { - Ok(i) => i, - Err(_) => { - out.iter_mut().for_each(|v| *v = 0.0); - return; - } - }; - let iq2 = match utils::find_interval_index(&il.log_q2s, lq2) { - Ok(j) => j, - Err(_) => { + let loc = match il.locate(points) { + Some(l) => l, + None => { out.iter_mut().for_each(|v| *v = 0.0); return; } }; - let dx = il.log_xs[ix + 1] - il.log_xs[ix]; - let dy = il.log_q2s[iq2 + 1] - il.log_q2s[iq2]; - let u = (lx - il.log_xs[ix]) / dx; - let v = (lq2 - il.log_q2s[iq2]) / dy; - - // Precompute PID → flavor slot mapping let mut pid_slots: [Option; 32] = [None; 32]; for (i, &pid) in pids.iter().enumerate().take(32) { pid_slots[i] = self.knot_array.pid_index(pid); } - il.eval_allpids( - ix, - iq2, - u, - v, - &pid_slots[..pids.len()], - self.force_positive_fn, - out, - ); + il.eval_allpids(&loc, &pid_slots[..pids.len()], self.force_positive_fn, out); return; } diff --git a/neopdf/src/interleaved.rs b/neopdf/src/interleaved.rs new file mode 100644 index 00000000..03c9411d --- /dev/null +++ b/neopdf/src/interleaved.rs @@ -0,0 +1,518 @@ +//! Generalized interleaved Hermite interpolation for 2D–5D PDF grids. +//! +//! This module is self-contained: it depends only on `super::utils` for binary search +//! and the Hermite cubic basis function. All grid layout and config awareness lives in +//! `gridpdf.rs`, which supplies a value-extraction closure at build time. + +use super::utils; + +/// Maximum number of "extra" dimensions beyond x (Q2 + up to 3 outer dims covers 5D). +const MAX_EXTRA_DIMS: usize = 4; + +/// Result of binary-searching all dimensions. +pub(crate) struct Location { + pub ix: usize, + pub u_x: f64, + pub extra_indices: [usize; MAX_EXTRA_DIMS], + pub extra_ts: [f64; MAX_EXTRA_DIMS], +} + +/// Generalized interleaved Hermite interpolation structure. +/// +/// The x-polynomial coefficients are precomputed and stored in interleaved +/// `[cell][flavor][4]` layout, where `cell` is the linearized index over +/// all extra dimensions and x-intervals. Evaluation in dimensions beyond x +/// is done recursively via `eval_level`. +pub(crate) struct InterleavedHermite { + /// Flat coefficient array: `[(cell * n_flavors + flavor) * 4 + c]` + /// where `cell = x_cell * cells_per_x + extra_linear_index`. + coeffs: Vec, + /// Log-transformed x grid. + log_xs: Vec, + /// Log-transformed grids for extra dimensions, ordered [Q2, outer1, outer2, ...]. + extra_grids: Vec>, + /// Stride per extra dimension in the cell index. + extra_strides: Vec, + /// Product of all extra dimension sizes (number of cells per x-interval). + cells_per_x: usize, + /// Number of extra dimensions. + n_extra: usize, + /// Number of flavors stored. + n_flavors: usize, +} + +/// Compute the x-derivative at knot `ix` using central differences. +/// +/// Same algorithm as `LogBicubicInterpolation::calculate_ddx`: +/// - Interior knots: average of left and right finite differences +/// - Boundary knots: one-sided finite difference +fn compute_x_derivative(log_xs: &[f64], values: &[f64], ix: usize) -> f64 { + let n = log_xs.len(); + let del1 = if ix > 0 { + log_xs[ix] - log_xs[ix - 1] + } else { + 0.0 + }; + let del2 = if ix < n - 1 { + log_xs[ix + 1] - log_xs[ix] + } else { + 0.0 + }; + + if ix > 0 && ix < n - 1 { + let lddx = (values[ix] - values[ix - 1]) / del1; + let rddx = (values[ix + 1] - values[ix]) / del2; + (lddx + rddx) / 2.0 + } else if ix == 0 { + (values[ix + 1] - values[ix]) / del2 + } else { + // ix == n - 1 + (values[ix] - values[ix - 1]) / del1 + } +} + +impl InterleavedHermite { + /// Build interleaved x-polynomial coefficients. + /// + /// # Arguments + /// + /// * `log_xs` — log-transformed x grid knots + /// * `extra_grids` — log-transformed grids for extra dims, ordered `[Q2, outer1, ...]` + /// * `n_flavors` — number of flavors (PIDs) + /// * `value_at` — closure `(flavor, x_idx, extra_indices) -> f64` that extracts + /// the grid value in a config-agnostic way. `extra_indices` is ordered to match + /// `extra_grids`: `[q2_idx, outer1_idx, ...]`. + pub fn build( + log_xs: Vec, + extra_grids: Vec>, + n_flavors: usize, + value_at: F, + ) -> Self + where + F: Fn(usize, usize, &[usize]) -> f64, + { + let n_extra = extra_grids.len(); + let nx = log_xs.len(); + + // Compute strides and total cells per x-interval + let extra_sizes: Vec = extra_grids.iter().map(|g| g.len()).collect(); + let mut extra_strides = vec![0usize; n_extra]; + let mut cells_per_x = 1usize; + for i in 0..n_extra { + extra_strides[i] = cells_per_x; + cells_per_x *= extra_sizes[i]; + } + + let n_x_cells = nx - 1; + let total_cells = n_x_cells * cells_per_x; + let mut coeffs = vec![0.0f64; total_cells * n_flavors * 4]; + + // Temporary buffer for x-values at a fixed (flavor, extra_indices) combination + let mut x_vals = vec![0.0f64; nx]; + let mut extra_idx_buf = vec![0usize; n_extra]; + + for flavor in 0..n_flavors { + // Iterate over all combinations of extra dimension indices + for extra_linear in 0..cells_per_x { + // Decode linear index to per-dimension indices + let mut remaining = extra_linear; + for dim in 0..n_extra { + extra_idx_buf[dim] = remaining % extra_sizes[dim]; + remaining /= extra_sizes[dim]; + } + + // Gather x-values for this (flavor, extra) combination + for (ix, val) in x_vals.iter_mut().enumerate() { + *val = value_at(flavor, ix, &extra_idx_buf); + } + + // Compute polynomial coefficients for each x-interval + for ix in 0..n_x_cells { + let dx = log_xs[ix + 1] - log_xs[ix]; + let vl = x_vals[ix]; + let vh = x_vals[ix + 1]; + let vdl = compute_x_derivative(&log_xs, &x_vals, ix) * dx; + let vdh = compute_x_derivative(&log_xs, &x_vals, ix + 1) * dx; + + let a = vdh + vdl - 2.0 * vh + 2.0 * vl; + let b = 3.0 * vh - 3.0 * vl - 2.0 * vdl - vdh; + let c = vdl; + let d = vl; + + let cell = ix * cells_per_x + extra_linear; + let base = (cell * n_flavors + flavor) * 4; + coeffs[base] = a; + coeffs[base + 1] = b; + coeffs[base + 2] = c; + coeffs[base + 3] = d; + } + } + } + + Self { + coeffs, + log_xs, + extra_grids, + extra_strides, + cells_per_x, + n_extra, + n_flavors, + } + } + + /// Binary-search all dimensions and return a `Location`. + /// + /// `points` is ordered `[outer_K, ..., outer_1, x, Q2]` (same as the public API). + /// Maps: x = points[n-2], Q2 = points[n-1], outer dims = points[0..n-2] reversed + /// into extra_grids order `[Q2, outer1, outer2, ...]`. + pub fn locate(&self, points: &[f64]) -> Option { + let n = points.len(); + // Validate dimensionality: expect 1 (x) + n_extra dimensions + if n != self.n_extra + 1 { + return None; + } + let lx = points[n - 2].ln(); + let lq2 = points[n - 1].ln(); + + let ix = utils::find_interval_index(&self.log_xs, lx).ok()?; + let dx = self.log_xs[ix + 1] - self.log_xs[ix]; + let u_x = (lx - self.log_xs[ix]) / dx; + + let mut extra_indices = [0usize; MAX_EXTRA_DIMS]; + let mut extra_ts = [0.0f64; MAX_EXTRA_DIMS]; + + // extra_grids[0] = Q2 + { + let grid = &self.extra_grids[0]; + let idx = utils::find_interval_index(grid, lq2).ok()?; + let d = grid[idx + 1] - grid[idx]; + extra_indices[0] = idx; + extra_ts[0] = (lq2 - grid[idx]) / d; + } + + // extra_grids[1..] = outer dims, mapped from points[0..n-2] in reverse + for k in 1..self.n_extra { + let point_idx = n - 3 - (k - 1); // reversed: outermost point first + let log_val = points[point_idx].ln(); + let grid = &self.extra_grids[k]; + let idx = utils::find_interval_index(grid, log_val).ok()?; + let d = grid[idx + 1] - grid[idx]; + extra_indices[k] = idx; + extra_ts[k] = (log_val - grid[idx]) / d; + } + + Some(Location { + ix, + u_x, + extra_indices, + extra_ts, + }) + } + + /// Evaluate a single flavor at the given points. + #[inline] + #[cfg(test)] + pub fn eval_single(&self, flavor: usize, points: &[f64]) -> f64 { + match self.locate(points) { + Some(loc) => self.eval_at(&loc, flavor), + None => 0.0, + } + } + + /// Fast single-flavor evaluation for 2D grids. + /// Returns `None` if out of bounds or wrong dimensionality. + #[inline] + pub fn eval_single_fast(&self, flavor: usize, points: &[f64]) -> Option { + if points.len() != self.n_extra + 1 { + return None; + } + let lx = points[points.len() - 2].ln(); + let lq2 = points[points.len() - 1].ln(); + let ix = utils::find_interval_index(&self.log_xs, lx).ok()?; + let dx = self.log_xs[ix + 1] - self.log_xs[ix]; + let u_x = (lx - self.log_xs[ix]) / dx; + let cell_base = ix * self.cells_per_x; + + if self.n_extra == 1 { + let grid = &self.extra_grids[0]; + let iq2 = utils::find_interval_index(grid, lq2).ok()?; + let d = grid[iq2 + 1] - grid[iq2]; + let v = (lq2 - grid[iq2]) / d; + Some(self.eval_q2_inline(cell_base, flavor, u_x, iq2, v)) + } else { + let loc = self.locate(points)?; + Some(self.eval_level(self.n_extra, cell_base, flavor, u_x, &loc)) + } + } + + /// Evaluate a single flavor at a pre-computed `Location`. + #[inline] + #[cfg(test)] + pub fn eval_at(&self, loc: &Location, flavor: usize) -> f64 { + let cell_base = loc.ix * self.cells_per_x; + if self.n_extra == 1 { + self.eval_q2_inline( + cell_base, + flavor, + loc.u_x, + loc.extra_indices[0], + loc.extra_ts[0], + ) + } else { + self.eval_level(self.n_extra, cell_base, flavor, loc.u_x, loc) + } + } + + /// Evaluate all requested flavors at a pre-computed `Location`. + pub fn eval_allpids( + &self, + loc: &Location, + pid_slots: &[Option], + force_positive_fn: fn(f64) -> f64, + out: &mut [f64], + ) { + let cell_base = loc.ix * self.cells_per_x; + if self.n_extra == 1 { + // Specialized 2D path: avoid recursion overhead + let iq2 = loc.extra_indices[0]; + let v = loc.extra_ts[0]; + let u = loc.u_x; + let nq2 = self.cells_per_x; // cells_per_x == nq2 for 2D + let log_q2s = &self.extra_grids[0]; + let dq_1 = log_q2s[iq2 + 1] - log_q2s[iq2]; + + for (o, slot) in out.iter_mut().zip(pid_slots.iter()) { + let fi = match *slot { + Some(idx) => idx, + None => { + *o = 0.0; + continue; + } + }; + + let cell_lo = cell_base + iq2; + let vl = self.hermite_x(cell_lo, fi, u); + let vh = self.hermite_x(cell_lo + 1, fi, u); + + let (vdl, vdh) = if iq2 == 0 { + let vdl_val = vh - vl; + if nq2 > 2 { + let vhh = self.hermite_x(cell_lo + 2, fi, u); + let dq_2_inv = 1.0 / (log_q2s[iq2 + 2] - log_q2s[iq2 + 1]); + let vdh_val = (vdl_val + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + } else { + (vdl_val, vh - vl) + } + } else if iq2 == nq2 - 2 { + let vdh_val = vh - vl; + if nq2 > 2 { + let vll = self.hermite_x(cell_lo - 1, fi, u); + let dq_0_inv = 1.0 / (log_q2s[iq2] - log_q2s[iq2 - 1]); + let vdl_val = (vdh_val + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + (vdl_val, vdh_val) + } else { + (vh - vl, vdh_val) + } + } else { + let vll = self.hermite_x(cell_lo - 1, fi, u); + let dq_0_inv = 1.0 / (log_q2s[iq2] - log_q2s[iq2 - 1]); + let vhh = self.hermite_x(cell_lo + 2, fi, u); + let dq_2_inv = 1.0 / (log_q2s[iq2 + 2] - log_q2s[iq2 + 1]); + let vdl_val = ((vh - vl) + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + let vdh_val = ((vh - vl) + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + }; + + *o = force_positive_fn(utils::hermite_cubic_interpolate(v, vl, vdl, vh, vdh)); + } + } else { + for (o, slot) in out.iter_mut().zip(pid_slots.iter()) { + match *slot { + Some(fi) => { + *o = force_positive_fn(self.eval_level( + self.n_extra, + cell_base, + fi, + loc.u_x, + loc, + )); + } + None => *o = 0.0, + } + } + } + } + + /// Specialized 2D evaluation: Q2 Hermite interpolation without recursion. + #[inline(always)] + fn eval_q2_inline(&self, cell_base: usize, flavor: usize, u: f64, iq2: usize, v: f64) -> f64 { + let nq2 = self.cells_per_x; + let log_q2s = &self.extra_grids[0]; + let dq_1 = log_q2s[iq2 + 1] - log_q2s[iq2]; + + let cell_lo = cell_base + iq2; + let vl = self.hermite_x(cell_lo, flavor, u); + let vh = self.hermite_x(cell_lo + 1, flavor, u); + + let (vdl, vdh) = if iq2 == 0 { + let vdl_val = vh - vl; + if nq2 > 2 { + let vhh = self.hermite_x(cell_lo + 2, flavor, u); + let dq_2_inv = 1.0 / (log_q2s[iq2 + 2] - log_q2s[iq2 + 1]); + let vdh_val = (vdl_val + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + } else { + (vdl_val, vh - vl) + } + } else if iq2 == nq2 - 2 { + let vdh_val = vh - vl; + if nq2 > 2 { + let vll = self.hermite_x(cell_lo - 1, flavor, u); + let dq_0_inv = 1.0 / (log_q2s[iq2] - log_q2s[iq2 - 1]); + let vdl_val = (vdh_val + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + (vdl_val, vdh_val) + } else { + (vh - vl, vdh_val) + } + } else { + let vll = self.hermite_x(cell_lo - 1, flavor, u); + let dq_0_inv = 1.0 / (log_q2s[iq2] - log_q2s[iq2 - 1]); + let vhh = self.hermite_x(cell_lo + 2, flavor, u); + let dq_2_inv = 1.0 / (log_q2s[iq2 + 2] - log_q2s[iq2 + 1]); + let vdl_val = ((vh - vl) + (vl - vll) * dq_1 * dq_0_inv) * 0.5; + let vdh_val = ((vh - vl) + (vhh - vh) * dq_1 * dq_2_inv) * 0.5; + (vdl_val, vdh_val) + }; + + utils::hermite_cubic_interpolate(v, vl, vdl, vh, vdh) + } + + /// Evaluate the precomputed x-polynomial at a given cell and flavor. + #[inline(always)] + fn hermite_x(&self, cell: usize, flavor: usize, u: f64) -> f64 { + let base = (cell * self.n_flavors + flavor) * 4; + let c = &self.coeffs[base..base + 4]; + let u2 = u * u; + let u3 = u2 * u; + c[0] * u3 + c[1] * u2 + c[2] * u + c[3] + } + + /// Recursive evaluation through extra dimensions. + /// + /// `level = 0` evaluates the x-polynomial (base case). + /// `level = k` performs Hermite interpolation in `extra_grids[k-1]` using + /// ~4 evaluations of `eval_level(k-1, ...)`. + #[inline(always)] + fn eval_level( + &self, + level: usize, + cell_base: usize, + flavor: usize, + u_x: f64, + loc: &Location, + ) -> f64 { + if level == 0 { + return self.hermite_x(cell_base, flavor, u_x); + } + + let dim = level - 1; // index into extra_grids + let grid = &self.extra_grids[dim]; + let n_knots = grid.len(); + let idx = loc.extra_indices[dim]; + let t = loc.extra_ts[dim]; + let stride = self.extra_strides[dim]; + + // Evaluate the inner dimension at idx and idx+1 + let cell_lo = cell_base + idx * stride; + let cell_hi = cell_base + (idx + 1) * stride; + let vl = self.eval_level(level - 1, cell_lo, flavor, u_x, loc); + let vh = self.eval_level(level - 1, cell_hi, flavor, u_x, loc); + + // Derivative estimation using the same central-difference scheme + let dq = grid[idx + 1] - grid[idx]; + + let (vdl, vdh) = if idx == 0 { + // Forward difference for low edge + let vdl_val = vh - vl; + if n_knots > 2 { + let cell_hh = cell_base + (idx + 2) * stride; + let vhh = self.eval_level(level - 1, cell_hh, flavor, u_x, loc); + let dq2_inv = 1.0 / (grid[idx + 2] - grid[idx + 1]); + let vdh_val = (vdl_val + (vhh - vh) * dq * dq2_inv) * 0.5; + (vdl_val, vdh_val) + } else { + (vdl_val, vh - vl) + } + } else if idx == n_knots - 2 { + // Backward difference for high edge + let vdh_val = vh - vl; + if n_knots > 2 { + let cell_ll = cell_base + (idx - 1) * stride; + let vll = self.eval_level(level - 1, cell_ll, flavor, u_x, loc); + let dq0_inv = 1.0 / (grid[idx] - grid[idx - 1]); + let vdl_val = (vdh_val + (vl - vll) * dq * dq0_inv) * 0.5; + (vdl_val, vdh_val) + } else { + (vh - vl, vdh_val) + } + } else { + // Central difference + let cell_ll = cell_base + (idx - 1) * stride; + let cell_hh = cell_base + (idx + 2) * stride; + let vll = self.eval_level(level - 1, cell_ll, flavor, u_x, loc); + let vhh = self.eval_level(level - 1, cell_hh, flavor, u_x, loc); + let dq0_inv = 1.0 / (grid[idx] - grid[idx - 1]); + let dq2_inv = 1.0 / (grid[idx + 2] - grid[idx + 1]); + let vdl_val = ((vh - vl) + (vl - vll) * dq * dq0_inv) * 0.5; + let vdh_val = ((vh - vl) + (vhh - vh) * dq * dq2_inv) * 0.5; + (vdl_val, vdh_val) + }; + + utils::hermite_cubic_interpolate(t, vl, vdl, vh, vdh) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compute_x_derivative_interior() { + let log_xs = vec![0.0, 1.0, 2.0, 3.0]; + let values = vec![0.0, 1.0, 4.0, 9.0]; + // Interior: average of (1-0)/1 and (4-1)/1 = (1+3)/2 = 2 + let d = compute_x_derivative(&log_xs, &values, 1); + assert!((d - 2.0).abs() < 1e-12); + } + + #[test] + fn test_compute_x_derivative_boundary() { + let log_xs = vec![0.0, 1.0, 2.0]; + let values = vec![0.0, 2.0, 6.0]; + // Left boundary: (2-0)/1 = 2 + assert!((compute_x_derivative(&log_xs, &values, 0) - 2.0).abs() < 1e-12); + // Right boundary: (6-2)/1 = 4 + assert!((compute_x_derivative(&log_xs, &values, 2) - 4.0).abs() < 1e-12); + } + + #[test] + fn test_2d_matches_knot_values() { + // Build a simple 2x2 grid and verify knot values are reproduced exactly + let log_xs = vec![0.0, 1.0]; + let log_q2s = vec![0.0, 1.0]; + let vals = [[1.0, 2.0], [3.0, 4.0]]; // vals[x][q2] + + let ih = InterleavedHermite::build(log_xs, vec![log_q2s], 1, |_flav, x_idx, extra| { + vals[x_idx][extra[0]] + }); + + // Evaluate at exact lower-left knot: x=exp(0)=1.0, q2=exp(0)=1.0 + let v00 = ih.eval_single(0, &[1.0, 1.0]); + assert!((v00 - 1.0).abs() < 1e-12, "Got {v00} expected 1.0"); + + // Evaluate at exact upper-right knot: x=exp(1)=e, q2=exp(1)=e + let v11 = ih.eval_single(0, &[1.0_f64.exp(), 1.0_f64.exp()]); + assert!((v11 - 4.0).abs() < 1e-12, "Got {v11} expected 4.0"); + } +} diff --git a/neopdf/src/lib.rs b/neopdf/src/lib.rs index 5d1f7574..1c22b63d 100644 --- a/neopdf/src/lib.rs +++ b/neopdf/src/lib.rs @@ -48,6 +48,7 @@ pub mod alphas; pub mod converter; pub mod gridpdf; +pub mod interleaved; pub mod interpolator; pub mod manage; pub mod metadata; diff --git a/neopdf_cli/tests/converter.rs b/neopdf_cli/tests/converter.rs index c06fac9b..7196d2a2 100644 --- a/neopdf_cli/tests/converter.rs +++ b/neopdf_cli/tests/converter.rs @@ -164,7 +164,7 @@ fn combine_nuclear_pdfs() { ]) .assert() .success() - .stdout("63.389564472386645\n"); + .stdout("63.389699819551744\n"); } #[test] @@ -210,7 +210,7 @@ fn combine_alphas_pdfs() { ]) .assert() .success() - .stdout("34.352827400641736\n"); + .stdout("34.35295086733387\n"); } #[test] diff --git a/neopdf_cli/tests/pdf.rs b/neopdf_cli/tests/pdf.rs index 5d3ed9e1..c0330bbc 100644 --- a/neopdf_cli/tests/pdf.rs +++ b/neopdf_cli/tests/pdf.rs @@ -85,7 +85,7 @@ fn xfxq2_neopdf_combined_npdfs() { ]) .assert() .success() - .stdout("8.204642526146479\n"); + .stdout("8.204713375903903\n"); } #[test] @@ -106,7 +106,7 @@ fn xfxq2_neopdf_combined_npdfs_interpolation() { ]) .assert() .success() - .stdout("7.994425939656785\n"); + .stdout("7.994525227715066\n"); } #[test] From f55b3347cd971e86ca82e259d00e901ebf467682 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 11 Feb 2026 14:26:36 +0100 Subject: [PATCH 5/7] Some clean ups --- neopdf/src/gridpdf.rs | 35 ++++++++++++++++++----------------- neopdf/src/interleaved.rs | 34 ++++++++++++---------------------- 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/neopdf/src/gridpdf.rs b/neopdf/src/gridpdf.rs index 10c1bda2..be804d8c 100644 --- a/neopdf/src/gridpdf.rs +++ b/neopdf/src/gridpdf.rs @@ -38,8 +38,8 @@ pub enum Error { /// resolved in O(1) via a flat array; out-of-range PIDs fall back to `None`. const PID_MIN: i32 = -6; const PID_MAX: i32 = 22; -const PID_RANGE: usize = (PID_MAX - PID_MIN + 1) as usize; // 29 -const PID_NONE: u8 = u8::MAX; // sentinel for "not present" +const PID_RANGE: usize = (PID_MAX - PID_MIN + 1) as usize; +const PID_NONE: u8 = u8::MAX; #[derive(Debug, Clone)] struct PidLookup { @@ -91,7 +91,7 @@ pub struct GridArray { pub pids: Array1, /// A collection of `SubGrid` instances that make up the full grid. pub subgrids: Vec, - /// Direct-indexed PID lookup (O(1), no hashing). + /// Direct-indexed PID lookup with complexity O(1). #[serde(skip)] pid_lookup: PidLookup, } @@ -219,10 +219,10 @@ impl GridArray { /// /// An `Option` containing the index of the subgrid if found, otherwise `None`. pub fn find_subgrid(&self, points: &[f64]) -> Option { - // Fast path: single subgrid (common case), clamping handles boundaries if self.subgrids.len() == 1 { return Some(0); } + self.subgrids .iter() .position(|sg| sg.contains_point(points)) @@ -295,14 +295,16 @@ pub enum ForcePositive { } /// Helper functions for force-positive clipping via function pointer. -fn fp_identity(v: f64) -> f64 { - v +fn fp_identity(value: f64) -> f64 { + value } -fn fp_clip_negative(v: f64) -> f64 { - v.max(0.0) + +fn fp_clip_negative(value: f64) -> f64 { + value.max(0.0) } -fn fp_clip_small(v: f64) -> f64 { - v.max(1e-10) + +fn fp_clip_small(value: f64) -> f64 { + value.max(1e-10) } /// Build an `InterleavedHermite` for a subgrid, if its config is supported. @@ -480,8 +482,8 @@ fn build_interleaved( let log_xis: Vec = subgrid.xis.iter().map(|&v| v.ln()).collect(); let log_del: Vec = subgrid.deltas.iter().map(|&v| v.ln()).collect(); let log_kts: Vec = subgrid.kts.iter().map(|&v| v.ln()).collect(); - // Points arrive as [kT, xi, delta, x, Q2] - // extra_grids order: [Q2, delta, xi, kT] (innermost first) + + // Re-order [kT, xi, delta, x, Q2] into [Q2, delta, xi, kT] (innermost first) let extra_grids = vec![log_q2s, log_del, log_xis, log_kts]; let grid = subgrid.grid.view(); Some(InterleavedHermite::build( @@ -497,7 +499,7 @@ fn build_interleaved( }, )) } - // SixD, SevenD, and non-cubic configs are not supported + // `SixD`, `SevenD`, and non-cubic configs are not supported. _ => None, } } @@ -519,7 +521,6 @@ pub struct GridPDF { /// Cached: function pointer for force-positive clipping (avoids per-call match). force_positive_fn: fn(f64) -> f64, /// Optional fast path for all-flavor evaluation (cubic Hermite, 2D–5D). - /// One entry per subgrid. interleaved: Option>, } @@ -664,7 +665,7 @@ impl GridPDF { } /// Internal fast path for interpolation — returns `f64` directly, no `Result` wrapping. - /// Avoids `map_err` string allocation. Used by `PDF::xfxq2`. + /// Avoids `map_err` string allocation. pub(crate) fn xfxq2_fast(&self, flavor_id: i32, points: &[f64]) -> f64 { let subgrid_idx = match self.knot_array.find_subgrid(points) { Some(idx) => idx, @@ -676,8 +677,8 @@ impl GridPDF { None => return 0.0, }; - // Fast path: interleaved Hermite coefficients — bypasses - // vtable dispatch, ninterp validation, and Result wrapping. + // Fast path: interleaved Hermite coefficients. This bypasses vtable dispatch, ninterp + // validation, and Result wrapping. if let Some(ref il) = self.interleaved { if let Some(val) = il[subgrid_idx].eval_single_fast(pid_idx, points) { return (self.force_positive_fn)(val); diff --git a/neopdf/src/interleaved.rs b/neopdf/src/interleaved.rs index 03c9411d..8c3c5c32 100644 --- a/neopdf/src/interleaved.rs +++ b/neopdf/src/interleaved.rs @@ -94,7 +94,7 @@ impl InterleavedHermite { let n_extra = extra_grids.len(); let nx = log_xs.len(); - // Compute strides and total cells per x-interval + // Compute strides and total cells per x-interval. let extra_sizes: Vec = extra_grids.iter().map(|g| g.len()).collect(); let mut extra_strides = vec![0usize; n_extra]; let mut cells_per_x = 1usize; @@ -107,26 +107,22 @@ impl InterleavedHermite { let total_cells = n_x_cells * cells_per_x; let mut coeffs = vec![0.0f64; total_cells * n_flavors * 4]; - // Temporary buffer for x-values at a fixed (flavor, extra_indices) combination let mut x_vals = vec![0.0f64; nx]; let mut extra_idx_buf = vec![0usize; n_extra]; for flavor in 0..n_flavors { - // Iterate over all combinations of extra dimension indices for extra_linear in 0..cells_per_x { - // Decode linear index to per-dimension indices let mut remaining = extra_linear; for dim in 0..n_extra { extra_idx_buf[dim] = remaining % extra_sizes[dim]; remaining /= extra_sizes[dim]; } - // Gather x-values for this (flavor, extra) combination for (ix, val) in x_vals.iter_mut().enumerate() { *val = value_at(flavor, ix, &extra_idx_buf); } - // Compute polynomial coefficients for each x-interval + // Compute polynomial coefficients for each x-interval. for ix in 0..n_x_cells { let dx = log_xs[ix + 1] - log_xs[ix]; let vl = x_vals[ix]; @@ -167,7 +163,6 @@ impl InterleavedHermite { /// into extra_grids order `[Q2, outer1, outer2, ...]`. pub fn locate(&self, points: &[f64]) -> Option { let n = points.len(); - // Validate dimensionality: expect 1 (x) + n_extra dimensions if n != self.n_extra + 1 { return None; } @@ -181,7 +176,6 @@ impl InterleavedHermite { let mut extra_indices = [0usize; MAX_EXTRA_DIMS]; let mut extra_ts = [0.0f64; MAX_EXTRA_DIMS]; - // extra_grids[0] = Q2 { let grid = &self.extra_grids[0]; let idx = utils::find_interval_index(grid, lq2).ok()?; @@ -190,9 +184,8 @@ impl InterleavedHermite { extra_ts[0] = (lq2 - grid[idx]) / d; } - // extra_grids[1..] = outer dims, mapped from points[0..n-2] in reverse for k in 1..self.n_extra { - let point_idx = n - 3 - (k - 1); // reversed: outermost point first + let point_idx = n - 3 - (k - 1); let log_val = points[point_idx].ln(); let grid = &self.extra_grids[k]; let idx = utils::find_interval_index(grid, log_val).ok()?; @@ -219,8 +212,8 @@ impl InterleavedHermite { } } - /// Fast single-flavor evaluation for 2D grids. - /// Returns `None` if out of bounds or wrong dimensionality. + /// Fast single-flavor evaluation for 2D grids. Returns `None` if out of bounds + /// or wrong dimensionality. #[inline] pub fn eval_single_fast(&self, flavor: usize, points: &[f64]) -> Option { if points.len() != self.n_extra + 1 { @@ -273,11 +266,10 @@ impl InterleavedHermite { ) { let cell_base = loc.ix * self.cells_per_x; if self.n_extra == 1 { - // Specialized 2D path: avoid recursion overhead let iq2 = loc.extra_indices[0]; let v = loc.extra_ts[0]; let u = loc.u_x; - let nq2 = self.cells_per_x; // cells_per_x == nq2 for 2D + let nq2 = self.cells_per_x; let log_q2s = &self.extra_grids[0]; let dq_1 = log_q2s[iq2 + 1] - log_q2s[iq2]; @@ -416,20 +408,18 @@ impl InterleavedHermite { return self.hermite_x(cell_base, flavor, u_x); } - let dim = level - 1; // index into extra_grids + let dim = level - 1; let grid = &self.extra_grids[dim]; let n_knots = grid.len(); let idx = loc.extra_indices[dim]; let t = loc.extra_ts[dim]; let stride = self.extra_strides[dim]; - // Evaluate the inner dimension at idx and idx+1 let cell_lo = cell_base + idx * stride; let cell_hi = cell_base + (idx + 1) * stride; let vl = self.eval_level(level - 1, cell_lo, flavor, u_x, loc); let vh = self.eval_level(level - 1, cell_hi, flavor, u_x, loc); - // Derivative estimation using the same central-difference scheme let dq = grid[idx + 1] - grid[idx]; let (vdl, vdh) = if idx == 0 { @@ -481,7 +471,7 @@ mod tests { fn test_compute_x_derivative_interior() { let log_xs = vec![0.0, 1.0, 2.0, 3.0]; let values = vec![0.0, 1.0, 4.0, 9.0]; - // Interior: average of (1-0)/1 and (4-1)/1 = (1+3)/2 = 2 + // Interior: average of (1-0)/1 and (4-1)/1 = (1+3)/2 = 2. let d = compute_x_derivative(&log_xs, &values, 1); assert!((d - 2.0).abs() < 1e-12); } @@ -498,20 +488,20 @@ mod tests { #[test] fn test_2d_matches_knot_values() { - // Build a simple 2x2 grid and verify knot values are reproduced exactly + // Build a simple 2x2 grid and verify knot values are reproduced exactly. let log_xs = vec![0.0, 1.0]; let log_q2s = vec![0.0, 1.0]; - let vals = [[1.0, 2.0], [3.0, 4.0]]; // vals[x][q2] + let vals = [[1.0, 2.0], [3.0, 4.0]]; let ih = InterleavedHermite::build(log_xs, vec![log_q2s], 1, |_flav, x_idx, extra| { vals[x_idx][extra[0]] }); - // Evaluate at exact lower-left knot: x=exp(0)=1.0, q2=exp(0)=1.0 + // Evaluate at exact lower-left knot: x=exp(0)=1.0, q2=exp(0)=1.0. let v00 = ih.eval_single(0, &[1.0, 1.0]); assert!((v00 - 1.0).abs() < 1e-12, "Got {v00} expected 1.0"); - // Evaluate at exact upper-right knot: x=exp(1)=e, q2=exp(1)=e + // Evaluate at exact upper-right knot: x=exp(1)=e, q2=exp(1)=e. let v11 = ih.eval_single(0, &[1.0_f64.exp(), 1.0_f64.exp()]); assert!((v11 - 4.0).abs() < 1e-12, "Got {v11} expected 4.0"); } From ee8dca5c96e7455732cd94e41638200b5243b990 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 11 Feb 2026 15:46:42 +0100 Subject: [PATCH 6/7] Delay addressing the TMDlib fix --- neopdf_tmdlib/build.rs | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/neopdf_tmdlib/build.rs b/neopdf_tmdlib/build.rs index a7f087f1..3fa700ac 100644 --- a/neopdf_tmdlib/build.rs +++ b/neopdf_tmdlib/build.rs @@ -61,33 +61,5 @@ fn main() { // Manually link to different libraries as TMDlib need them. println!("cargo:rustc-link-lib=gsl"); println!("cargo:rustc-link-lib=gslcblas"); - println!("cargo:rustc-link-lib=gfortran"); - - // Discover LHAPDF via lhapdf-config so we pick up the correct library path. - let lhapdf_libs = String::from_utf8( - Command::new("lhapdf-config") - .arg("--libs") - .output() - .expect("Could not find `lhapdf-config`, please install LHAPDF and make sure `lhapdf-config` is in your PATH") - .stdout, - ) - .unwrap(); - - for lib_path in lhapdf_libs - .split_whitespace() - .filter_map(|token| token.strip_prefix("-L")) - { - println!("cargo:rustc-link-search={lib_path}"); - all_lib_paths.push(lib_path.to_owned()); - } - - for lib in lhapdf_libs - .split_whitespace() - .filter_map(|token| token.strip_prefix("-l")) - { - println!("cargo:rustc-link-lib={lib}"); - } - - // Propagate library paths to dependent crates via DEP_NEOPDF_TMDLIB_LIB_PATHS. - println!("cargo:lib_paths={}", all_lib_paths.join(":")); + println!("cargo:rustc-link-lib=LHAPDF"); } From d1ed6d6145c1645ec3987651201ca01de4c09166 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 11 Feb 2026 15:48:04 +0100 Subject: [PATCH 7/7] Minor fixes --- neopdf_tmdlib/build.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/neopdf_tmdlib/build.rs b/neopdf_tmdlib/build.rs index 3fa700ac..2ba78ae6 100644 --- a/neopdf_tmdlib/build.rs +++ b/neopdf_tmdlib/build.rs @@ -3,7 +3,6 @@ use std::process::Command; fn main() { - let mut all_lib_paths = Vec::new(); let cxx_flags: Vec = String::from_utf8( Command::new("TMDlib-config") .arg("--cppflags") @@ -35,7 +34,6 @@ fn main() { .filter_map(|token| token.strip_prefix("-L")) { println!("cargo:rustc-link-search={lib_path}"); - all_lib_paths.push(lib_path.to_owned()); } for lib in libs