From 6f4c5a2cef641d07c5b8b905d7cbca9e4ca24a3b Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 22 Mar 2026 13:10:07 +0000 Subject: [PATCH] =?UTF-8?q?feat(hpc):=20Session=20C=20=E2=80=94=20bgz17=20?= =?UTF-8?q?dual-path=20integration=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add four new modules implementing the bgz17 Base17 encoding pipeline directly in ndarray for interoperability without external crate deps: - bgz17_bridge: Base17/SpoBase17/PaletteEdge types with golden-step octave encoding, L1/weighted-L1 distance, XOR bind, bundle, permute. Matches lance-graph bgz17 crate encoding exactly. - palette_distance: k-means palette builder (farthest-first init), nearest-codebook lookup, precomputed k×k distance matrices for O(1) SPO distance. 128KB matrix fits L1 cache. - layered_distance: Container W125 palette-edge read/write, W4-W5 truth-value read/write, TruthGate evidence filter with NARS-style expectation (confidence * (frequency - 0.5) + 0.5). - parallel_search: Dual-path HHTL + CLAM search with archetype-based pruning, merge-and-rerank deduplication, TruthGate filtering, and local fractal dimension estimation from palette distances. All modules have comprehensive tests (880 total tests pass). https://claude.ai/code/session_01CdqyUTUfjKZuk8YGJzv6LB --- src/hpc/bgz17_bridge.rs | 399 ++++++++++++++++++++++++ src/hpc/layered_distance.rs | 264 ++++++++++++++++ src/hpc/mod.rs | 10 + src/hpc/palette_distance.rs | 423 ++++++++++++++++++++++++++ src/hpc/parallel_search.rs | 590 ++++++++++++++++++++++++++++++++++++ 5 files changed, 1686 insertions(+) create mode 100644 src/hpc/bgz17_bridge.rs create mode 100644 src/hpc/layered_distance.rs create mode 100644 src/hpc/palette_distance.rs create mode 100644 src/hpc/parallel_search.rs diff --git a/src/hpc/bgz17_bridge.rs b/src/hpc/bgz17_bridge.rs new file mode 100644 index 00000000..88978e22 --- /dev/null +++ b/src/hpc/bgz17_bridge.rs @@ -0,0 +1,399 @@ +//! Bridge between ndarray Fingerprint<256> (2KB) and Base17 (34 bytes). +//! +//! Converts flat 16384-bit fingerprint planes to i16[17] base patterns +//! using golden-step octave averaging. +//! +//! This is a self-contained port of the bgz17 crate's `base17` module, +//! ensuring data interoperability without adding an external dependency. + +const BASE_DIM: usize = 17; +const FULL_DIM: usize = 16384; +const GOLDEN_STEP: usize = 11; +const FP_SCALE: f64 = 256.0; + +/// Golden-step position table. +const GOLDEN_POS: [u8; BASE_DIM] = { + let mut t = [0u8; BASE_DIM]; + let mut i = 0; + while i < BASE_DIM { + t[i] = ((i * GOLDEN_STEP) % BASE_DIM) as u8; + i += 1; + } + t +}; + +/// Number of octaves. +const N_OCTAVES: usize = (FULL_DIM + BASE_DIM - 1) / BASE_DIM; + +/// 17-dimensional base pattern. 34 bytes. +/// +/// Each dimension is an i16 fixed-point value (scaled by 256) representing +/// the average of golden-step-selected positions from a 16384-element accumulator. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Base17 { + pub dims: [i16; BASE_DIM], +} + +/// SPO triple of Base17 patterns. 102 bytes. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SpoBase17 { + pub subject: Base17, + pub predicate: Base17, + pub object: Base17, +} + +/// Palette edge: 3-byte compressed SPO triple (one u8 index per plane). +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct PaletteEdge { + pub s_idx: u8, + pub p_idx: u8, + pub o_idx: u8, +} + +impl Base17 { + /// Byte size of serialized form. + pub const BYTE_SIZE: usize = BASE_DIM * 2; // 34 + + /// Encode i8[16384] accumulator into a Base17 pattern. + /// + /// For each of 17 base dimensions, averages the accumulator values at + /// golden-step-selected positions across all octaves, then scales by + /// FP_SCALE (256) into fixed-point i16. + pub fn encode(acc: &[i8]) -> Self { + assert!(acc.len() >= FULL_DIM); + let mut sum = [0i64; BASE_DIM]; + let mut count = [0u32; BASE_DIM]; + + for octave in 0..N_OCTAVES { + for bi in 0..BASE_DIM { + let dim = octave * BASE_DIM + GOLDEN_POS[bi] as usize; + if dim < FULL_DIM { + sum[bi] += acc[dim] as i64; + count[bi] += 1; + } + } + } + + let mut dims = [0i16; BASE_DIM]; + for d in 0..BASE_DIM { + if count[d] > 0 { + let mean = sum[d] as f64 / count[d] as f64; + dims[d] = (mean * FP_SCALE).round().clamp(-32768.0, 32767.0) as i16; + } + } + Base17 { dims } + } + + /// All-zero pattern (identity for xor_bind). + pub fn zero() -> Self { + Base17 { dims: [0i16; BASE_DIM] } + } + + /// L1 (Manhattan) distance. + #[inline] + pub fn l1(&self, other: &Base17) -> u32 { + let mut d = 0u32; + for i in 0..BASE_DIM { + d += (self.dims[i] as i32 - other.dims[i] as i32).unsigned_abs(); + } + d + } + + /// PCDVQ-informed L1: weight sign dimension 20x over mantissa. + /// + /// From arxiv 2506.05432: direction (sign) is 20x more sensitive to + /// quantization than magnitude. BF16 decomposition maps to polar: + /// dim 0 = sign (direction), dims 1-6 = exponent (magnitude scale), + /// dims 7-16 = mantissa (fine detail). + #[inline] + pub fn l1_weighted(&self, other: &Base17) -> u32 { + let mut d = 0u32; + for i in 0..BASE_DIM { + let diff = (self.dims[i] as i32 - other.dims[i] as i32).unsigned_abs(); + let weight = if i == 0 { 20 } else if i < 7 { 3 } else { 1 }; + d += diff * weight; + } + d + } + + /// Sign-bit agreement (out of 17). + #[inline] + pub fn sign_agreement(&self, other: &Base17) -> u32 { + let mut a = 0u32; + for i in 0..BASE_DIM { + if (self.dims[i] >= 0) == (other.dims[i] >= 0) { + a += 1; + } + } + a + } + + /// XOR bind: path composition in hyperdimensional space. + /// + /// Bitwise XOR on each i16 dimension (reinterpreted as u16). + /// Self-inverse: `a.xor_bind(&b).xor_bind(&b) == a`. + /// Identity: `a.xor_bind(&Base17::zero()) == a`. + #[inline] + pub fn xor_bind(&self, other: &Base17) -> Base17 { + let mut dims = [0i16; BASE_DIM]; + for i in 0..BASE_DIM { + dims[i] = (self.dims[i] as u16 ^ other.dims[i] as u16) as i16; + } + Base17 { dims } + } + + /// Bundle: element-wise majority vote (set union in VSA). + /// + /// For each dimension, sums all patterns and takes the average. + /// Ties (sum == 0) resolve to 0. + pub fn bundle(patterns: &[&Base17]) -> Base17 { + if patterns.is_empty() { + return Base17::zero(); + } + let mut dims = [0i16; BASE_DIM]; + let mut sums = [0i64; BASE_DIM]; + for p in patterns { + for d in 0..BASE_DIM { + sums[d] += p.dims[d] as i64; + } + } + let n = patterns.len() as i64; + for d in 0..BASE_DIM { + dims[d] = (sums[d] / n).clamp(-32768, 32767) as i16; + } + Base17 { dims } + } + + /// Permute: cyclic dimension shift (sequence encoding in VSA). + /// + /// `result[i] = self[(i + shift) % 17]`. + #[inline] + pub fn permute(&self, shift: usize) -> Base17 { + let mut dims = [0i16; BASE_DIM]; + for i in 0..BASE_DIM { + dims[i] = self.dims[(i + shift) % BASE_DIM]; + } + Base17 { dims } + } + + /// Serialize to 34 bytes (little-endian). + pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] { + let mut buf = [0u8; Self::BYTE_SIZE]; + for i in 0..BASE_DIM { + let b = self.dims[i].to_le_bytes(); + buf[i * 2] = b[0]; + buf[i * 2 + 1] = b[1]; + } + buf + } + + /// Deserialize from 34 bytes (little-endian). + pub fn from_bytes(buf: &[u8; Self::BYTE_SIZE]) -> Self { + let mut dims = [0i16; BASE_DIM]; + for i in 0..BASE_DIM { + dims[i] = i16::from_le_bytes([buf[i * 2], buf[i * 2 + 1]]); + } + Base17 { dims } + } +} + +impl SpoBase17 { + /// Byte size of serialized form. + pub const BYTE_SIZE: usize = Base17::BYTE_SIZE * 3; // 102 + + /// Encode three i8[16384] accumulator planes. + pub fn encode(s: &[i8], p: &[i8], o: &[i8]) -> Self { + SpoBase17 { + subject: Base17::encode(s), + predicate: Base17::encode(p), + object: Base17::encode(o), + } + } + + /// Combined L1 distance (sum of three planes). + #[inline] + pub fn l1(&self, other: &SpoBase17) -> u32 { + self.subject.l1(&other.subject) + + self.predicate.l1(&other.predicate) + + self.object.l1(&other.object) + } + + /// Per-plane L1 distances. + #[inline] + pub fn l1_per_plane(&self, other: &SpoBase17) -> (u32, u32, u32) { + ( + self.subject.l1(&other.subject), + self.predicate.l1(&other.predicate), + self.object.l1(&other.object), + ) + } +} + +impl PaletteEdge { + /// Serialize to 3 bytes. + pub fn to_bytes(self) -> [u8; 3] { + [self.s_idx, self.p_idx, self.o_idx] + } + + /// Deserialize from 3 bytes. + pub fn from_bytes(b: &[u8; 3]) -> Self { + PaletteEdge { s_idx: b[0], p_idx: b[1], o_idx: b[2] } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_golden_coverage() { + let mut seen = [false; BASE_DIM]; + for &p in &GOLDEN_POS { seen[p as usize] = true; } + assert!(seen.iter().all(|&s| s)); + } + + #[test] + fn test_l1_self_zero() { + let a = Base17 { dims: [100, -50, 0, 127, -128, 1, -1, 50, 25, -25, 0, 0, 0, 0, 0, 0, 0] }; + assert_eq!(a.l1(&a), 0); + } + + #[test] + fn test_l1_symmetric() { + let a = Base17 { dims: [100; BASE_DIM] }; + let b = Base17 { dims: [-100; BASE_DIM] }; + assert_eq!(a.l1(&b), b.l1(&a)); + } + + #[test] + fn test_xor_bind_self_inverse() { + let a = Base17 { dims: [100, -200, 300, -400, 500, -600, 700, -800, 900, -1000, 1100, -1200, 1300, -1400, 1500, -1600, 1700] }; + let b = Base17 { dims: [-50, 150, -250, 350, -450, 550, -650, 750, -850, 950, -1050, 1150, -1250, 1350, -1450, 1550, -1650] }; + let bound = a.xor_bind(&b); + let recovered = bound.xor_bind(&b); + assert_eq!(a, recovered, "xor_bind must be its own inverse"); + } + + #[test] + fn test_xor_bind_identity() { + let a = Base17 { dims: [100, -200, 300, -400, 500, -600, 700, -800, 900, -1000, 1100, -1200, 1300, -1400, 1500, -1600, 1700] }; + let zero = Base17::zero(); + assert_eq!(a.xor_bind(&zero), a, "xor_bind with zero must be identity"); + } + + #[test] + fn test_bundle_single() { + let a = Base17 { dims: [100; BASE_DIM] }; + let result = Base17::bundle(&[&a]); + assert_eq!(result, a); + } + + #[test] + fn test_bundle_majority() { + let pos = Base17 { dims: [100; BASE_DIM] }; + let neg = Base17 { dims: [-100; BASE_DIM] }; + let result = Base17::bundle(&[&pos, &pos, &neg]); + for d in 0..BASE_DIM { + assert!(result.dims[d] > 0, "dim {} should be positive from majority vote", d); + } + } + + #[test] + fn test_permute_identity() { + let a = Base17 { dims: [1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, 17] }; + assert_eq!(a.permute(0), a, "permute(0) must be identity"); + assert_eq!(a.permute(BASE_DIM), a, "permute(17) must wrap to identity"); + } + + #[test] + fn test_permute_cyclic() { + let a = Base17 { dims: [1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, 17] }; + let shifted = a.permute(1); + for i in 0..BASE_DIM { + assert_eq!(shifted.dims[i], a.dims[(i + 1) % BASE_DIM]); + } + } + + #[test] + fn test_byte_roundtrip() { + let a = Base17 { dims: [1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, 17] }; + let bytes = a.to_bytes(); + let b = Base17::from_bytes(&bytes); + assert_eq!(a, b); + } + + #[test] + fn test_encode_all_zeros() { + let acc = vec![0i8; FULL_DIM]; + let b = Base17::encode(&acc); + assert_eq!(b, Base17::zero()); + } + + #[test] + fn test_encode_all_positive() { + let acc = vec![1i8; FULL_DIM]; + let b = Base17::encode(&acc); + // Each dim should average to 1.0, scaled by 256 = 256 + for d in 0..BASE_DIM { + assert_eq!(b.dims[d], 256, "dim {} should be 256", d); + } + } + + #[test] + fn test_spo_l1_self_zero() { + let edge = SpoBase17 { + subject: Base17 { dims: [100; BASE_DIM] }, + predicate: Base17 { dims: [-50; BASE_DIM] }, + object: Base17 { dims: [25; BASE_DIM] }, + }; + assert_eq!(edge.l1(&edge), 0); + } + + #[test] + fn test_spo_encode() { + let s = vec![1i8; FULL_DIM]; + let p = vec![-1i8; FULL_DIM]; + let o = vec![0i8; FULL_DIM]; + let spo = SpoBase17::encode(&s, &p, &o); + assert!(spo.subject.dims[0] > 0); + assert!(spo.predicate.dims[0] < 0); + assert_eq!(spo.object.dims[0], 0); + } + + #[test] + fn test_palette_edge_roundtrip() { + let pe = PaletteEdge { s_idx: 42, p_idx: 128, o_idx: 255 }; + let bytes = pe.to_bytes(); + let pe2 = PaletteEdge::from_bytes(&bytes); + assert_eq!(pe, pe2); + } + + #[test] + fn test_l1_weighted_sign_dim_dominates() { + let a = Base17 { dims: [0; 17] }; + let mut b_sign = Base17 { dims: [0; 17] }; + b_sign.dims[0] = 100; + let mut b_mant = Base17 { dims: [0; 17] }; + b_mant.dims[10] = 100; + + let d_sign = a.l1_weighted(&b_sign); + let d_mant = a.l1_weighted(&b_mant); + + assert_eq!(d_sign, 100 * 20); + assert_eq!(d_mant, 100 * 1); + assert!(d_sign > d_mant * 10); + } + + #[test] + fn test_sign_agreement_self() { + let a = Base17 { dims: [100, -50, 30, 0, 10, -20, 40, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] }; + assert_eq!(a.sign_agreement(&a), BASE_DIM as u32); + } + + #[test] + fn test_sign_agreement_opposite() { + let a = Base17 { dims: [1; BASE_DIM] }; + let b = Base17 { dims: [-1; BASE_DIM] }; + assert_eq!(a.sign_agreement(&b), 0); + } +} diff --git a/src/hpc/layered_distance.rs b/src/hpc/layered_distance.rs new file mode 100644 index 00000000..96b6d192 --- /dev/null +++ b/src/hpc/layered_distance.rs @@ -0,0 +1,264 @@ +//! Layered distance function for CLAM tree building and search. +//! +//! Provides O(1) distance lookups by reading palette indices from containers +//! (the lance-graph 256-word container format) and using precomputed +//! distance matrices. +//! +//! Also provides TruthGate for filtering results by minimum expectation +//! (frequency * confidence threshold). + +use super::bgz17_bridge::PaletteEdge; +use super::palette_distance::SpoDistanceMatrices; + +/// Container word layout constants (matching lance-graph container.rs). +/// W112..W128 hold Base17 data; W125 holds the palette edge. +pub const W_BASE17_START: usize = 112; +pub const W_PALETTE_WORD: usize = 125; + +/// Word indices for truth values in container format. +/// W4 = frequency (f32 in lower 32 bits), W5 = confidence (f32 in lower 32 bits). +const W_FREQUENCY: usize = 4; +const W_CONFIDENCE: usize = 5; + +/// Read palette edge from container W125. +/// +/// The palette edge is packed into the low 24 bits of word 125: +/// bits [0..8) = s_idx, bits [8..16) = p_idx, bits [16..24) = o_idx. +pub fn read_palette_edge(container: &[u64; 256]) -> PaletteEdge { + let w = container[W_PALETTE_WORD]; + PaletteEdge { + s_idx: (w & 0xFF) as u8, + p_idx: ((w >> 8) & 0xFF) as u8, + o_idx: ((w >> 16) & 0xFF) as u8, + } +} + +/// Write palette edge into container W125. +pub fn write_palette_edge(container: &mut [u64; 256], pe: PaletteEdge) { + let packed = pe.s_idx as u64 + | ((pe.p_idx as u64) << 8) + | ((pe.o_idx as u64) << 16); + // Preserve upper bits + container[W_PALETTE_WORD] = (container[W_PALETTE_WORD] & !0xFF_FFFF) | packed; +} + +/// Read truth value (frequency, confidence) from container W4-W5. +/// +/// Both are stored as f32 reinterpreted into the lower 32 bits of the u64 word. +pub fn read_truth(container: &[u64; 256]) -> (f32, f32) { + let freq = f32::from_bits(container[W_FREQUENCY] as u32); + let conf = f32::from_bits(container[W_CONFIDENCE] as u32); + (freq, conf) +} + +/// Write truth value (frequency, confidence) into container W4-W5. +pub fn write_truth(container: &mut [u64; 256], frequency: f32, confidence: f32) { + container[W_FREQUENCY] = (container[W_FREQUENCY] & !0xFFFF_FFFF) + | frequency.to_bits() as u64; + container[W_CONFIDENCE] = (container[W_CONFIDENCE] & !0xFFFF_FFFF) + | confidence.to_bits() as u64; +} + +/// Layered distance: O(1) palette lookup between two containers. +/// +/// Reads palette edges from W125 of each container, then looks up the +/// precomputed SPO distance in the distance matrices. +pub fn palette_distance( + dm: &SpoDistanceMatrices, + a: &[u64; 256], + b: &[u64; 256], +) -> u32 { + let pe_a = read_palette_edge(a); + let pe_b = read_palette_edge(b); + dm.spo_distance( + pe_a.s_idx, pe_a.p_idx, pe_a.o_idx, + pe_b.s_idx, pe_b.p_idx, pe_b.o_idx, + ) +} + +/// TruthGate: filter by minimum expectation. +/// +/// Expectation is computed as: `confidence * (frequency - 0.5) + 0.5` +/// which maps (freq, conf) to [0, 1] where: +/// - expectation = 0.5 means no evidence either way +/// - expectation = 1.0 means certain positive +/// - expectation = 0.0 means certain negative +#[derive(Clone, Copy, Debug)] +pub struct TruthGate { + pub min_expectation: f32, +} + +impl TruthGate { + /// No filtering: all results pass. + pub const OPEN: Self = Self { min_expectation: 0.0 }; + /// Weak evidence threshold. + pub const WEAK: Self = Self { min_expectation: 0.4 }; + /// Normal evidence threshold. + pub const NORMAL: Self = Self { min_expectation: 0.6 }; + /// Strong evidence threshold. + pub const STRONG: Self = Self { min_expectation: 0.75 }; + /// Near-certain evidence threshold. + pub const CERTAIN: Self = Self { min_expectation: 0.9 }; + + /// Check if a (frequency, confidence) pair passes the gate. + #[inline] + pub fn passes(&self, frequency: f32, confidence: f32) -> bool { + let expectation = confidence * (frequency - 0.5) + 0.5; + expectation >= self.min_expectation + } + + /// Compute expectation from frequency and confidence. + #[inline] + pub fn expectation(frequency: f32, confidence: f32) -> f32 { + confidence * (frequency - 0.5) + 0.5 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_container(s_idx: u8, p_idx: u8, o_idx: u8, freq: f32, conf: f32) -> [u64; 256] { + let mut c = [0u64; 256]; + write_palette_edge(&mut c, PaletteEdge { s_idx, p_idx, o_idx }); + write_truth(&mut c, freq, conf); + c + } + + #[test] + fn test_read_write_palette_edge_roundtrip() { + let mut container = [0u64; 256]; + let pe = PaletteEdge { s_idx: 42, p_idx: 128, o_idx: 255 }; + write_palette_edge(&mut container, pe); + let read = read_palette_edge(&container); + assert_eq!(pe, read); + } + + #[test] + fn test_read_write_palette_edge_zero() { + let mut container = [0u64; 256]; + let pe = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + write_palette_edge(&mut container, pe); + let read = read_palette_edge(&container); + assert_eq!(pe, read); + } + + #[test] + fn test_read_write_truth_roundtrip() { + let mut container = [0u64; 256]; + write_truth(&mut container, 0.75, 0.9); + let (f, c) = read_truth(&container); + assert!((f - 0.75).abs() < 1e-6); + assert!((c - 0.9).abs() < 1e-6); + } + + #[test] + fn test_read_write_truth_zero() { + let mut container = [0u64; 256]; + write_truth(&mut container, 0.0, 0.0); + let (f, c) = read_truth(&container); + assert_eq!(f, 0.0); + assert_eq!(c, 0.0); + } + + #[test] + fn test_palette_distance_self_zero() { + use super::super::palette_distance::{Palette, SpoDistanceMatrices}; + use super::super::bgz17_bridge::Base17; + + let entries: Vec = (0..16) + .map(|i| { + let mut dims = [0i16; 17]; + for d in 0..17 { + dims[d] = ((i * 97 + d * 31) % 512) as i16 - 256; + } + Base17 { dims } + }) + .collect(); + let pal = Palette { entries }; + let dm = SpoDistanceMatrices::build(&pal, &pal, &pal); + + let c = make_container(5, 5, 5, 0.8, 0.9); + assert_eq!(palette_distance(&dm, &c, &c), 0); + } + + #[test] + fn test_palette_distance_symmetric() { + use super::super::palette_distance::{Palette, SpoDistanceMatrices}; + use super::super::bgz17_bridge::Base17; + + let entries: Vec = (0..16) + .map(|i| { + let mut dims = [0i16; 17]; + for d in 0..17 { + dims[d] = ((i * 97 + d * 31) % 512) as i16 - 256; + } + Base17 { dims } + }) + .collect(); + let pal = Palette { entries }; + let dm = SpoDistanceMatrices::build(&pal, &pal, &pal); + + let a = make_container(3, 7, 11, 0.8, 0.9); + let b = make_container(5, 2, 14, 0.6, 0.7); + assert_eq!(palette_distance(&dm, &a, &b), palette_distance(&dm, &b, &a)); + } + + #[test] + fn test_truth_gate_open() { + assert!(TruthGate::OPEN.passes(0.0, 0.0)); + assert!(TruthGate::OPEN.passes(1.0, 1.0)); + assert!(TruthGate::OPEN.passes(0.5, 0.5)); + } + + #[test] + fn test_truth_gate_certain() { + // freq=1.0, conf=1.0 => expectation = 1.0 * (1.0 - 0.5) + 0.5 = 1.0 + assert!(TruthGate::CERTAIN.passes(1.0, 1.0)); + // freq=0.5, conf=1.0 => expectation = 1.0 * (0.5 - 0.5) + 0.5 = 0.5 + assert!(!TruthGate::CERTAIN.passes(0.5, 1.0)); + // freq=0.9, conf=0.95 => expectation = 0.95 * 0.4 + 0.5 = 0.88 + assert!(!TruthGate::CERTAIN.passes(0.9, 0.95)); + } + + #[test] + fn test_truth_gate_normal() { + // freq=0.8, conf=0.8 => expectation = 0.8 * 0.3 + 0.5 = 0.74 + assert!(TruthGate::NORMAL.passes(0.8, 0.8)); + // freq=0.5, conf=0.5 => expectation = 0.5 * 0.0 + 0.5 = 0.5 + assert!(!TruthGate::NORMAL.passes(0.5, 0.5)); + } + + #[test] + fn test_truth_gate_weak() { + // freq=0.6, conf=0.5 => expectation = 0.5 * 0.1 + 0.5 = 0.55 + assert!(TruthGate::WEAK.passes(0.6, 0.5)); + // freq=0.3, conf=0.5 => expectation = 0.5 * -0.2 + 0.5 = 0.4 + assert!(TruthGate::WEAK.passes(0.3, 0.5)); + } + + #[test] + fn test_expectation_formula() { + // Perfect positive evidence + assert!((TruthGate::expectation(1.0, 1.0) - 1.0).abs() < 1e-6); + // No evidence + assert!((TruthGate::expectation(0.5, 0.0) - 0.5).abs() < 1e-6); + // Perfect negative evidence + assert!((TruthGate::expectation(0.0, 1.0) - 0.0).abs() < 1e-6); + // Zero confidence, any frequency => 0.5 + assert!((TruthGate::expectation(0.0, 0.0) - 0.5).abs() < 1e-6); + assert!((TruthGate::expectation(1.0, 0.0) - 0.5).abs() < 1e-6); + } + + #[test] + fn test_write_palette_edge_preserves_upper_bits() { + let mut container = [0u64; 256]; + container[W_PALETTE_WORD] = 0xFFFF_FFFF_FF00_0000; + let pe = PaletteEdge { s_idx: 1, p_idx: 2, o_idx: 3 }; + write_palette_edge(&mut container, pe); + // Upper bits should be preserved + assert_eq!(container[W_PALETTE_WORD] & 0xFFFF_FFFF_FF00_0000, 0xFFFF_FFFF_FF00_0000); + let read = read_palette_edge(&container); + assert_eq!(read, pe); + } +} diff --git a/src/hpc/mod.rs b/src/hpc/mod.rs index d7cd51d2..a2ac89e1 100644 --- a/src/hpc/mod.rs +++ b/src/hpc/mod.rs @@ -116,6 +116,16 @@ pub mod crystal_encoder; #[allow(missing_docs)] pub mod udf_kernels; +// Session C: bgz17 dual-path integration +#[allow(missing_docs)] +pub mod bgz17_bridge; +#[allow(missing_docs)] +pub mod palette_distance; +#[allow(missing_docs)] +pub mod layered_distance; +#[allow(missing_docs)] +pub mod parallel_search; + #[cfg(test)] mod e2e_tests { //! End-to-end pipeline test: Fingerprint → Node → Seal → Cascade → CLAM → Causality → BNN diff --git a/src/hpc/palette_distance.rs b/src/hpc/palette_distance.rs new file mode 100644 index 00000000..08f842b8 --- /dev/null +++ b/src/hpc/palette_distance.rs @@ -0,0 +1,423 @@ +//! Palette distance matrix and search. +//! +//! Precomputed k x k distance matrix for O(1) palette distance lookups. +//! After building a palette, compute ALL pairwise L1 distances once. +//! Every subsequent distance lookup becomes a single u16 array load. +//! +//! Self-contained re-implementation of lance-graph's bgz17 palette and +//! distance_matrix modules for interoperability. + +use super::bgz17_bridge::{Base17, PaletteEdge, SpoBase17}; + +const MAX_PALETTE_SIZE: usize = 256; +const BASE_DIM: usize = 17; + +/// A palette codebook: up to 256 archetypal Base17 patterns. +#[derive(Clone, Debug)] +pub struct Palette { + /// The archetype entries. + pub entries: Vec, +} + +/// Precomputed pairwise distance matrix for one plane's palette. +/// +/// `data[i * k + j]` = scaled L1 distance between palette entries i and j. +/// Symmetric: `data[i * k + j] == data[j * k + i]`. +/// Diagonal: `data[i * k + i] == 0`. +#[derive(Clone, Debug)] +pub struct DistanceMatrix { + /// Flat storage: k x k u16 values. + pub data: Vec, + /// Palette size (k). `data.len() == k * k`. + pub k: usize, +} + +/// Three distance matrices: one per S/P/O plane. +#[derive(Clone, Debug)] +pub struct SpoDistanceMatrices { + pub subject: DistanceMatrix, + pub predicate: DistanceMatrix, + pub object: DistanceMatrix, +} + +impl Palette { + /// Number of entries. + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Whether the palette is empty. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Byte size of the codebook. + pub fn codebook_bytes(&self) -> usize { + self.entries.len() * Base17::BYTE_SIZE + } + + /// Find the nearest palette entry to a given base pattern. Returns index. + pub fn nearest(&self, query: &Base17) -> u8 { + let mut best_idx = 0u8; + let mut best_dist = u32::MAX; + for (i, entry) in self.entries.iter().enumerate() { + let d = query.l1(entry); + if d < best_dist { + best_dist = d; + best_idx = i as u8; + } + } + best_idx + } + + /// Encode an SpoBase17 edge to palette indices. + pub fn encode_edge(&self, edge: &SpoBase17) -> PaletteEdge { + PaletteEdge { + s_idx: self.nearest(&edge.subject), + p_idx: self.nearest(&edge.predicate), + o_idx: self.nearest(&edge.object), + } + } + + /// Decode palette indices back to Base17 patterns (lossy). + pub fn decode_edge(&self, pe: PaletteEdge) -> SpoBase17 { + SpoBase17 { + subject: self.entries[pe.s_idx as usize].clone(), + predicate: self.entries[pe.p_idx as usize].clone(), + object: self.entries[pe.o_idx as usize].clone(), + } + } + + /// Build a palette from a collection of Base17 patterns using k-means. + /// + /// `k`: target palette size (max 256). + /// `max_iter`: k-means iterations (typically converges in 1-3). + /// + /// Initialization: k-means++ style (first pattern, then farthest-first). + pub fn build(patterns: &[Base17], k: usize, max_iter: usize) -> Self { + let k = k.min(MAX_PALETTE_SIZE).min(patterns.len()); + if k == 0 { + return Palette { entries: Vec::new() }; + } + + // Initialize centroids: k-means++ style (first = first pattern, rest = farthest) + let mut centroids: Vec = Vec::with_capacity(k); + centroids.push(patterns[0].clone()); + + for _ in 1..k { + let mut best_idx = 0; + let mut best_dist = 0u64; + for (i, p) in patterns.iter().enumerate() { + let min_d: u32 = centroids.iter().map(|c| p.l1(c)).min().unwrap_or(u32::MAX); + if min_d as u64 > best_dist { + best_dist = min_d as u64; + best_idx = i; + } + } + centroids.push(patterns[best_idx].clone()); + } + + // K-means iterations + for _iter in 0..max_iter { + // Assign each pattern to nearest centroid + let mut assignments = vec![0usize; patterns.len()]; + for (i, p) in patterns.iter().enumerate() { + let mut best = 0; + let mut best_d = u32::MAX; + for (c, centroid) in centroids.iter().enumerate() { + let d = p.l1(centroid); + if d < best_d { + best_d = d; + best = c; + } + } + assignments[i] = best; + } + + // Recompute centroids + let mut new_centroids: Vec<[i64; BASE_DIM]> = vec![[0i64; BASE_DIM]; k]; + let mut counts = vec![0u32; k]; + + for (i, p) in patterns.iter().enumerate() { + let c = assignments[i]; + counts[c] += 1; + for d in 0..BASE_DIM { + new_centroids[c][d] += p.dims[d] as i64; + } + } + + let mut changed = false; + for c in 0..k { + if counts[c] == 0 { + continue; + } + let mut new_dims = [0i16; BASE_DIM]; + for d in 0..BASE_DIM { + new_dims[d] = (new_centroids[c][d] / counts[c] as i64) as i16; + } + let new_base = Base17 { dims: new_dims }; + if new_base != centroids[c] { + changed = true; + centroids[c] = new_base; + } + } + + if !changed { + break; + } + } + + Palette { entries: centroids } + } + + /// Build three palettes (one per S/P/O plane) from a set of SpoBase17 edges. + pub fn build_spo(edges: &[SpoBase17], k: usize, max_iter: usize) -> (Self, Self, Self) { + let s_patterns: Vec = edges.iter().map(|e| e.subject.clone()).collect(); + let p_patterns: Vec = edges.iter().map(|e| e.predicate.clone()).collect(); + let o_patterns: Vec = edges.iter().map(|e| e.object.clone()).collect(); + + ( + Palette::build(&s_patterns, k, max_iter), + Palette::build(&p_patterns, k, max_iter), + Palette::build(&o_patterns, k, max_iter), + ) + } +} + +impl DistanceMatrix { + /// Build from a palette. O(k^2) pairwise comparisons. + /// + /// Distances are scaled to u16 range: `d / max_l1 * 65535` where + /// max_l1 = 17 * 65535 = 1,114,095. + pub fn build(palette: &Palette) -> Self { + let k = palette.len(); + let mut data = vec![0u16; k * k]; + + for i in 0..k { + for j in (i + 1)..k { + let d = palette.entries[i].l1(&palette.entries[j]); + let max_l1 = 17u64 * 65535; + let scaled = ((d as u64 * 65535) / max_l1).min(65535) as u16; + data[i * k + j] = scaled; + data[j * k + i] = scaled; + } + } + + DistanceMatrix { data, k } + } + + /// Look up distance between two palette indices. O(1). + #[inline] + pub fn distance(&self, a: u8, b: u8) -> u16 { + self.data[a as usize * self.k + b as usize] + } + + /// Byte size of the matrix. + pub fn byte_size(&self) -> usize { + self.k * self.k * 2 + } +} + +impl SpoDistanceMatrices { + /// Build from three palettes. + pub fn build(s_pal: &Palette, p_pal: &Palette, o_pal: &Palette) -> Self { + SpoDistanceMatrices { + subject: DistanceMatrix::build(s_pal), + predicate: DistanceMatrix::build(p_pal), + object: DistanceMatrix::build(o_pal), + } + } + + /// Combined S+P+O distance from palette indices. O(1): 3 array loads. + #[inline] + pub fn spo_distance(&self, a_s: u8, a_p: u8, a_o: u8, b_s: u8, b_p: u8, b_o: u8) -> u32 { + self.subject.distance(a_s, b_s) as u32 + + self.predicate.distance(a_p, b_p) as u32 + + self.object.distance(a_o, b_o) as u32 + } + + /// Total byte size of all three matrices. + pub fn byte_size(&self) -> usize { + self.subject.byte_size() + self.predicate.byte_size() + self.object.byte_size() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_patterns(n: usize) -> Vec { + (0..n) + .map(|i| { + let mut dims = [0i16; BASE_DIM]; + for d in 0..BASE_DIM { + dims[d] = ((i * 7 + d * 13) % 256) as i16 - 128; + } + Base17 { dims } + }) + .collect() + } + + fn make_palette(k: usize) -> Palette { + let entries = (0..k) + .map(|i| { + let mut dims = [0i16; BASE_DIM]; + for d in 0..BASE_DIM { + dims[d] = ((i * 97 + d * 31) % 512) as i16 - 256; + } + Base17 { dims } + }) + .collect(); + Palette { entries } + } + + #[test] + fn test_build_palette() { + let patterns = make_patterns(100); + let palette = Palette::build(&patterns, 16, 10); + assert_eq!(palette.len(), 16); + } + + #[test] + fn test_build_palette_empty() { + let patterns: Vec = Vec::new(); + let palette = Palette::build(&patterns, 16, 10); + assert_eq!(palette.len(), 0); + assert!(palette.is_empty()); + } + + #[test] + fn test_build_palette_k_larger_than_n() { + let patterns = make_patterns(5); + let palette = Palette::build(&patterns, 16, 10); + assert_eq!(palette.len(), 5); + } + + #[test] + fn test_nearest_self() { + let patterns = make_patterns(50); + let palette = Palette::build(&patterns, 50, 1); + for p in &patterns { + let idx = palette.nearest(p); + let dist = p.l1(&palette.entries[idx as usize]); + assert!(dist < 1000, "nearest distance {} too large", dist); + } + } + + #[test] + fn test_encode_decode() { + let patterns = make_patterns(100); + let palette = Palette::build(&patterns, 32, 5); + let edge = SpoBase17 { + subject: patterns[10].clone(), + predicate: patterns[20].clone(), + object: patterns[30].clone(), + }; + let encoded = palette.encode_edge(&edge); + let decoded = palette.decode_edge(encoded); + assert!(edge.subject.l1(&decoded.subject) < 2000); + } + + #[test] + fn test_convergence() { + let patterns = make_patterns(200); + let p1 = Palette::build(&patterns, 32, 1); + let p5 = Palette::build(&patterns, 32, 5); + let p20 = Palette::build(&patterns, 32, 20); + + let total_dist = |pal: &Palette| -> u64 { + patterns + .iter() + .map(|p| { + let idx = pal.nearest(p); + p.l1(&pal.entries[idx as usize]) as u64 + }) + .sum::() + }; + + let d1 = total_dist(&p1); + let d5 = total_dist(&p5); + let d20 = total_dist(&p20); + assert!(d5 <= d1, "5 iters should be <= 1 iter: {} vs {}", d5, d1); + assert!(d20 <= d5, "20 iters should be <= 5 iters: {} vs {}", d20, d5); + } + + #[test] + fn test_distance_self_zero() { + let pal = make_palette(32); + let dm = DistanceMatrix::build(&pal); + for i in 0..32 { + assert_eq!(dm.distance(i, i), 0, "self-distance must be 0 for entry {}", i); + } + } + + #[test] + fn test_distance_symmetric() { + let pal = make_palette(32); + let dm = DistanceMatrix::build(&pal); + for i in 0..32u8 { + for j in 0..32u8 { + assert_eq!(dm.distance(i, j), dm.distance(j, i)); + } + } + } + + #[test] + fn test_spo_distance_self_zero() { + let pal = make_palette(16); + let spo = SpoDistanceMatrices::build(&pal, &pal, &pal); + assert_eq!(spo.spo_distance(5, 5, 5, 5, 5, 5), 0); + } + + #[test] + fn test_cache_friendliness() { + let pal = make_palette(256); + let dm = DistanceMatrix::build(&pal); + assert_eq!(dm.byte_size(), 256 * 256 * 2); // 128 KB + assert!(dm.byte_size() <= 131072); + } + + #[test] + fn test_codebook_bytes() { + let pal = make_palette(64); + assert_eq!(pal.codebook_bytes(), 64 * 34); + } + + #[test] + fn test_spo_distance_triangle_inequality() { + let pal = make_palette(16); + let spo = SpoDistanceMatrices::build(&pal, &pal, &pal); + // d(a, c) <= d(a, b) + d(b, c) for the scaled metric + let d_ac = spo.spo_distance(0, 0, 0, 2, 2, 2); + let d_ab = spo.spo_distance(0, 0, 0, 1, 1, 1); + let d_bc = spo.spo_distance(1, 1, 1, 2, 2, 2); + assert!( + d_ac <= d_ab + d_bc, + "triangle inequality violated: d(a,c)={} > d(a,b)={} + d(b,c)={}", + d_ac, d_ab, d_bc + ); + } + + #[test] + fn test_build_spo() { + let patterns = make_patterns(100); + let edges: Vec = (0..30) + .map(|i| SpoBase17 { + subject: patterns[i].clone(), + predicate: patterns[i + 30].clone(), + object: patterns[i + 60].clone(), + }) + .collect(); + let (s, p, o) = Palette::build_spo(&edges, 16, 5); + assert_eq!(s.len(), 16); + assert_eq!(p.len(), 16); + assert_eq!(o.len(), 16); + } + + #[test] + fn test_spo_byte_size() { + let pal = make_palette(32); + let spo = SpoDistanceMatrices::build(&pal, &pal, &pal); + assert_eq!(spo.byte_size(), 3 * 32 * 32 * 2); + } +} diff --git a/src/hpc/parallel_search.rs b/src/hpc/parallel_search.rs new file mode 100644 index 00000000..ad2554bd --- /dev/null +++ b/src/hpc/parallel_search.rs @@ -0,0 +1,590 @@ +//! Parallel search combining HHTL progressive cascade with CLAM tree pruning. +//! +//! Dual-path search strategy: +//! - HHTL (Hierarchical Hash Table Lookup): palette-level progressive refinement +//! - CLAM: archetype tree pruning using precomputed distance matrices +//! +//! Results are merged and filtered through TruthGate for evidence quality. + +use super::bgz17_bridge::PaletteEdge; +use super::palette_distance::SpoDistanceMatrices; +use super::layered_distance::{TruthGate, read_palette_edge, read_truth}; + +/// Search result with distance and truth metadata. +#[derive(Debug, Clone)] +pub struct SearchResult { + /// Index of the matching node in the scope. + pub node_idx: usize, + /// SPO distance (sum of 3 plane distances from precomputed matrix). + pub distance: u32, + /// Truth frequency of the matching node. + pub frequency: f32, + /// Truth confidence of the matching node. + pub confidence: f32, +} + +impl SearchResult { + /// Compute expectation from truth values. + pub fn expectation(&self) -> f32 { + TruthGate::expectation(self.frequency, self.confidence) + } +} + +/// Scope: search-ready palette data for a set of nodes. +/// +/// Contains the containers (256 u64 words each), extracted palette edges, +/// and precomputed SPO distance matrices for O(1) distance lookups. +pub struct PaletteScope { + /// Palette indices extracted from each container's W125. + pub palette_indices: Vec, + /// Precomputed SPO distance matrices. + pub distances: SpoDistanceMatrices, + /// Raw containers (for reading truth values and other metadata). + pub containers: Vec<[u64; 256]>, +} + +impl PaletteScope { + /// Build from containers: extract palette edges from W125 of each. + pub fn from_containers( + containers: Vec<[u64; 256]>, + distances: SpoDistanceMatrices, + ) -> Self { + let palette_indices: Vec = containers + .iter() + .map(|c| read_palette_edge(c)) + .collect(); + PaletteScope { + palette_indices, + distances, + containers, + } + } + + /// Number of nodes in this scope. + pub fn len(&self) -> usize { + self.containers.len() + } + + /// Whether the scope is empty. + pub fn is_empty(&self) -> bool { + self.containers.is_empty() + } + + /// Compute palette distance between query and node at index. + #[inline] + fn distance_to(&self, query: &PaletteEdge, idx: usize) -> u32 { + let c = &self.palette_indices[idx]; + self.distances.spo_distance( + query.s_idx, query.p_idx, query.o_idx, + c.s_idx, c.p_idx, c.o_idx, + ) + } + + /// HHTL search: progressive refinement using palette distances. + /// + /// Scans all nodes, computing palette distance and keeping the top-k + /// nearest results. This is the "brute force" path that benefits from + /// the O(1) palette distance lookups (3 array loads per comparison). + pub fn hhtl_search(&self, query: &PaletteEdge, k: usize) -> Vec<(usize, u32)> { + if self.palette_indices.is_empty() || k == 0 { + return Vec::new(); + } + + // Use a max-heap approach: track the k nearest + let mut results: Vec<(usize, u32)> = Vec::with_capacity(k + 1); + let mut threshold = u32::MAX; + + for idx in 0..self.palette_indices.len() { + let d = self.distance_to(query, idx); + if d < threshold || results.len() < k { + results.push((idx, d)); + results.sort_unstable_by_key(|&(_, dist)| dist); + if results.len() > k { + results.truncate(k); + } + if results.len() == k { + threshold = results[k - 1].1; + } + } + } + + results + } + + /// CLAM-style search: archetype-based pruning. + /// + /// Partitions nodes into archetypes (clusters) and prunes distant + /// clusters using triangle inequality on palette distances. + /// Falls back to exhaustive scan for small scopes. + pub fn clam_search(&self, query: &PaletteEdge, k: usize) -> Vec<(usize, u32)> { + let n = self.palette_indices.len(); + if n == 0 || k == 0 { + return Vec::new(); + } + + // For small datasets, exhaustive scan is faster than tree overhead + if n <= 64 { + return self.hhtl_search(query, k); + } + + // Build archetype clusters: pick sqrt(n) archetypes via farthest-first + let n_archetypes = (n as f64).sqrt().ceil() as usize; + let n_archetypes = n_archetypes.max(2).min(n); + + // Pick first archetype as node 0 + let mut archetype_indices: Vec = Vec::with_capacity(n_archetypes); + archetype_indices.push(0); + + // Farthest-first selection + let mut min_dists = vec![u32::MAX; n]; + for _ in 1..n_archetypes { + let last = *archetype_indices.last().unwrap(); + let last_pe = &self.palette_indices[last]; + // Update min distances + for i in 0..n { + let d = self.distances.spo_distance( + last_pe.s_idx, last_pe.p_idx, last_pe.o_idx, + self.palette_indices[i].s_idx, + self.palette_indices[i].p_idx, + self.palette_indices[i].o_idx, + ); + if d < min_dists[i] { + min_dists[i] = d; + } + } + // Pick the farthest node + let mut best_idx = 0; + let mut best_d = 0u32; + for i in 0..n { + if min_dists[i] > best_d { + best_d = min_dists[i]; + best_idx = i; + } + } + archetype_indices.push(best_idx); + } + + // Assign each node to nearest archetype + let mut assignments = vec![0usize; n]; + let mut archetype_radii = vec![0u32; n_archetypes]; + + for i in 0..n { + let pe_i = &self.palette_indices[i]; + let mut best_arch = 0; + let mut best_d = u32::MAX; + for (a, &arch_idx) in archetype_indices.iter().enumerate() { + let arch_pe = &self.palette_indices[arch_idx]; + let d = self.distances.spo_distance( + pe_i.s_idx, pe_i.p_idx, pe_i.o_idx, + arch_pe.s_idx, arch_pe.p_idx, arch_pe.o_idx, + ); + if d < best_d { + best_d = d; + best_arch = a; + } + } + assignments[i] = best_arch; + if best_d > archetype_radii[best_arch] { + archetype_radii[best_arch] = best_d; + } + } + + // Compute query distance to each archetype + let mut archetype_dists: Vec<(usize, u32)> = archetype_indices + .iter() + .enumerate() + .map(|(a, &arch_idx)| { + let d = self.distance_to(query, arch_idx); + (a, d) + }) + .collect(); + archetype_dists.sort_unstable_by_key(|&(_, d)| d); + + // Collect candidates from non-pruned clusters + let mut results: Vec<(usize, u32)> = Vec::new(); + let mut current_threshold = u32::MAX; + + for &(arch_a, arch_d) in &archetype_dists { + // Prune: if archetype distance - radius > current threshold, skip + if results.len() >= k && arch_d > current_threshold + archetype_radii[arch_a] { + continue; + } + + // Scan all nodes in this cluster + for i in 0..n { + if assignments[i] != arch_a { + continue; + } + let d = self.distance_to(query, i); + if d < current_threshold || results.len() < k { + results.push((i, d)); + results.sort_unstable_by_key(|&(_, dist)| dist); + if results.len() > k { + results.truncate(k); + } + if results.len() == k { + current_threshold = results[k - 1].1; + } + } + } + } + + results + } +} + +/// Parallel search: run HHTL + CLAM, merge, apply TruthGate. +/// +/// Both search paths run independently and their results are merged +/// to produce the best top-k, filtered by truth-value evidence quality. +pub fn parallel_search( + scope: &PaletteScope, + query: &PaletteEdge, + k: usize, + gate: &TruthGate, +) -> Vec { + if scope.is_empty() || k == 0 { + return Vec::new(); + } + + // Run both search paths + let hhtl = scope.hhtl_search(query, k); + let clam = scope.clam_search(query, k); + + // Merge results + let merged = merge_and_rerank(hhtl, clam, k); + + // Apply TruthGate filter and build SearchResults + let mut results = Vec::with_capacity(merged.len()); + for (idx, distance) in merged { + let (frequency, confidence) = read_truth(&scope.containers[idx]); + if gate.passes(frequency, confidence) { + results.push(SearchResult { + node_idx: idx, + distance, + frequency, + confidence, + }); + } + } + + // Re-truncate after filtering (gate may have removed some) + if results.len() > k { + results.truncate(k); + } + + results +} + +/// Merge and re-rank two result sets, taking union of top-k. +/// +/// Deduplicates by node index, keeping the minimum distance for each node. +fn merge_and_rerank( + hhtl: Vec<(usize, u32)>, + clam: Vec<(usize, u32)>, + k: usize, +) -> Vec<(usize, u32)> { + // Collect all results into a map (node_idx -> min_distance) + let mut map = std::collections::HashMap::new(); + for (idx, d) in hhtl.into_iter().chain(clam.into_iter()) { + let entry = map.entry(idx).or_insert(u32::MAX); + if d < *entry { + *entry = d; + } + } + + // Sort by distance + let mut results: Vec<(usize, u32)> = map.into_iter().collect(); + results.sort_unstable_by_key(|&(_, d)| d); + results.truncate(k); + results +} + +/// Compute local fractal dimension from palette distances. +/// +/// LFD = log2(|B(center, radius)| / |B(center, radius/2)|) +/// where B(c, r) is the set of nodes within distance r of center. +pub fn lfd_from_palette( + scope: &PaletteScope, + center_idx: usize, + radius: u32, +) -> f64 { + if radius == 0 || scope.is_empty() { + return 0.0; + } + + let center = &scope.palette_indices[center_idx]; + let half_radius = radius / 2; + + let mut count_r = 0usize; + let mut count_half_r = 0usize; + + for (i, pe) in scope.palette_indices.iter().enumerate() { + if i == center_idx { + count_r += 1; + count_half_r += 1; + continue; + } + let d = scope.distances.spo_distance( + center.s_idx, center.p_idx, center.o_idx, + pe.s_idx, pe.p_idx, pe.o_idx, + ); + if d <= radius { + count_r += 1; + } + if d <= half_radius { + count_half_r += 1; + } + } + + if count_half_r == 0 || count_r <= count_half_r { + 0.0 + } else { + (count_r as f64 / count_half_r as f64).log2() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::bgz17_bridge::{Base17, PaletteEdge}; + use super::super::palette_distance::{Palette, SpoDistanceMatrices}; + use super::super::layered_distance::write_palette_edge; + + fn make_test_scope(n: usize) -> PaletteScope { + let entries: Vec = (0..32) + .map(|i| { + let mut dims = [0i16; 17]; + for d in 0..17 { + dims[d] = ((i * 97 + d * 31) % 512) as i16 - 256; + } + Base17 { dims } + }) + .collect(); + let pal = Palette { entries }; + let dm = SpoDistanceMatrices::build(&pal, &pal, &pal); + + let mut containers: Vec<[u64; 256]> = Vec::with_capacity(n); + for i in 0..n { + let mut c = [0u64; 256]; + let pe = PaletteEdge { + s_idx: (i % 32) as u8, + p_idx: ((i * 3) % 32) as u8, + o_idx: ((i * 7) % 32) as u8, + }; + write_palette_edge(&mut c, pe); + // Write truth: freq=0.8, conf=0.9 + super::super::layered_distance::write_truth(&mut c, 0.8, 0.9); + containers.push(c); + } + + PaletteScope::from_containers(containers, dm) + } + + #[test] + fn test_hhtl_search_basic() { + let scope = make_test_scope(100); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = scope.hhtl_search(&query, 5); + assert_eq!(results.len(), 5); + // Results should be sorted by distance + for w in results.windows(2) { + assert!(w[0].1 <= w[1].1); + } + } + + #[test] + fn test_hhtl_search_empty() { + let scope = make_test_scope(0); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = scope.hhtl_search(&query, 5); + assert!(results.is_empty()); + } + + #[test] + fn test_hhtl_search_k_larger_than_n() { + let scope = make_test_scope(3); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = scope.hhtl_search(&query, 10); + assert_eq!(results.len(), 3); + } + + #[test] + fn test_clam_search_basic() { + let scope = make_test_scope(100); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = scope.clam_search(&query, 5); + assert_eq!(results.len(), 5); + for w in results.windows(2) { + assert!(w[0].1 <= w[1].1); + } + } + + #[test] + fn test_clam_search_small_fallback() { + // Small scope should fallback to HHTL + let scope = make_test_scope(10); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let hhtl = scope.hhtl_search(&query, 5); + let clam = scope.clam_search(&query, 5); + // Should return the same results for small scope + assert_eq!(hhtl.len(), clam.len()); + for (h, c) in hhtl.iter().zip(clam.iter()) { + assert_eq!(h.0, c.0); + assert_eq!(h.1, c.1); + } + } + + #[test] + fn test_parallel_search_basic() { + let scope = make_test_scope(100); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = parallel_search(&scope, &query, 5, &TruthGate::OPEN); + assert!(results.len() <= 5); + assert!(!results.is_empty()); + // Results should be sorted by distance + for w in results.windows(2) { + assert!(w[0].distance <= w[1].distance); + } + } + + #[test] + fn test_parallel_search_with_truth_gate() { + // Build scope with mixed truth values + let entries: Vec = (0..32) + .map(|i| { + let mut dims = [0i16; 17]; + for d in 0..17 { + dims[d] = ((i * 97 + d * 31) % 512) as i16 - 256; + } + Base17 { dims } + }) + .collect(); + let pal = Palette { entries }; + let dm = SpoDistanceMatrices::build(&pal, &pal, &pal); + + let mut containers: Vec<[u64; 256]> = Vec::new(); + for i in 0..20 { + let mut c = [0u64; 256]; + let pe = PaletteEdge { + s_idx: (i % 32) as u8, + p_idx: ((i * 3) % 32) as u8, + o_idx: ((i * 7) % 32) as u8, + }; + write_palette_edge(&mut c, pe); + // Alternate high and low truth + if i % 2 == 0 { + super::super::layered_distance::write_truth(&mut c, 0.9, 0.9); + } else { + super::super::layered_distance::write_truth(&mut c, 0.3, 0.2); + } + containers.push(c); + } + let scope = PaletteScope::from_containers(containers, dm); + + // CERTAIN gate should filter out low-truth nodes + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let all = parallel_search(&scope, &query, 20, &TruthGate::OPEN); + let certain = parallel_search(&scope, &query, 20, &TruthGate::CERTAIN); + + // Certain should have fewer results + assert!(certain.len() <= all.len()); + + // All results in certain should have high expectation + for r in &certain { + let exp = TruthGate::expectation(r.frequency, r.confidence); + assert!( + exp >= 0.9, + "result expectation {} should be >= 0.9", + exp + ); + } + } + + #[test] + fn test_parallel_search_empty() { + let scope = make_test_scope(0); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = parallel_search(&scope, &query, 5, &TruthGate::OPEN); + assert!(results.is_empty()); + } + + #[test] + fn test_merge_and_rerank_dedup() { + let a = vec![(0, 10), (1, 20), (2, 30)]; + let b = vec![(0, 5), (3, 25), (2, 15)]; + let merged = merge_and_rerank(a, b, 4); + // node 0 should have min distance 5 + assert_eq!(merged[0], (0, 5)); + // node 2 should have min distance 15 + let node2 = merged.iter().find(|&&(idx, _)| idx == 2).unwrap(); + assert_eq!(node2.1, 15); + assert_eq!(merged.len(), 4); + } + + #[test] + fn test_merge_and_rerank_truncate() { + let a = vec![(0, 10), (1, 20), (2, 30)]; + let b = vec![(3, 5), (4, 25)]; + let merged = merge_and_rerank(a, b, 3); + assert_eq!(merged.len(), 3); + assert_eq!(merged[0].0, 3); // distance 5 + assert_eq!(merged[1].0, 0); // distance 10 + assert_eq!(merged[2].0, 1); // distance 20 + } + + #[test] + fn test_lfd_from_palette() { + let scope = make_test_scope(100); + // Use a large radius so most nodes are within + let lfd = lfd_from_palette(&scope, 0, u32::MAX); + // With all nodes within radius and most within half, LFD should be small + assert!(lfd >= 0.0, "LFD should be non-negative, got {}", lfd); + } + + #[test] + fn test_lfd_zero_radius() { + let scope = make_test_scope(100); + let lfd = lfd_from_palette(&scope, 0, 0); + assert_eq!(lfd, 0.0); + } + + #[test] + fn test_lfd_empty_scope() { + let scope = make_test_scope(0); + let lfd = lfd_from_palette(&scope, 0, 100); + assert_eq!(lfd, 0.0); + } + + #[test] + fn test_search_result_expectation() { + let sr = SearchResult { + node_idx: 0, + distance: 100, + frequency: 0.8, + confidence: 0.9, + }; + let exp = sr.expectation(); + let expected = 0.9 * (0.8 - 0.5) + 0.5; // 0.77 + assert!((exp - expected).abs() < 1e-6); + } + + #[test] + fn test_scope_from_containers() { + let scope = make_test_scope(50); + assert_eq!(scope.len(), 50); + assert!(!scope.is_empty()); + assert_eq!(scope.palette_indices.len(), 50); + assert_eq!(scope.containers.len(), 50); + } + + #[test] + fn test_hhtl_finds_exact_match() { + // Node 0 has palette edge (0, 0, 0), query is (0, 0, 0) => distance 0 + let scope = make_test_scope(100); + let query = PaletteEdge { s_idx: 0, p_idx: 0, o_idx: 0 }; + let results = scope.hhtl_search(&query, 1); + assert_eq!(results.len(), 1); + assert_eq!(results[0].1, 0, "exact match should have distance 0"); + } +}