diff --git a/crates/thinking-engine/src/builder.rs b/crates/thinking-engine/src/builder.rs new file mode 100644 index 00000000..da655468 --- /dev/null +++ b/crates/thinking-engine/src/builder.rs @@ -0,0 +1,304 @@ +//! ThinkingEngineBuilder: fluent API for engine construction. +//! +//! ```rust,no_run +//! let engine = ThinkingEngineBuilder::new() +//! .lens(Lens::Jina) +//! .table_type(TableType::SignedI8) +//! .pooling(Pooling::TopK(5)) +//! .on_commit(|bus| l4.learn_from(bus)) +//! .build(); +//! ``` + +use crate::engine::ThinkingEngine; +use crate::signed_engine::SignedThinkingEngine; +use crate::pooling::Pooling; + +/// Which baked lens to use. +#[derive(Clone, Debug)] +pub enum Lens { + Jina, + BgeM3, + Reranker, + Custom(Vec), +} + +/// Distance table encoding. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum TableType { + UnsignedU8, + SignedI8, +} + +/// Built engine: either unsigned or signed. +pub enum BuiltEngine { + Unsigned(ThinkingEngine), + Signed(SignedThinkingEngine), +} + +impl BuiltEngine { + pub fn perturb(&mut self, indices: &[u16]) { + match self { + BuiltEngine::Unsigned(e) => e.perturb(indices), + BuiltEngine::Signed(e) => e.perturb(indices), + } + } + + pub fn reset(&mut self) { + match self { + BuiltEngine::Unsigned(e) => e.reset(), + BuiltEngine::Signed(e) => e.reset(), + } + } + + pub fn energy(&self) -> &[f32] { + match self { + BuiltEngine::Unsigned(e) => &e.energy, + BuiltEngine::Signed(e) => &e.energy, + } + } + + pub fn cycles(&self) -> u16 { + match self { + BuiltEngine::Unsigned(e) => e.cycles, + BuiltEngine::Signed(e) => e.cycles, + } + } + + pub fn size(&self) -> usize { + match self { + BuiltEngine::Unsigned(e) => e.size, + BuiltEngine::Signed(e) => e.size, + } + } + + pub fn think(&mut self, max_cycles: usize) { + match self { + BuiltEngine::Unsigned(e) => { e.think(max_cycles); } + BuiltEngine::Signed(e) => { e.think(max_cycles); } + } + } +} + +/// Commit sink: where committed thoughts go. +pub type CommitSink = Box; + +/// Builder for ThinkingEngine with fluent API. +pub struct ThinkingEngineBuilder { + lens: Option, + table_type: TableType, + pooling: Pooling, + max_cycles: usize, + sinks: Vec, +} + +impl ThinkingEngineBuilder { + pub fn new() -> Self { + Self { + lens: None, + table_type: TableType::UnsignedU8, + pooling: Pooling::ArgMax, + max_cycles: 10, + sinks: Vec::new(), + } + } + + /// Select a baked lens. + pub fn lens(mut self, lens: Lens) -> Self { + self.lens = Some(lens); + self + } + + /// Set table encoding type: u8 (default) or i8 signed. + pub fn table_type(mut self, tt: TableType) -> Self { + self.table_type = tt; + self + } + + /// Set pooling strategy. + pub fn pooling(mut self, p: Pooling) -> Self { + self.pooling = p; + self + } + + /// Set max think cycles (default: 10). + pub fn max_cycles(mut self, n: usize) -> Self { + self.max_cycles = n; + self + } + + /// Add a commit sink (adapter pattern). + /// Sinks receive the BusDto after every commit. + pub fn on_commit(mut self, sink: impl Fn(&crate::dto::BusDto) + Send + Sync + 'static) -> Self { + self.sinks.push(Box::new(sink)); + self + } + + /// Build the engine. + pub fn build(self) -> Result { + let table = match self.lens { + Some(Lens::Jina) => crate::jina_lens::JINA_HDR_TABLE.to_vec(), + Some(Lens::BgeM3) => crate::bge_m3_lens::BGE_M3_HDR_TABLE.to_vec(), + Some(Lens::Reranker) => crate::reranker_lens::RERANKER_HDR_TABLE.to_vec(), + Some(Lens::Custom(t)) => t, + None => return Err("no lens specified".into()), + }; + + let engine = match self.table_type { + TableType::UnsignedU8 => BuiltEngine::Unsigned(ThinkingEngine::new(table)), + TableType::SignedI8 => BuiltEngine::Signed( + crate::signed_engine::SignedThinkingEngine::from_unsigned(&table) + ), + }; + + Ok(ConfiguredEngine { + engine, + pooling: self.pooling, + max_cycles: self.max_cycles, + sinks: self.sinks, + }) + } +} + +impl Default for ThinkingEngineBuilder { + fn default() -> Self { + Self::new() + } +} + +/// A fully configured engine with pooling and commit sinks. +pub struct ConfiguredEngine { + pub engine: BuiltEngine, + pub pooling: Pooling, + pub max_cycles: usize, + sinks: Vec, +} + +impl ConfiguredEngine { + /// Full pipeline: perturb → think → pool → commit → notify sinks. + pub fn process(&mut self, codebook_indices: &[u16]) -> crate::dto::BusDto { + self.engine.reset(); + self.engine.perturb(codebook_indices); + self.engine.think(self.max_cycles); + + let bus = self.pooling.to_bus(self.engine.energy(), self.engine.cycles()); + + // Notify all sinks + for sink in &self.sinks { + sink(&bus); + } + + bus + } + + /// Access the underlying engine. + pub fn inner(&self) -> &BuiltEngine { + &self.engine + } + + /// Access the pooling strategy. + pub fn pooling(&self) -> &Pooling { + &self.pooling + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Arc, atomic::{AtomicU32, Ordering}}; + + #[test] + fn builder_jina_unsigned() { + let engine = ThinkingEngineBuilder::new() + .lens(Lens::Jina) + .build() + .unwrap(); + assert_eq!(engine.engine.size(), 256); + } + + #[test] + fn builder_reranker_signed() { + let engine = ThinkingEngineBuilder::new() + .lens(Lens::Reranker) + .table_type(TableType::SignedI8) + .build() + .unwrap(); + assert_eq!(engine.engine.size(), 256); + } + + #[test] + fn builder_with_pooling() { + let mut engine = ThinkingEngineBuilder::new() + .lens(Lens::Jina) + .pooling(Pooling::TopK(3)) + .build() + .unwrap(); + + let bus = engine.process(&[50, 52, 54]); + assert!(bus.energy > 0.0); + } + + #[test] + fn builder_with_sink() { + let counter = Arc::new(AtomicU32::new(0)); + let counter_clone = counter.clone(); + + let mut engine = ThinkingEngineBuilder::new() + .lens(Lens::BgeM3) + .on_commit(move |_bus| { + counter_clone.fetch_add(1, Ordering::Relaxed); + }) + .build() + .unwrap(); + + engine.process(&[10, 20, 30]); + engine.process(&[40, 50, 60]); + + assert_eq!(counter.load(Ordering::Relaxed), 2); + } + + #[test] + fn builder_no_lens_errors() { + let result = ThinkingEngineBuilder::new().build(); + assert!(result.is_err()); + } + + #[test] + fn builder_custom_table() { + let mut table = vec![128u8; 64 * 64]; + for i in 0..64 { table[i * 64 + i] = 255; } + + let engine = ThinkingEngineBuilder::new() + .lens(Lens::Custom(table)) + .table_type(TableType::SignedI8) + .pooling(Pooling::Mean { threshold: 0.001 }) + .max_cycles(5) + .build() + .unwrap(); + + assert_eq!(engine.engine.size(), 64); + } + + #[test] + fn builder_multiple_sinks() { + let log = Arc::new(std::sync::Mutex::new(Vec::new())); + let log1 = log.clone(); + let log2 = log.clone(); + + let mut engine = ThinkingEngineBuilder::new() + .lens(Lens::Jina) + .on_commit(move |bus| { + log1.lock().unwrap().push(format!("sink1:{}", bus.codebook_index)); + }) + .on_commit(move |bus| { + log2.lock().unwrap().push(format!("sink2:{}", bus.codebook_index)); + }) + .build() + .unwrap(); + + engine.process(&[100]); + let entries = log.lock().unwrap(); + assert_eq!(entries.len(), 2); + assert!(entries[0].starts_with("sink1:")); + assert!(entries[1].starts_with("sink2:")); + } +} diff --git a/crates/thinking-engine/src/lib.rs b/crates/thinking-engine/src/lib.rs index 207b3ed8..b101c043 100644 --- a/crates/thinking-engine/src/lib.rs +++ b/crates/thinking-engine/src/lib.rs @@ -44,3 +44,5 @@ pub mod dual_engine; pub mod l4_bridge; pub mod composite_engine; pub mod signed_domino; +pub mod pooling; +pub mod builder; diff --git a/crates/thinking-engine/src/pooling.rs b/crates/thinking-engine/src/pooling.rs new file mode 100644 index 00000000..4b3149a3 --- /dev/null +++ b/crates/thinking-engine/src/pooling.rs @@ -0,0 +1,267 @@ +//! Energy pooling strategies for ThinkingEngine output. +//! +//! Different pooling = different qualia: +//! ArgMax = steelwind (sharp, decisive, single peak) +//! Mean = woodwarm (broad, grounded, gestalt) +//! TopK = oceandrift (multiple currents, multi-thought) +//! Weighted = nightshade (experience-modulated, ghost-biased) +//! +//! EmbedAnything has Mean/Cls/LastToken for transformer output. +//! We have the same concept on the ENERGY VECTOR after convergence. + +use crate::dto::BusDto; + +/// Pooling strategy for energy vector after convergence. +#[derive(Clone, Debug)] +pub enum Pooling { + /// Strongest peak wins. Sharp, decisive. Current default. + ArgMax, + /// Average of all active atoms (energy > threshold). Broad gestalt. + Mean { threshold: f32 }, + /// Top K peaks. Multiple simultaneous thoughts. + TopK(usize), + /// Ghost-weighted: multiply energy by experience weights before pooling. + Weighted { weights: Vec, inner: Box }, +} + +/// Result of pooling the energy vector. +#[derive(Clone, Debug)] +pub struct PooledResult { + /// Primary peak (always present). + pub primary: (u16, f32), + /// All selected atoms with their pooled energies. + pub atoms: Vec<(u16, f32)>, + /// Pooling strategy used. + pub strategy: String, + /// Entropy of the pooled selection. + pub entropy: f32, + /// Concentration: what fraction of total energy is in the selection. + pub concentration: f32, +} + +impl Pooling { + /// Pool the energy vector. Returns the selected atoms. + pub fn pool(&self, energy: &[f32]) -> PooledResult { + match self { + Pooling::ArgMax => pool_argmax(energy), + Pooling::Mean { threshold } => pool_mean(energy, *threshold), + Pooling::TopK(k) => pool_topk(energy, *k), + Pooling::Weighted { weights, inner } => { + let mut weighted = energy.to_vec(); + for (i, e) in weighted.iter_mut().enumerate() { + if i < weights.len() { + *e *= weights[i]; + } + } + // Re-normalize + let total: f32 = weighted.iter().sum(); + if total > 1e-10 { + for e in &mut weighted { *e /= total; } + } + let mut result = inner.pool(&weighted); + result.strategy = format!("Weighted({})", result.strategy); + result + } + } + } + + /// Convert a PooledResult into a BusDto (backward compat). + pub fn to_bus(&self, energy: &[f32], cycles: u16) -> BusDto { + let pooled = self.pool(energy); + let mut top_k = [(0u16, 0.0f32); 8]; + for (i, &(idx, e)) in pooled.atoms.iter().take(8).enumerate() { + top_k[i] = (idx, e); + } + BusDto { + codebook_index: pooled.primary.0, + energy: pooled.primary.1, + top_k, + cycle_count: cycles, + converged: cycles < 10, + } + } +} + +fn pool_argmax(energy: &[f32]) -> PooledResult { + let mut best_idx = 0u16; + let mut best_val = 0.0f32; + for (i, &e) in energy.iter().enumerate() { + if e > best_val { + best_val = e; + best_idx = i as u16; + } + } + let total: f32 = energy.iter().sum(); + PooledResult { + primary: (best_idx, best_val), + atoms: vec![(best_idx, best_val)], + strategy: "ArgMax".into(), + entropy: 0.0, // single atom = zero entropy + concentration: if total > 1e-10 { best_val / total } else { 0.0 }, + } +} + +fn pool_mean(energy: &[f32], threshold: f32) -> PooledResult { + let mut active: Vec<(u16, f32)> = energy.iter().enumerate() + .filter(|(_, &e)| e > threshold) + .map(|(i, &e)| (i as u16, e)) + .collect(); + active.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + + if active.is_empty() { + return PooledResult { + primary: (0, 0.0), + atoms: vec![], + strategy: "Mean".into(), + entropy: 0.0, + concentration: 0.0, + }; + } + + let total: f32 = energy.iter().sum(); + let active_sum: f32 = active.iter().map(|(_, e)| e).sum(); + let mean_energy = active_sum / active.len() as f32; + + // Compute entropy of active atoms + let mut entropy = 0.0f32; + for &(_, e) in &active { + if e > 1e-10 { + let p = e / active_sum; + entropy -= p * p.ln(); + } + } + + PooledResult { + primary: active[0], + atoms: active, + strategy: "Mean".into(), + entropy, + concentration: if total > 1e-10 { active_sum / total } else { 0.0 }, + } +} + +fn pool_topk(energy: &[f32], k: usize) -> PooledResult { + let mut indexed: Vec<(u16, f32)> = energy.iter().enumerate() + .map(|(i, &e)| (i as u16, e)) + .collect(); + indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + let selected: Vec<(u16, f32)> = indexed.into_iter() + .take(k) + .filter(|&(_, e)| e > 1e-10) + .collect(); + + let total: f32 = energy.iter().sum(); + let selected_sum: f32 = selected.iter().map(|(_, e)| e).sum(); + + let mut entropy = 0.0f32; + for &(_, e) in &selected { + if e > 1e-10 && selected_sum > 1e-10 { + let p = e / selected_sum; + entropy -= p * p.ln(); + } + } + + let primary = selected.first().cloned().unwrap_or((0, 0.0)); + + PooledResult { + primary, + atoms: selected, + strategy: format!("TopK({})", k), + entropy, + concentration: if total > 1e-10 { selected_sum / total } else { 0.0 }, + } +} + +impl Default for Pooling { + fn default() -> Self { + Pooling::ArgMax + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_energy() -> Vec { + let mut e = vec![0.0f32; 256]; + e[50] = 0.30; + e[52] = 0.25; + e[54] = 0.20; + e[100] = 0.10; + e[130] = 0.08; + e[200] = 0.05; + e[10] = 0.01; + e[1] = 0.01; + e + } + + #[test] + fn argmax_finds_peak() { + let energy = make_energy(); + let result = Pooling::ArgMax.pool(&energy); + assert_eq!(result.primary.0, 50); + assert_eq!(result.atoms.len(), 1); + assert!(result.concentration > 0.0); + } + + #[test] + fn mean_collects_active() { + let energy = make_energy(); + let result = Pooling::Mean { threshold: 0.05 }.pool(&energy); + assert_eq!(result.primary.0, 50); + // Should include atoms > 0.05: 50, 52, 54, 100, 130 + assert!(result.atoms.len() >= 4); + assert!(result.entropy > 0.0); + assert!(result.concentration > 0.5); + } + + #[test] + fn topk_selects_k() { + let energy = make_energy(); + let result = Pooling::TopK(3).pool(&energy); + assert_eq!(result.atoms.len(), 3); + assert_eq!(result.primary.0, 50); + assert_eq!(result.atoms[1].0, 52); + assert_eq!(result.atoms[2].0, 54); + } + + #[test] + fn weighted_modulates() { + let energy = make_energy(); + // Suppress atom 50, boost atom 100 + let mut weights = vec![1.0f32; 256]; + weights[50] = 0.01; // suppress + weights[100] = 10.0; // boost + + let result = Pooling::Weighted { + weights, + inner: Box::new(Pooling::ArgMax), + }.pool(&energy); + + // Atom 100 should now dominate (0.10 * 10 = 1.0 vs 0.30 * 0.01 = 0.003) + assert_eq!(result.primary.0, 100); + assert!(result.strategy.contains("Weighted")); + } + + #[test] + fn to_bus_compat() { + let energy = make_energy(); + let bus = Pooling::TopK(5).to_bus(&energy, 7); + assert_eq!(bus.codebook_index, 50); + assert!(bus.energy > 0.0); + assert_eq!(bus.cycle_count, 7); + } + + #[test] + fn empty_energy_safe() { + let energy = vec![0.0f32; 256]; + let r1 = Pooling::ArgMax.pool(&energy); + assert_eq!(r1.primary.1, 0.0); + + let r2 = Pooling::Mean { threshold: 0.01 }.pool(&energy); + assert!(r2.atoms.is_empty()); + + let r3 = Pooling::TopK(3).pool(&energy); + assert!(r3.atoms.is_empty()); + } +}