From c680c02bdc4da8ba03f628f116d9e45d943ba540 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 12:43:52 +0000 Subject: [PATCH 1/8] =?UTF-8?q?feat:=20wire=20bgz17=20Palette=E2=86=92Dist?= =?UTF-8?q?anceMatrix=E2=86=92SimilarityTable=20into=20serve.rs=20+=20Lanc?= =?UTF-8?q?e=20write?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - serve.rs: PalettePipeline built at startup from bgz7 weight rows (Palette k=256, DistanceMatrix 128KB, SimilarityTable σ-calibrated CDF) - palette_score() maps incoming messages through palette.nearest() then scores via similarity_table.similarity(distance_matrix.distance(q, c)) - Threshold sim > 0.3 → Palette HIT, else MISS → LLM fallthrough - hydrate.rs: write_to_lance() + hydrate_to_lance() for LanceDB persistence - chat_bundle.rs: palette_indices field on AutocompleteCache https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- crates/lance-graph-planner/src/serve.rs | 206 ++++++++++++++++-- .../src/strategy/chat_bundle.rs | 4 + crates/lance-graph/src/graph/hydrate.rs | 37 ++++ 3 files changed, 223 insertions(+), 24 deletions(-) diff --git a/crates/lance-graph-planner/src/serve.rs b/crates/lance-graph-planner/src/serve.rs index 618d493e..11c7c781 100644 --- a/crates/lance-graph-planner/src/serve.rs +++ b/crates/lance-graph-planner/src/serve.rs @@ -31,7 +31,23 @@ mod server { use lance_graph_planner::cache::triple_model::TripleModel; use lance_graph_planner::strategy::chat_bundle::AutocompleteCache; - type AppState = std::sync::Arc>; + /// Compiled palette pipeline: bgz17 Palette → DistanceMatrix → SimilarityTable. + /// Built once at startup from bgz7 weight rows. All subsequent lookups are O(1). + struct PalettePipeline { + /// 256 archetypal Base17 patterns from weight manifold. + palette: bgz17::palette::Palette, + /// 256×256 precomputed L1 distances (128 KB, L1-cache resident). + distance: bgz17::distance_matrix::DistanceMatrix, + /// σ-calibrated CDF: raw distance → [0.0, 1.0] similarity. + similarity: bgz17::similarity::SimilarityTable, + } + + struct ServerState { + cache: AutocompleteCache, + pipeline: Option, + } + + type AppState = std::sync::Arc>; fn timestamp() -> u64 { SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() @@ -51,6 +67,36 @@ mod server { HeadPrint { dims } } + /// Convert ndarray HeadPrint (Base17) to bgz17 Base17 for palette lookup. + /// Both types have identical layout: dims: [i16; 17]. + fn headprint_to_bgz17(hp: &HeadPrint) -> bgz17::base17::Base17 { + bgz17::base17::Base17 { dims: hp.dims } + } + + /// Score a message against the palette pipeline. + /// Returns (palette_index, best_match_index, similarity_score). + fn palette_score( + pipeline: &PalettePipeline, + query: &HeadPrint, + cached_indices: &[u8], + ) -> (u8, usize, f32) { + let bgz_query = headprint_to_bgz17(query); + let q_idx = pipeline.palette.nearest(&bgz_query); + + // Find best match among cached palette indices + let mut best_sim = 0.0f32; + let mut best_pos = 0usize; + for (pos, &c_idx) in cached_indices.iter().enumerate() { + let dist = pipeline.distance.distance(q_idx, c_idx) as u32; + let sim = pipeline.similarity.similarity(dist); + if sim > best_sim { + best_sim = sim; + best_pos = pos; + } + } + (q_idx, best_pos, best_sim) + } + fn phase_to_str(phase: Phase) -> &'static str { match phase { Phase::Exposition => "exposition", @@ -114,7 +160,7 @@ mod server { })))); } - let mut cache = state.lock().unwrap(); + let mut server = state.lock().unwrap(); // Process each message through the cache let mut last_content = String::new(); @@ -128,8 +174,51 @@ mod server { match role { "user" => { - if let Some(spo) = cache.on_user_message(&fp) { - // Cache hit — we have a candidate + // Try palette pipeline first (σ-calibrated scoring) + if let Some(ref pipeline) = server.pipeline { + let (q_idx, best_pos, sim) = palette_score( + pipeline, + &fp, + &server.cache.palette_indices, + ); + + if sim > 0.3 { + // Palette HIT — σ-calibrated similarity above threshold + cache_hit = true; + let dist = pipeline.distance.distance( + q_idx, + server.cache.palette_indices.get(best_pos).copied().unwrap_or(0), + ); + last_content = format!( + "[Palette HIT] idx={} match={} dist={} sim={:.4} | \ + Phase: {} | \ + Palette k={} | \ + σ-calibrated | \ + Model: {}", + q_idx, best_pos, dist, sim, + phase_to_str(server.cache.phase()), + pipeline.palette.len(), + model, + ); + } else { + // Palette MISS — similarity too low, fall through + let surprise = server.cache.triple.free_energy(&fp); + let alignment = server.cache.triple.alignment(); + last_content = format!( + "[Palette MISS → LLM] idx={} best_sim={:.4} | \ + Surprise={:.3} Alignment={:.3} | \ + Phase: {} | \ + Pool: {} candidates | \ + Model: {}", + q_idx, sim, + surprise, alignment, + phase_to_str(server.cache.phase()), + server.cache.pool.count(), + model, + ); + } + } else if let Some(spo) = server.cache.on_user_message(&fp) { + // Fallback: old cache path (no palette pipeline) cache_hit = true; last_content = format!( "[Cache HIT] Palette route: S={} P={} O={} | \ @@ -140,13 +229,13 @@ mod server { spo.s_idx, spo.p_idx, spo.o_idx, spo.frequency(), spo.confidence(), spo.expectation(), spo.pearl, - phase_to_str(cache.phase()), + phase_to_str(server.cache.phase()), model, ); } else { - // Cache miss — would normally call LLM - let surprise = cache.triple.free_energy(&fp); - let alignment = cache.triple.alignment(); + // Cache miss — no pipeline, no cache hit + let surprise = server.cache.triple.free_energy(&fp); + let alignment = server.cache.triple.alignment(); last_content = format!( "[Cache MISS → LLM fallthrough] \ Surprise={:.3} Alignment={:.3} | \ @@ -155,16 +244,16 @@ mod server { DK: self={:?} user={:?} | \ Model: {}", surprise, alignment, - phase_to_str(cache.phase()), - cache.pool.count(), - cache.triple.self_model.dk, - cache.triple.user_model.dk, + phase_to_str(server.cache.phase()), + server.cache.pool.count(), + server.cache.triple.self_model.dk, + server.cache.triple.user_model.dk, model, ); } } "assistant" => { - cache.on_self_output(&fp); + server.cache.on_self_output(&fp); } _ => {} // system, tool — pass through } @@ -183,14 +272,14 @@ mod server { "role": "assistant", "content": last_content, }, - "finish_reason": if cache.should_stop() { "stop" } else { "length" }, + "finish_reason": if server.cache.should_stop() { "stop" } else { "length" }, }], "usage": { "prompt_tokens": messages.len(), "completion_tokens": 1, "total_tokens": messages.len() + 1, }, - "system_fingerprint": format!("palette-{}", phase_to_str(cache.phase())), + "system_fingerprint": format!("palette-{}", phase_to_str(server.cache.phase())), }))) } @@ -213,8 +302,43 @@ mod server { } } + /// Build the palette pipeline from bgz7 weight rows. + /// Returns (PalettePipeline, palette_indices) for all collected Base17 rows. + fn build_palette_pipeline(all_rows: &[HeadPrint]) -> (PalettePipeline, Vec) { + // Convert HeadPrint (ndarray Base17) → bgz17 Base17 for palette building + let bgz_rows: Vec = all_rows + .iter() + .map(|hp| bgz17::base17::Base17 { dims: hp.dims }) + .collect(); + + eprintln!(" Building palette from {} weight rows...", bgz_rows.len()); + let palette = bgz17::palette::Palette::build(&bgz_rows, 256, 10); + eprintln!(" Palette: {} archetypes", palette.len()); + + let distance = bgz17::distance_matrix::DistanceMatrix::build(&palette); + eprintln!(" DistanceMatrix: {} KB", distance.byte_size() / 1024); + + // Collect all pairwise distances for SimilarityTable calibration + let k = palette.len(); + let mut reservoir: Vec = Vec::with_capacity(k * (k - 1) / 2); + for i in 0..k { + for j in (i + 1)..k { + reservoir.push(distance.distance(i as u8, j as u8) as u32); + } + } + let similarity = bgz17::similarity::SimilarityTable::from_reservoir(&mut reservoir); + eprintln!(" SimilarityTable: bucket_width={} max_dist={}", + similarity.bucket_width(), similarity.max_distance()); + + // Assign all weight rows to palette indices + let indices: Vec = bgz_rows.iter().map(|r| palette.nearest(r)).collect(); + eprintln!(" Assigned {} rows to palette indices", indices.len()); + + (PalettePipeline { palette, distance, similarity }, indices) + } + /// Populate attention matrix from bgz7 weight fingerprints. - fn populate_cache(cache: &mut AutocompleteCache, v2_path: &str, base_path: &str) { + fn populate_cache(server: &mut ServerState, v2_path: &str, base_path: &str) { eprintln!("Loading Qwen3.5-27B v2 (Opus 4.6) weights..."); let v2_tensors = load_bgz7(v2_path); eprintln!(" {} tensors, {} total rows", @@ -227,9 +351,26 @@ mod server { base_tensors.len(), base_tensors.iter().map(|(_, r)| r.len()).sum::()); + // Collect ALL weight rows for palette building + let mut all_rows: Vec = Vec::new(); + for (_, rows) in &v2_tensors { + all_rows.extend_from_slice(rows); + } + for (_, rows) in &base_tensors { + all_rows.extend_from_slice(rows); + } + + // Build palette pipeline + if !all_rows.is_empty() { + let (pipeline, indices) = build_palette_pipeline(&all_rows); + server.cache.palette_indices = indices; + server.pipeline = Some(pipeline); + } + // Populate self_model with v2 weights (what Opus 4.6 looks like) + let cache = &mut server.cache; let mut head_count = 0usize; - for (name, rows) in &v2_tensors { + for (_name, rows) in &v2_tensors { for (r, fp) in rows.iter().enumerate().take(64) { let row = head_count % 64; let col = r % 64; @@ -242,7 +383,7 @@ mod server { // Populate user_model with base weights (what the user "knows") head_count = 0; - for (name, rows) in &base_tensors { + for (_name, rows) in &base_tensors { for (r, fp) in rows.iter().enumerate().take(64) { let row = head_count % 64; let col = r % 64; @@ -260,7 +401,6 @@ mod server { let u = cache.triple.user_model.matrix.get(row, col); let dist = s.l1(u); if dist > 0 { - // Impact = the difference let mut impact_dims = [0i16; 17]; for d in 0..17 { impact_dims[d] = s.dims[d].wrapping_sub(u.dims[d]); @@ -275,7 +415,7 @@ mod server { } async fn embeddings( - State(_state): State, + State(state): State, Json(req): Json, ) -> Result, (StatusCode, Json)> { let model = req.get("model").and_then(|v| v.as_str()).unwrap_or("bge-m3"); @@ -291,9 +431,18 @@ mod server { })))); } + let server = state.lock().unwrap(); + // Embed as Base17 fingerprint (17 dims, golden-step folding) let fp = message_to_headprint(input); - let embedding: Vec = fp.dims.iter().map(|d| *d as f64 / 10000.0).collect(); + let mut embedding: Vec = fp.dims.iter().map(|d| *d as f64 / 10000.0).collect(); + + // If palette pipeline available, append palette index as extra dim + if let Some(ref pipeline) = server.pipeline { + let bgz = headprint_to_bgz17(&fp); + let idx = pipeline.palette.nearest(&bgz); + embedding.push(idx as f64 / 256.0); + } Ok(Json(json!({ "object": "list", @@ -311,19 +460,28 @@ mod server { } pub async fn run(port: u16) { - let mut cache = AutocompleteCache::new(); + let mut server = ServerState { + cache: AutocompleteCache::new(), + pipeline: None, + }; // Try to load bgz7 weights from /tmp/ (from indexing session) let v2_shard = "/tmp/qwen35_27b_v2_shard02.bgz7"; let base_shard = "/tmp/qwen35_27b_base_shard02.bgz7"; if std::fs::metadata(v2_shard).is_ok() && std::fs::metadata(base_shard).is_ok() { - populate_cache(&mut cache, v2_shard, base_shard); + populate_cache(&mut server, v2_shard, base_shard); } else { eprintln!("No bgz7 weights found in /tmp/ — running with empty cache"); eprintln!(" Run indexing first or hydrate --download qwen35-27b-distilled-v2"); } - let state: AppState = std::sync::Arc::new(Mutex::new(cache)); + if server.pipeline.is_some() { + eprintln!("Palette pipeline: ACTIVE (σ-calibrated scoring)"); + } else { + eprintln!("Palette pipeline: INACTIVE (no weight data)"); + } + + let state: AppState = std::sync::Arc::new(Mutex::new(server)); let app = Router::new() .route("/health", get(health)) diff --git a/crates/lance-graph-planner/src/strategy/chat_bundle.rs b/crates/lance-graph-planner/src/strategy/chat_bundle.rs index 969480c0..a26ed05d 100644 --- a/crates/lance-graph-planner/src/strategy/chat_bundle.rs +++ b/crates/lance-graph-planner/src/strategy/chat_bundle.rs @@ -26,6 +26,9 @@ pub struct AutocompleteCache { pub evaluator: LaneEvaluator, pub nars: NarsEngine, pub turn_count: u32, + /// Palette indices for each cached weight row (from bgz17 Palette::nearest). + /// Populated at startup when bgz7 weights are loaded and palette is built. + pub palette_indices: Vec, } impl AutocompleteCache { @@ -36,6 +39,7 @@ impl AutocompleteCache { evaluator: LaneEvaluator::new(Tension::integrative()), nars: NarsEngine::new(SpoDistances::new_zero()), turn_count: 0, + palette_indices: Vec::new(), } } diff --git a/crates/lance-graph/src/graph/hydrate.rs b/crates/lance-graph/src/graph/hydrate.rs index b90f5a92..2b19765f 100644 --- a/crates/lance-graph/src/graph/hydrate.rs +++ b/crates/lance-graph/src/graph/hydrate.rs @@ -78,6 +78,43 @@ pub fn hydrate_bgz7(path: &str) -> Result { Ok(bgz7_to_batch(&tensors)) } +/// Write a RecordBatch to a Lance dataset at the given path. +/// +/// Creates a new dataset or appends to an existing one. +/// This is the LanceDB persistence layer for hydrated bgz7 vectors. +pub async fn write_to_lance( + batch: &RecordBatch, + dataset_path: &str, +) -> Result<(), String> { + use lance::dataset::{WriteMode, WriteParams}; + use lance::Dataset; + + let batches = vec![batch.clone()]; + let reader = arrow::record_batch::RecordBatchIterator::new( + batches.into_iter().map(Ok), + batch.schema(), + ); + + let params = WriteParams { + mode: WriteMode::Append, + ..Default::default() + }; + + Dataset::write(reader, dataset_path, Some(params)) + .await + .map_err(|e| format!("Lance write error: {e}"))?; + + Ok(()) +} + +/// Hydrate a bgz7 file and write directly to Lance dataset. +pub async fn hydrate_to_lance(bgz7_path: &str, dataset_path: &str) -> Result { + let batch = hydrate_bgz7(bgz7_path)?; + let n_rows = batch.num_rows(); + write_to_lance(&batch, dataset_path).await?; + Ok(n_rows) +} + /// Compute HEEL vector: column-wise bundle of ALL BF16-hydrated rows. pub fn compute_heel(batch: &RecordBatch) -> ndarray::hpc::bgz17_bridge::Base17 { let base17_col = batch.column_by_name("base17").expect("base17 column"); From 85d1c41ca693616303377d707ce5ff628cccb6af Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 12:58:43 +0000 Subject: [PATCH 2/8] refactor: direct L1 search on raw Base17 vectors, keep palette infra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit serve.rs: WeightStore with direct L1 nearest neighbor on 34-byte vectors. No palette indirection for query path — 17 subtractions is sub-microsecond. hydrate.rs: both vector (f32, for Lance ANN/RaBitQ) and base17 (i16, for direct L1 and palette assignment) columns. palette_s/p/o columns kept for the SPO triple store path (bgz17 Palette→DistanceMatrix→SimilarityTable). The palette infrastructure (bgz17 crate, 121 tests) is not dropped — it serves the million-edge SPO triple store where O(1) precomputed 256×256 distance lookups matter. For the REST query path, raw vectors are better. https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- crates/lance-graph-planner/src/serve.rs | 407 +++++++----------------- crates/lance-graph/src/graph/hydrate.rs | 88 +++-- 2 files changed, 184 insertions(+), 311 deletions(-) diff --git a/crates/lance-graph-planner/src/serve.rs b/crates/lance-graph-planner/src/serve.rs index 11c7c781..9f596351 100644 --- a/crates/lance-graph-planner/src/serve.rs +++ b/crates/lance-graph-planner/src/serve.rs @@ -1,5 +1,9 @@ //! OpenAI-compatible REST server powered by lance-graph-planner. //! +//! Weight vectors are raw Base17 (34 bytes, ρ=0.993 vs BF16). +//! No palette indirection — direct L1 on 17 dims is sub-microsecond. +//! At scale: store in LanceDB, use RaBitQ index for ANN search. +//! //! ```bash //! cargo run --manifest-path crates/lance-graph-planner/Cargo.toml \ //! --features serve --bin serve --release @@ -25,26 +29,77 @@ mod server { use lance_graph_planner::cache::candidate_pool::Phase; use lance_graph_planner::cache::kv_bundle::HeadPrint; use lance_graph_planner::cache::nars_engine::{ - analytical_style, creative_style, empathetic_style, style_score, - NarsEngine, SpoDistances, SpoHead, MASK_PO, MASK_SO, MASK_SPO, + NarsEngine, SpoDistances, SpoHead, MASK_SPO, }; use lance_graph_planner::cache::triple_model::TripleModel; use lance_graph_planner::strategy::chat_bundle::AutocompleteCache; - /// Compiled palette pipeline: bgz17 Palette → DistanceMatrix → SimilarityTable. - /// Built once at startup from bgz7 weight rows. All subsequent lookups are O(1). - struct PalettePipeline { - /// 256 archetypal Base17 patterns from weight manifold. - palette: bgz17::palette::Palette, - /// 256×256 precomputed L1 distances (128 KB, L1-cache resident). - distance: bgz17::distance_matrix::DistanceMatrix, - /// σ-calibrated CDF: raw distance → [0.0, 1.0] similarity. - similarity: bgz17::similarity::SimilarityTable, + /// Raw weight vectors. 34 bytes each. Direct L1 search. + struct WeightStore { + /// All weight rows as raw Base17 vectors. + vectors: Vec, + /// Tensor name per row (provenance). + names: Vec, + /// HEEL: element-wise mean of all vectors (the gestalt). + heel: HeadPrint, + } + + impl WeightStore { + fn new() -> Self { + Self { + vectors: Vec::new(), + names: Vec::new(), + heel: HeadPrint::zero(), + } + } + + /// Add vectors from a bgz7 file. + fn ingest(&mut self, path: &str) { + match ndarray::hpc::gguf_indexer::read_bgz7_file(path) { + Ok(tensors) => { + for ct in tensors { + for row in ct.rows.into_iter().take(10000) { + self.vectors.push(row); + self.names.push(ct.name.clone()); + } + } + } + Err(e) => eprintln!(" SKIP {path}: {e}"), + } + } + + /// Compute HEEL after all ingestion. + fn compute_heel(&mut self) { + if self.vectors.is_empty() { return; } + let n = self.vectors.len() as f64; + let mut sums = [0.0f64; 17]; + for v in &self.vectors { + for d in 0..17 { sums[d] += v.dims[d] as f64; } + } + for d in 0..17 { + self.heel.dims[d] = (sums[d] / n).round() as i16; + } + } + + /// Direct L1 nearest neighbor search. Returns (index, distance, tensor_name). + fn nearest(&self, query: &HeadPrint, k: usize) -> Vec<(usize, u32, &str)> { + let mut scored: Vec<(usize, u32)> = self.vectors.iter() + .enumerate() + .map(|(i, v)| (i, query.l1(v))) + .collect(); + scored.sort_unstable_by_key(|&(_, d)| d); + scored.truncate(k); + scored.iter() + .map(|&(i, d)| (i, d, self.names[i].as_str())) + .collect() + } + + fn len(&self) -> usize { self.vectors.len() } } struct ServerState { cache: AutocompleteCache, - pipeline: Option, + weights: WeightStore, } type AppState = std::sync::Arc>; @@ -53,50 +108,20 @@ mod server { SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() } - fn message_to_headprint(content: &str) -> HeadPrint { - // Hash message content into Base17 fingerprint + /// Hash message content into Base17 space. + /// TODO: replace with BGE-M3 embed → golden-step projection for real semantic matching. + fn message_to_base17(content: &str) -> HeadPrint { let mut dims = [0i16; 17]; let bytes = content.as_bytes(); for (i, &b) in bytes.iter().enumerate() { dims[i % 17] = dims[i % 17].wrapping_add(b as i16 * 31); } - // Normalize for d in &mut dims { *d = (*d % 1000).abs() as i16; } HeadPrint { dims } } - /// Convert ndarray HeadPrint (Base17) to bgz17 Base17 for palette lookup. - /// Both types have identical layout: dims: [i16; 17]. - fn headprint_to_bgz17(hp: &HeadPrint) -> bgz17::base17::Base17 { - bgz17::base17::Base17 { dims: hp.dims } - } - - /// Score a message against the palette pipeline. - /// Returns (palette_index, best_match_index, similarity_score). - fn palette_score( - pipeline: &PalettePipeline, - query: &HeadPrint, - cached_indices: &[u8], - ) -> (u8, usize, f32) { - let bgz_query = headprint_to_bgz17(query); - let q_idx = pipeline.palette.nearest(&bgz_query); - - // Find best match among cached palette indices - let mut best_sim = 0.0f32; - let mut best_pos = 0usize; - for (pos, &c_idx) in cached_indices.iter().enumerate() { - let dist = pipeline.distance.distance(q_idx, c_idx) as u32; - let sim = pipeline.similarity.similarity(dist); - if sim > best_sim { - best_sim = sim; - best_pos = pos; - } - } - (q_idx, best_pos, best_sim) - } - fn phase_to_str(phase: Phase) -> &'static str { match phase { Phase::Exposition => "exposition", @@ -139,7 +164,6 @@ mod server { let model = req.get("model").and_then(|v| v.as_str()).unwrap_or("qwen35-opus46"); let messages = req.get("messages").and_then(|v| v.as_array()).cloned().unwrap_or_default(); - // Validate model name const VALID_MODELS: &[&str] = &[ "qwen35-opus46", "qwen35-opus45", "qwen35-9b", "reader-lm", "bge-m3", "llama4-scout", "openchat-3.5", @@ -162,258 +186,70 @@ mod server { let mut server = state.lock().unwrap(); - // Process each message through the cache let mut last_content = String::new(); - let mut cache_hit = false; for msg in &messages { let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user"); let content = msg.get("content").and_then(|v| v.as_str()).unwrap_or(""); - let fp = message_to_headprint(content); + let query = message_to_base17(content); match role { "user" => { - // Try palette pipeline first (σ-calibrated scoring) - if let Some(ref pipeline) = server.pipeline { - let (q_idx, best_pos, sim) = palette_score( - pipeline, - &fp, - &server.cache.palette_indices, - ); + if server.weights.len() > 0 { + // Direct L1 nearest neighbor on raw Base17 vectors + let neighbors = server.weights.nearest(&query, 5); + let heel_dist = query.l1(&server.weights.heel); + + let top: Vec = neighbors.iter() + .map(|(i, d, name)| format!("{}:r{}(d={})", name, i, d)) + .collect(); - if sim > 0.3 { - // Palette HIT — σ-calibrated similarity above threshold - cache_hit = true; - let dist = pipeline.distance.distance( - q_idx, - server.cache.palette_indices.get(best_pos).copied().unwrap_or(0), - ); - last_content = format!( - "[Palette HIT] idx={} match={} dist={} sim={:.4} | \ - Phase: {} | \ - Palette k={} | \ - σ-calibrated | \ - Model: {}", - q_idx, best_pos, dist, sim, - phase_to_str(server.cache.phase()), - pipeline.palette.len(), - model, - ); - } else { - // Palette MISS — similarity too low, fall through - let surprise = server.cache.triple.free_energy(&fp); - let alignment = server.cache.triple.alignment(); - last_content = format!( - "[Palette MISS → LLM] idx={} best_sim={:.4} | \ - Surprise={:.3} Alignment={:.3} | \ - Phase: {} | \ - Pool: {} candidates | \ - Model: {}", - q_idx, sim, - surprise, alignment, - phase_to_str(server.cache.phase()), - server.cache.pool.count(), - model, - ); - } - } else if let Some(spo) = server.cache.on_user_message(&fp) { - // Fallback: old cache path (no palette pipeline) - cache_hit = true; last_content = format!( - "[Cache HIT] Palette route: S={} P={} O={} | \ - NARS f={:.3} c={:.3} E={:.3} | \ - Pearl mask={:03b} | \ - Phase: {} | \ - Model: {}", - spo.s_idx, spo.p_idx, spo.o_idx, - spo.frequency(), spo.confidence(), spo.expectation(), - spo.pearl, + "[L1 search] heel_dist={} top_5=[{}] | \ + vectors={} | Phase: {} | Model: {}", + heel_dist, + top.join(", "), + server.weights.len(), phase_to_str(server.cache.phase()), model, ); } else { - // Cache miss — no pipeline, no cache hit - let surprise = server.cache.triple.free_energy(&fp); - let alignment = server.cache.triple.alignment(); + let surprise = server.cache.triple.free_energy(&query); last_content = format!( - "[Cache MISS → LLM fallthrough] \ - Surprise={:.3} Alignment={:.3} | \ - Phase: {} | \ - Pool: {} candidates | \ - DK: self={:?} user={:?} | \ - Model: {}", - surprise, alignment, + "[No weights] Surprise={:.3} | Phase: {} | Model: {}", + surprise, phase_to_str(server.cache.phase()), - server.cache.pool.count(), - server.cache.triple.self_model.dk, - server.cache.triple.user_model.dk, model, ); } } "assistant" => { - server.cache.on_self_output(&fp); + server.cache.on_self_output(&query); } - _ => {} // system, tool — pass through + _ => {} } } - let response_id = format!("chatcmpl-ada-{}", timestamp()); - Ok(Json(json!({ - "id": response_id, + "id": format!("chatcmpl-ada-{}", timestamp()), "object": "chat.completion", "created": timestamp(), "model": model, "choices": [{ "index": 0, - "message": { - "role": "assistant", - "content": last_content, - }, - "finish_reason": if server.cache.should_stop() { "stop" } else { "length" }, + "message": { "role": "assistant", "content": last_content }, + "finish_reason": "length", }], "usage": { "prompt_tokens": messages.len(), "completion_tokens": 1, "total_tokens": messages.len() + 1, }, - "system_fingerprint": format!("palette-{}", phase_to_str(server.cache.phase())), + "system_fingerprint": format!("base17-{}", server.weights.len()), }))) } - /// Load Base17 rows from a bgz7 file into HeadPrints. - /// Delegates to ndarray's canonical bgz7 parser. - fn load_bgz7(path: &str) -> Vec<(String, Vec)> { - match ndarray::hpc::gguf_indexer::read_bgz7_file(path) { - Ok(tensors) => tensors - .into_iter() - .map(|ct| { - // Cap rows at 1000 per tensor to match previous behavior - let rows: Vec = ct.rows.into_iter().take(1000).collect(); - (ct.name, rows) - }) - .collect(), - Err(e) => { - eprintln!(" SKIP {path}: {e}"); - Vec::new() - } - } - } - - /// Build the palette pipeline from bgz7 weight rows. - /// Returns (PalettePipeline, palette_indices) for all collected Base17 rows. - fn build_palette_pipeline(all_rows: &[HeadPrint]) -> (PalettePipeline, Vec) { - // Convert HeadPrint (ndarray Base17) → bgz17 Base17 for palette building - let bgz_rows: Vec = all_rows - .iter() - .map(|hp| bgz17::base17::Base17 { dims: hp.dims }) - .collect(); - - eprintln!(" Building palette from {} weight rows...", bgz_rows.len()); - let palette = bgz17::palette::Palette::build(&bgz_rows, 256, 10); - eprintln!(" Palette: {} archetypes", palette.len()); - - let distance = bgz17::distance_matrix::DistanceMatrix::build(&palette); - eprintln!(" DistanceMatrix: {} KB", distance.byte_size() / 1024); - - // Collect all pairwise distances for SimilarityTable calibration - let k = palette.len(); - let mut reservoir: Vec = Vec::with_capacity(k * (k - 1) / 2); - for i in 0..k { - for j in (i + 1)..k { - reservoir.push(distance.distance(i as u8, j as u8) as u32); - } - } - let similarity = bgz17::similarity::SimilarityTable::from_reservoir(&mut reservoir); - eprintln!(" SimilarityTable: bucket_width={} max_dist={}", - similarity.bucket_width(), similarity.max_distance()); - - // Assign all weight rows to palette indices - let indices: Vec = bgz_rows.iter().map(|r| palette.nearest(r)).collect(); - eprintln!(" Assigned {} rows to palette indices", indices.len()); - - (PalettePipeline { palette, distance, similarity }, indices) - } - - /// Populate attention matrix from bgz7 weight fingerprints. - fn populate_cache(server: &mut ServerState, v2_path: &str, base_path: &str) { - eprintln!("Loading Qwen3.5-27B v2 (Opus 4.6) weights..."); - let v2_tensors = load_bgz7(v2_path); - eprintln!(" {} tensors, {} total rows", - v2_tensors.len(), - v2_tensors.iter().map(|(_, r)| r.len()).sum::()); - - eprintln!("Loading Qwen3.5-27B base weights..."); - let base_tensors = load_bgz7(base_path); - eprintln!(" {} tensors, {} total rows", - base_tensors.len(), - base_tensors.iter().map(|(_, r)| r.len()).sum::()); - - // Collect ALL weight rows for palette building - let mut all_rows: Vec = Vec::new(); - for (_, rows) in &v2_tensors { - all_rows.extend_from_slice(rows); - } - for (_, rows) in &base_tensors { - all_rows.extend_from_slice(rows); - } - - // Build palette pipeline - if !all_rows.is_empty() { - let (pipeline, indices) = build_palette_pipeline(&all_rows); - server.cache.palette_indices = indices; - server.pipeline = Some(pipeline); - } - - // Populate self_model with v2 weights (what Opus 4.6 looks like) - let cache = &mut server.cache; - let mut head_count = 0usize; - for (_name, rows) in &v2_tensors { - for (r, fp) in rows.iter().enumerate().take(64) { - let row = head_count % 64; - let col = r % 64; - cache.triple.self_model.matrix.set(row, col, fp.clone()); - head_count += 1; - } - if head_count >= 4096 { break; } - } - eprintln!(" self_model: {} heads populated", head_count.min(4096)); - - // Populate user_model with base weights (what the user "knows") - head_count = 0; - for (_name, rows) in &base_tensors { - for (r, fp) in rows.iter().enumerate().take(64) { - let row = head_count % 64; - let col = r % 64; - cache.triple.user_model.matrix.set(row, col, fp.clone()); - head_count += 1; - } - if head_count >= 4096 { break; } - } - eprintln!(" user_model: {} heads populated", head_count.min(4096)); - - // Impact model starts as diff: where self and user diverge - for row in 0..64 { - for col in 0..64 { - let s = cache.triple.self_model.matrix.get(row, col); - let u = cache.triple.user_model.matrix.get(row, col); - let dist = s.l1(u); - if dist > 0 { - let mut impact_dims = [0i16; 17]; - for d in 0..17 { - impact_dims[d] = s.dims[d].wrapping_sub(u.dims[d]); - } - cache.triple.impact_model.matrix.set(row, col, HeadPrint { dims: impact_dims }); - } - } - } - eprintln!(" impact_model: populated from diff"); - eprintln!(" Gestalt L1 (self vs user): {}", - cache.triple.self_model.matrix.gestalt.l1(&cache.triple.user_model.matrix.gestalt)); - } - async fn embeddings( State(state): State, Json(req): Json, @@ -431,18 +267,9 @@ mod server { })))); } - let server = state.lock().unwrap(); - - // Embed as Base17 fingerprint (17 dims, golden-step folding) - let fp = message_to_headprint(input); - let mut embedding: Vec = fp.dims.iter().map(|d| *d as f64 / 10000.0).collect(); - - // If palette pipeline available, append palette index as extra dim - if let Some(ref pipeline) = server.pipeline { - let bgz = headprint_to_bgz17(&fp); - let idx = pipeline.palette.nearest(&bgz); - embedding.push(idx as f64 / 256.0); - } + // Base17: 17 dims, f32 for OpenAI compat + let fp = message_to_base17(input); + let embedding: Vec = fp.dims.iter().map(|&d| d as f64).collect(); Ok(Json(json!({ "object": "list", @@ -460,27 +287,31 @@ mod server { } pub async fn run(port: u16) { - let mut server = ServerState { - cache: AutocompleteCache::new(), - pipeline: None, - }; - - // Try to load bgz7 weights from /tmp/ (from indexing session) - let v2_shard = "/tmp/qwen35_27b_v2_shard02.bgz7"; - let base_shard = "/tmp/qwen35_27b_base_shard02.bgz7"; - if std::fs::metadata(v2_shard).is_ok() && std::fs::metadata(base_shard).is_ok() { - populate_cache(&mut server, v2_shard, base_shard); - } else { - eprintln!("No bgz7 weights found in /tmp/ — running with empty cache"); - eprintln!(" Run indexing first or hydrate --download qwen35-27b-distilled-v2"); + let mut weights = WeightStore::new(); + + // Ingest available bgz7 shards + for path in &[ + "/tmp/qwen35_27b_v2_shard02.bgz7", + "/tmp/qwen35_27b_base_shard02.bgz7", + ] { + if std::fs::metadata(path).is_ok() { + eprintln!("Ingesting {path}..."); + weights.ingest(path); + } } - if server.pipeline.is_some() { - eprintln!("Palette pipeline: ACTIVE (σ-calibrated scoring)"); + if weights.len() > 0 { + weights.compute_heel(); + eprintln!("WeightStore: {} vectors, HEEL={:?}", weights.len(), weights.heel.dims); } else { - eprintln!("Palette pipeline: INACTIVE (no weight data)"); + eprintln!("No bgz7 weights found — running empty"); } + let server = ServerState { + cache: AutocompleteCache::new(), + weights, + }; + let state: AppState = std::sync::Arc::new(Mutex::new(server)); let app = Router::new() @@ -491,11 +322,7 @@ mod server { .with_state(state); let addr = format!("0.0.0.0:{port}"); - eprintln!("lance-graph-planner serve listening on {addr}"); - eprintln!(" POST /v1/chat/completions (OpenAI compatible)"); - eprintln!(" POST /v1/embeddings (Base17 fingerprints)"); - eprintln!(" GET /v1/models"); - eprintln!(" GET /health"); + eprintln!("Listening on {addr}"); let listener = tokio::net::TcpListener::bind(&addr).await.unwrap(); axum::serve(listener, app).await.unwrap(); } diff --git a/crates/lance-graph/src/graph/hydrate.rs b/crates/lance-graph/src/graph/hydrate.rs index 2b19765f..355ad2e5 100644 --- a/crates/lance-graph/src/graph/hydrate.rs +++ b/crates/lance-graph/src/graph/hydrate.rs @@ -1,20 +1,40 @@ -//! Hydrate bgz7 weight fingerprints into LanceDB for HHTL search. +//! Hydrate bgz7 weight vectors into LanceDB. //! -//! Reads bgz7 shards (Base17 fingerprints) and writes them as Arrow RecordBatches -//! for Lance Dataset storage with vector columns for HEEL/HIP/TWIG/LEAF cascade. +//! Base17 vectors (34 bytes, ρ=0.993 vs BF16) are stored as 17-dim f32 +//! vector columns in Lance datasets. Lance handles indexing (IVF_PQ, RaBitQ) +//! and ANN search natively. +//! +//! Palette columns (palette_s/p/o) are kept for the SPO triple store path — +//! the bgz17 Palette→DistanceMatrix→SimilarityTable pipeline uses them for +//! O(1) precomputed distance lookups on millions of edges. use arrow::array::{ - ArrayRef, FixedSizeListBuilder, Int16Builder, StringArray, UInt32Array, UInt8Array, + ArrayRef, FixedSizeListBuilder, Float32Builder, Int16Builder, + StringArray, UInt32Array, UInt8Array, }; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use std::sync::Arc; /// Schema for the hydrated weight table. +/// +/// - `tensor_name`: which weight tensor (e.g. "model.layers.0.self_attn.q_proj") +/// - `row_idx`: row within tensor +/// - `vector`: 17-dim f32 for Lance vector search (i16→f32 is exact) +/// - `base17`: 17-dim i16 raw values (for direct L1, palette assignment) +/// - `palette_s/p/o`: SPO palette indices (populated later by palette pipeline) pub fn weight_schema() -> Schema { Schema::new(vec![ Field::new("tensor_name", DataType::Utf8, false), Field::new("row_idx", DataType::UInt32, false), + Field::new( + "vector", + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::Float32, false)), + 17, + ), + false, + ), Field::new( "base17", DataType::FixedSizeList( @@ -30,12 +50,14 @@ pub fn weight_schema() -> Schema { } /// Convert bgz7 compressed tensors to Arrow RecordBatch. +/// +/// Stores both f32 (for Lance vector search) and i16 (for direct L1 / palette). pub fn bgz7_to_batch( tensors: &[(String, Vec)], ) -> RecordBatch { - let schema = Arc::new(weight_schema()); let mut names = Vec::new(); let mut row_idxs = Vec::new(); + let mut vector_builder = FixedSizeListBuilder::new(Float32Builder::new(), 17); let mut base17_builder = FixedSizeListBuilder::new(Int16Builder::new(), 17); let mut total_rows = 0usize; @@ -44,8 +66,10 @@ pub fn bgz7_to_batch( names.push(name.clone()); row_idxs.push(r as u32); for d in 0..17 { + vector_builder.values().append_value(fp.dims[d] as f32); base17_builder.values().append_value(fp.dims[d]); } + vector_builder.append(true); base17_builder.append(true); total_rows += 1; } @@ -53,13 +77,14 @@ pub fn bgz7_to_batch( let name_array: ArrayRef = Arc::new(StringArray::from(names)); let row_idx_array: ArrayRef = Arc::new(UInt32Array::from(row_idxs)); + let vector_array: ArrayRef = Arc::new(vector_builder.finish()); let base17_array: ArrayRef = Arc::new(base17_builder.finish()); let null_u8: ArrayRef = Arc::new(UInt8Array::from(vec![None::; total_rows])); - // Let Arrow infer schema from columns instead of forcing it RecordBatch::try_from_iter(vec![ ("tensor_name", name_array), ("row_idx", row_idx_array), + ("vector", vector_array), ("base17", base17_array), ("palette_s", null_u8.clone()), ("palette_p", null_u8.clone()), @@ -78,10 +103,7 @@ pub fn hydrate_bgz7(path: &str) -> Result { Ok(bgz7_to_batch(&tensors)) } -/// Write a RecordBatch to a Lance dataset at the given path. -/// -/// Creates a new dataset or appends to an existing one. -/// This is the LanceDB persistence layer for hydrated bgz7 vectors. +/// Write a RecordBatch to a Lance dataset. pub async fn write_to_lance( batch: &RecordBatch, dataset_path: &str, @@ -107,7 +129,7 @@ pub async fn write_to_lance( Ok(()) } -/// Hydrate a bgz7 file and write directly to Lance dataset. +/// Hydrate bgz7 → Lance dataset in one call. Returns row count. pub async fn hydrate_to_lance(bgz7_path: &str, dataset_path: &str) -> Result { let batch = hydrate_bgz7(bgz7_path)?; let n_rows = batch.num_rows(); @@ -115,30 +137,32 @@ pub async fn hydrate_to_lance(bgz7_path: &str, dataset_path: &str) -> Result ndarray::hpc::bgz17_bridge::Base17 { - let base17_col = batch.column_by_name("base17").expect("base17 column"); - let list_array = base17_col + let vector_col = batch.column_by_name("vector").expect("vector column"); + let list_array = vector_col .as_any() .downcast_ref::() .expect("FixedSizeList"); let values = list_array .values() .as_any() - .downcast_ref::() - .expect("Int16"); + .downcast_ref::() + .expect("Float32"); let n_rows = batch.num_rows(); - let mut sums = [0i64; 17]; + let mut sums = [0.0f64; 17]; for row in 0..n_rows { let offset = row * 17; for d in 0..17 { - sums[d] += values.value(offset + d) as i64; + sums[d] += values.value(offset + d) as f64; } } let mut dims = [0i16; 17]; if n_rows > 0 { - for d in 0..17 { dims[d] = (sums[d] / n_rows as i64) as i16; } + for d in 0..17 { + dims[d] = (sums[d] / n_rows as f64).round() as i16; + } } ndarray::hpc::bgz17_bridge::Base17 { dims } } @@ -151,7 +175,7 @@ mod tests { #[test] fn test_weight_schema() { let schema = weight_schema(); - assert_eq!(schema.fields().len(), 6); + assert_eq!(schema.fields().len(), 7); } #[test] @@ -162,6 +186,7 @@ mod tests { ]; let batch = bgz7_to_batch(&tensors); assert_eq!(batch.num_rows(), 3); + assert_eq!(batch.num_columns(), 7); } #[test] @@ -186,11 +211,32 @@ mod tests { assert_ne!(heel.dims[0], 0); } + #[test] + fn test_f32_preserves_i16() { + let tensors = vec![("t".into(), vec![ + Base17 { dims: [-32768, 32767, 0, 1, -1, 12345, -12345, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }, + ])]; + let batch = bgz7_to_batch(&tensors); + let heel = compute_heel(&batch); + assert_eq!(heel.dims[0], -32768); + assert_eq!(heel.dims[1], 32767); + assert_eq!(heel.dims[5], 12345); + } + + #[test] + fn test_both_columns_present() { + let tensors = vec![("t".into(), vec![Base17 { dims: [42; 17] }])]; + let batch = bgz7_to_batch(&tensors); + assert!(batch.column_by_name("vector").is_some()); + assert!(batch.column_by_name("base17").is_some()); + assert!(batch.column_by_name("palette_s").is_some()); + } + #[test] #[ignore = "requires /tmp/qwen35_27b_v2_shard02.bgz7"] fn test_hydrate_real() { let batch = hydrate_bgz7("/tmp/qwen35_27b_v2_shard02.bgz7").unwrap(); - eprintln!("Hydrated: {} rows", batch.num_rows()); + eprintln!("Hydrated: {} rows, {} cols", batch.num_rows(), batch.num_columns()); let heel = compute_heel(&batch); eprintln!("HEEL: {:?}", heel.dims); } From 5f07f3a3fa62cf34a29abaa28386816d1629aa7b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 17:28:40 +0000 Subject: [PATCH 3/8] feat: wire SPO extraction + NARS reasoning into serve.rs endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Request flow is now: message → extract_triplets() → (S, P, O) strings → triplet_to_headprint(S, P, O) → HeadPrint (S:6, P:6, O:5 planes) → headprint_to_spo() → SpoHead (palette indices + NARS truth) → nars_engine.score() with StyleVector → f32 → nars_infer() deduction/abduction against knowledge base No more brute-force vector search. Messages are decomposed at SPO level like AriGraph does, scored via NARS inference rules, and matched against knowledge base of ingested weight tensors. hydrate.rs: dual columns (f32 vector for Lance ANN, i16 base17 for direct L1), palette_s/p/o for SPO triple store path. https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- crates/lance-graph-planner/src/serve.rs | 311 +++++++++++++++--------- 1 file changed, 190 insertions(+), 121 deletions(-) diff --git a/crates/lance-graph-planner/src/serve.rs b/crates/lance-graph-planner/src/serve.rs index 9f596351..5955f419 100644 --- a/crates/lance-graph-planner/src/serve.rs +++ b/crates/lance-graph-planner/src/serve.rs @@ -1,8 +1,8 @@ //! OpenAI-compatible REST server powered by lance-graph-planner. //! -//! Weight vectors are raw Base17 (34 bytes, ρ=0.993 vs BF16). -//! No palette indirection — direct L1 on 17 dims is sub-microsecond. -//! At scale: store in LanceDB, use RaBitQ index for ANN search. +//! Request flow: +//! message → extract SPO triplets → triplet_to_headprint → headprint_to_spo +//! → NarsEngine.score() with SpoDistances + StyleVector → NARS reasoning //! //! ```bash //! cargo run --manifest-path crates/lance-graph-planner/Cargo.toml \ @@ -27,79 +27,20 @@ mod server { use std::time::{SystemTime, UNIX_EPOCH}; use lance_graph_planner::cache::candidate_pool::Phase; - use lance_graph_planner::cache::kv_bundle::HeadPrint; + use lance_graph_planner::cache::convergence::{ + triplet_to_headprint, headprint_to_spo, + }; use lance_graph_planner::cache::nars_engine::{ - NarsEngine, SpoDistances, SpoHead, MASK_SPO, + analytical_style, nars_infer, Inference, SpoHead, }; - use lance_graph_planner::cache::triple_model::TripleModel; use lance_graph_planner::strategy::chat_bundle::AutocompleteCache; - /// Raw weight vectors. 34 bytes each. Direct L1 search. - struct WeightStore { - /// All weight rows as raw Base17 vectors. - vectors: Vec, - /// Tensor name per row (provenance). - names: Vec, - /// HEEL: element-wise mean of all vectors (the gestalt). - heel: HeadPrint, - } - - impl WeightStore { - fn new() -> Self { - Self { - vectors: Vec::new(), - names: Vec::new(), - heel: HeadPrint::zero(), - } - } - - /// Add vectors from a bgz7 file. - fn ingest(&mut self, path: &str) { - match ndarray::hpc::gguf_indexer::read_bgz7_file(path) { - Ok(tensors) => { - for ct in tensors { - for row in ct.rows.into_iter().take(10000) { - self.vectors.push(row); - self.names.push(ct.name.clone()); - } - } - } - Err(e) => eprintln!(" SKIP {path}: {e}"), - } - } - - /// Compute HEEL after all ingestion. - fn compute_heel(&mut self) { - if self.vectors.is_empty() { return; } - let n = self.vectors.len() as f64; - let mut sums = [0.0f64; 17]; - for v in &self.vectors { - for d in 0..17 { sums[d] += v.dims[d] as f64; } - } - for d in 0..17 { - self.heel.dims[d] = (sums[d] / n).round() as i16; - } - } - - /// Direct L1 nearest neighbor search. Returns (index, distance, tensor_name). - fn nearest(&self, query: &HeadPrint, k: usize) -> Vec<(usize, u32, &str)> { - let mut scored: Vec<(usize, u32)> = self.vectors.iter() - .enumerate() - .map(|(i, v)| (i, query.l1(v))) - .collect(); - scored.sort_unstable_by_key(|&(_, d)| d); - scored.truncate(k); - scored.iter() - .map(|&(i, d)| (i, d, self.names[i].as_str())) - .collect() - } - - fn len(&self) -> usize { self.vectors.len() } - } - struct ServerState { cache: AutocompleteCache, - weights: WeightStore, + /// SPO heads from ingested weight tensors (the knowledge base). + knowledge: Vec, + /// Last context SpoHead (for NARS scoring with style vectors). + context: SpoHead, } type AppState = std::sync::Arc>; @@ -108,20 +49,67 @@ mod server { SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() } - /// Hash message content into Base17 space. - /// TODO: replace with BGE-M3 embed → golden-step projection for real semantic matching. - fn message_to_base17(content: &str) -> HeadPrint { - let mut dims = [0i16; 17]; - let bytes = content.as_bytes(); - for (i, &b) in bytes.iter().enumerate() { - dims[i % 17] = dims[i % 17].wrapping_add(b as i16 * 31); - } - for d in &mut dims { - *d = (*d % 1000).abs() as i16; + /// Extract SPO triplets from text using verb-pattern matching. + /// Returns (subject, predicate, object) tuples. + fn extract_triplets(text: &str) -> Vec<(String, String, String)> { + let mut triplets = Vec::new(); + // Split on sentence boundaries + for sentence in text.split(|c| c == '.' || c == '!' || c == '?' || c == '\n') { + let sentence = sentence.trim(); + if sentence.is_empty() { continue; } + + let words: Vec<&str> = sentence.split_whitespace().collect(); + if words.len() < 2 { continue; } + + // Find verb position by morphological cues or common verb list + let verb_pos = words.iter().position(|w| { + let w = w.to_lowercase(); + w.ends_with("ed") || w.ends_with("ing") || w.ends_with("es") + || w.ends_with("ize") || w.ends_with("ify") + || COMMON_VERBS.contains(&w.as_str()) + }); + + if let Some(vp) = verb_pos { + if vp > 0 && vp < words.len() - 1 { + let subject = words[..vp].join(" "); + let predicate = words[vp].to_string(); + let object = words[vp + 1..].join(" "); + triplets.push((subject, predicate, object)); + } + } else if words.len() >= 3 { + // Fallback: first word = S, second = P, rest = O + triplets.push(( + words[0].to_string(), + words[1].to_string(), + words[2..].join(" "), + )); + } else if words.len() == 2 { + // Intransitive: S P (no object) + triplets.push(( + words[0].to_string(), + words[1].to_string(), + String::new(), + )); + } } - HeadPrint { dims } + triplets } + const COMMON_VERBS: &[&str] = &[ + "is", "are", "was", "were", "has", "have", "had", "do", "does", "did", + "can", "could", "will", "would", "shall", "should", "may", "might", + "must", "need", "know", "think", "want", "like", "use", "find", "give", + "tell", "say", "get", "make", "go", "see", "come", "take", "help", + "show", "try", "ask", "work", "call", "keep", "let", "begin", "seem", + "run", "move", "live", "believe", "hold", "bring", "happen", "write", + "provide", "sit", "stand", "lose", "pay", "meet", "include", "continue", + "set", "learn", "change", "lead", "understand", "watch", "follow", + "stop", "create", "speak", "read", "allow", "add", "spend", "grow", + "open", "walk", "win", "offer", "remember", "love", "consider", "appear", + "buy", "wait", "serve", "die", "send", "expect", "build", "stay", + "fall", "cut", "reach", "kill", "remain", "causes", "enables", "supports", + ]; + fn phase_to_str(phase: Phase) -> &'static str { match phase { Phase::Exposition => "exposition", @@ -185,6 +173,7 @@ mod server { } let mut server = state.lock().unwrap(); + let style = analytical_style(); let mut last_content = String::new(); @@ -192,40 +181,92 @@ mod server { let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user"); let content = msg.get("content").and_then(|v| v.as_str()).unwrap_or(""); - let query = message_to_base17(content); - match role { "user" => { - if server.weights.len() > 0 { - // Direct L1 nearest neighbor on raw Base17 vectors - let neighbors = server.weights.nearest(&query, 5); - let heel_dist = query.l1(&server.weights.heel); + // 1. Extract SPO triplets from message text + let triplets = extract_triplets(content); - let top: Vec = neighbors.iter() - .map(|(i, d, name)| format!("{}:r{}(d={})", name, i, d)) - .collect(); + if triplets.is_empty() { + // Can't decompose — use whole message as single SPO + let fp = triplet_to_headprint(content, "states", ""); + let spo = headprint_to_spo(&fp, 0.9, 0.5); + let score = server.cache.nars.score(&spo, &server.context, &style); last_content = format!( - "[L1 search] heel_dist={} top_5=[{}] | \ - vectors={} | Phase: {} | Model: {}", - heel_dist, - top.join(", "), - server.weights.len(), - phase_to_str(server.cache.phase()), - model, - ); - } else { - let surprise = server.cache.triple.free_energy(&query); - last_content = format!( - "[No weights] Surprise={:.3} | Phase: {} | Model: {}", - surprise, + "[SPO] S={} P={} O={} | score={:.3} E={:.3} | \ + Phase: {} | Model: {}", + spo.s_idx, spo.p_idx, spo.o_idx, + score, spo.expectation(), phase_to_str(server.cache.phase()), model, ); + server.context = spo; + continue; } + + // 2. Process each triplet through the convergence pipeline + let mut results = Vec::new(); + for (s, p, o) in &triplets { + let fp = triplet_to_headprint(s, p, o); + let spo = headprint_to_spo(&fp, 0.9, 0.7); + + // 3. Score against context using NARS + style vector + let score = server.cache.nars.score(&spo, &server.context, &style); + + // 4. NARS inference against knowledge base + let mut best_inference = None; + let mut best_truth_e = 0.0f32; + for known in &server.knowledge { + // Try deduction: known → spo + let truth = nars_infer(known, &spo, Inference::Deduction); + let e = truth.expectation(); + if e > best_truth_e { + best_truth_e = e; + best_inference = Some(("deduction", known.s_idx, known.p_idx, known.o_idx, e)); + } + // Try abduction: spo ← known + let truth = nars_infer(&spo, known, Inference::Abduction); + let e = truth.expectation(); + if e > best_truth_e { + best_truth_e = e; + best_inference = Some(("abduction", known.s_idx, known.p_idx, known.o_idx, e)); + } + } + + // 5. Update context (the last SPO becomes the new context) + server.cache.nars.on_emit(&spo); + server.context = spo.clone(); + + let inference_str = match best_inference { + Some((rule, s, p, o, e)) => format!(" | NARS {}→[{},{},{}] E={:.3}", rule, s, p, o, e), + None => String::new(), + }; + + results.push(format!( + "({} —{}→ {}) S={} P={} O={} score={:.3}{}", + s, p, o, spo.s_idx, spo.p_idx, spo.o_idx, score, inference_str, + )); + } + + last_content = format!( + "[SPO×{}] {} | Phase: {} | knowledge={} | Model: {}", + triplets.len(), + results.join(" ; "), + phase_to_str(server.cache.phase()), + server.knowledge.len(), + model, + ); } "assistant" => { - server.cache.on_self_output(&query); + // Extract triplets from assistant response, add to knowledge + let triplets = extract_triplets(content); + for (s, p, o) in &triplets { + let fp = triplet_to_headprint(s, p, o); + let spo = headprint_to_spo(&fp, 0.85, 0.8); + server.knowledge.push(spo); + } + let fp = triplet_to_headprint(content, "responds", ""); + server.cache.on_self_output(&fp); } _ => {} } @@ -239,19 +280,19 @@ mod server { "choices": [{ "index": 0, "message": { "role": "assistant", "content": last_content }, - "finish_reason": "length", + "finish_reason": if server.cache.should_stop() { "stop" } else { "length" }, }], "usage": { "prompt_tokens": messages.len(), "completion_tokens": 1, "total_tokens": messages.len() + 1, }, - "system_fingerprint": format!("base17-{}", server.weights.len()), + "system_fingerprint": format!("spo-{}", phase_to_str(server.cache.phase())), }))) } async fn embeddings( - State(state): State, + State(_state): State, Json(req): Json, ) -> Result, (StatusCode, Json)> { let model = req.get("model").and_then(|v| v.as_str()).unwrap_or("bge-m3"); @@ -267,9 +308,21 @@ mod server { })))); } - // Base17: 17 dims, f32 for OpenAI compat - let fp = message_to_base17(input); - let embedding: Vec = fp.dims.iter().map(|&d| d as f64).collect(); + // SPO-decomposed embedding: extract triplets, encode each, bundle + let triplets = extract_triplets(input); + let embedding: Vec = if !triplets.is_empty() { + // Average of all triplet HeadPrints + let mut sums = [0.0f64; 17]; + for (s, p, o) in &triplets { + let fp = triplet_to_headprint(s, p, o); + for d in 0..17 { sums[d] += fp.dims[d] as f64; } + } + let n = triplets.len() as f64; + sums.iter().map(|s| s / n).collect() + } else { + let fp = triplet_to_headprint(input, "states", ""); + fp.dims.iter().map(|&d| d as f64).collect() + }; Ok(Json(json!({ "object": "list", @@ -286,30 +339,46 @@ mod server { }))) } + /// Load bgz7 weight shards into knowledge base as SPO heads. + fn ingest_weights(knowledge: &mut Vec, path: &str) { + match ndarray::hpc::gguf_indexer::read_bgz7_file(path) { + Ok(tensors) => { + for ct in tensors { + // Each tensor becomes an SPO: tensor_name → "encodes" → layer + let fp = triplet_to_headprint(&ct.name, "encodes", "weights"); + let spo = headprint_to_spo(&fp, 0.95, 0.99); + knowledge.push(spo); + + // Sample weight rows as additional knowledge + for (_r, row) in ct.rows.iter().enumerate().take(100) { + let row_spo = headprint_to_spo(row, 0.9, 0.95); + knowledge.push(row_spo); + } + } + } + Err(e) => eprintln!(" SKIP {path}: {e}"), + } + } + pub async fn run(port: u16) { - let mut weights = WeightStore::new(); + let mut knowledge = Vec::new(); - // Ingest available bgz7 shards + // Ingest available bgz7 shards into knowledge base for path in &[ "/tmp/qwen35_27b_v2_shard02.bgz7", "/tmp/qwen35_27b_base_shard02.bgz7", ] { if std::fs::metadata(path).is_ok() { eprintln!("Ingesting {path}..."); - weights.ingest(path); + ingest_weights(&mut knowledge, path); } } - - if weights.len() > 0 { - weights.compute_heel(); - eprintln!("WeightStore: {} vectors, HEEL={:?}", weights.len(), weights.heel.dims); - } else { - eprintln!("No bgz7 weights found — running empty"); - } + eprintln!("Knowledge base: {} SPO heads", knowledge.len()); let server = ServerState { cache: AutocompleteCache::new(), - weights, + knowledge, + context: SpoHead::zero(), }; let state: AppState = std::sync::Arc::new(Mutex::new(server)); From 6f59d5ca0f9ec7d01cc0868363a676c96c4b9172 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 19:23:29 +0000 Subject: [PATCH 4/8] =?UTF-8?q?feat:=20partitioned=20CAM=20index=20?= =?UTF-8?q?=E2=80=94=20TensorRole=20+=20layer=5Fidx=20from=20tensor=20name?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse tensor names (HuggingFace + GGUF conventions) into: - TensorRole: QProj/KProj/VProj/OProj/GateProj/UpProj/DownProj/Embedding/Norm - layer_idx: u16 layer number (None for non-layer tensors) Stored as Arrow columns for Lance partition pruning. Enables per-role palettes (256 archetypes of "query behavior" vs "gating decisions") and per-layer search (only search gate_proj in layer 12, not all 5M vectors). No re-extraction from models needed — partition key was always in the bgz7 tensor names. https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- crates/lance-graph/src/graph/hydrate.rs | 167 +++++++++++++++++++++--- 1 file changed, 151 insertions(+), 16 deletions(-) diff --git a/crates/lance-graph/src/graph/hydrate.rs b/crates/lance-graph/src/graph/hydrate.rs index 355ad2e5..cd7903c7 100644 --- a/crates/lance-graph/src/graph/hydrate.rs +++ b/crates/lance-graph/src/graph/hydrate.rs @@ -10,23 +10,98 @@ use arrow::array::{ ArrayRef, FixedSizeListBuilder, Float32Builder, Int16Builder, - StringArray, UInt32Array, UInt8Array, + StringArray, UInt16Array, UInt32Array, UInt8Array, }; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use std::sync::Arc; -/// Schema for the hydrated weight table. -/// -/// - `tensor_name`: which weight tensor (e.g. "model.layers.0.self_attn.q_proj") -/// - `row_idx`: row within tensor -/// - `vector`: 17-dim f32 for Lance vector search (i16→f32 is exact) -/// - `base17`: 17-dim i16 raw values (for direct L1, palette assignment) -/// - `palette_s/p/o`: SPO palette indices (populated later by palette pipeline) +/// Functional partition of a weight tensor. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum TensorRole { + QProj, // "how this layer queries" + KProj, // "what this layer matches" + VProj, // "what this layer retrieves" + OProj, // "how this layer outputs attention" + GateProj, // "what this layer gates" + UpProj, // "what this layer amplifies" + DownProj, // "what this layer compresses" + Embedding, // "vocabulary → hidden" + Norm, // "scale/bias" + Other, // unclassified +} + +impl TensorRole { + /// Parse tensor role from the full tensor name string. + /// Works with both HuggingFace and GGUF naming conventions. + pub fn from_name(name: &str) -> Self { + let n = name.to_lowercase(); + if n.contains("q_proj") || n.contains("attn_q") || n.contains(".wq.") { TensorRole::QProj } + else if n.contains("k_proj") || n.contains("attn_k") || n.contains(".wk.") { TensorRole::KProj } + else if n.contains("v_proj") || n.contains("attn_v") || n.contains(".wv.") { TensorRole::VProj } + else if n.contains("o_proj") || n.contains("attn_output") || n.contains(".wo.") { TensorRole::OProj } + else if n.contains("gate_proj") || n.contains("ffn_gate") || n.contains(".w1.") { TensorRole::GateProj } + else if n.contains("up_proj") || n.contains("ffn_up") || n.contains(".w3.") { TensorRole::UpProj } + else if n.contains("down_proj") || n.contains("ffn_down") || n.contains(".w2.") { TensorRole::DownProj } + else if n.contains("embed") || n.contains("token_embd") { TensorRole::Embedding } + else if n.contains("norm") || n.contains("ln_") { TensorRole::Norm } + else { TensorRole::Other } + } + + /// Numeric ID for Arrow column storage. + pub fn as_u8(&self) -> u8 { + match self { + TensorRole::QProj => 0, + TensorRole::KProj => 1, + TensorRole::VProj => 2, + TensorRole::OProj => 3, + TensorRole::GateProj => 4, + TensorRole::UpProj => 5, + TensorRole::DownProj => 6, + TensorRole::Embedding => 7, + TensorRole::Norm => 8, + TensorRole::Other => 9, + } + } + + pub fn label(&self) -> &'static str { + match self { + TensorRole::QProj => "q_proj", + TensorRole::KProj => "k_proj", + TensorRole::VProj => "v_proj", + TensorRole::OProj => "o_proj", + TensorRole::GateProj => "gate_proj", + TensorRole::UpProj => "up_proj", + TensorRole::DownProj => "down_proj", + TensorRole::Embedding => "embed", + TensorRole::Norm => "norm", + TensorRole::Other => "other", + } + } +} + +/// Extract layer index from tensor name. Returns None for non-layer tensors. +pub fn parse_layer_idx(name: &str) -> Option { + // Match "layers.N." or "blk.N." + let n = name.to_lowercase(); + if let Some(pos) = n.find("layers.") { + let rest = &n[pos + 7..]; + rest.split('.').next().and_then(|s| s.parse().ok()) + } else if let Some(pos) = n.find("blk.") { + let rest = &n[pos + 4..]; + rest.split('.').next().and_then(|s| s.parse().ok()) + } else { + None + } +} + +/// Schema for the hydrated weight table with partition columns. pub fn weight_schema() -> Schema { Schema::new(vec![ Field::new("tensor_name", DataType::Utf8, false), Field::new("row_idx", DataType::UInt32, false), + Field::new("layer_idx", DataType::UInt16, true), + Field::new("tensor_role", DataType::UInt8, false), Field::new( "vector", DataType::FixedSizeList( @@ -49,22 +124,30 @@ pub fn weight_schema() -> Schema { ]) } -/// Convert bgz7 compressed tensors to Arrow RecordBatch. +/// Convert bgz7 compressed tensors to Arrow RecordBatch with partition columns. /// -/// Stores both f32 (for Lance vector search) and i16 (for direct L1 / palette). +/// Each row gets `layer_idx` and `tensor_role` parsed from the tensor name. +/// This enables partitioned CAM indexing: per-role palettes, per-layer search. pub fn bgz7_to_batch( tensors: &[(String, Vec)], ) -> RecordBatch { let mut names = Vec::new(); let mut row_idxs = Vec::new(); + let mut layer_idxs: Vec> = Vec::new(); + let mut roles = Vec::new(); let mut vector_builder = FixedSizeListBuilder::new(Float32Builder::new(), 17); let mut base17_builder = FixedSizeListBuilder::new(Int16Builder::new(), 17); let mut total_rows = 0usize; for (name, rows) in tensors { + let role = TensorRole::from_name(name); + let layer = parse_layer_idx(name); + for (r, fp) in rows.iter().enumerate() { names.push(name.clone()); row_idxs.push(r as u32); + layer_idxs.push(layer); + roles.push(role.as_u8()); for d in 0..17 { vector_builder.values().append_value(fp.dims[d] as f32); base17_builder.values().append_value(fp.dims[d]); @@ -77,6 +160,8 @@ pub fn bgz7_to_batch( let name_array: ArrayRef = Arc::new(StringArray::from(names)); let row_idx_array: ArrayRef = Arc::new(UInt32Array::from(row_idxs)); + let layer_idx_array: ArrayRef = Arc::new(UInt16Array::from(layer_idxs)); + let role_array: ArrayRef = Arc::new(UInt8Array::from(roles)); let vector_array: ArrayRef = Arc::new(vector_builder.finish()); let base17_array: ArrayRef = Arc::new(base17_builder.finish()); let null_u8: ArrayRef = Arc::new(UInt8Array::from(vec![None::; total_rows])); @@ -84,6 +169,8 @@ pub fn bgz7_to_batch( RecordBatch::try_from_iter(vec![ ("tensor_name", name_array), ("row_idx", row_idx_array), + ("layer_idx", layer_idx_array), + ("tensor_role", role_array), ("vector", vector_array), ("base17", base17_array), ("palette_s", null_u8.clone()), @@ -175,18 +262,64 @@ mod tests { #[test] fn test_weight_schema() { let schema = weight_schema(); - assert_eq!(schema.fields().len(), 7); + assert_eq!(schema.fields().len(), 9); } #[test] fn test_bgz7_to_batch() { let tensors = vec![ - ("layer.0.q_proj".into(), vec![Base17 { dims: [100; 17] }, Base17 { dims: [200; 17] }]), - ("layer.0.k_proj".into(), vec![Base17 { dims: [-50; 17] }]), + ("model.layers.0.self_attn.q_proj.weight".into(), vec![Base17 { dims: [100; 17] }, Base17 { dims: [200; 17] }]), + ("model.layers.0.self_attn.k_proj.weight".into(), vec![Base17 { dims: [-50; 17] }]), ]; let batch = bgz7_to_batch(&tensors); assert_eq!(batch.num_rows(), 3); - assert_eq!(batch.num_columns(), 7); + assert_eq!(batch.num_columns(), 9); + } + + #[test] + fn test_tensor_role_parsing() { + assert_eq!(TensorRole::from_name("model.layers.0.self_attn.q_proj.weight"), TensorRole::QProj); + assert_eq!(TensorRole::from_name("model.layers.0.self_attn.k_proj.weight"), TensorRole::KProj); + assert_eq!(TensorRole::from_name("model.layers.0.self_attn.v_proj.weight"), TensorRole::VProj); + assert_eq!(TensorRole::from_name("model.layers.0.self_attn.o_proj.weight"), TensorRole::OProj); + assert_eq!(TensorRole::from_name("model.layers.0.mlp.gate_proj.weight"), TensorRole::GateProj); + assert_eq!(TensorRole::from_name("model.layers.0.mlp.up_proj.weight"), TensorRole::UpProj); + assert_eq!(TensorRole::from_name("model.layers.0.mlp.down_proj.weight"), TensorRole::DownProj); + assert_eq!(TensorRole::from_name("model.embed_tokens.weight"), TensorRole::Embedding); + assert_eq!(TensorRole::from_name("model.layers.0.input_layernorm.weight"), TensorRole::Norm); + // GGUF naming + assert_eq!(TensorRole::from_name("blk.5.attn_q.weight"), TensorRole::QProj); + assert_eq!(TensorRole::from_name("blk.5.ffn_gate.weight"), TensorRole::GateProj); + } + + #[test] + fn test_layer_idx_parsing() { + assert_eq!(parse_layer_idx("model.layers.15.self_attn.q_proj.weight"), Some(15)); + assert_eq!(parse_layer_idx("blk.7.attn_q.weight"), Some(7)); + assert_eq!(parse_layer_idx("model.embed_tokens.weight"), None); + assert_eq!(parse_layer_idx("model.layers.0.mlp.gate_proj.weight"), Some(0)); + } + + #[test] + fn test_partition_columns_populated() { + let tensors = vec![ + ("model.layers.5.self_attn.q_proj.weight".into(), vec![Base17 { dims: [100; 17] }]), + ("model.layers.5.mlp.gate_proj.weight".into(), vec![Base17 { dims: [200; 17] }]), + ("model.embed_tokens.weight".into(), vec![Base17 { dims: [50; 17] }]), + ]; + let batch = bgz7_to_batch(&tensors); + let roles = batch.column_by_name("tensor_role").unwrap(); + let role_arr = roles.as_any().downcast_ref::().unwrap(); + assert_eq!(role_arr.value(0), TensorRole::QProj.as_u8()); + assert_eq!(role_arr.value(1), TensorRole::GateProj.as_u8()); + assert_eq!(role_arr.value(2), TensorRole::Embedding.as_u8()); + + let layers = batch.column_by_name("layer_idx").unwrap(); + let layer_arr = layers.as_any().downcast_ref::().unwrap(); + assert_eq!(layer_arr.value(0), 5); + assert_eq!(layer_arr.value(1), 5); + // First two have layer 5, third (embed) has no layer + assert_eq!(batch.num_rows(), 3); } #[test] @@ -224,11 +357,13 @@ mod tests { } #[test] - fn test_both_columns_present() { - let tensors = vec![("t".into(), vec![Base17 { dims: [42; 17] }])]; + fn test_all_columns_present() { + let tensors = vec![("model.layers.0.self_attn.q_proj.weight".into(), vec![Base17 { dims: [42; 17] }])]; let batch = bgz7_to_batch(&tensors); assert!(batch.column_by_name("vector").is_some()); assert!(batch.column_by_name("base17").is_some()); + assert!(batch.column_by_name("layer_idx").is_some()); + assert!(batch.column_by_name("tensor_role").is_some()); assert!(batch.column_by_name("palette_s").is_some()); } From 41f6b734f65f47a8836b729f431b7d2762cb10d3 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 19:29:07 +0000 Subject: [PATCH 5/8] =?UTF-8?q?feat:=20NeuronPrint=20+=20NeuronQuery=20+?= =?UTF-8?q?=20NeuronTrace=20=E2=80=94=206D=20holographic=20neuron=20repres?= =?UTF-8?q?entation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NeuronPrint (204 bytes): Q/K/V/Gate/Up/Down — complete behavior of one neuron. bundle() → 34-byte holographic fingerprint (all 6 roles superposed) attention() → Q ⊕ K (what it attends to) retrieval() → K ⊕ V (what it retrieves when matched) mlp() → Gate ⊕ Up ⊕ Down (the nonlinear transform) NeuronQuery: selective role probing with Optional fields. attention(q) → probe Q against K store retrieval(k) → probe K against V store gating(gate) → probe Gate role_mask() → 6-bit Pearl-like mask (Q/K/V/Gate/Up/Down) score(neuron) → L1 distance on active roles only NeuronTrace: NARS truth derived from role ratios. frequency → Gate magnitude (how often this neuron fires) confidence → Up/Down ratio (evidence strength) attention → Q·K alignment (activation strength) coherence → K·V alignment (retrieval quality) expectation → c * (f - 0.5) + 0.5 https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- crates/lance-graph/src/graph/mod.rs | 1 + crates/lance-graph/src/graph/neuron.rs | 307 +++++++++++++++++++++++++ 2 files changed, 308 insertions(+) create mode 100644 crates/lance-graph/src/graph/neuron.rs diff --git a/crates/lance-graph/src/graph/mod.rs b/crates/lance-graph/src/graph/mod.rs index 2e03ec53..6307bb6f 100644 --- a/crates/lance-graph/src/graph/mod.rs +++ b/crates/lance-graph/src/graph/mod.rs @@ -14,6 +14,7 @@ pub mod falkor_semirings; pub mod fingerprint; pub mod hydrate; pub mod metadata; +pub mod neuron; pub mod neighborhood; pub mod sparse; pub mod spo; diff --git a/crates/lance-graph/src/graph/neuron.rs b/crates/lance-graph/src/graph/neuron.rs new file mode 100644 index 00000000..09fd5511 --- /dev/null +++ b/crates/lance-graph/src/graph/neuron.rs @@ -0,0 +1,307 @@ +//! NeuronPrint: 6D holographic representation of a single neuron's behavior. +//! +//! Each neuron (layer i, feature j) has 6 roles in the transformer: +//! Q = how it queries (34 bytes) +//! K = what it matches (34 bytes) +//! V = what it retrieves (34 bytes) +//! Gate = whether it fires (34 bytes) +//! Up = how it amplifies (34 bytes) +//! Down = how it compresses (34 bytes) +//! +//! Total: 204 bytes per neuron. Holographic: bundle all 6 → 34 bytes. +//! The CAM index (row_idx) aligns all 6 tables — same row = same feature. +//! +//! Three constructs: +//! NeuronPrint — what a neuron IS (the object, 204 bytes) +//! NeuronQuery — how you ASK it (the query, selective role probing) +//! NeuronTrace — how it REASONS (the thinking, NARS truth from role ratios) + +use ndarray::hpc::bgz17_bridge::Base17; + +// ─── Object: what a neuron IS ─────────────────────────────────────────────── + +/// Complete 6D representation of a single neuron at (layer, feature). +/// 204 bytes. Each field is a 34-byte Base17 vector. +#[derive(Clone, Debug)] +pub struct NeuronPrint { + /// Layer index in the model. + pub layer: u16, + /// Feature/row index within the layer. + pub feature: u32, + /// Query projection: how this neuron queries. + pub q: Base17, + /// Key projection: what this neuron matches. + pub k: Base17, + /// Value projection: what this neuron retrieves. + pub v: Base17, + /// Gate projection: whether this neuron fires (SwiGLU gate). + pub gate: Base17, + /// Up projection: how this neuron amplifies. + pub up: Base17, + /// Down projection: how this neuron compresses. + pub down: Base17, +} + +impl NeuronPrint { + /// Bundle all 6 roles into a single 34-byte holographic fingerprint. + /// The gestalt contains all roles in superposition. + pub fn bundle(&self) -> Base17 { + let mut dims = [0i32; 17]; + for d in 0..17 { + dims[d] = self.q.dims[d] as i32 + + self.k.dims[d] as i32 + + self.v.dims[d] as i32 + + self.gate.dims[d] as i32 + + self.up.dims[d] as i32 + + self.down.dims[d] as i32; + } + let mut out = [0i16; 17]; + for d in 0..17 { + out[d] = (dims[d] / 6).clamp(-32768, 32767) as i16; + } + Base17 { dims: out } + } + + /// Attention fingerprint: Q ⊕ K (what this neuron attends to). + pub fn attention(&self) -> Base17 { + self.q.xor_bind(&self.k) + } + + /// Retrieval fingerprint: K ⊕ V (what this neuron retrieves when matched). + pub fn retrieval(&self) -> Base17 { + self.k.xor_bind(&self.v) + } + + /// MLP fingerprint: Gate ⊕ Up ⊕ Down (the nonlinear transform). + pub fn mlp(&self) -> Base17 { + self.gate.xor_bind(&self.up).xor_bind(&self.down) + } + + /// Byte size of the full neuron print. + pub const BYTE_SIZE: usize = 6 * 34; // 204 +} + +// ─── Query: how you ASK a neuron ──────────────────────────────────────────── + +/// Selective probe into neuron roles. Set the roles you want to query. +/// None = wildcard (don't constrain this role). +#[derive(Clone, Debug, Default)] +pub struct NeuronQuery { + /// Constrain layer (None = any layer). + pub layer: Option, + /// Constrain feature (None = any feature). + pub feature: Option, + /// Query vector for Q-role (None = don't probe Q). + pub q: Option, + /// Query vector for K-role (None = don't probe K). + pub k: Option, + /// Query vector for V-role (None = don't probe V). + pub v: Option, + /// Query vector for Gate-role (None = don't probe Gate). + pub gate: Option, + /// Query vector for Up-role (None = don't probe Up). + pub up: Option, + /// Query vector for Down-role (None = don't probe Down). + pub down: Option, +} + +impl NeuronQuery { + /// "What does this query attend to?" — probe Q against K store. + pub fn attention(q: Base17) -> Self { + NeuronQuery { q: Some(q), ..Default::default() } + } + + /// "What is retrieved for this key?" — probe K against V store. + pub fn retrieval(k: Base17) -> Self { + NeuronQuery { k: Some(k), ..Default::default() } + } + + /// "Does this feature fire?" — probe Gate. + pub fn gating(gate: Base17) -> Self { + NeuronQuery { gate: Some(gate), ..Default::default() } + } + + /// "What does layer N do?" — constrain to a specific layer. + pub fn at_layer(mut self, layer: u16) -> Self { + self.layer = Some(layer); + self + } + + /// Score a NeuronPrint against this query. Lower = better match. + /// Only active (Some) roles contribute to the score. + pub fn score(&self, neuron: &NeuronPrint) -> u32 { + let mut total = 0u32; + let mut count = 0u32; + if let Some(ref q) = self.q { total += q.l1(&neuron.q); count += 1; } + if let Some(ref k) = self.k { total += k.l1(&neuron.k); count += 1; } + if let Some(ref v) = self.v { total += v.l1(&neuron.v); count += 1; } + if let Some(ref g) = self.gate { total += g.l1(&neuron.gate); count += 1; } + if let Some(ref u) = self.up { total += u.l1(&neuron.up); count += 1; } + if let Some(ref d) = self.down { total += d.l1(&neuron.down); count += 1; } + if count > 0 { total / count } else { u32::MAX } + } + + /// How many roles are active in this query. + pub fn active_roles(&self) -> u8 { + [&self.q, &self.k, &self.v, &self.gate, &self.up, &self.down] + .iter() + .filter(|r| r.is_some()) + .count() as u8 + } + + /// Pearl-like mask: which roles are active (6-bit). + /// Bit 0=Q, 1=K, 2=V, 3=Gate, 4=Up, 5=Down. + pub fn role_mask(&self) -> u8 { + let mut mask = 0u8; + if self.q.is_some() { mask |= 1 << 0; } + if self.k.is_some() { mask |= 1 << 1; } + if self.v.is_some() { mask |= 1 << 2; } + if self.gate.is_some() { mask |= 1 << 3; } + if self.up.is_some() { mask |= 1 << 4; } + if self.down.is_some() { mask |= 1 << 5; } + mask + } +} + +// ─── Thinking: how a neuron REASONS ───────────────────────────────────────── + +/// NARS truth values derived from the 6 role ratios. +/// The MLP roles (Gate/Up/Down) encode causal structure. +#[derive(Clone, Debug)] +pub struct NeuronTrace { + /// NARS frequency: P(fires) derived from Gate activation. + /// gate_magnitude / max_magnitude → [0, 1]. + pub frequency: f32, + /// NARS confidence: Up/Down ratio → evidence strength. + /// High Up + low Down = strong positive evidence. + /// Low Up + high Down = strong compression (less evidence). + pub confidence: f32, + /// Attention strength: Q·K alignment (L1 distance, inverted). + /// Low distance = strong attention = this neuron activates. + pub attention: f32, + /// Retrieval coherence: K·V alignment. + /// Low distance = coherent retrieval (what's stored matches what's keyed). + pub coherence: f32, + /// NARS expectation: c * (f - 0.5) + 0.5. + pub expectation: f32, +} + +impl NeuronTrace { + /// Derive NARS truth from a NeuronPrint. + pub fn from_neuron(n: &NeuronPrint) -> Self { + // Gate magnitude → frequency (how often this neuron fires) + let gate_mag = n.gate.dims.iter().map(|d| (*d as f32).abs()).sum::(); + let max_mag = 17.0 * 32768.0; + let frequency = (gate_mag / max_mag).clamp(0.0, 1.0); + + // Up/Down ratio → confidence + let up_mag = n.up.dims.iter().map(|d| (*d as f32).abs()).sum::(); + let down_mag = n.down.dims.iter().map(|d| (*d as f32).abs()).sum::().max(1.0); + let confidence = (up_mag / (up_mag + down_mag)).clamp(0.0, 0.99); + + // Q·K alignment → attention strength + let qk_dist = n.q.l1(&n.k) as f32; + let attention = 1.0 - (qk_dist / max_mag).clamp(0.0, 1.0); + + // K·V alignment → retrieval coherence + let kv_dist = n.k.l1(&n.v) as f32; + let coherence = 1.0 - (kv_dist / max_mag).clamp(0.0, 1.0); + + let expectation = confidence * (frequency - 0.5) + 0.5; + + NeuronTrace { frequency, confidence, attention, coherence, expectation } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_neuron(layer: u16, feature: u32, base_val: i16) -> NeuronPrint { + NeuronPrint { + layer, + feature, + q: Base17 { dims: [base_val; 17] }, + k: Base17 { dims: [base_val + 10; 17] }, + v: Base17 { dims: [base_val + 20; 17] }, + gate: Base17 { dims: [base_val + 100; 17] }, + up: Base17 { dims: [base_val + 50; 17] }, + down: Base17 { dims: [base_val + 30; 17] }, + } + } + + #[test] + fn test_neuron_bundle() { + let n = make_neuron(0, 0, 100); + let b = n.bundle(); + // Average of 100, 110, 120, 200, 150, 130 = 135 + assert_eq!(b.dims[0], 135); + } + + #[test] + fn test_neuron_byte_size() { + assert_eq!(NeuronPrint::BYTE_SIZE, 204); + } + + #[test] + fn test_query_attention() { + let q = NeuronQuery::attention(Base17 { dims: [100; 17] }); + assert_eq!(q.active_roles(), 1); + assert_eq!(q.role_mask(), 0b000001); // Q only + } + + #[test] + fn test_query_score() { + let n = make_neuron(0, 0, 100); + // Query that matches Q exactly + let q_exact = NeuronQuery::attention(Base17 { dims: [100; 17] }); + let score_exact = q_exact.score(&n); + // Query that's far from Q + let q_far = NeuronQuery::attention(Base17 { dims: [10000; 17] }); + let score_far = q_far.score(&n); + assert!(score_exact < score_far, "exact match should score lower (closer)"); + } + + #[test] + fn test_query_multi_role() { + let q = NeuronQuery { + q: Some(Base17 { dims: [100; 17] }), + k: Some(Base17 { dims: [200; 17] }), + ..Default::default() + }; + assert_eq!(q.active_roles(), 2); + assert_eq!(q.role_mask(), 0b000011); // Q + K + } + + #[test] + fn test_trace_from_neuron() { + let n = make_neuron(5, 42, 100); + let t = NeuronTrace::from_neuron(&n); + assert!(t.frequency > 0.0); + assert!(t.confidence > 0.0 && t.confidence < 1.0); + assert!(t.attention > 0.0); // Q and K are close (only differ by 10) + assert!(t.expectation > 0.0 && t.expectation < 1.0); + } + + #[test] + fn test_high_gate_high_frequency() { + let mut n = make_neuron(0, 0, 0); + n.gate = Base17 { dims: [30000; 17] }; // high gate + let t = NeuronTrace::from_neuron(&n); + assert!(t.frequency > 0.8, "high gate should mean high frequency: {}", t.frequency); + } + + #[test] + fn test_attention_xor_bind() { + let n = make_neuron(0, 0, 100); + let attn = n.attention(); // Q ⊕ K + // Should be non-zero (Q ≠ K) + assert!(attn.dims.iter().any(|d| *d != 0)); + } + + #[test] + fn test_query_at_layer() { + let q = NeuronQuery::attention(Base17 { dims: [100; 17] }).at_layer(15); + assert_eq!(q.layer, Some(15)); + } +} From 8650b4ae00d3524bd418dd867c268446cc082429 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 19:35:12 +0000 Subject: [PATCH 6/8] =?UTF-8?q?docs:=20NeuronPrint=20Rosetta=20Stone=20?= =?UTF-8?q?=E2=80=94=206D=20holographic=20neuron=20representation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 6 weight tensor roles (Q/K/V/Gate/Up/Down) are 6 dimensions of a single neuron's complete behavior. 204 bytes per neuron, aligned by row index across all 6 tables. The CAM position IS preserved. Key epiphanies: - Q/K/V = attention triad (who asks, what matches, what's retrieved) - Gate/Up/Down = MLP triad (fires?, amplifies?, compresses?) - K+V = key-value retrieval store, Q = query against it - Gate/Up/Down = NARS truth hydration (frequency, confidence) - Two triads = 6D SPO: each triad is an S/P/O decomposition - Cross-role distances are meaningful (Q·K = attention sharpness) - Same structure across Llama/Qwen/GPT-2/GGUF with naming map Rosetta exploration needed: - Do Q archetypes cluster by semantic role? - Does Gate magnitude predict neuron importance? - Does Up/Down ratio detect polysemanticity? - Layer-wise NeuronTrace progression (feature → concept gradient) No re-extraction from models needed — partition key was always in the bgz7 tensor names. Just needs grouping by tensor role. https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- docs/NEURONPRINT_ROSETTA.md | 225 ++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 docs/NEURONPRINT_ROSETTA.md diff --git a/docs/NEURONPRINT_ROSETTA.md b/docs/NEURONPRINT_ROSETTA.md new file mode 100644 index 00000000..11c901d3 --- /dev/null +++ b/docs/NEURONPRINT_ROSETTA.md @@ -0,0 +1,225 @@ +# NeuronPrint Rosetta Stone + +> **Date**: 2026-03-31 +> **Status**: Exploration — we built the instrument, now we learn to read it + +--- + +## What We Built + +Every neuron (layer `i`, feature `j`) in a transformer has 6 functional roles, +each compressed to 34 bytes (Base17, ρ=0.993 vs BF16). Together: **204 bytes +per neuron** — a complete holographic fingerprint of what that neuron does. + +``` +NeuronPrint { + q: Base17, // 34B — how this neuron queries (attention Q projection) + k: Base17, // 34B — what this neuron matches (attention K projection) + v: Base17, // 34B — what this neuron retrieves (attention V projection) + gate: Base17, // 34B — whether this neuron fires (SwiGLU/MLP gate) + up: Base17, // 34B — how this neuron amplifies (MLP up projection) + down: Base17, // 34B — how this neuron compresses (MLP down projection) +} +``` + +Three operations on it: + +| Struct | Purpose | Metaphor | +|--------|---------|----------| +| `NeuronPrint` | What a neuron IS | The object — its complete behavior in 204 bytes | +| `NeuronQuery` | How you ASK it | The query — selective role probing (6-bit mask) | +| `NeuronTrace` | How it REASONS | The thinking — NARS truth derived from role ratios | + +--- + +## The Epiphany: 6D SPO + +The 6 roles map to an extended SPO decomposition. Classical SPO has 3 planes +(Subject, Predicate, Object). NeuronPrint has 6 — which factor into two triads: + +``` +Attention Triad (how the neuron communicates): + Q = Subject "who is asking?" + K = Predicate "what is the relationship?" + V = Object "what is the answer?" + +MLP Triad (how the neuron transforms): + Gate = Subject "what input feature is this about?" + Up = Predicate "how does it transform?" + Down = Object "what does it produce?" +``` + +The two triads are linked by the residual stream — attention writes to it, +MLP reads from it. The NeuronPrint captures BOTH sides: the communication +(Q/K/V) and the computation (Gate/Up/Down) in a single 204-byte struct. + +### Why This Is a Rosetta Stone + +The same neuron appears in all 6 tables, aligned by row index. This means: + +1. **Q tells you what the neuron looks for** — its query pattern +2. **K tells you when the neuron responds** — its matching criteria +3. **V tells you what the neuron says** — its contribution +4. **Gate tells you IF the neuron speaks** — its activation threshold +5. **Up tells you HOW MUCH it speaks** — its amplification factor +6. **Down tells you how it's COMPRESSED afterward** — the information bottleneck + +Reading all 6 together is like having the Rosetta Stone for that neuron — +the same information expressed in 6 different "languages" (projection spaces). + +--- + +## Retrieval vs Reasoning + +The 6 roles split cleanly into two uses: + +### Retrieval (Key-Value Store) +``` +Q probes against K → finds matching neurons +V at those positions → the retrieved information +``` +This IS attention, reconstructed from palette indices. It's a key-value cache +where K is the key and V is the value, and Q is the lookup query. + +### Reasoning (NARS Hydration) +``` +Gate magnitude → NARS frequency (how often does this fire?) +Up/Down ratio → NARS confidence (how strong is the evidence?) +Q·K alignment → attention strength (how relevant is this?) +K·V alignment → retrieval coherence (how consistent is the stored info?) +``` +The MLP roles encode causal structure. A neuron with high Gate, high Up, +low Down is a "confident amplifier" — it fires often and boosts its signal. +A neuron with low Gate, low Up, high Down is a "skeptical compressor" — +it rarely fires and attenuates when it does. + +--- + +## The LLM Architecture Zoo + +Different LLM architectures use different naming conventions but the same +6 functional roles. Here's the mapping: + +### Llama / Qwen / Mistral (GQA attention + SwiGLU MLP) +``` +model.layers.{L}.self_attn.q_proj.weight → Q +model.layers.{L}.self_attn.k_proj.weight → K (grouped, fewer heads) +model.layers.{L}.self_attn.v_proj.weight → V (grouped, same as K) +model.layers.{L}.self_attn.o_proj.weight → O (output projection, maps back) +model.layers.{L}.mlp.gate_proj.weight → Gate (SwiGLU σ(x) branch) +model.layers.{L}.mlp.up_proj.weight → Up (SwiGLU linear branch) +model.layers.{L}.mlp.down_proj.weight → Down (back to hidden dim) +``` + +### GPT-2 / GPT-J (MHA attention + GELU MLP) +``` +transformer.h.{L}.attn.c_attn.weight → Q+K+V fused (split by dim) +transformer.h.{L}.attn.c_proj.weight → O +transformer.h.{L}.mlp.c_fc.weight → Up (no gate in GELU MLP) +transformer.h.{L}.mlp.c_proj.weight → Down +``` +Note: GPT-2 has no separate Gate — GELU activation is implicit. The Gate +role is absent; use Up magnitude as a proxy for both gating and amplification. + +### GGUF (llama.cpp naming) +``` +blk.{L}.attn_q.weight → Q +blk.{L}.attn_k.weight → K +blk.{L}.attn_v.weight → V +blk.{L}.attn_output.weight → O +blk.{L}.ffn_gate.weight → Gate +blk.{L}.ffn_up.weight → Up +blk.{L}.ffn_down.weight → Down +``` + +### What Varies Between Architectures +- **GQA vs MHA**: K and V may have fewer heads than Q (grouped query attention). + Row count differs: Q has `n_heads × d_head` rows, K/V have `n_kv_heads × d_head`. +- **SwiGLU vs GELU**: SwiGLU has explicit Gate; GELU doesn't. For GELU models, + the Gate NeuronPrint role is empty or derived from Up. +- **Fused QKV**: Some models fuse Q/K/V into one weight matrix. Need to split + by dimension when extracting. + +--- + +## What We Don't Know Yet (Rosetta Exploration) + +### Unanswered Questions +1. **Do Q archetypes cluster by semantic role?** If palette entry 42 in the + Q palette consistently corresponds to "entity lookup" across layers, that's + a universal attention primitive. If it doesn't, the palette is just compression. + +2. **Does Gate magnitude correlate with neuron importance?** Literature suggests + yes (see: SwiGLU analysis papers), but we haven't verified on our Base17 + projections. The ρ=0.993 preservation should keep this relationship intact. + +3. **Are cross-role distances meaningful?** Does `L1(Q[i][j], K[i][j])` (the + Q-K alignment for one neuron) predict attention entropy? Theory says yes: + a neuron whose Q and K are similar attends broadly; one whose Q and K + differ attends sharply. + +4. **Does the Up/Down ratio track with polysemanticity?** A neuron with many + features (polysemantic) should have high Up magnitude (many activations) + but also high Down magnitude (aggressive compression). The ratio might + identify monosemantic vs polysemantic neurons. + +5. **Layer-wise structure**: Do early layers (feature detection) have different + Gate/Up/Down distributions than late layers (concept composition)? + The Hyperprobe paper suggests probing only the second half of layers. + +### What the Literature Tells Us +- **Anthropic's "Scaling Monosemanticity"** (2024): Individual neurons often + represent single concepts. The NeuronPrint should capture this — a monosemantic + neuron has a tight, unique fingerprint across all 6 roles. +- **"Attention Head Superposition"** (2024): Attention heads can represent multiple + features simultaneously. The Q/K alignment in NeuronPrint detects this — + broad alignment = superposed, tight alignment = specialized. +- **SwiGLU analysis** (Shazeer 2020, PaLM): Gate projection acts as a learned + binary mask over features. High Gate magnitude = important feature. +- **Residual stream as communication bus** (Elhage et al. 2021): All layers + read from and write to the same residual stream. NeuronPrint captures both + the read (Q/K) and write (V/Down) sides. + +--- + +## Next Steps + +1. **Hydrate a real model** with partition columns and build per-role palettes. + Compare archetype distributions across Q/K/V/Gate/Up/Down. + +2. **Cross-role distance analysis**: For each neuron, compute Q·K, K·V, + Gate magnitude, Up/Down ratio. Correlate with known interpretability results. + +3. **Layer progression**: Plot NeuronTrace (frequency, confidence, attention, + coherence) across layers. Does it match the feature→concept gradient? + +4. **Diff between models**: Compare Opus 4.5 vs 4.6 NeuronPrints. + Which roles diverge? Which layers? This tells you WHERE the behavioral + difference lives in the architecture. + +5. **Wire NeuronQuery into serve.rs**: Replace flat SPO extraction with + role-aware probing. "What does this model know about X?" becomes + `NeuronQuery::attention(encode(X)).at_layer(15)` → searches Q partition + at layer 15 only. + +--- + +## Memory Budget + +For Qwen3.5-27B (28 layers, ~5M weight rows across all tensors): + +``` +Per neuron: 204 bytes (6 × 34) +Per layer: ~180K neurons × 204 bytes = ~36 MB +Full model: 28 layers × 36 MB ≈ 1 GB (NeuronPrint for every feature) +Bundled per layer: 28 × 34 bytes = 952 bytes (one HEEL per layer) +Bundled per role: 6 × 34 bytes = 204 bytes (one HEEL per role type) +Full model HEEL: 34 bytes +``` + +Compare: original BF16 weights = ~54 GB. NeuronPrint = ~1 GB. 54× compression +while adding structural metadata (role, layer) that the raw weights don't have. + +The 5M × 34 bytes (170 MB) we already have in bgz7 = the LEAF level. +NeuronPrint organizes the same data into 6 aligned tables with semantic meaning. +No additional extraction needed — just grouping by tensor role. From 4e8b96021646d1f1d29e9d43db12a858195bb03b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 19:49:16 +0000 Subject: [PATCH 7/8] =?UTF-8?q?docs:=206D=20SPO=20query=20language=20desig?= =?UTF-8?q?n=20=E2=80=94=20Cypher/GQL=20extension=20for=20NeuronPrint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends lance-graph's Cypher parser with 6D NeuronPrint-aware queries: MATCH (n:Neuron {layer:15})-[:ATTENDS]->(m:Neuron) WHERE l1(n.q, m.k) < 50 RETURN n.feature, m.v, n.trace.confidence Maps to DataFusion SQL over partitioned Lance datasets: - Partition prune by tensor_role + layer_idx - RaBitQ ANN on vector column - UDFs: l1, magnitude, xor_bind, bundle, neuron_trace, nars_revision 4-phase implementation plan: Phase 1: DataFusion UDFs (pure SQL, no Cypher changes) Phase 2: Cypher extension (parser + planner) Phase 3: Cross-layer tracing (residual stream paths) Phase 4: Model comparison (multi-model diff queries) https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- docs/NEURON_QUERY_LANGUAGE.md | 229 ++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 docs/NEURON_QUERY_LANGUAGE.md diff --git a/docs/NEURON_QUERY_LANGUAGE.md b/docs/NEURON_QUERY_LANGUAGE.md new file mode 100644 index 00000000..6be4ee68 --- /dev/null +++ b/docs/NEURON_QUERY_LANGUAGE.md @@ -0,0 +1,229 @@ +# 6D SPO Query Language — Cypher/GQL Extension for NeuronPrint + +> **Date**: 2026-03-31 +> **Status**: Design — ready to implement when token budget refreshes +> **Depends on**: Cypher parser (done), DataFusion planner (done), NeuronPrint (done), hydrate partitions (done) + +--- + +## The Idea + +Extend lance-graph's existing Cypher/GQL parser to query the 6D NeuronPrint +structure natively. DataFusion executes the query over partitioned Lance datasets. +The 6 tensor roles become first-class graph relationships. + +``` +Today (string SPO): + MATCH (s:Entity)-[r:KNOWS]->(o:Entity) RETURN s, r, o + +Tomorrow (6D NeuronPrint SPO): + MATCH (n:Neuron)-[:Q]->(target) + WHERE n.layer = 15 AND distance(n.q, $query) < 100 + RETURN n.feature, n.v AS retrieval, n.trace.confidence AS conf +``` + +--- + +## Query Language Extension + +### Node Type: Neuron + +```cypher +-- A neuron is identified by (layer, feature) +MATCH (n:Neuron {layer: 15, feature: 42}) +RETURN n.q, n.k, n.v, n.gate, n.up, n.down +``` + +Each property (q, k, v, gate, up, down) is a 17-dim Base17 vector. + +### Relationship Types: The 6 Roles + +```cypher +-- Attention: what does layer 15 attend to? +MATCH (n:Neuron {layer: 15})-[:ATTENDS]->(m:Neuron) +WHERE l1(n.q, m.k) < 50 +RETURN n.feature, m.feature, m.v AS retrieved + +-- Gating: which neurons fire at layer 10? +MATCH (n:Neuron {layer: 10}) +WHERE magnitude(n.gate) > 0.8 +RETURN n.feature, n.trace.frequency + +-- MLP path: what does layer 5 amplify? +MATCH (n:Neuron {layer: 5}) +WHERE magnitude(n.up) > magnitude(n.down) * 2 +RETURN n.feature AS amplified, n.trace.confidence +``` + +### Role Masks (Pearl 2³ → Pearl 2⁶) + +```cypher +-- Probe only Q+K (attention query) +MATCH (n:Neuron) USING ROLES(q, k) +WHERE l1(n.q, $probe) < 100 +RETURN n.k, n.v + +-- Probe only Gate+Up+Down (reasoning query) +MATCH (n:Neuron) USING ROLES(gate, up, down) +WHERE n.trace.expectation > 0.7 +RETURN n.feature, n.layer, n.trace + +-- Full 6D probe +MATCH (n:Neuron) USING ROLES(*) +WHERE bundle_distance(n, $query) < 200 +RETURN n +``` + +### Cross-Layer Queries (Residual Stream Tracing) + +```cypher +-- Trace a concept through the network: +-- which neurons activate at each layer for this query? +MATCH path = (n:Neuron)-[:ATTENDS*]->(m:Neuron) +WHERE n.layer = 0 AND m.layer = 27 + AND l1(n.q, $concept) < 50 +RETURN nodes(path), [x IN nodes(path) | x.trace.frequency] AS activations +``` + +### NARS-Enriched Queries + +```cypher +-- Find neurons with high confidence AND high frequency +-- (strong, reliable features) +MATCH (n:Neuron) +WHERE n.trace.frequency > 0.8 AND n.trace.confidence > 0.7 +RETURN n.layer, n.feature, n.trace.expectation +ORDER BY n.trace.expectation DESC +LIMIT 100 + +-- Find contradictions: neurons where Q says one thing, Gate says another +MATCH (n:Neuron) +WHERE n.trace.attention > 0.8 AND n.trace.frequency < 0.2 +RETURN n AS "attends but doesn't fire" + +-- NARS revision across layers: combine evidence +MATCH (a:Neuron {layer: 10}), (b:Neuron {layer: 20}) +WHERE a.feature = b.feature +RETURN a.feature, + nars_revision(a.trace, b.trace) AS combined_truth +``` + +### Model Comparison (Diff) + +```cypher +-- Compare Opus 4.5 vs 4.6: where do they diverge? +MATCH (a:Neuron:Opus45), (b:Neuron:Opus46) +WHERE a.layer = b.layer AND a.feature = b.feature + AND l1(a.bundle, b.bundle) > 500 +RETURN a.layer, a.feature, + l1(a.q, b.q) AS q_diff, + l1(a.gate, b.gate) AS gate_diff, + CASE + WHEN l1(a.q, b.q) > l1(a.gate, b.gate) THEN 'attention changed' + ELSE 'gating changed' + END AS change_type +ORDER BY l1(a.bundle, b.bundle) DESC +``` + +--- + +## DataFusion Execution Plan + +The Cypher extension maps to DataFusion SQL over Lance datasets: + +``` +Cypher: + MATCH (n:Neuron {layer: 15})-[:ATTENDS]->(m:Neuron) + WHERE l1(n.q, m.k) < 50 + +DataFusion SQL: + SELECT a.feature, b.feature, b.vector AS v_vector + FROM weights a + JOIN weights b ON l1_distance(a.vector, b.vector) < 50 + WHERE a.layer_idx = 15 + AND a.tensor_role = 0 -- Q + AND b.tensor_role = 1 -- K + +Lance execution: + 1. Partition prune: tensor_role=0 (Q) for a, tensor_role=1 (K) for b + 2. Layer filter: layer_idx=15 for a + 3. Vector search: RaBitQ ANN on a.vector against b.vector + 4. Join: matching features where L1 < 50 + 5. Fetch: b's V-role vector for matched features +``` + +### UDFs Needed + +```sql +-- L1 distance between two Base17 vectors (17 × i16) +CREATE FUNCTION l1(a FIXED_SIZE_LIST(FLOAT32, 17), b FIXED_SIZE_LIST(FLOAT32, 17)) + RETURNS UINT32 AS 'l1_distance'; + +-- Magnitude of a Base17 vector (sum of abs values, normalized) +CREATE FUNCTION magnitude(a FIXED_SIZE_LIST(FLOAT32, 17)) + RETURNS FLOAT32 AS 'base17_magnitude'; + +-- XOR bind two Base17 vectors +CREATE FUNCTION xor_bind(a FIXED_SIZE_LIST(FLOAT32, 17), b FIXED_SIZE_LIST(FLOAT32, 17)) + RETURNS FIXED_SIZE_LIST(FLOAT32, 17) AS 'base17_xor_bind'; + +-- Bundle (average) multiple vectors +CREATE AGGREGATE FUNCTION bundle(a FIXED_SIZE_LIST(FLOAT32, 17)) + RETURNS FIXED_SIZE_LIST(FLOAT32, 17) AS 'base17_bundle'; + +-- NeuronTrace from 6 role vectors +CREATE FUNCTION neuron_trace(q, k, v, gate, up, down) + RETURNS STRUCT(frequency FLOAT32, confidence FLOAT32, + attention FLOAT32, coherence FLOAT32, + expectation FLOAT32) AS 'neuron_trace'; + +-- NARS revision of two truth values +CREATE FUNCTION nars_revision(a_f FLOAT32, a_c FLOAT32, b_f FLOAT32, b_c FLOAT32) + RETURNS STRUCT(frequency FLOAT32, confidence FLOAT32) AS 'nars_revision'; +``` + +--- + +## Implementation Plan + +### Phase 1: UDFs (pure DataFusion, no Cypher changes) +Register `l1`, `magnitude`, `xor_bind`, `bundle`, `neuron_trace`, `nars_revision` +as DataFusion scalar/aggregate UDFs. Queryable via raw SQL immediately. + +```sql +-- Already works after Phase 1: +SELECT tensor_name, row_idx, + l1(vector, ARRAY[100,200,...]) AS dist +FROM weights +WHERE tensor_role = 0 AND layer_idx = 15 +ORDER BY dist +LIMIT 10; +``` + +### Phase 2: Cypher Extension (parser + planner) +Add `Neuron` node type, role relationship types, `USING ROLES()` clause, +and `trace` property access to the existing nom-based Cypher parser. +Planner maps them to the Phase 1 UDFs. + +### Phase 3: Cross-Layer Tracing +Add variable-length path patterns (`-[:ATTENDS*]->`) with layer progression +constraints. DataFusion recursive CTE or iterative join. + +### Phase 4: Model Comparison +Multi-model queries with label selectors (`:Opus45`, `:Opus46`). +Multiple Lance datasets joined on (layer, feature). + +--- + +## Why This Works + +1. **DataFusion is already a dependency** (version 51, mandatory for Cypher stack) +2. **Lance datasets support partition pruning** (tensor_role, layer_idx columns) +3. **Cypher parser is nom-based and extensible** (44 tests, well-structured AST) +4. **Arrow RecordBatches carry the partition columns** (just added in hydrate.rs) +5. **UDFs are pure functions** (L1, magnitude, xor_bind — all deterministic, SIMD-friendly) +6. **NeuronTrace is derived, not stored** (computed at query time from 6 role vectors) + +The query language makes the 6D NeuronPrint structure explorable without +writing Rust code. A researcher can interactively probe the model's knowledge +graph using familiar Cypher syntax. From e14581116a7976106dc8d3170e6a2c836ff5e7b0 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 31 Mar 2026 19:56:16 +0000 Subject: [PATCH 8/8] =?UTF-8?q?docs:=20session=20handover=20=E2=80=94=206D?= =?UTF-8?q?=20NeuronPrint,=20loose=20ends,=20Rosetta=20exploration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete handover prompt for next session: - What was built (serve.rs SPO pipeline, hydrate partitions, neuron.rs, docs) - Key epiphanies (6D = two SPO triads, palette = cleanup memory, golden-step = JL) - Loose ends (DataFusion UDFs, per-role palettes, real model hydration) - Rosetta exploration questions (Q archetype semantics, Gate importance, layer progression) - Architecture map with file paths - 7 commits this session - External references (Hyperprobe, Monosemanticity, SwiGLU, AriGraph) https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK --- .../session_6d_neuronprint_handover.md | 263 ++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 .claude/prompts/session_6d_neuronprint_handover.md diff --git a/.claude/prompts/session_6d_neuronprint_handover.md b/.claude/prompts/session_6d_neuronprint_handover.md new file mode 100644 index 00000000..a4836ccb --- /dev/null +++ b/.claude/prompts/session_6d_neuronprint_handover.md @@ -0,0 +1,263 @@ +# SESSION HANDOVER: 6D NeuronPrint + Partitioned CAM + Cypher Query Language + +> **Date**: 2026-03-31 +> **Branch**: `claude/qwen-claude-reverse-eng-vHuHv` (both repos) +> **Last commit**: lance-graph 4e8b960, ndarray unchanged this session + +--- + +## What Was Built This Session + +### 1. serve.rs — SPO Extraction + NARS Reasoning (lance-graph-planner) + +**File**: `crates/lance-graph-planner/src/serve.rs` + +The OpenAI-compatible REST endpoint now decomposes messages into SPO triplets +instead of brute-force vector search: + +``` +message → extract_triplets() → (S, P, O) strings +→ triplet_to_headprint(S, P, O) → HeadPrint (S:6, P:6, O:5 dims) +→ headprint_to_spo() → SpoHead (palette indices + NARS truth) +→ nars_infer() deduction/abduction against knowledge base +``` + +Key insight: messages are decomposed at SPO level (like AriGraph does), +not hashed into flat fingerprints. The palette/DistanceMatrix/SimilarityTable +infrastructure is for the SPO triple store path, not for query-time search. + +### 2. hydrate.rs — Partitioned CAM Index (lance-graph core) + +**File**: `crates/lance-graph/src/graph/hydrate.rs` + +Arrow RecordBatch schema now includes partition columns: + +``` +tensor_name: Utf8 — full tensor path +row_idx: UInt32 — row within tensor +layer_idx: UInt16 — parsed from tensor name (nullable for non-layer tensors) +tensor_role: UInt8 — TensorRole enum (Q=0, K=1, V=2, O=3, Gate=4, Up=5, Down=6, ...) +vector: FixedSizeList(f32, 17) — for Lance ANN/RaBitQ +base17: FixedSizeList(i16, 17) — for direct L1 / palette +palette_s/p/o: UInt8 — SPO palette indices (populated later) +``` + +`TensorRole::from_name()` parses HuggingFace + GGUF naming conventions. +`parse_layer_idx()` extracts layer number. No re-extraction from models needed. + +**Tests**: 9 passing (tensor_role_parsing, layer_idx_parsing, partition_columns_populated, etc.) + +### 3. neuron.rs — 6D Holographic Neuron Representation (lance-graph core) + +**File**: `crates/lance-graph/src/graph/neuron.rs` + +Three structs: + +```rust +NeuronPrint // 204 bytes: Q/K/V/Gate/Up/Down — what a neuron IS +NeuronQuery // Selective role probing with Option per role — how you ASK +NeuronTrace // NARS truth from role ratios — how it REASONS +``` + +Key methods: +- `NeuronPrint::bundle()` → 34-byte holographic gestalt +- `NeuronPrint::attention()` → Q ⊕ K (retrieval fingerprint) +- `NeuronPrint::mlp()` → Gate ⊕ Up ⊕ Down (transform fingerprint) +- `NeuronQuery::attention(q)` → probes K store only +- `NeuronQuery::score(neuron)` → L1 on active roles only +- `NeuronQuery::role_mask()` → 6-bit mask (Q/K/V/Gate/Up/Down) +- `NeuronTrace::from_neuron()` → derives NARS f/c/attention/coherence/expectation + +**Tests**: 9 passing + +### 4. Documentation + +- `docs/NEURONPRINT_ROSETTA.md` — Epiphanies, LLM architecture zoo, unanswered questions +- `docs/NEURON_QUERY_LANGUAGE.md` — Cypher/GQL extension design, DataFusion UDFs, 4-phase plan + +--- + +## Key Epiphanies + +### The 6 tensor roles ARE 6 dimensions of one neuron +Each neuron (layer i, feature j) has the same row index across Q/K/V/Gate/Up/Down. +204 bytes = complete behavioral fingerprint. Bundle all 6 → 34 bytes holographic. + +### Two triads = 6D SPO +- Attention triad: Q=Subject, K=Predicate, V=Object (communication) +- MLP triad: Gate=Subject, Up=Predicate, Down=Object (computation) +- Each triad is an SPO decomposition → Pearl 2⁶ instead of Pearl 2³ + +### K+V = retrieval store, Q = query, Gate+Up+Down = NARS hydration +Retrieval and reasoning are separate operations on the same aligned data. +NeuronQuery selects which roles participate via a 6-bit role mask. + +### The palette is a cleanup memory, not a search engine +For queries: direct L1 on 34-byte Base17 (17 subtractions, sub-μs). +For SPO triple store (millions of edges): palette → DistanceMatrix → O(1). +For cleanup after VSA unbind: palette.nearest() snaps noisy bundle to archetype. + +### Base17 = the Lindenstrauss projection +Golden-step codec compresses BF16 d_model → 17 dims, ρ=0.993. No need for +a second random projection on top. The Hyperprobe paper's 55M-param neural +encoder does what golden-step gives for free deterministically. + +### HHTL cascade over 5M vectors +- HEEL: model gestalt (34 bytes) → "is this query in this model's space?" +- HIP: per-layer or per-role bundles (136 KB) → "which region?" +- TWIG: palette 256×256 distance table (128 KB) → "which archetype?" +- LEAF: 5M vectors in Lance + RaBitQ → "which exact weight row?" + +--- + +## Loose Ends + +### Must Fix +1. **`message_to_base17()` in serve.rs is still a byte hash** — needs to use + `triplet_to_headprint()` (which it now does for the SPO path) but the + embedding endpoint still uses the old hash. Low priority since embeddings + endpoint is secondary. + +2. **`AutocompleteCache.palette_indices` field was added but is unused** after + the refactor from palette pipeline to direct SPO. Can be removed or + repurposed for NeuronPrint palette assignment. + +### Should Do (Next Session) +3. **Register DataFusion UDFs** — `l1`, `magnitude`, `xor_bind`, `bundle`, + `neuron_trace`, `nars_revision`. Pure scalar functions, no Cypher changes. + This makes the 6D store queryable via raw SQL immediately. + **File**: `crates/lance-graph/src/nsm/` or new `crates/lance-graph/src/neuron_udf.rs` + +4. **Hydrate a real model with partition columns** — run hydrate on existing + bgz7 files, verify tensor_role and layer_idx are correctly populated, + write to Lance dataset, query with the UDFs. + +5. **Build per-role palettes** — instead of one palette for all 5M vectors, + build 6 palettes (one per tensor role). Compare archetype distributions. + Do Q archetypes cluster semantically? + +6. **NeuronPrint construction from partitioned Lance data** — given a (layer, feature) + pair, load Q/K/V/Gate/Up/Down rows from the 6 partitions, assemble NeuronPrint. + This is the hydration step that creates the 204-byte struct from stored data. + +### Could Explore (Rosetta Stone) +7. **Q·K alignment per layer** — does attention sharpness increase with depth? + `SELECT layer_idx, AVG(l1(q.vector, k.vector)) FROM weights GROUP BY layer_idx` + +8. **Gate magnitude distribution** — which layers have the most active gates? + Are early layers feature detectors (low gate, broad) and late layers + concept composers (high gate, selective)? + +9. **Up/Down ratio as polysemanticity detector** — monosemantic neurons should + have low Up and low Down (clean pass-through). Polysemantic neurons should + have high both (many features, aggressive compression). + +10. **Cross-model NeuronPrint diff** — compare Opus 4.5 vs 4.6 per-role. + Which roles diverge? Which layers? This localizes behavioral differences. + +11. **AriGraph episodic memory with NeuronPrint** — replace string triplets + with NeuronTriplet { q, k, v, gate, up, down }. Episodic retrieval + becomes NeuronQuery::attention(q) instead of Hamming on fingerprints. + The NARS truth comes from Gate/Up/Down ratio instead of heuristics. + +12. **Cypher extension (Phase 2)** — add Neuron node type, role relationships, + USING ROLES() clause, trace property access to the nom-based parser. + +--- + +## Architecture Map + +``` +ndarray (unchanged this session) +├── src/hpc/bgz17_bridge.rs — Base17 type, SIMD L1, xor_bind +├── src/hpc/palette_distance.rs — Palette::build(), DistanceMatrix, SimilarityTable +├── src/hpc/nars.rs — NarsTruth type +├── crates/p64/src/lib.rs — Palette64, HHTL cascade +└── src/hpc/gguf_indexer.rs — read_bgz7_file(), CompressedTensor + +lance-graph +├── crates/lance-graph/src/graph/ +│ ├── neuron.rs [NEW] — NeuronPrint, NeuronQuery, NeuronTrace (9 tests) +│ ├── hydrate.rs [UPDATED] — TensorRole, parse_layer_idx, partition columns (9 tests) +│ ├── arigraph/ — TripletGraph, EpisodicMemory (existing) +│ └── fingerprint.rs — 512-bit Fingerprint, Hamming (existing) +├── crates/lance-graph-planner/src/ +│ ├── serve.rs [UPDATED] — SPO extraction + NARS reasoning endpoint +│ ├── cache/convergence.rs — triplet_to_headprint, headprint_to_spo (existing) +│ └── cache/nars_engine.rs — SpoHead, NarsEngine, Pearl 2³ (existing) +├── crates/bgz17/src/ +│ ├── palette.rs — Palette::build(), nearest() (existing) +│ ├── distance_matrix.rs — DistanceMatrix, SpoDistanceMatrices (existing) +│ └── similarity.rs — SimilarityTable, from_reservoir() (existing) +├── crates/bgz-tensor/src/ +│ ├── palette.rs — WeightPalette (CLAM-inspired, existing) +│ └── attention.rs — AttentionTable, ComposeTable, CompiledHead (existing) +└── docs/ + ├── NEURONPRINT_ROSETTA.md [NEW] — Epiphanies, architecture zoo, exploration plan + └── NEURON_QUERY_LANGUAGE.md [NEW] — Cypher extension design, UDFs, 4-phase plan +``` + +--- + +## Commits This Session (lance-graph) + +``` +4e8b960 docs: 6D SPO query language design — Cypher/GQL extension for NeuronPrint +8650b4a docs: NeuronPrint Rosetta Stone — 6D holographic neuron representation +41f6b73 feat: NeuronPrint + NeuronQuery + NeuronTrace — 6D holographic neuron representation +6f59d5c feat: partitioned CAM index — TensorRole + layer_idx from tensor names +5f07f3a feat: wire SPO extraction + NARS reasoning into serve.rs endpoint +85d1c41 refactor: direct L1 search on raw Base17 vectors, keep palette infra +c680c02 feat: wire bgz17 Palette→DistanceMatrix→SimilarityTable into serve.rs + Lance write +``` + +--- + +## How to Continue + +### Quick Start (15 min) +```bash +cd /home/user/lance-graph +git checkout claude/qwen-claude-reverse-eng-vHuHv +cargo test -p lance-graph --lib -- graph::neuron # 9 tests +cargo test -p lance-graph --lib -- graph::hydrate # 9 tests +cargo check -p lance-graph-planner --features serve # compiles clean +``` + +### Rosetta Exploration (needs bgz7 files) +```bash +# 1. Hydrate with partition columns +# (needs bgz7 files in /tmp/ from previous indexing session) +cargo test -p lance-graph --lib -- graph::hydrate::tests::test_hydrate_real + +# 2. Register DataFusion UDFs (Phase 1 of query language) +# Create crates/lance-graph/src/neuron_udf.rs with l1, magnitude, etc. + +# 3. Query the 6D store +# SELECT tensor_role, layer_idx, COUNT(*) FROM weights GROUP BY tensor_role, layer_idx +``` + +### Key Question for Next Session +**"What do the Q archetypes look like?"** — build a palette from only Q-role vectors, +inspect the 256 centroids, see if they cluster by semantic function. This is the +first Rosetta reading. Everything else follows from what you find there. + +--- + +## External References + +- **Hyperprobe paper**: arXiv 2509.25045 — validates residual→VSA→algebra approach. + Their 55M-param encoder = our zero-param golden-step projection. + GitHub: `Ipazia-AI/hyperprobe` (cloned to `/home/user/hyperprobe/`) + +- **Anthropic Monosemanticity** (2024): individual neurons represent single concepts. + NeuronPrint should capture this — monosemantic = tight fingerprint across all 6 roles. + +- **SwiGLU analysis** (Shazeer 2020): Gate acts as learned binary mask. + NeuronTrace.frequency is derived from Gate magnitude — validates the mapping. + +- **Residual stream as communication bus** (Elhage et al. 2021): all layers read/write + the same bus. NeuronPrint captures both read (Q/K) and write (V/Down) sides. + +- **Original AriGraph**: AdaWorldAPI/AriGraph (Python), used 768D Contriever embeddings. + Transcoded to lance-graph with DeepNSM (0 params, 16.5 MB, bit-exact).