From c680c02bdc4da8ba03f628f116d9e45d943ba540 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 12:43:52 +0000
Subject: [PATCH 1/8] =?UTF-8?q?feat:=20wire=20bgz17=20Palette=E2=86=92Dist?=
 =?UTF-8?q?anceMatrix=E2=86=92SimilarityTable=20into=20serve.rs=20+=20Lanc?=
 =?UTF-8?q?e=20write?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- serve.rs: PalettePipeline built at startup from bgz7 weight rows
  (Palette k=256, DistanceMatrix 128KB, SimilarityTable σ-calibrated CDF)
- palette_score() maps incoming messages through palette.nearest() then
  scores via similarity_table.similarity(distance_matrix.distance(q, c))
- Threshold sim > 0.3 → Palette HIT, else MISS → LLM fallthrough
- hydrate.rs: write_to_lance() + hydrate_to_lance() for LanceDB persistence
- chat_bundle.rs: palette_indices field on AutocompleteCache

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 crates/lance-graph-planner/src/serve.rs       | 206 ++++++++++++++++--
 .../src/strategy/chat_bundle.rs               |   4 +
 crates/lance-graph/src/graph/hydrate.rs       |  37 ++++
 3 files changed, 223 insertions(+), 24 deletions(-)
diff --git a/crates/lance-graph-planner/src/serve.rs b/crates/lance-graph-planner/src/serve.rs
index 618d493e..11c7c781 100644
--- a/crates/lance-graph-planner/src/serve.rs
+++ b/crates/lance-graph-planner/src/serve.rs
@@ -31,7 +31,23 @@ mod server {
     use lance_graph_planner::cache::triple_model::TripleModel;
     use lance_graph_planner::strategy::chat_bundle::AutocompleteCache;
 
-    type AppState = std::sync::Arc<Mutex<AutocompleteCache>>;
+    /// Compiled palette pipeline: bgz17 Palette → DistanceMatrix → SimilarityTable.
+    /// Built once at startup from bgz7 weight rows. All subsequent lookups are O(1).
+    struct PalettePipeline {
+        /// 256 archetypal Base17 patterns from weight manifold.
+        palette: bgz17::palette::Palette,
+        /// 256×256 precomputed L1 distances (128 KB, L1-cache resident).
+        distance: bgz17::distance_matrix::DistanceMatrix,
+        /// σ-calibrated CDF: raw distance → [0.0, 1.0] similarity.
+        similarity: bgz17::similarity::SimilarityTable,
+    }
+
+    struct ServerState {
+        cache: AutocompleteCache,
+        pipeline: Option<PalettePipeline>,
+    }
+
+    type AppState = std::sync::Arc<Mutex<ServerState>>;
 
     fn timestamp() -> u64 {
         SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs()
@@ -51,6 +67,36 @@ mod server {
         HeadPrint { dims }
     }
 
+    /// Convert ndarray HeadPrint (Base17) to bgz17 Base17 for palette lookup.
+    /// Both types have identical layout: dims: [i16; 17].
+    fn headprint_to_bgz17(hp: &HeadPrint) -> bgz17::base17::Base17 {
+        bgz17::base17::Base17 { dims: hp.dims }
+    }
+
+    /// Score a message against the palette pipeline.
+    /// Returns (palette_index, best_match_index, similarity_score).
+    fn palette_score(
+        pipeline: &PalettePipeline,
+        query: &HeadPrint,
+        cached_indices: &[u8],
+    ) -> (u8, usize, f32) {
+        let bgz_query = headprint_to_bgz17(query);
+        let q_idx = pipeline.palette.nearest(&bgz_query);
+
+        // Find best match among cached palette indices
+        let mut best_sim = 0.0f32;
+        let mut best_pos = 0usize;
+        for (pos, &c_idx) in cached_indices.iter().enumerate() {
+            let dist = pipeline.distance.distance(q_idx, c_idx) as u32;
+            let sim = pipeline.similarity.similarity(dist);
+            if sim > best_sim {
+                best_sim = sim;
+                best_pos = pos;
+            }
+        }
+        (q_idx, best_pos, best_sim)
+    }
+
     fn phase_to_str(phase: Phase) -> &'static str {
         match phase {
             Phase::Exposition => "exposition",
@@ -114,7 +160,7 @@ mod server {
             }))));
         }
 
-        let mut cache = state.lock().unwrap();
+        let mut server = state.lock().unwrap();
 
         // Process each message through the cache
         let mut last_content = String::new();
@@ -128,8 +174,51 @@ mod server {
 
             match role {
                 "user" => {
-                    if let Some(spo) = cache.on_user_message(&fp) {
-                        // Cache hit — we have a candidate
+                    // Try palette pipeline first (σ-calibrated scoring)
+                    if let Some(ref pipeline) = server.pipeline {
+                        let (q_idx, best_pos, sim) = palette_score(
+                            pipeline,
+                            &fp,
+                            &server.cache.palette_indices,
+                        );
+
+                        if sim > 0.3 {
+                            // Palette HIT — σ-calibrated similarity above threshold
+                            cache_hit = true;
+                            let dist = pipeline.distance.distance(
+                                q_idx,
+                                server.cache.palette_indices.get(best_pos).copied().unwrap_or(0),
+                            );
+                            last_content = format!(
+                                "[Palette HIT] idx={} match={} dist={} sim={:.4} | \
+                                 Phase: {} | \
+                                 Palette k={} | \
+                                 σ-calibrated | \
+                                 Model: {}",
+                                q_idx, best_pos, dist, sim,
+                                phase_to_str(server.cache.phase()),
+                                pipeline.palette.len(),
+                                model,
+                            );
+                        } else {
+                            // Palette MISS — similarity too low, fall through
+                            let surprise = server.cache.triple.free_energy(&fp);
+                            let alignment = server.cache.triple.alignment();
+                            last_content = format!(
+                                "[Palette MISS → LLM] idx={} best_sim={:.4} | \
+                                 Surprise={:.3} Alignment={:.3} | \
+                                 Phase: {} | \
+                                 Pool: {} candidates | \
+                                 Model: {}",
+                                q_idx, sim,
+                                surprise, alignment,
+                                phase_to_str(server.cache.phase()),
+                                server.cache.pool.count(),
+                                model,
+                            );
+                        }
+                    } else if let Some(spo) = server.cache.on_user_message(&fp) {
+                        // Fallback: old cache path (no palette pipeline)
                         cache_hit = true;
                         last_content = format!(
                             "[Cache HIT] Palette route: S={} P={} O={} | \
@@ -140,13 +229,13 @@ mod server {
                             spo.s_idx, spo.p_idx, spo.o_idx,
                             spo.frequency(), spo.confidence(), spo.expectation(),
                             spo.pearl,
-                            phase_to_str(cache.phase()),
+                            phase_to_str(server.cache.phase()),
                             model,
                         );
                     } else {
-                        // Cache miss — would normally call LLM
-                        let surprise = cache.triple.free_energy(&fp);
-                        let alignment = cache.triple.alignment();
+                        // Cache miss — no pipeline, no cache hit
+                        let surprise = server.cache.triple.free_energy(&fp);
+                        let alignment = server.cache.triple.alignment();
                         last_content = format!(
                             "[Cache MISS → LLM fallthrough] \
                              Surprise={:.3} Alignment={:.3} | \
@@ -155,16 +244,16 @@ mod server {
                              DK: self={:?} user={:?} | \
                              Model: {}",
                             surprise, alignment,
-                            phase_to_str(cache.phase()),
-                            cache.pool.count(),
-                            cache.triple.self_model.dk,
-                            cache.triple.user_model.dk,
+                            phase_to_str(server.cache.phase()),
+                            server.cache.pool.count(),
+                            server.cache.triple.self_model.dk,
+                            server.cache.triple.user_model.dk,
                             model,
                         );
                     }
                 }
                 "assistant" => {
-                    cache.on_self_output(&fp);
+                    server.cache.on_self_output(&fp);
                 }
                 _ => {} // system, tool — pass through
             }
@@ -183,14 +272,14 @@ mod server {
                     "role": "assistant",
                     "content": last_content,
                 },
-                "finish_reason": if cache.should_stop() { "stop" } else { "length" },
+                "finish_reason": if server.cache.should_stop() { "stop" } else { "length" },
             }],
             "usage": {
                 "prompt_tokens": messages.len(),
                 "completion_tokens": 1,
                 "total_tokens": messages.len() + 1,
             },
-            "system_fingerprint": format!("palette-{}", phase_to_str(cache.phase())),
+            "system_fingerprint": format!("palette-{}", phase_to_str(server.cache.phase())),
         })))
     }
 
@@ -213,8 +302,43 @@ mod server {
         }
     }
 
+    /// Build the palette pipeline from bgz7 weight rows.
+    /// Returns (PalettePipeline, palette_indices) for all collected Base17 rows.
+    fn build_palette_pipeline(all_rows: &[HeadPrint]) -> (PalettePipeline, Vec<u8>) {
+        // Convert HeadPrint (ndarray Base17) → bgz17 Base17 for palette building
+        let bgz_rows: Vec<bgz17::base17::Base17> = all_rows
+            .iter()
+            .map(|hp| bgz17::base17::Base17 { dims: hp.dims })
+            .collect();
+
+        eprintln!("  Building palette from {} weight rows...", bgz_rows.len());
+        let palette = bgz17::palette::Palette::build(&bgz_rows, 256, 10);
+        eprintln!("  Palette: {} archetypes", palette.len());
+
+        let distance = bgz17::distance_matrix::DistanceMatrix::build(&palette);
+        eprintln!("  DistanceMatrix: {} KB", distance.byte_size() / 1024);
+
+        // Collect all pairwise distances for SimilarityTable calibration
+        let k = palette.len();
+        let mut reservoir: Vec<u32> = Vec::with_capacity(k * (k - 1) / 2);
+        for i in 0..k {
+            for j in (i + 1)..k {
+                reservoir.push(distance.distance(i as u8, j as u8) as u32);
+            }
+        }
+        let similarity = bgz17::similarity::SimilarityTable::from_reservoir(&mut reservoir);
+        eprintln!("  SimilarityTable: bucket_width={} max_dist={}",
+            similarity.bucket_width(), similarity.max_distance());
+
+        // Assign all weight rows to palette indices
+        let indices: Vec<u8> = bgz_rows.iter().map(|r| palette.nearest(r)).collect();
+        eprintln!("  Assigned {} rows to palette indices", indices.len());
+
+        (PalettePipeline { palette, distance, similarity }, indices)
+    }
+
     /// Populate attention matrix from bgz7 weight fingerprints.
-    fn populate_cache(cache: &mut AutocompleteCache, v2_path: &str, base_path: &str) {
+    fn populate_cache(server: &mut ServerState, v2_path: &str, base_path: &str) {
         eprintln!("Loading Qwen3.5-27B v2 (Opus 4.6) weights...");
         let v2_tensors = load_bgz7(v2_path);
         eprintln!("  {} tensors, {} total rows",
@@ -227,9 +351,26 @@ mod server {
             base_tensors.len(),
             base_tensors.iter().map(|(_, r)| r.len()).sum::<usize>());
 
+        // Collect ALL weight rows for palette building
+        let mut all_rows: Vec<HeadPrint> = Vec::new();
+        for (_, rows) in &v2_tensors {
+            all_rows.extend_from_slice(rows);
+        }
+        for (_, rows) in &base_tensors {
+            all_rows.extend_from_slice(rows);
+        }
+
+        // Build palette pipeline
+        if !all_rows.is_empty() {
+            let (pipeline, indices) = build_palette_pipeline(&all_rows);
+            server.cache.palette_indices = indices;
+            server.pipeline = Some(pipeline);
+        }
+
         // Populate self_model with v2 weights (what Opus 4.6 looks like)
+        let cache = &mut server.cache;
         let mut head_count = 0usize;
-        for (name, rows) in &v2_tensors {
+        for (_name, rows) in &v2_tensors {
             for (r, fp) in rows.iter().enumerate().take(64) {
                 let row = head_count % 64;
                 let col = r % 64;
@@ -242,7 +383,7 @@ mod server {
 
         // Populate user_model with base weights (what the user "knows")
         head_count = 0;
-        for (name, rows) in &base_tensors {
+        for (_name, rows) in &base_tensors {
             for (r, fp) in rows.iter().enumerate().take(64) {
                 let row = head_count % 64;
                 let col = r % 64;
@@ -260,7 +401,6 @@ mod server {
                 let u = cache.triple.user_model.matrix.get(row, col);
                 let dist = s.l1(u);
                 if dist > 0 {
-                    // Impact = the difference
                     let mut impact_dims = [0i16; 17];
                     for d in 0..17 {
                         impact_dims[d] = s.dims[d].wrapping_sub(u.dims[d]);
@@ -275,7 +415,7 @@ mod server {
     }
 
     async fn embeddings(
-        State(_state): State<AppState>,
+        State(state): State<AppState>,
         Json(req): Json<Value>,
     ) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
         let model = req.get("model").and_then(|v| v.as_str()).unwrap_or("bge-m3");
@@ -291,9 +431,18 @@ mod server {
             }))));
         }
 
+        let server = state.lock().unwrap();
+
         // Embed as Base17 fingerprint (17 dims, golden-step folding)
         let fp = message_to_headprint(input);
-        let embedding: Vec<f64> = fp.dims.iter().map(|d| *d as f64 / 10000.0).collect();
+        let mut embedding: Vec<f64> = fp.dims.iter().map(|d| *d as f64 / 10000.0).collect();
+
+        // If palette pipeline available, append palette index as extra dim
+        if let Some(ref pipeline) = server.pipeline {
+            let bgz = headprint_to_bgz17(&fp);
+            let idx = pipeline.palette.nearest(&bgz);
+            embedding.push(idx as f64 / 256.0);
+        }
 
         Ok(Json(json!({
             "object": "list",
@@ -311,19 +460,28 @@ mod server {
     }
 
     pub async fn run(port: u16) {
-        let mut cache = AutocompleteCache::new();
+        let mut server = ServerState {
+            cache: AutocompleteCache::new(),
+            pipeline: None,
+        };
 
         // Try to load bgz7 weights from /tmp/ (from indexing session)
         let v2_shard = "/tmp/qwen35_27b_v2_shard02.bgz7";
         let base_shard = "/tmp/qwen35_27b_base_shard02.bgz7";
         if std::fs::metadata(v2_shard).is_ok() && std::fs::metadata(base_shard).is_ok() {
-            populate_cache(&mut cache, v2_shard, base_shard);
+            populate_cache(&mut server, v2_shard, base_shard);
         } else {
             eprintln!("No bgz7 weights found in /tmp/ — running with empty cache");
             eprintln!("  Run indexing first or hydrate --download qwen35-27b-distilled-v2");
         }
 
-        let state: AppState = std::sync::Arc::new(Mutex::new(cache));
+        if server.pipeline.is_some() {
+            eprintln!("Palette pipeline: ACTIVE (σ-calibrated scoring)");
+        } else {
+            eprintln!("Palette pipeline: INACTIVE (no weight data)");
+        }
+
+        let state: AppState = std::sync::Arc::new(Mutex::new(server));
 
         let app = Router::new()
             .route("/health", get(health))
diff --git a/crates/lance-graph-planner/src/strategy/chat_bundle.rs b/crates/lance-graph-planner/src/strategy/chat_bundle.rs
index 969480c0..a26ed05d 100644
--- a/crates/lance-graph-planner/src/strategy/chat_bundle.rs
+++ b/crates/lance-graph-planner/src/strategy/chat_bundle.rs
@@ -26,6 +26,9 @@ pub struct AutocompleteCache {
     pub evaluator: LaneEvaluator,
     pub nars: NarsEngine,
     pub turn_count: u32,
+    /// Palette indices for each cached weight row (from bgz17 Palette::nearest).
+    /// Populated at startup when bgz7 weights are loaded and palette is built.
+    pub palette_indices: Vec<u8>,
 }
 
 impl AutocompleteCache {
@@ -36,6 +39,7 @@ impl AutocompleteCache {
             evaluator: LaneEvaluator::new(Tension::integrative()),
             nars: NarsEngine::new(SpoDistances::new_zero()),
             turn_count: 0,
+            palette_indices: Vec::new(),
         }
     }
 
diff --git a/crates/lance-graph/src/graph/hydrate.rs b/crates/lance-graph/src/graph/hydrate.rs
index b90f5a92..2b19765f 100644
--- a/crates/lance-graph/src/graph/hydrate.rs
+++ b/crates/lance-graph/src/graph/hydrate.rs
@@ -78,6 +78,43 @@ pub fn hydrate_bgz7(path: &str) -> Result<RecordBatch, String> {
     Ok(bgz7_to_batch(&tensors))
 }
 
+/// Write a RecordBatch to a Lance dataset at the given path.
+///
+/// Creates a new dataset or appends to an existing one.
+/// This is the LanceDB persistence layer for hydrated bgz7 vectors.
+pub async fn write_to_lance(
+    batch: &RecordBatch,
+    dataset_path: &str,
+) -> Result<(), String> {
+    use lance::dataset::{WriteMode, WriteParams};
+    use lance::Dataset;
+
+    let batches = vec![batch.clone()];
+    let reader = arrow::record_batch::RecordBatchIterator::new(
+        batches.into_iter().map(Ok),
+        batch.schema(),
+    );
+
+    let params = WriteParams {
+        mode: WriteMode::Append,
+        ..Default::default()
+    };
+
+    Dataset::write(reader, dataset_path, Some(params))
+        .await
+        .map_err(|e| format!("Lance write error: {e}"))?;
+
+    Ok(())
+}
+
+/// Hydrate a bgz7 file and write directly to Lance dataset.
+pub async fn hydrate_to_lance(bgz7_path: &str, dataset_path: &str) -> Result<usize, String> {
+    let batch = hydrate_bgz7(bgz7_path)?;
+    let n_rows = batch.num_rows();
+    write_to_lance(&batch, dataset_path).await?;
+    Ok(n_rows)
+}
+
 /// Compute HEEL vector: column-wise bundle of ALL BF16-hydrated rows.
 pub fn compute_heel(batch: &RecordBatch) -> ndarray::hpc::bgz17_bridge::Base17 {
     let base17_col = batch.column_by_name("base17").expect("base17 column");

From 85d1c41ca693616303377d707ce5ff628cccb6af Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 12:58:43 +0000
Subject: [PATCH 2/8] refactor: direct L1 search on raw Base17 vectors, keep
 palette infra
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

serve.rs: WeightStore with direct L1 nearest neighbor on 34-byte vectors.
No palette indirection for query path — 17 subtractions is sub-microsecond.

hydrate.rs: both vector (f32, for Lance ANN/RaBitQ) and base17 (i16, for
direct L1 and palette assignment) columns. palette_s/p/o columns kept for
the SPO triple store path (bgz17 Palette→DistanceMatrix→SimilarityTable).

The palette infrastructure (bgz17 crate, 121 tests) is not dropped — it
serves the million-edge SPO triple store where O(1) precomputed 256×256
distance lookups matter. For the REST query path, raw vectors are better.

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 crates/lance-graph-planner/src/serve.rs | 407 +++++++-----------------
 crates/lance-graph/src/graph/hydrate.rs |  88 +++--
 2 files changed, 184 insertions(+), 311 deletions(-)

diff --git a/crates/lance-graph-planner/src/serve.rs b/crates/lance-graph-planner/src/serve.rs
index 11c7c781..9f596351 100644
--- a/crates/lance-graph-planner/src/serve.rs
+++ b/crates/lance-graph-planner/src/serve.rs
@@ -1,5 +1,9 @@
 //! OpenAI-compatible REST server powered by lance-graph-planner.
 //!
+//! Weight vectors are raw Base17 (34 bytes, ρ=0.993 vs BF16).
+//! No palette indirection — direct L1 on 17 dims is sub-microsecond.
+//! At scale: store in LanceDB, use RaBitQ index for ANN search.
+//!
 //! ```bash
 //! cargo run --manifest-path crates/lance-graph-planner/Cargo.toml \
 //!   --features serve --bin serve --release
@@ -25,26 +29,77 @@ mod server {
     use lance_graph_planner::cache::candidate_pool::Phase;
     use lance_graph_planner::cache::kv_bundle::HeadPrint;
     use lance_graph_planner::cache::nars_engine::{
-        analytical_style, creative_style, empathetic_style, style_score,
-        NarsEngine, SpoDistances, SpoHead, MASK_PO, MASK_SO, MASK_SPO,
+        NarsEngine, SpoDistances, SpoHead, MASK_SPO,
     };
     use lance_graph_planner::cache::triple_model::TripleModel;
     use lance_graph_planner::strategy::chat_bundle::AutocompleteCache;
 
-    /// Compiled palette pipeline: bgz17 Palette → DistanceMatrix → SimilarityTable.
-    /// Built once at startup from bgz7 weight rows. All subsequent lookups are O(1).
-    struct PalettePipeline {
-        /// 256 archetypal Base17 patterns from weight manifold.
-        palette: bgz17::palette::Palette,
-        /// 256×256 precomputed L1 distances (128 KB, L1-cache resident).
-        distance: bgz17::distance_matrix::DistanceMatrix,
-        /// σ-calibrated CDF: raw distance → [0.0, 1.0] similarity.
-        similarity: bgz17::similarity::SimilarityTable,
+    /// Raw weight vectors. 34 bytes each. Direct L1 search.
+    struct WeightStore {
+        /// All weight rows as raw Base17 vectors.
+        vectors: Vec<HeadPrint>,
+        /// Tensor name per row (provenance).
+        names: Vec<String>,
+        /// HEEL: element-wise mean of all vectors (the gestalt).
+        heel: HeadPrint,
+    }
+
+    impl WeightStore {
+        fn new() -> Self {
+            Self {
+                vectors: Vec::new(),
+                names: Vec::new(),
+                heel: HeadPrint::zero(),
+            }
+        }
+
+        /// Add vectors from a bgz7 file.
+        fn ingest(&mut self, path: &str) {
+            match ndarray::hpc::gguf_indexer::read_bgz7_file(path) {
+                Ok(tensors) => {
+                    for ct in tensors {
+                        for row in ct.rows.into_iter().take(10000) {
+                            self.vectors.push(row);
+                            self.names.push(ct.name.clone());
+                        }
+                    }
+                }
+                Err(e) => eprintln!("  SKIP {path}: {e}"),
+            }
+        }
+
+        /// Compute HEEL after all ingestion.
+        fn compute_heel(&mut self) {
+            if self.vectors.is_empty() { return; }
+            let n = self.vectors.len() as f64;
+            let mut sums = [0.0f64; 17];
+            for v in &self.vectors {
+                for d in 0..17 { sums[d] += v.dims[d] as f64; }
+            }
+            for d in 0..17 {
+                self.heel.dims[d] = (sums[d] / n).round() as i16;
+            }
+        }
+
+        /// Direct L1 nearest neighbor search. Returns (index, distance, tensor_name).
+        fn nearest(&self, query: &HeadPrint, k: usize) -> Vec<(usize, u32, &str)> {
+            let mut scored: Vec<(usize, u32)> = self.vectors.iter()
+                .enumerate()
+                .map(|(i, v)| (i, query.l1(v)))
+                .collect();
+            scored.sort_unstable_by_key(|&(_, d)| d);
+            scored.truncate(k);
+            scored.iter()
+                .map(|&(i, d)| (i, d, self.names[i].as_str()))
+                .collect()
+        }
+
+        fn len(&self) -> usize { self.vectors.len() }
     }
 
     struct ServerState {
         cache: AutocompleteCache,
-        pipeline: Option<PalettePipeline>,
+        weights: WeightStore,
     }
 
     type AppState = std::sync::Arc<Mutex<ServerState>>;
@@ -53,50 +108,20 @@ mod server {
         SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs()
     }
 
-    fn message_to_headprint(content: &str) -> HeadPrint {
-        // Hash message content into Base17 fingerprint
+    /// Hash message content into Base17 space.
+    /// TODO: replace with BGE-M3 embed → golden-step projection for real semantic matching.
+    fn message_to_base17(content: &str) -> HeadPrint {
         let mut dims = [0i16; 17];
         let bytes = content.as_bytes();
         for (i, &b) in bytes.iter().enumerate() {
             dims[i % 17] = dims[i % 17].wrapping_add(b as i16 * 31);
         }
-        // Normalize
         for d in &mut dims {
             *d = (*d % 1000).abs() as i16;
         }
         HeadPrint { dims }
     }
 
-    /// Convert ndarray HeadPrint (Base17) to bgz17 Base17 for palette lookup.
-    /// Both types have identical layout: dims: [i16; 17].
-    fn headprint_to_bgz17(hp: &HeadPrint) -> bgz17::base17::Base17 {
-        bgz17::base17::Base17 { dims: hp.dims }
-    }
-
-    /// Score a message against the palette pipeline.
-    /// Returns (palette_index, best_match_index, similarity_score).
-    fn palette_score(
-        pipeline: &PalettePipeline,
-        query: &HeadPrint,
-        cached_indices: &[u8],
-    ) -> (u8, usize, f32) {
-        let bgz_query = headprint_to_bgz17(query);
-        let q_idx = pipeline.palette.nearest(&bgz_query);
-
-        // Find best match among cached palette indices
-        let mut best_sim = 0.0f32;
-        let mut best_pos = 0usize;
-        for (pos, &c_idx) in cached_indices.iter().enumerate() {
-            let dist = pipeline.distance.distance(q_idx, c_idx) as u32;
-            let sim = pipeline.similarity.similarity(dist);
-            if sim > best_sim {
-                best_sim = sim;
-                best_pos = pos;
-            }
-        }
-        (q_idx, best_pos, best_sim)
-    }
-
     fn phase_to_str(phase: Phase) -> &'static str {
         match phase {
             Phase::Exposition => "exposition",
@@ -139,7 +164,6 @@ mod server {
         let model = req.get("model").and_then(|v| v.as_str()).unwrap_or("qwen35-opus46");
         let messages = req.get("messages").and_then(|v| v.as_array()).cloned().unwrap_or_default();
 
-        // Validate model name
         const VALID_MODELS: &[&str] = &[
             "qwen35-opus46", "qwen35-opus45", "qwen35-9b",
             "reader-lm", "bge-m3", "llama4-scout", "openchat-3.5",
@@ -162,258 +186,70 @@ mod server {
 
         let mut server = state.lock().unwrap();
 
-        // Process each message through the cache
         let mut last_content = String::new();
-        let mut cache_hit = false;
 
         for msg in &messages {
             let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
             let content = msg.get("content").and_then(|v| v.as_str()).unwrap_or("");
 
-            let fp = message_to_headprint(content);
+            let query = message_to_base17(content);
 
             match role {
                 "user" => {
-                    // Try palette pipeline first (σ-calibrated scoring)
-                    if let Some(ref pipeline) = server.pipeline {
-                        let (q_idx, best_pos, sim) = palette_score(
-                            pipeline,
-                            &fp,
-                            &server.cache.palette_indices,
-                        );
+                    if server.weights.len() > 0 {
+                        // Direct L1 nearest neighbor on raw Base17 vectors
+                        let neighbors = server.weights.nearest(&query, 5);
+                        let heel_dist = query.l1(&server.weights.heel);
+
+                        let top: Vec<String> = neighbors.iter()
+                            .map(|(i, d, name)| format!("{}:r{}(d={})", name, i, d))
+                            .collect();
 
-                        if sim > 0.3 {
-                            // Palette HIT — σ-calibrated similarity above threshold
-                            cache_hit = true;
-                            let dist = pipeline.distance.distance(
-                                q_idx,
-                                server.cache.palette_indices.get(best_pos).copied().unwrap_or(0),
-                            );
-                            last_content = format!(
-                                "[Palette HIT] idx={} match={} dist={} sim={:.4} | \
-                                 Phase: {} | \
-                                 Palette k={} | \
-                                 σ-calibrated | \
-                                 Model: {}",
-                                q_idx, best_pos, dist, sim,
-                                phase_to_str(server.cache.phase()),
-                                pipeline.palette.len(),
-                                model,
-                            );
-                        } else {
-                            // Palette MISS — similarity too low, fall through
-                            let surprise = server.cache.triple.free_energy(&fp);
-                            let alignment = server.cache.triple.alignment();
-                            last_content = format!(
-                                "[Palette MISS → LLM] idx={} best_sim={:.4} | \
-                                 Surprise={:.3} Alignment={:.3} | \
-                                 Phase: {} | \
-                                 Pool: {} candidates | \
-                                 Model: {}",
-                                q_idx, sim,
-                                surprise, alignment,
-                                phase_to_str(server.cache.phase()),
-                                server.cache.pool.count(),
-                                model,
-                            );
-                        }
-                    } else if let Some(spo) = server.cache.on_user_message(&fp) {
-                        // Fallback: old cache path (no palette pipeline)
-                        cache_hit = true;
                         last_content = format!(
-                            "[Cache HIT] Palette route: S={} P={} O={} | \
-                             NARS f={:.3} c={:.3} E={:.3} | \
-                             Pearl mask={:03b} | \
-                             Phase: {} | \
-                             Model: {}",
-                            spo.s_idx, spo.p_idx, spo.o_idx,
-                            spo.frequency(), spo.confidence(), spo.expectation(),
-                            spo.pearl,
+                            "[L1 search] heel_dist={} top_5=[{}] | \
+                             vectors={} | Phase: {} | Model: {}",
+                            heel_dist,
+                            top.join(", "),
+                            server.weights.len(),
                             phase_to_str(server.cache.phase()),
                             model,
                         );
                     } else {
-                        // Cache miss — no pipeline, no cache hit
-                        let surprise = server.cache.triple.free_energy(&fp);
-                        let alignment = server.cache.triple.alignment();
+                        let surprise = server.cache.triple.free_energy(&query);
                         last_content = format!(
-                            "[Cache MISS → LLM fallthrough] \
-                             Surprise={:.3} Alignment={:.3} | \
-                             Phase: {} | \
-                             Pool: {} candidates | \
-                             DK: self={:?} user={:?} | \
-                             Model: {}",
-                            surprise, alignment,
+                            "[No weights] Surprise={:.3} | Phase: {} | Model: {}",
+                            surprise,
                             phase_to_str(server.cache.phase()),
-                            server.cache.pool.count(),
-                            server.cache.triple.self_model.dk,
-                            server.cache.triple.user_model.dk,
                             model,
                         );
                     }
                 }
                 "assistant" => {
-                    server.cache.on_self_output(&fp);
+                    server.cache.on_self_output(&query);
                 }
-                _ => {} // system, tool — pass through
+                _ => {}
             }
         }
 
-        let response_id = format!("chatcmpl-ada-{}", timestamp());
-
         Ok(Json(json!({
-            "id": response_id,
+            "id": format!("chatcmpl-ada-{}", timestamp()),
             "object": "chat.completion",
             "created": timestamp(),
             "model": model,
             "choices": [{
                 "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": last_content,
-                },
-                "finish_reason": if server.cache.should_stop() { "stop" } else { "length" },
+                "message": { "role": "assistant", "content": last_content },
+                "finish_reason": "length",
             }],
             "usage": {
                 "prompt_tokens": messages.len(),
                 "completion_tokens": 1,
                 "total_tokens": messages.len() + 1,
             },
-            "system_fingerprint": format!("palette-{}", phase_to_str(server.cache.phase())),
+            "system_fingerprint": format!("base17-{}", server.weights.len()),
         })))
     }
 
-    /// Load Base17 rows from a bgz7 file into HeadPrints.
-    /// Delegates to ndarray's canonical bgz7 parser.
-    fn load_bgz7(path: &str) -> Vec<(String, Vec<HeadPrint>)> {
-        match ndarray::hpc::gguf_indexer::read_bgz7_file(path) {
-            Ok(tensors) => tensors
-                .into_iter()
-                .map(|ct| {
-                    // Cap rows at 1000 per tensor to match previous behavior
-                    let rows: Vec<HeadPrint> = ct.rows.into_iter().take(1000).collect();
-                    (ct.name, rows)
-                })
-                .collect(),
-            Err(e) => {
-                eprintln!("  SKIP {path}: {e}");
-                Vec::new()
-            }
-        }
-    }
-
-    /// Build the palette pipeline from bgz7 weight rows.
-    /// Returns (PalettePipeline, palette_indices) for all collected Base17 rows.
-    fn build_palette_pipeline(all_rows: &[HeadPrint]) -> (PalettePipeline, Vec<u8>) {
-        // Convert HeadPrint (ndarray Base17) → bgz17 Base17 for palette building
-        let bgz_rows: Vec<bgz17::base17::Base17> = all_rows
-            .iter()
-            .map(|hp| bgz17::base17::Base17 { dims: hp.dims })
-            .collect();
-
-        eprintln!("  Building palette from {} weight rows...", bgz_rows.len());
-        let palette = bgz17::palette::Palette::build(&bgz_rows, 256, 10);
-        eprintln!("  Palette: {} archetypes", palette.len());
-
-        let distance = bgz17::distance_matrix::DistanceMatrix::build(&palette);
-        eprintln!("  DistanceMatrix: {} KB", distance.byte_size() / 1024);
-
-        // Collect all pairwise distances for SimilarityTable calibration
-        let k = palette.len();
-        let mut reservoir: Vec<u32> = Vec::with_capacity(k * (k - 1) / 2);
-        for i in 0..k {
-            for j in (i + 1)..k {
-                reservoir.push(distance.distance(i as u8, j as u8) as u32);
-            }
-        }
-        let similarity = bgz17::similarity::SimilarityTable::from_reservoir(&mut reservoir);
-        eprintln!("  SimilarityTable: bucket_width={} max_dist={}",
-            similarity.bucket_width(), similarity.max_distance());
-
-        // Assign all weight rows to palette indices
-        let indices: Vec<u8> = bgz_rows.iter().map(|r| palette.nearest(r)).collect();
-        eprintln!("  Assigned {} rows to palette indices", indices.len());
-
-        (PalettePipeline { palette, distance, similarity }, indices)
-    }
-
-    /// Populate attention matrix from bgz7 weight fingerprints.
-    fn populate_cache(server: &mut ServerState, v2_path: &str, base_path: &str) {
-        eprintln!("Loading Qwen3.5-27B v2 (Opus 4.6) weights...");
-        let v2_tensors = load_bgz7(v2_path);
-        eprintln!("  {} tensors, {} total rows",
-            v2_tensors.len(),
-            v2_tensors.iter().map(|(_, r)| r.len()).sum::<usize>());
-
-        eprintln!("Loading Qwen3.5-27B base weights...");
-        let base_tensors = load_bgz7(base_path);
-        eprintln!("  {} tensors, {} total rows",
-            base_tensors.len(),
-            base_tensors.iter().map(|(_, r)| r.len()).sum::<usize>());
-
-        // Collect ALL weight rows for palette building
-        let mut all_rows: Vec<HeadPrint> = Vec::new();
-        for (_, rows) in &v2_tensors {
-            all_rows.extend_from_slice(rows);
-        }
-        for (_, rows) in &base_tensors {
-            all_rows.extend_from_slice(rows);
-        }
-
-        // Build palette pipeline
-        if !all_rows.is_empty() {
-            let (pipeline, indices) = build_palette_pipeline(&all_rows);
-            server.cache.palette_indices = indices;
-            server.pipeline = Some(pipeline);
-        }
-
-        // Populate self_model with v2 weights (what Opus 4.6 looks like)
-        let cache = &mut server.cache;
-        let mut head_count = 0usize;
-        for (_name, rows) in &v2_tensors {
-            for (r, fp) in rows.iter().enumerate().take(64) {
-                let row = head_count % 64;
-                let col = r % 64;
-                cache.triple.self_model.matrix.set(row, col, fp.clone());
-                head_count += 1;
-            }
-            if head_count >= 4096 { break; }
-        }
-        eprintln!("  self_model: {} heads populated", head_count.min(4096));
-
-        // Populate user_model with base weights (what the user "knows")
-        head_count = 0;
-        for (_name, rows) in &base_tensors {
-            for (r, fp) in rows.iter().enumerate().take(64) {
-                let row = head_count % 64;
-                let col = r % 64;
-                cache.triple.user_model.matrix.set(row, col, fp.clone());
-                head_count += 1;
-            }
-            if head_count >= 4096 { break; }
-        }
-        eprintln!("  user_model: {} heads populated", head_count.min(4096));
-
-        // Impact model starts as diff: where self and user diverge
-        for row in 0..64 {
-            for col in 0..64 {
-                let s = cache.triple.self_model.matrix.get(row, col);
-                let u = cache.triple.user_model.matrix.get(row, col);
-                let dist = s.l1(u);
-                if dist > 0 {
-                    let mut impact_dims = [0i16; 17];
-                    for d in 0..17 {
-                        impact_dims[d] = s.dims[d].wrapping_sub(u.dims[d]);
-                    }
-                    cache.triple.impact_model.matrix.set(row, col, HeadPrint { dims: impact_dims });
-                }
-            }
-        }
-        eprintln!("  impact_model: populated from diff");
-        eprintln!("  Gestalt L1 (self vs user): {}",
-            cache.triple.self_model.matrix.gestalt.l1(&cache.triple.user_model.matrix.gestalt));
-    }
-
     async fn embeddings(
         State(state): State<AppState>,
         Json(req): Json<Value>,
@@ -431,18 +267,9 @@ mod server {
             }))));
         }
 
-        let server = state.lock().unwrap();
-
-        // Embed as Base17 fingerprint (17 dims, golden-step folding)
-        let fp = message_to_headprint(input);
-        let mut embedding: Vec<f64> = fp.dims.iter().map(|d| *d as f64 / 10000.0).collect();
-
-        // If palette pipeline available, append palette index as extra dim
-        if let Some(ref pipeline) = server.pipeline {
-            let bgz = headprint_to_bgz17(&fp);
-            let idx = pipeline.palette.nearest(&bgz);
-            embedding.push(idx as f64 / 256.0);
-        }
+        // Base17: 17 dims, f32 for OpenAI compat
+        let fp = message_to_base17(input);
+        let embedding: Vec<f64> = fp.dims.iter().map(|&d| d as f64).collect();
 
         Ok(Json(json!({
             "object": "list",
@@ -460,27 +287,31 @@ mod server {
     }
 
     pub async fn run(port: u16) {
-        let mut server = ServerState {
-            cache: AutocompleteCache::new(),
-            pipeline: None,
-        };
-
-        // Try to load bgz7 weights from /tmp/ (from indexing session)
-        let v2_shard = "/tmp/qwen35_27b_v2_shard02.bgz7";
-        let base_shard = "/tmp/qwen35_27b_base_shard02.bgz7";
-        if std::fs::metadata(v2_shard).is_ok() && std::fs::metadata(base_shard).is_ok() {
-            populate_cache(&mut server, v2_shard, base_shard);
-        } else {
-            eprintln!("No bgz7 weights found in /tmp/ — running with empty cache");
-            eprintln!("  Run indexing first or hydrate --download qwen35-27b-distilled-v2");
+        let mut weights = WeightStore::new();
+
+        // Ingest available bgz7 shards
+        for path in &[
+            "/tmp/qwen35_27b_v2_shard02.bgz7",
+            "/tmp/qwen35_27b_base_shard02.bgz7",
+        ] {
+            if std::fs::metadata(path).is_ok() {
+                eprintln!("Ingesting {path}...");
+                weights.ingest(path);
+            }
         }
 
-        if server.pipeline.is_some() {
-            eprintln!("Palette pipeline: ACTIVE (σ-calibrated scoring)");
+        if weights.len() > 0 {
+            weights.compute_heel();
+            eprintln!("WeightStore: {} vectors, HEEL={:?}", weights.len(), weights.heel.dims);
         } else {
-            eprintln!("Palette pipeline: INACTIVE (no weight data)");
+            eprintln!("No bgz7 weights found — running empty");
         }
 
+        let server = ServerState {
+            cache: AutocompleteCache::new(),
+            weights,
+        };
+
         let state: AppState = std::sync::Arc::new(Mutex::new(server));
 
         let app = Router::new()
@@ -491,11 +322,7 @@ mod server {
             .with_state(state);
 
         let addr = format!("0.0.0.0:{port}");
-        eprintln!("lance-graph-planner serve listening on {addr}");
-        eprintln!("  POST /v1/chat/completions  (OpenAI compatible)");
-        eprintln!("  POST /v1/embeddings         (Base17 fingerprints)");
-        eprintln!("  GET  /v1/models");
-        eprintln!("  GET  /health");
+        eprintln!("Listening on {addr}");
         let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
         axum::serve(listener, app).await.unwrap();
     }
diff --git a/crates/lance-graph/src/graph/hydrate.rs b/crates/lance-graph/src/graph/hydrate.rs
index 2b19765f..355ad2e5 100644
--- a/crates/lance-graph/src/graph/hydrate.rs
+++ b/crates/lance-graph/src/graph/hydrate.rs
@@ -1,20 +1,40 @@
-//! Hydrate bgz7 weight fingerprints into LanceDB for HHTL search.
+//! Hydrate bgz7 weight vectors into LanceDB.
 //!
-//! Reads bgz7 shards (Base17 fingerprints) and writes them as Arrow RecordBatches
-//! for Lance Dataset storage with vector columns for HEEL/HIP/TWIG/LEAF cascade.
+//! Base17 vectors (34 bytes, ρ=0.993 vs BF16) are stored as 17-dim f32
+//! vector columns in Lance datasets. Lance handles indexing (IVF_PQ, RaBitQ)
+//! and ANN search natively.
+//!
+//! Palette columns (palette_s/p/o) are kept for the SPO triple store path —
+//! the bgz17 Palette→DistanceMatrix→SimilarityTable pipeline uses them for
+//! O(1) precomputed distance lookups on millions of edges.
 
 use arrow::array::{
-    ArrayRef, FixedSizeListBuilder, Int16Builder, StringArray, UInt32Array, UInt8Array,
+    ArrayRef, FixedSizeListBuilder, Float32Builder, Int16Builder,
+    StringArray, UInt32Array, UInt8Array,
 };
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use std::sync::Arc;
 
 /// Schema for the hydrated weight table.
+///
+/// - `tensor_name`: which weight tensor (e.g. "model.layers.0.self_attn.q_proj")
+/// - `row_idx`: row within tensor
+/// - `vector`: 17-dim f32 for Lance vector search (i16→f32 is exact)
+/// - `base17`: 17-dim i16 raw values (for direct L1, palette assignment)
+/// - `palette_s/p/o`: SPO palette indices (populated later by palette pipeline)
 pub fn weight_schema() -> Schema {
     Schema::new(vec![
         Field::new("tensor_name", DataType::Utf8, false),
         Field::new("row_idx", DataType::UInt32, false),
+        Field::new(
+            "vector",
+            DataType::FixedSizeList(
+                Arc::new(Field::new("item", DataType::Float32, false)),
+                17,
+            ),
+            false,
+        ),
         Field::new(
             "base17",
             DataType::FixedSizeList(
@@ -30,12 +50,14 @@ pub fn weight_schema() -> Schema {
 }
 
 /// Convert bgz7 compressed tensors to Arrow RecordBatch.
+///
+/// Stores both f32 (for Lance vector search) and i16 (for direct L1 / palette).
 pub fn bgz7_to_batch(
     tensors: &[(String, Vec<ndarray::hpc::bgz17_bridge::Base17>)],
 ) -> RecordBatch {
-    let schema = Arc::new(weight_schema());
     let mut names = Vec::new();
     let mut row_idxs = Vec::new();
+    let mut vector_builder = FixedSizeListBuilder::new(Float32Builder::new(), 17);
     let mut base17_builder = FixedSizeListBuilder::new(Int16Builder::new(), 17);
     let mut total_rows = 0usize;
 
@@ -44,8 +66,10 @@ pub fn bgz7_to_batch(
             names.push(name.clone());
             row_idxs.push(r as u32);
             for d in 0..17 {
+                vector_builder.values().append_value(fp.dims[d] as f32);
                 base17_builder.values().append_value(fp.dims[d]);
             }
+            vector_builder.append(true);
             base17_builder.append(true);
             total_rows += 1;
         }
@@ -53,13 +77,14 @@ pub fn bgz7_to_batch(
 
     let name_array: ArrayRef = Arc::new(StringArray::from(names));
     let row_idx_array: ArrayRef = Arc::new(UInt32Array::from(row_idxs));
+    let vector_array: ArrayRef = Arc::new(vector_builder.finish());
     let base17_array: ArrayRef = Arc::new(base17_builder.finish());
     let null_u8: ArrayRef = Arc::new(UInt8Array::from(vec![None::<u8>; total_rows]));
 
-    // Let Arrow infer schema from columns instead of forcing it
     RecordBatch::try_from_iter(vec![
         ("tensor_name", name_array),
         ("row_idx", row_idx_array),
+        ("vector", vector_array),
         ("base17", base17_array),
         ("palette_s", null_u8.clone()),
         ("palette_p", null_u8.clone()),
@@ -78,10 +103,7 @@ pub fn hydrate_bgz7(path: &str) -> Result<RecordBatch, String> {
     Ok(bgz7_to_batch(&tensors))
 }
 
-/// Write a RecordBatch to a Lance dataset at the given path.
-///
-/// Creates a new dataset or appends to an existing one.
-/// This is the LanceDB persistence layer for hydrated bgz7 vectors.
+/// Write a RecordBatch to a Lance dataset.
 pub async fn write_to_lance(
     batch: &RecordBatch,
     dataset_path: &str,
@@ -107,7 +129,7 @@ pub async fn write_to_lance(
     Ok(())
 }
 
-/// Hydrate a bgz7 file and write directly to Lance dataset.
+/// Hydrate bgz7 → Lance dataset in one call. Returns row count.
 pub async fn hydrate_to_lance(bgz7_path: &str, dataset_path: &str) -> Result<usize, String> {
     let batch = hydrate_bgz7(bgz7_path)?;
     let n_rows = batch.num_rows();
@@ -115,30 +137,32 @@ pub async fn hydrate_to_lance(bgz7_path: &str, dataset_path: &str) -> Result<usi
     Ok(n_rows)
 }
 
-/// Compute HEEL vector: column-wise bundle of ALL BF16-hydrated rows.
+/// Compute HEEL: element-wise mean of all vectors (the gestalt).
 pub fn compute_heel(batch: &RecordBatch) -> ndarray::hpc::bgz17_bridge::Base17 {
-    let base17_col = batch.column_by_name("base17").expect("base17 column");
-    let list_array = base17_col
+    let vector_col = batch.column_by_name("vector").expect("vector column");
+    let list_array = vector_col
         .as_any()
         .downcast_ref::<arrow::array::FixedSizeListArray>()
         .expect("FixedSizeList");
     let values = list_array
         .values()
         .as_any()
-        .downcast_ref::<arrow::array::Int16Array>()
-        .expect("Int16");
+        .downcast_ref::<arrow::array::Float32Array>()
+        .expect("Float32");
 
     let n_rows = batch.num_rows();
-    let mut sums = [0i64; 17];
+    let mut sums = [0.0f64; 17];
     for row in 0..n_rows {
         let offset = row * 17;
         for d in 0..17 {
-            sums[d] += values.value(offset + d) as i64;
+            sums[d] += values.value(offset + d) as f64;
         }
     }
     let mut dims = [0i16; 17];
     if n_rows > 0 {
-        for d in 0..17 { dims[d] = (sums[d] / n_rows as i64) as i16; }
+        for d in 0..17 {
+            dims[d] = (sums[d] / n_rows as f64).round() as i16;
+        }
     }
     ndarray::hpc::bgz17_bridge::Base17 { dims }
 }
@@ -151,7 +175,7 @@ mod tests {
     #[test]
     fn test_weight_schema() {
         let schema = weight_schema();
-        assert_eq!(schema.fields().len(), 6);
+        assert_eq!(schema.fields().len(), 7);
     }
 
     #[test]
@@ -162,6 +186,7 @@ mod tests {
         ];
         let batch = bgz7_to_batch(&tensors);
         assert_eq!(batch.num_rows(), 3);
+        assert_eq!(batch.num_columns(), 7);
     }
 
     #[test]
@@ -186,11 +211,32 @@ mod tests {
         assert_ne!(heel.dims[0], 0);
     }
 
+    #[test]
+    fn test_f32_preserves_i16() {
+        let tensors = vec![("t".into(), vec![
+            Base17 { dims: [-32768, 32767, 0, 1, -1, 12345, -12345, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] },
+        ])];
+        let batch = bgz7_to_batch(&tensors);
+        let heel = compute_heel(&batch);
+        assert_eq!(heel.dims[0], -32768);
+        assert_eq!(heel.dims[1], 32767);
+        assert_eq!(heel.dims[5], 12345);
+    }
+
+    #[test]
+    fn test_both_columns_present() {
+        let tensors = vec![("t".into(), vec![Base17 { dims: [42; 17] }])];
+        let batch = bgz7_to_batch(&tensors);
+        assert!(batch.column_by_name("vector").is_some());
+        assert!(batch.column_by_name("base17").is_some());
+        assert!(batch.column_by_name("palette_s").is_some());
+    }
+
     #[test]
     #[ignore = "requires /tmp/qwen35_27b_v2_shard02.bgz7"]
     fn test_hydrate_real() {
         let batch = hydrate_bgz7("/tmp/qwen35_27b_v2_shard02.bgz7").unwrap();
-        eprintln!("Hydrated: {} rows", batch.num_rows());
+        eprintln!("Hydrated: {} rows, {} cols", batch.num_rows(), batch.num_columns());
         let heel = compute_heel(&batch);
         eprintln!("HEEL: {:?}", heel.dims);
     }

From 5f07f3a3fa62cf34a29abaa28386816d1629aa7b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 17:28:40 +0000
Subject: [PATCH 3/8] feat: wire SPO extraction + NARS reasoning into serve.rs
 endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Request flow is now:
  message → extract_triplets() → (S, P, O) strings
  → triplet_to_headprint(S, P, O) → HeadPrint (S:6, P:6, O:5 planes)
  → headprint_to_spo() → SpoHead (palette indices + NARS truth)
  → nars_engine.score() with StyleVector → f32
  → nars_infer() deduction/abduction against knowledge base

No more brute-force vector search. Messages are decomposed at
SPO level like AriGraph does, scored via NARS inference rules,
and matched against knowledge base of ingested weight tensors.

hydrate.rs: dual columns (f32 vector for Lance ANN, i16 base17
for direct L1), palette_s/p/o for SPO triple store path.

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 crates/lance-graph-planner/src/serve.rs | 311 +++++++++++++++---------
 1 file changed, 190 insertions(+), 121 deletions(-)

diff --git a/crates/lance-graph-planner/src/serve.rs b/crates/lance-graph-planner/src/serve.rs
index 9f596351..5955f419 100644
--- a/crates/lance-graph-planner/src/serve.rs
+++ b/crates/lance-graph-planner/src/serve.rs
@@ -1,8 +1,8 @@
 //! OpenAI-compatible REST server powered by lance-graph-planner.
 //!
-//! Weight vectors are raw Base17 (34 bytes, ρ=0.993 vs BF16).
-//! No palette indirection — direct L1 on 17 dims is sub-microsecond.
-//! At scale: store in LanceDB, use RaBitQ index for ANN search.
+//! Request flow:
+//!   message → extract SPO triplets → triplet_to_headprint → headprint_to_spo
+//!   → NarsEngine.score() with SpoDistances + StyleVector → NARS reasoning
 //!
 //! ```bash
 //! cargo run --manifest-path crates/lance-graph-planner/Cargo.toml \
@@ -27,79 +27,20 @@ mod server {
     use std::time::{SystemTime, UNIX_EPOCH};
 
     use lance_graph_planner::cache::candidate_pool::Phase;
-    use lance_graph_planner::cache::kv_bundle::HeadPrint;
+    use lance_graph_planner::cache::convergence::{
+        triplet_to_headprint, headprint_to_spo,
+    };
     use lance_graph_planner::cache::nars_engine::{
-        NarsEngine, SpoDistances, SpoHead, MASK_SPO,
+        analytical_style, nars_infer, Inference, SpoHead,
     };
-    use lance_graph_planner::cache::triple_model::TripleModel;
     use lance_graph_planner::strategy::chat_bundle::AutocompleteCache;
 
-    /// Raw weight vectors. 34 bytes each. Direct L1 search.
-    struct WeightStore {
-        /// All weight rows as raw Base17 vectors.
-        vectors: Vec<HeadPrint>,
-        /// Tensor name per row (provenance).
-        names: Vec<String>,
-        /// HEEL: element-wise mean of all vectors (the gestalt).
-        heel: HeadPrint,
-    }
-
-    impl WeightStore {
-        fn new() -> Self {
-            Self {
-                vectors: Vec::new(),
-                names: Vec::new(),
-                heel: HeadPrint::zero(),
-            }
-        }
-
-        /// Add vectors from a bgz7 file.
-        fn ingest(&mut self, path: &str) {
-            match ndarray::hpc::gguf_indexer::read_bgz7_file(path) {
-                Ok(tensors) => {
-                    for ct in tensors {
-                        for row in ct.rows.into_iter().take(10000) {
-                            self.vectors.push(row);
-                            self.names.push(ct.name.clone());
-                        }
-                    }
-                }
-                Err(e) => eprintln!("  SKIP {path}: {e}"),
-            }
-        }
-
-        /// Compute HEEL after all ingestion.
-        fn compute_heel(&mut self) {
-            if self.vectors.is_empty() { return; }
-            let n = self.vectors.len() as f64;
-            let mut sums = [0.0f64; 17];
-            for v in &self.vectors {
-                for d in 0..17 { sums[d] += v.dims[d] as f64; }
-            }
-            for d in 0..17 {
-                self.heel.dims[d] = (sums[d] / n).round() as i16;
-            }
-        }
-
-        /// Direct L1 nearest neighbor search. Returns (index, distance, tensor_name).
-        fn nearest(&self, query: &HeadPrint, k: usize) -> Vec<(usize, u32, &str)> {
-            let mut scored: Vec<(usize, u32)> = self.vectors.iter()
-                .enumerate()
-                .map(|(i, v)| (i, query.l1(v)))
-                .collect();
-            scored.sort_unstable_by_key(|&(_, d)| d);
-            scored.truncate(k);
-            scored.iter()
-                .map(|&(i, d)| (i, d, self.names[i].as_str()))
-                .collect()
-        }
-
-        fn len(&self) -> usize { self.vectors.len() }
-    }
-
     struct ServerState {
         cache: AutocompleteCache,
-        weights: WeightStore,
+        /// SPO heads from ingested weight tensors (the knowledge base).
+        knowledge: Vec<SpoHead>,
+        /// Last context SpoHead (for NARS scoring with style vectors).
+        context: SpoHead,
     }
 
     type AppState = std::sync::Arc<Mutex<ServerState>>;
@@ -108,20 +49,67 @@ mod server {
         SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs()
     }
 
-    /// Hash message content into Base17 space.
-    /// TODO: replace with BGE-M3 embed → golden-step projection for real semantic matching.
-    fn message_to_base17(content: &str) -> HeadPrint {
-        let mut dims = [0i16; 17];
-        let bytes = content.as_bytes();
-        for (i, &b) in bytes.iter().enumerate() {
-            dims[i % 17] = dims[i % 17].wrapping_add(b as i16 * 31);
-        }
-        for d in &mut dims {
-            *d = (*d % 1000).abs() as i16;
+    /// Extract SPO triplets from text using verb-pattern matching.
+    /// Returns (subject, predicate, object) tuples.
+    fn extract_triplets(text: &str) -> Vec<(String, String, String)> {
+        let mut triplets = Vec::new();
+        // Split on sentence boundaries
+        for sentence in text.split(|c| c == '.' || c == '!' || c == '?' || c == '\n') {
+            let sentence = sentence.trim();
+            if sentence.is_empty() { continue; }
+
+            let words: Vec<&str> = sentence.split_whitespace().collect();
+            if words.len() < 2 { continue; }
+
+            // Find verb position by morphological cues or common verb list
+            let verb_pos = words.iter().position(|w| {
+                let w = w.to_lowercase();
+                w.ends_with("ed") || w.ends_with("ing") || w.ends_with("es")
+                    || w.ends_with("ize") || w.ends_with("ify")
+                    || COMMON_VERBS.contains(&w.as_str())
+            });
+
+            if let Some(vp) = verb_pos {
+                if vp > 0 && vp < words.len() - 1 {
+                    let subject = words[..vp].join(" ");
+                    let predicate = words[vp].to_string();
+                    let object = words[vp + 1..].join(" ");
+                    triplets.push((subject, predicate, object));
+                }
+            } else if words.len() >= 3 {
+                // Fallback: first word = S, second = P, rest = O
+                triplets.push((
+                    words[0].to_string(),
+                    words[1].to_string(),
+                    words[2..].join(" "),
+                ));
+            } else if words.len() == 2 {
+                // Intransitive: S P (no object)
+                triplets.push((
+                    words[0].to_string(),
+                    words[1].to_string(),
+                    String::new(),
+                ));
+            }
         }
-        HeadPrint { dims }
+        triplets
     }
 
+    const COMMON_VERBS: &[&str] = &[
+        "is", "are", "was", "were", "has", "have", "had", "do", "does", "did",
+        "can", "could", "will", "would", "shall", "should", "may", "might",
+        "must", "need", "know", "think", "want", "like", "use", "find", "give",
+        "tell", "say", "get", "make", "go", "see", "come", "take", "help",
+        "show", "try", "ask", "work", "call", "keep", "let", "begin", "seem",
+        "run", "move", "live", "believe", "hold", "bring", "happen", "write",
+        "provide", "sit", "stand", "lose", "pay", "meet", "include", "continue",
+        "set", "learn", "change", "lead", "understand", "watch", "follow",
+        "stop", "create", "speak", "read", "allow", "add", "spend", "grow",
+        "open", "walk", "win", "offer", "remember", "love", "consider", "appear",
+        "buy", "wait", "serve", "die", "send", "expect", "build", "stay",
+        "fall", "cut", "reach", "kill", "remain", "causes", "enables", "supports",
+    ];
+
     fn phase_to_str(phase: Phase) -> &'static str {
         match phase {
             Phase::Exposition => "exposition",
@@ -185,6 +173,7 @@ mod server {
         }
 
         let mut server = state.lock().unwrap();
+        let style = analytical_style();
 
         let mut last_content = String::new();
 
@@ -192,40 +181,92 @@ mod server {
             let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
             let content = msg.get("content").and_then(|v| v.as_str()).unwrap_or("");
 
-            let query = message_to_base17(content);
-
             match role {
                 "user" => {
-                    if server.weights.len() > 0 {
-                        // Direct L1 nearest neighbor on raw Base17 vectors
-                        let neighbors = server.weights.nearest(&query, 5);
-                        let heel_dist = query.l1(&server.weights.heel);
+                    // 1. Extract SPO triplets from message text
+                    let triplets = extract_triplets(content);
 
-                        let top: Vec<String> = neighbors.iter()
-                            .map(|(i, d, name)| format!("{}:r{}(d={})", name, i, d))
-                            .collect();
+                    if triplets.is_empty() {
+                        // Can't decompose — use whole message as single SPO
+                        let fp = triplet_to_headprint(content, "states", "");
+                        let spo = headprint_to_spo(&fp, 0.9, 0.5);
+                        let score = server.cache.nars.score(&spo, &server.context, &style);
 
                         last_content = format!(
-                            "[L1 search] heel_dist={} top_5=[{}] | \
-                             vectors={} | Phase: {} | Model: {}",
-                            heel_dist,
-                            top.join(", "),
-                            server.weights.len(),
-                            phase_to_str(server.cache.phase()),
-                            model,
-                        );
-                    } else {
-                        let surprise = server.cache.triple.free_energy(&query);
-                        last_content = format!(
-                            "[No weights] Surprise={:.3} | Phase: {} | Model: {}",
-                            surprise,
+                            "[SPO] S={} P={} O={} | score={:.3} E={:.3} | \
+                             Phase: {} | Model: {}",
+                            spo.s_idx, spo.p_idx, spo.o_idx,
+                            score, spo.expectation(),
                             phase_to_str(server.cache.phase()),
                             model,
                         );
+                        server.context = spo;
+                        continue;
                     }
+
+                    // 2. Process each triplet through the convergence pipeline
+                    let mut results = Vec::new();
+                    for (s, p, o) in &triplets {
+                        let fp = triplet_to_headprint(s, p, o);
+                        let spo = headprint_to_spo(&fp, 0.9, 0.7);
+
+                        // 3. Score against context using NARS + style vector
+                        let score = server.cache.nars.score(&spo, &server.context, &style);
+
+                        // 4. NARS inference against knowledge base
+                        let mut best_inference = None;
+                        let mut best_truth_e = 0.0f32;
+                        for known in &server.knowledge {
+                            // Try deduction: known → spo
+                            let truth = nars_infer(known, &spo, Inference::Deduction);
+                            let e = truth.expectation();
+                            if e > best_truth_e {
+                                best_truth_e = e;
+                                best_inference = Some(("deduction", known.s_idx, known.p_idx, known.o_idx, e));
+                            }
+                            // Try abduction: spo ← known
+                            let truth = nars_infer(&spo, known, Inference::Abduction);
+                            let e = truth.expectation();
+                            if e > best_truth_e {
+                                best_truth_e = e;
+                                best_inference = Some(("abduction", known.s_idx, known.p_idx, known.o_idx, e));
+                            }
+                        }
+
+                        // 5. Update context (the last SPO becomes the new context)
+                        server.cache.nars.on_emit(&spo);
+                        server.context = spo.clone();
+
+                        let inference_str = match best_inference {
+                            Some((rule, s, p, o, e)) => format!(" | NARS {}→[{},{},{}] E={:.3}", rule, s, p, o, e),
+                            None => String::new(),
+                        };
+
+                        results.push(format!(
+                            "({} —{}→ {}) S={} P={} O={} score={:.3}{}",
+                            s, p, o, spo.s_idx, spo.p_idx, spo.o_idx, score, inference_str,
+                        ));
+                    }
+
+                    last_content = format!(
+                        "[SPO×{}] {} | Phase: {} | knowledge={} | Model: {}",
+                        triplets.len(),
+                        results.join(" ; "),
+                        phase_to_str(server.cache.phase()),
+                        server.knowledge.len(),
+                        model,
+                    );
                 }
                 "assistant" => {
-                    server.cache.on_self_output(&query);
+                    // Extract triplets from assistant response, add to knowledge
+                    let triplets = extract_triplets(content);
+                    for (s, p, o) in &triplets {
+                        let fp = triplet_to_headprint(s, p, o);
+                        let spo = headprint_to_spo(&fp, 0.85, 0.8);
+                        server.knowledge.push(spo);
+                    }
+                    let fp = triplet_to_headprint(content, "responds", "");
+                    server.cache.on_self_output(&fp);
                 }
                 _ => {}
             }
@@ -239,19 +280,19 @@ mod server {
             "choices": [{
                 "index": 0,
                 "message": { "role": "assistant", "content": last_content },
-                "finish_reason": "length",
+                "finish_reason": if server.cache.should_stop() { "stop" } else { "length" },
             }],
             "usage": {
                 "prompt_tokens": messages.len(),
                 "completion_tokens": 1,
                 "total_tokens": messages.len() + 1,
             },
-            "system_fingerprint": format!("base17-{}", server.weights.len()),
+            "system_fingerprint": format!("spo-{}", phase_to_str(server.cache.phase())),
         })))
     }
 
     async fn embeddings(
-        State(state): State<AppState>,
+        State(_state): State<AppState>,
         Json(req): Json<Value>,
     ) -> Result<Json<Value>, (StatusCode, Json<Value>)> {
         let model = req.get("model").and_then(|v| v.as_str()).unwrap_or("bge-m3");
@@ -267,9 +308,21 @@ mod server {
             }))));
         }
 
-        // Base17: 17 dims, f32 for OpenAI compat
-        let fp = message_to_base17(input);
-        let embedding: Vec<f64> = fp.dims.iter().map(|&d| d as f64).collect();
+        // SPO-decomposed embedding: extract triplets, encode each, bundle
+        let triplets = extract_triplets(input);
+        let embedding: Vec<f64> = if !triplets.is_empty() {
+            // Average of all triplet HeadPrints
+            let mut sums = [0.0f64; 17];
+            for (s, p, o) in &triplets {
+                let fp = triplet_to_headprint(s, p, o);
+                for d in 0..17 { sums[d] += fp.dims[d] as f64; }
+            }
+            let n = triplets.len() as f64;
+            sums.iter().map(|s| s / n).collect()
+        } else {
+            let fp = triplet_to_headprint(input, "states", "");
+            fp.dims.iter().map(|&d| d as f64).collect()
+        };
 
         Ok(Json(json!({
             "object": "list",
@@ -286,30 +339,46 @@ mod server {
         })))
     }
 
+    /// Load bgz7 weight shards into knowledge base as SPO heads.
+    fn ingest_weights(knowledge: &mut Vec<SpoHead>, path: &str) {
+        match ndarray::hpc::gguf_indexer::read_bgz7_file(path) {
+            Ok(tensors) => {
+                for ct in tensors {
+                    // Each tensor becomes an SPO: tensor_name → "encodes" → layer
+                    let fp = triplet_to_headprint(&ct.name, "encodes", "weights");
+                    let spo = headprint_to_spo(&fp, 0.95, 0.99);
+                    knowledge.push(spo);
+
+                    // Sample weight rows as additional knowledge
+                    for (_r, row) in ct.rows.iter().enumerate().take(100) {
+                        let row_spo = headprint_to_spo(row, 0.9, 0.95);
+                        knowledge.push(row_spo);
+                    }
+                }
+            }
+            Err(e) => eprintln!("  SKIP {path}: {e}"),
+        }
+    }
+
     pub async fn run(port: u16) {
-        let mut weights = WeightStore::new();
+        let mut knowledge = Vec::new();
 
-        // Ingest available bgz7 shards
+        // Ingest available bgz7 shards into knowledge base
         for path in &[
             "/tmp/qwen35_27b_v2_shard02.bgz7",
             "/tmp/qwen35_27b_base_shard02.bgz7",
         ] {
             if std::fs::metadata(path).is_ok() {
                 eprintln!("Ingesting {path}...");
-                weights.ingest(path);
+                ingest_weights(&mut knowledge, path);
             }
         }
-
-        if weights.len() > 0 {
-            weights.compute_heel();
-            eprintln!("WeightStore: {} vectors, HEEL={:?}", weights.len(), weights.heel.dims);
-        } else {
-            eprintln!("No bgz7 weights found — running empty");
-        }
+        eprintln!("Knowledge base: {} SPO heads", knowledge.len());
 
         let server = ServerState {
             cache: AutocompleteCache::new(),
-            weights,
+            knowledge,
+            context: SpoHead::zero(),
         };
 
         let state: AppState = std::sync::Arc::new(Mutex::new(server));

From 6f59d5ca0f9ec7d01cc0868363a676c96c4b9172 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 19:23:29 +0000
Subject: [PATCH 4/8] =?UTF-8?q?feat:=20partitioned=20CAM=20index=20?=
 =?UTF-8?q?=E2=80=94=20TensorRole=20+=20layer=5Fidx=20from=20tensor=20name?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parse tensor names (HuggingFace + GGUF conventions) into:
- TensorRole: QProj/KProj/VProj/OProj/GateProj/UpProj/DownProj/Embedding/Norm
- layer_idx: u16 layer number (None for non-layer tensors)

Stored as Arrow columns for Lance partition pruning. Enables per-role
palettes (256 archetypes of "query behavior" vs "gating decisions") and
per-layer search (only search gate_proj in layer 12, not all 5M vectors).

No re-extraction from models needed — partition key was always in the
bgz7 tensor names.

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 crates/lance-graph/src/graph/hydrate.rs | 167 +++++++++++++++++++++---
 1 file changed, 151 insertions(+), 16 deletions(-)

diff --git a/crates/lance-graph/src/graph/hydrate.rs b/crates/lance-graph/src/graph/hydrate.rs
index 355ad2e5..cd7903c7 100644
--- a/crates/lance-graph/src/graph/hydrate.rs
+++ b/crates/lance-graph/src/graph/hydrate.rs
@@ -10,23 +10,98 @@
 
 use arrow::array::{
     ArrayRef, FixedSizeListBuilder, Float32Builder, Int16Builder,
-    StringArray, UInt32Array, UInt8Array,
+    StringArray, UInt16Array, UInt32Array, UInt8Array,
 };
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use std::sync::Arc;
 
-/// Schema for the hydrated weight table.
-///
-/// - `tensor_name`: which weight tensor (e.g. "model.layers.0.self_attn.q_proj")
-/// - `row_idx`: row within tensor
-/// - `vector`: 17-dim f32 for Lance vector search (i16→f32 is exact)
-/// - `base17`: 17-dim i16 raw values (for direct L1, palette assignment)
-/// - `palette_s/p/o`: SPO palette indices (populated later by palette pipeline)
+/// Functional partition of a weight tensor.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum TensorRole {
+    QProj,     // "how this layer queries"
+    KProj,     // "what this layer matches"
+    VProj,     // "what this layer retrieves"
+    OProj,     // "how this layer outputs attention"
+    GateProj,  // "what this layer gates"
+    UpProj,    // "what this layer amplifies"
+    DownProj,  // "what this layer compresses"
+    Embedding, // "vocabulary → hidden"
+    Norm,      // "scale/bias"
+    Other,     // unclassified
+}
+
+impl TensorRole {
+    /// Parse tensor role from the full tensor name string.
+    /// Works with both HuggingFace and GGUF naming conventions.
+    pub fn from_name(name: &str) -> Self {
+        let n = name.to_lowercase();
+        if n.contains("q_proj") || n.contains("attn_q") || n.contains(".wq.") { TensorRole::QProj }
+        else if n.contains("k_proj") || n.contains("attn_k") || n.contains(".wk.") { TensorRole::KProj }
+        else if n.contains("v_proj") || n.contains("attn_v") || n.contains(".wv.") { TensorRole::VProj }
+        else if n.contains("o_proj") || n.contains("attn_output") || n.contains(".wo.") { TensorRole::OProj }
+        else if n.contains("gate_proj") || n.contains("ffn_gate") || n.contains(".w1.") { TensorRole::GateProj }
+        else if n.contains("up_proj") || n.contains("ffn_up") || n.contains(".w3.") { TensorRole::UpProj }
+        else if n.contains("down_proj") || n.contains("ffn_down") || n.contains(".w2.") { TensorRole::DownProj }
+        else if n.contains("embed") || n.contains("token_embd") { TensorRole::Embedding }
+        else if n.contains("norm") || n.contains("ln_") { TensorRole::Norm }
+        else { TensorRole::Other }
+    }
+
+    /// Numeric ID for Arrow column storage.
+    pub fn as_u8(&self) -> u8 {
+        match self {
+            TensorRole::QProj => 0,
+            TensorRole::KProj => 1,
+            TensorRole::VProj => 2,
+            TensorRole::OProj => 3,
+            TensorRole::GateProj => 4,
+            TensorRole::UpProj => 5,
+            TensorRole::DownProj => 6,
+            TensorRole::Embedding => 7,
+            TensorRole::Norm => 8,
+            TensorRole::Other => 9,
+        }
+    }
+
+    pub fn label(&self) -> &'static str {
+        match self {
+            TensorRole::QProj => "q_proj",
+            TensorRole::KProj => "k_proj",
+            TensorRole::VProj => "v_proj",
+            TensorRole::OProj => "o_proj",
+            TensorRole::GateProj => "gate_proj",
+            TensorRole::UpProj => "up_proj",
+            TensorRole::DownProj => "down_proj",
+            TensorRole::Embedding => "embed",
+            TensorRole::Norm => "norm",
+            TensorRole::Other => "other",
+        }
+    }
+}
+
+/// Extract layer index from tensor name. Returns None for non-layer tensors.
+pub fn parse_layer_idx(name: &str) -> Option<u16> {
+    // Match "layers.N." or "blk.N."
+    let n = name.to_lowercase();
+    if let Some(pos) = n.find("layers.") {
+        let rest = &n[pos + 7..];
+        rest.split('.').next().and_then(|s| s.parse().ok())
+    } else if let Some(pos) = n.find("blk.") {
+        let rest = &n[pos + 4..];
+        rest.split('.').next().and_then(|s| s.parse().ok())
+    } else {
+        None
+    }
+}
+
+/// Schema for the hydrated weight table with partition columns.
 pub fn weight_schema() -> Schema {
     Schema::new(vec![
         Field::new("tensor_name", DataType::Utf8, false),
         Field::new("row_idx", DataType::UInt32, false),
+        Field::new("layer_idx", DataType::UInt16, true),
+        Field::new("tensor_role", DataType::UInt8, false),
         Field::new(
             "vector",
             DataType::FixedSizeList(
@@ -49,22 +124,30 @@ pub fn weight_schema() -> Schema {
     ])
 }
 
-/// Convert bgz7 compressed tensors to Arrow RecordBatch.
+/// Convert bgz7 compressed tensors to Arrow RecordBatch with partition columns.
 ///
-/// Stores both f32 (for Lance vector search) and i16 (for direct L1 / palette).
+/// Each row gets `layer_idx` and `tensor_role` parsed from the tensor name.
+/// This enables partitioned CAM indexing: per-role palettes, per-layer search.
 pub fn bgz7_to_batch(
     tensors: &[(String, Vec<ndarray::hpc::bgz17_bridge::Base17>)],
 ) -> RecordBatch {
     let mut names = Vec::new();
     let mut row_idxs = Vec::new();
+    let mut layer_idxs: Vec<Option<u16>> = Vec::new();
+    let mut roles = Vec::new();
     let mut vector_builder = FixedSizeListBuilder::new(Float32Builder::new(), 17);
     let mut base17_builder = FixedSizeListBuilder::new(Int16Builder::new(), 17);
     let mut total_rows = 0usize;
 
     for (name, rows) in tensors {
+        let role = TensorRole::from_name(name);
+        let layer = parse_layer_idx(name);
+
         for (r, fp) in rows.iter().enumerate() {
             names.push(name.clone());
             row_idxs.push(r as u32);
+            layer_idxs.push(layer);
+            roles.push(role.as_u8());
             for d in 0..17 {
                 vector_builder.values().append_value(fp.dims[d] as f32);
                 base17_builder.values().append_value(fp.dims[d]);
@@ -77,6 +160,8 @@ pub fn bgz7_to_batch(
 
     let name_array: ArrayRef = Arc::new(StringArray::from(names));
     let row_idx_array: ArrayRef = Arc::new(UInt32Array::from(row_idxs));
+    let layer_idx_array: ArrayRef = Arc::new(UInt16Array::from(layer_idxs));
+    let role_array: ArrayRef = Arc::new(UInt8Array::from(roles));
     let vector_array: ArrayRef = Arc::new(vector_builder.finish());
     let base17_array: ArrayRef = Arc::new(base17_builder.finish());
     let null_u8: ArrayRef = Arc::new(UInt8Array::from(vec![None::<u8>; total_rows]));
@@ -84,6 +169,8 @@ pub fn bgz7_to_batch(
     RecordBatch::try_from_iter(vec![
         ("tensor_name", name_array),
         ("row_idx", row_idx_array),
+        ("layer_idx", layer_idx_array),
+        ("tensor_role", role_array),
         ("vector", vector_array),
         ("base17", base17_array),
         ("palette_s", null_u8.clone()),
@@ -175,18 +262,64 @@ mod tests {
     #[test]
     fn test_weight_schema() {
         let schema = weight_schema();
-        assert_eq!(schema.fields().len(), 7);
+        assert_eq!(schema.fields().len(), 9);
     }
 
     #[test]
     fn test_bgz7_to_batch() {
         let tensors = vec![
-            ("layer.0.q_proj".into(), vec![Base17 { dims: [100; 17] }, Base17 { dims: [200; 17] }]),
-            ("layer.0.k_proj".into(), vec![Base17 { dims: [-50; 17] }]),
+            ("model.layers.0.self_attn.q_proj.weight".into(), vec![Base17 { dims: [100; 17] }, Base17 { dims: [200; 17] }]),
+            ("model.layers.0.self_attn.k_proj.weight".into(), vec![Base17 { dims: [-50; 17] }]),
         ];
         let batch = bgz7_to_batch(&tensors);
         assert_eq!(batch.num_rows(), 3);
-        assert_eq!(batch.num_columns(), 7);
+        assert_eq!(batch.num_columns(), 9);
+    }
+
+    #[test]
+    fn test_tensor_role_parsing() {
+        assert_eq!(TensorRole::from_name("model.layers.0.self_attn.q_proj.weight"), TensorRole::QProj);
+        assert_eq!(TensorRole::from_name("model.layers.0.self_attn.k_proj.weight"), TensorRole::KProj);
+        assert_eq!(TensorRole::from_name("model.layers.0.self_attn.v_proj.weight"), TensorRole::VProj);
+        assert_eq!(TensorRole::from_name("model.layers.0.self_attn.o_proj.weight"), TensorRole::OProj);
+        assert_eq!(TensorRole::from_name("model.layers.0.mlp.gate_proj.weight"), TensorRole::GateProj);
+        assert_eq!(TensorRole::from_name("model.layers.0.mlp.up_proj.weight"), TensorRole::UpProj);
+        assert_eq!(TensorRole::from_name("model.layers.0.mlp.down_proj.weight"), TensorRole::DownProj);
+        assert_eq!(TensorRole::from_name("model.embed_tokens.weight"), TensorRole::Embedding);
+        assert_eq!(TensorRole::from_name("model.layers.0.input_layernorm.weight"), TensorRole::Norm);
+        // GGUF naming
+        assert_eq!(TensorRole::from_name("blk.5.attn_q.weight"), TensorRole::QProj);
+        assert_eq!(TensorRole::from_name("blk.5.ffn_gate.weight"), TensorRole::GateProj);
+    }
+
+    #[test]
+    fn test_layer_idx_parsing() {
+        assert_eq!(parse_layer_idx("model.layers.15.self_attn.q_proj.weight"), Some(15));
+        assert_eq!(parse_layer_idx("blk.7.attn_q.weight"), Some(7));
+        assert_eq!(parse_layer_idx("model.embed_tokens.weight"), None);
+        assert_eq!(parse_layer_idx("model.layers.0.mlp.gate_proj.weight"), Some(0));
+    }
+
+    #[test]
+    fn test_partition_columns_populated() {
+        let tensors = vec![
+            ("model.layers.5.self_attn.q_proj.weight".into(), vec![Base17 { dims: [100; 17] }]),
+            ("model.layers.5.mlp.gate_proj.weight".into(), vec![Base17 { dims: [200; 17] }]),
+            ("model.embed_tokens.weight".into(), vec![Base17 { dims: [50; 17] }]),
+        ];
+        let batch = bgz7_to_batch(&tensors);
+        let roles = batch.column_by_name("tensor_role").unwrap();
+        let role_arr = roles.as_any().downcast_ref::<UInt8Array>().unwrap();
+        assert_eq!(role_arr.value(0), TensorRole::QProj.as_u8());
+        assert_eq!(role_arr.value(1), TensorRole::GateProj.as_u8());
+        assert_eq!(role_arr.value(2), TensorRole::Embedding.as_u8());
+
+        let layers = batch.column_by_name("layer_idx").unwrap();
+        let layer_arr = layers.as_any().downcast_ref::<UInt16Array>().unwrap();
+        assert_eq!(layer_arr.value(0), 5);
+        assert_eq!(layer_arr.value(1), 5);
+        // First two have layer 5, third (embed) has no layer
+        assert_eq!(batch.num_rows(), 3);
     }
 
     #[test]
@@ -224,11 +357,13 @@ mod tests {
     }
 
     #[test]
-    fn test_both_columns_present() {
-        let tensors = vec![("t".into(), vec![Base17 { dims: [42; 17] }])];
+    fn test_all_columns_present() {
+        let tensors = vec![("model.layers.0.self_attn.q_proj.weight".into(), vec![Base17 { dims: [42; 17] }])];
         let batch = bgz7_to_batch(&tensors);
         assert!(batch.column_by_name("vector").is_some());
         assert!(batch.column_by_name("base17").is_some());
+        assert!(batch.column_by_name("layer_idx").is_some());
+        assert!(batch.column_by_name("tensor_role").is_some());
         assert!(batch.column_by_name("palette_s").is_some());
     }
 

From 41f6b734f65f47a8836b729f431b7d2762cb10d3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 19:29:07 +0000
Subject: [PATCH 5/8] =?UTF-8?q?feat:=20NeuronPrint=20+=20NeuronQuery=20+?=
 =?UTF-8?q?=20NeuronTrace=20=E2=80=94=206D=20holographic=20neuron=20repres?=
 =?UTF-8?q?entation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NeuronPrint (204 bytes): Q/K/V/Gate/Up/Down — complete behavior of one neuron.
  bundle() → 34-byte holographic fingerprint (all 6 roles superposed)
  attention() → Q ⊕ K (what it attends to)
  retrieval() → K ⊕ V (what it retrieves when matched)
  mlp() → Gate ⊕ Up ⊕ Down (the nonlinear transform)

NeuronQuery: selective role probing with Optional fields.
  attention(q) → probe Q against K store
  retrieval(k) → probe K against V store
  gating(gate) → probe Gate
  role_mask() → 6-bit Pearl-like mask (Q/K/V/Gate/Up/Down)
  score(neuron) → L1 distance on active roles only

NeuronTrace: NARS truth derived from role ratios.
  frequency → Gate magnitude (how often this neuron fires)
  confidence → Up/Down ratio (evidence strength)
  attention → Q·K alignment (activation strength)
  coherence → K·V alignment (retrieval quality)
  expectation → c * (f - 0.5) + 0.5

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 crates/lance-graph/src/graph/mod.rs    |   1 +
 crates/lance-graph/src/graph/neuron.rs | 307 +++++++++++++++++++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 crates/lance-graph/src/graph/neuron.rs

diff --git a/crates/lance-graph/src/graph/mod.rs b/crates/lance-graph/src/graph/mod.rs
index 2e03ec53..6307bb6f 100644
--- a/crates/lance-graph/src/graph/mod.rs
+++ b/crates/lance-graph/src/graph/mod.rs
@@ -14,6 +14,7 @@ pub mod falkor_semirings;
 pub mod fingerprint;
 pub mod hydrate;
 pub mod metadata;
+pub mod neuron;
 pub mod neighborhood;
 pub mod sparse;
 pub mod spo;
diff --git a/crates/lance-graph/src/graph/neuron.rs b/crates/lance-graph/src/graph/neuron.rs
new file mode 100644
index 00000000..09fd5511
--- /dev/null
+++ b/crates/lance-graph/src/graph/neuron.rs
@@ -0,0 +1,307 @@
+//! NeuronPrint: 6D holographic representation of a single neuron's behavior.
+//!
+//! Each neuron (layer i, feature j) has 6 roles in the transformer:
+//!   Q = how it queries        (34 bytes)
+//!   K = what it matches       (34 bytes)
+//!   V = what it retrieves     (34 bytes)
+//!   Gate = whether it fires   (34 bytes)
+//!   Up = how it amplifies     (34 bytes)
+//!   Down = how it compresses  (34 bytes)
+//!
+//! Total: 204 bytes per neuron. Holographic: bundle all 6 → 34 bytes.
+//! The CAM index (row_idx) aligns all 6 tables — same row = same feature.
+//!
+//! Three constructs:
+//!   NeuronPrint  — what a neuron IS (the object, 204 bytes)
+//!   NeuronQuery  — how you ASK it (the query, selective role probing)
+//!   NeuronTrace  — how it REASONS (the thinking, NARS truth from role ratios)
+
+use ndarray::hpc::bgz17_bridge::Base17;
+
+// ─── Object: what a neuron IS ───────────────────────────────────────────────
+
+/// Complete 6D representation of a single neuron at (layer, feature).
+/// 204 bytes. Each field is a 34-byte Base17 vector.
+#[derive(Clone, Debug)]
+pub struct NeuronPrint {
+    /// Layer index in the model.
+    pub layer: u16,
+    /// Feature/row index within the layer.
+    pub feature: u32,
+    /// Query projection: how this neuron queries.
+    pub q: Base17,
+    /// Key projection: what this neuron matches.
+    pub k: Base17,
+    /// Value projection: what this neuron retrieves.
+    pub v: Base17,
+    /// Gate projection: whether this neuron fires (SwiGLU gate).
+    pub gate: Base17,
+    /// Up projection: how this neuron amplifies.
+    pub up: Base17,
+    /// Down projection: how this neuron compresses.
+    pub down: Base17,
+}
+
+impl NeuronPrint {
+    /// Bundle all 6 roles into a single 34-byte holographic fingerprint.
+    /// The gestalt contains all roles in superposition.
+    pub fn bundle(&self) -> Base17 {
+        let mut dims = [0i32; 17];
+        for d in 0..17 {
+            dims[d] = self.q.dims[d] as i32
+                + self.k.dims[d] as i32
+                + self.v.dims[d] as i32
+                + self.gate.dims[d] as i32
+                + self.up.dims[d] as i32
+                + self.down.dims[d] as i32;
+        }
+        let mut out = [0i16; 17];
+        for d in 0..17 {
+            out[d] = (dims[d] / 6).clamp(-32768, 32767) as i16;
+        }
+        Base17 { dims: out }
+    }
+
+    /// Attention fingerprint: Q ⊕ K (what this neuron attends to).
+    pub fn attention(&self) -> Base17 {
+        self.q.xor_bind(&self.k)
+    }
+
+    /// Retrieval fingerprint: K ⊕ V (what this neuron retrieves when matched).
+    pub fn retrieval(&self) -> Base17 {
+        self.k.xor_bind(&self.v)
+    }
+
+    /// MLP fingerprint: Gate ⊕ Up ⊕ Down (the nonlinear transform).
+    pub fn mlp(&self) -> Base17 {
+        self.gate.xor_bind(&self.up).xor_bind(&self.down)
+    }
+
+    /// Byte size of the full neuron print.
+    pub const BYTE_SIZE: usize = 6 * 34; // 204
+}
+
+// ─── Query: how you ASK a neuron ────────────────────────────────────────────
+
+/// Selective probe into neuron roles. Set the roles you want to query.
+/// None = wildcard (don't constrain this role).
+#[derive(Clone, Debug, Default)]
+pub struct NeuronQuery {
+    /// Constrain layer (None = any layer).
+    pub layer: Option<u16>,
+    /// Constrain feature (None = any feature).
+    pub feature: Option<u32>,
+    /// Query vector for Q-role (None = don't probe Q).
+    pub q: Option<Base17>,
+    /// Query vector for K-role (None = don't probe K).
+    pub k: Option<Base17>,
+    /// Query vector for V-role (None = don't probe V).
+    pub v: Option<Base17>,
+    /// Query vector for Gate-role (None = don't probe Gate).
+    pub gate: Option<Base17>,
+    /// Query vector for Up-role (None = don't probe Up).
+    pub up: Option<Base17>,
+    /// Query vector for Down-role (None = don't probe Down).
+    pub down: Option<Base17>,
+}
+
+impl NeuronQuery {
+    /// "What does this query attend to?" — probe Q against K store.
+    pub fn attention(q: Base17) -> Self {
+        NeuronQuery { q: Some(q), ..Default::default() }
+    }
+
+    /// "What is retrieved for this key?" — probe K against V store.
+    pub fn retrieval(k: Base17) -> Self {
+        NeuronQuery { k: Some(k), ..Default::default() }
+    }
+
+    /// "Does this feature fire?" — probe Gate.
+    pub fn gating(gate: Base17) -> Self {
+        NeuronQuery { gate: Some(gate), ..Default::default() }
+    }
+
+    /// "What does layer N do?" — constrain to a specific layer.
+    pub fn at_layer(mut self, layer: u16) -> Self {
+        self.layer = Some(layer);
+        self
+    }
+
+    /// Score a NeuronPrint against this query. Lower = better match.
+    /// Only active (Some) roles contribute to the score.
+    pub fn score(&self, neuron: &NeuronPrint) -> u32 {
+        let mut total = 0u32;
+        let mut count = 0u32;
+        if let Some(ref q) = self.q { total += q.l1(&neuron.q); count += 1; }
+        if let Some(ref k) = self.k { total += k.l1(&neuron.k); count += 1; }
+        if let Some(ref v) = self.v { total += v.l1(&neuron.v); count += 1; }
+        if let Some(ref g) = self.gate { total += g.l1(&neuron.gate); count += 1; }
+        if let Some(ref u) = self.up { total += u.l1(&neuron.up); count += 1; }
+        if let Some(ref d) = self.down { total += d.l1(&neuron.down); count += 1; }
+        if count > 0 { total / count } else { u32::MAX }
+    }
+
+    /// How many roles are active in this query.
+    pub fn active_roles(&self) -> u8 {
+        [&self.q, &self.k, &self.v, &self.gate, &self.up, &self.down]
+            .iter()
+            .filter(|r| r.is_some())
+            .count() as u8
+    }
+
+    /// Pearl-like mask: which roles are active (6-bit).
+    /// Bit 0=Q, 1=K, 2=V, 3=Gate, 4=Up, 5=Down.
+    pub fn role_mask(&self) -> u8 {
+        let mut mask = 0u8;
+        if self.q.is_some() { mask |= 1 << 0; }
+        if self.k.is_some() { mask |= 1 << 1; }
+        if self.v.is_some() { mask |= 1 << 2; }
+        if self.gate.is_some() { mask |= 1 << 3; }
+        if self.up.is_some() { mask |= 1 << 4; }
+        if self.down.is_some() { mask |= 1 << 5; }
+        mask
+    }
+}
+
+// ─── Thinking: how a neuron REASONS ─────────────────────────────────────────
+
+/// NARS truth values derived from the 6 role ratios.
+/// The MLP roles (Gate/Up/Down) encode causal structure.
+#[derive(Clone, Debug)]
+pub struct NeuronTrace {
+    /// NARS frequency: P(fires) derived from Gate activation.
+    /// gate_magnitude / max_magnitude → [0, 1].
+    pub frequency: f32,
+    /// NARS confidence: Up/Down ratio → evidence strength.
+    /// High Up + low Down = strong positive evidence.
+    /// Low Up + high Down = strong compression (less evidence).
+    pub confidence: f32,
+    /// Attention strength: Q·K alignment (L1 distance, inverted).
+    /// Low distance = strong attention = this neuron activates.
+    pub attention: f32,
+    /// Retrieval coherence: K·V alignment.
+    /// Low distance = coherent retrieval (what's stored matches what's keyed).
+    pub coherence: f32,
+    /// NARS expectation: c * (f - 0.5) + 0.5.
+    pub expectation: f32,
+}
+
+impl NeuronTrace {
+    /// Derive NARS truth from a NeuronPrint.
+    pub fn from_neuron(n: &NeuronPrint) -> Self {
+        // Gate magnitude → frequency (how often this neuron fires)
+        let gate_mag = n.gate.dims.iter().map(|d| (*d as f32).abs()).sum::<f32>();
+        let max_mag = 17.0 * 32768.0;
+        let frequency = (gate_mag / max_mag).clamp(0.0, 1.0);
+
+        // Up/Down ratio → confidence
+        let up_mag = n.up.dims.iter().map(|d| (*d as f32).abs()).sum::<f32>();
+        let down_mag = n.down.dims.iter().map(|d| (*d as f32).abs()).sum::<f32>().max(1.0);
+        let confidence = (up_mag / (up_mag + down_mag)).clamp(0.0, 0.99);
+
+        // Q·K alignment → attention strength
+        let qk_dist = n.q.l1(&n.k) as f32;
+        let attention = 1.0 - (qk_dist / max_mag).clamp(0.0, 1.0);
+
+        // K·V alignment → retrieval coherence
+        let kv_dist = n.k.l1(&n.v) as f32;
+        let coherence = 1.0 - (kv_dist / max_mag).clamp(0.0, 1.0);
+
+        let expectation = confidence * (frequency - 0.5) + 0.5;
+
+        NeuronTrace { frequency, confidence, attention, coherence, expectation }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_neuron(layer: u16, feature: u32, base_val: i16) -> NeuronPrint {
+        NeuronPrint {
+            layer,
+            feature,
+            q: Base17 { dims: [base_val; 17] },
+            k: Base17 { dims: [base_val + 10; 17] },
+            v: Base17 { dims: [base_val + 20; 17] },
+            gate: Base17 { dims: [base_val + 100; 17] },
+            up: Base17 { dims: [base_val + 50; 17] },
+            down: Base17 { dims: [base_val + 30; 17] },
+        }
+    }
+
+    #[test]
+    fn test_neuron_bundle() {
+        let n = make_neuron(0, 0, 100);
+        let b = n.bundle();
+        // Average of 100, 110, 120, 200, 150, 130 = 135
+        assert_eq!(b.dims[0], 135);
+    }
+
+    #[test]
+    fn test_neuron_byte_size() {
+        assert_eq!(NeuronPrint::BYTE_SIZE, 204);
+    }
+
+    #[test]
+    fn test_query_attention() {
+        let q = NeuronQuery::attention(Base17 { dims: [100; 17] });
+        assert_eq!(q.active_roles(), 1);
+        assert_eq!(q.role_mask(), 0b000001); // Q only
+    }
+
+    #[test]
+    fn test_query_score() {
+        let n = make_neuron(0, 0, 100);
+        // Query that matches Q exactly
+        let q_exact = NeuronQuery::attention(Base17 { dims: [100; 17] });
+        let score_exact = q_exact.score(&n);
+        // Query that's far from Q
+        let q_far = NeuronQuery::attention(Base17 { dims: [10000; 17] });
+        let score_far = q_far.score(&n);
+        assert!(score_exact < score_far, "exact match should score lower (closer)");
+    }
+
+    #[test]
+    fn test_query_multi_role() {
+        let q = NeuronQuery {
+            q: Some(Base17 { dims: [100; 17] }),
+            k: Some(Base17 { dims: [200; 17] }),
+            ..Default::default()
+        };
+        assert_eq!(q.active_roles(), 2);
+        assert_eq!(q.role_mask(), 0b000011); // Q + K
+    }
+
+    #[test]
+    fn test_trace_from_neuron() {
+        let n = make_neuron(5, 42, 100);
+        let t = NeuronTrace::from_neuron(&n);
+        assert!(t.frequency > 0.0);
+        assert!(t.confidence > 0.0 && t.confidence < 1.0);
+        assert!(t.attention > 0.0); // Q and K are close (only differ by 10)
+        assert!(t.expectation > 0.0 && t.expectation < 1.0);
+    }
+
+    #[test]
+    fn test_high_gate_high_frequency() {
+        let mut n = make_neuron(0, 0, 0);
+        n.gate = Base17 { dims: [30000; 17] }; // high gate
+        let t = NeuronTrace::from_neuron(&n);
+        assert!(t.frequency > 0.8, "high gate should mean high frequency: {}", t.frequency);
+    }
+
+    #[test]
+    fn test_attention_xor_bind() {
+        let n = make_neuron(0, 0, 100);
+        let attn = n.attention(); // Q ⊕ K
+        // Should be non-zero (Q ≠ K)
+        assert!(attn.dims.iter().any(|d| *d != 0));
+    }
+
+    #[test]
+    fn test_query_at_layer() {
+        let q = NeuronQuery::attention(Base17 { dims: [100; 17] }).at_layer(15);
+        assert_eq!(q.layer, Some(15));
+    }
+}

From 8650b4ae00d3524bd418dd867c268446cc082429 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 19:35:12 +0000
Subject: [PATCH 6/8] =?UTF-8?q?docs:=20NeuronPrint=20Rosetta=20Stone=20?=
 =?UTF-8?q?=E2=80=94=206D=20holographic=20neuron=20representation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 6 weight tensor roles (Q/K/V/Gate/Up/Down) are 6 dimensions of a
single neuron's complete behavior. 204 bytes per neuron, aligned by
row index across all 6 tables. The CAM position IS preserved.

Key epiphanies:
- Q/K/V = attention triad (who asks, what matches, what's retrieved)
- Gate/Up/Down = MLP triad (fires?, amplifies?, compresses?)
- K+V = key-value retrieval store, Q = query against it
- Gate/Up/Down = NARS truth hydration (frequency, confidence)
- Two triads = 6D SPO: each triad is an S/P/O decomposition
- Cross-role distances are meaningful (Q·K = attention sharpness)
- Same structure across Llama/Qwen/GPT-2/GGUF with naming map

Rosetta exploration needed:
- Do Q archetypes cluster by semantic role?
- Does Gate magnitude predict neuron importance?
- Does Up/Down ratio detect polysemanticity?
- Layer-wise NeuronTrace progression (feature → concept gradient)

No re-extraction from models needed — partition key was always in the
bgz7 tensor names. Just needs grouping by tensor role.

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 docs/NEURONPRINT_ROSETTA.md | 225 ++++++++++++++++++++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100644 docs/NEURONPRINT_ROSETTA.md

diff --git a/docs/NEURONPRINT_ROSETTA.md b/docs/NEURONPRINT_ROSETTA.md
new file mode 100644
index 00000000..11c901d3
--- /dev/null
+++ b/docs/NEURONPRINT_ROSETTA.md
@@ -0,0 +1,225 @@
+# NeuronPrint Rosetta Stone
+
+> **Date**: 2026-03-31
+> **Status**: Exploration — we built the instrument, now we learn to read it
+
+---
+
+## What We Built
+
+Every neuron (layer `i`, feature `j`) in a transformer has 6 functional roles,
+each compressed to 34 bytes (Base17, ρ=0.993 vs BF16). Together: **204 bytes
+per neuron** — a complete holographic fingerprint of what that neuron does.
+
+```
+NeuronPrint {
+    q:    Base17,  // 34B — how this neuron queries (attention Q projection)
+    k:    Base17,  // 34B — what this neuron matches (attention K projection)
+    v:    Base17,  // 34B — what this neuron retrieves (attention V projection)
+    gate: Base17,  // 34B — whether this neuron fires (SwiGLU/MLP gate)
+    up:   Base17,  // 34B — how this neuron amplifies (MLP up projection)
+    down: Base17,  // 34B — how this neuron compresses (MLP down projection)
+}
+```
+
+Three operations on it:
+
+| Struct | Purpose | Metaphor |
+|--------|---------|----------|
+| `NeuronPrint` | What a neuron IS | The object — its complete behavior in 204 bytes |
+| `NeuronQuery` | How you ASK it | The query — selective role probing (6-bit mask) |
+| `NeuronTrace` | How it REASONS | The thinking — NARS truth derived from role ratios |
+
+---
+
+## The Epiphany: 6D SPO
+
+The 6 roles map to an extended SPO decomposition. Classical SPO has 3 planes
+(Subject, Predicate, Object). NeuronPrint has 6 — which factor into two triads:
+
+```
+Attention Triad (how the neuron communicates):
+  Q = Subject    "who is asking?"
+  K = Predicate  "what is the relationship?"
+  V = Object     "what is the answer?"
+
+MLP Triad (how the neuron transforms):
+  Gate = Subject    "what input feature is this about?"
+  Up   = Predicate  "how does it transform?"
+  Down = Object     "what does it produce?"
+```
+
+The two triads are linked by the residual stream — attention writes to it,
+MLP reads from it. The NeuronPrint captures BOTH sides: the communication
+(Q/K/V) and the computation (Gate/Up/Down) in a single 204-byte struct.
+
+### Why This Is a Rosetta Stone
+
+The same neuron appears in all 6 tables, aligned by row index. This means:
+
+1. **Q tells you what the neuron looks for** — its query pattern
+2. **K tells you when the neuron responds** — its matching criteria
+3. **V tells you what the neuron says** — its contribution
+4. **Gate tells you IF the neuron speaks** — its activation threshold
+5. **Up tells you HOW MUCH it speaks** — its amplification factor
+6. **Down tells you how it's COMPRESSED afterward** — the information bottleneck
+
+Reading all 6 together is like having the Rosetta Stone for that neuron —
+the same information expressed in 6 different "languages" (projection spaces).
+
+---
+
+## Retrieval vs Reasoning
+
+The 6 roles split cleanly into two uses:
+
+### Retrieval (Key-Value Store)
+```
+Q probes against K → finds matching neurons
+V at those positions → the retrieved information
+```
+This IS attention, reconstructed from palette indices. It's a key-value cache
+where K is the key and V is the value, and Q is the lookup query.
+
+### Reasoning (NARS Hydration)
+```
+Gate magnitude → NARS frequency (how often does this fire?)
+Up/Down ratio  → NARS confidence (how strong is the evidence?)
+Q·K alignment  → attention strength (how relevant is this?)
+K·V alignment  → retrieval coherence (how consistent is the stored info?)
+```
+The MLP roles encode causal structure. A neuron with high Gate, high Up,
+low Down is a "confident amplifier" — it fires often and boosts its signal.
+A neuron with low Gate, low Up, high Down is a "skeptical compressor" —
+it rarely fires and attenuates when it does.
+
+---
+
+## The LLM Architecture Zoo
+
+Different LLM architectures use different naming conventions but the same
+6 functional roles. Here's the mapping:
+
+### Llama / Qwen / Mistral (GQA attention + SwiGLU MLP)
+```
+model.layers.{L}.self_attn.q_proj.weight  → Q
+model.layers.{L}.self_attn.k_proj.weight  → K (grouped, fewer heads)
+model.layers.{L}.self_attn.v_proj.weight  → V (grouped, same as K)
+model.layers.{L}.self_attn.o_proj.weight  → O (output projection, maps back)
+model.layers.{L}.mlp.gate_proj.weight     → Gate (SwiGLU σ(x) branch)
+model.layers.{L}.mlp.up_proj.weight       → Up (SwiGLU linear branch)
+model.layers.{L}.mlp.down_proj.weight     → Down (back to hidden dim)
+```
+
+### GPT-2 / GPT-J (MHA attention + GELU MLP)
+```
+transformer.h.{L}.attn.c_attn.weight      → Q+K+V fused (split by dim)
+transformer.h.{L}.attn.c_proj.weight       → O
+transformer.h.{L}.mlp.c_fc.weight          → Up (no gate in GELU MLP)
+transformer.h.{L}.mlp.c_proj.weight        → Down
+```
+Note: GPT-2 has no separate Gate — GELU activation is implicit. The Gate
+role is absent; use Up magnitude as a proxy for both gating and amplification.
+
+### GGUF (llama.cpp naming)
+```
+blk.{L}.attn_q.weight    → Q
+blk.{L}.attn_k.weight    → K
+blk.{L}.attn_v.weight    → V
+blk.{L}.attn_output.weight → O
+blk.{L}.ffn_gate.weight  → Gate
+blk.{L}.ffn_up.weight    → Up
+blk.{L}.ffn_down.weight  → Down
+```
+
+### What Varies Between Architectures
+- **GQA vs MHA**: K and V may have fewer heads than Q (grouped query attention).
+  Row count differs: Q has `n_heads × d_head` rows, K/V have `n_kv_heads × d_head`.
+- **SwiGLU vs GELU**: SwiGLU has explicit Gate; GELU doesn't. For GELU models,
+  the Gate NeuronPrint role is empty or derived from Up.
+- **Fused QKV**: Some models fuse Q/K/V into one weight matrix. Need to split
+  by dimension when extracting.
+
+---
+
+## What We Don't Know Yet (Rosetta Exploration)
+
+### Unanswered Questions
+1. **Do Q archetypes cluster by semantic role?** If palette entry 42 in the
+   Q palette consistently corresponds to "entity lookup" across layers, that's
+   a universal attention primitive. If it doesn't, the palette is just compression.
+
+2. **Does Gate magnitude correlate with neuron importance?** Literature suggests
+   yes (see: SwiGLU analysis papers), but we haven't verified on our Base17
+   projections. The ρ=0.993 preservation should keep this relationship intact.
+
+3. **Are cross-role distances meaningful?** Does `L1(Q[i][j], K[i][j])` (the
+   Q-K alignment for one neuron) predict attention entropy? Theory says yes:
+   a neuron whose Q and K are similar attends broadly; one whose Q and K
+   differ attends sharply.
+
+4. **Does the Up/Down ratio track with polysemanticity?** A neuron with many
+   features (polysemantic) should have high Up magnitude (many activations)
+   but also high Down magnitude (aggressive compression). The ratio might
+   identify monosemantic vs polysemantic neurons.
+
+5. **Layer-wise structure**: Do early layers (feature detection) have different
+   Gate/Up/Down distributions than late layers (concept composition)?
+   The Hyperprobe paper suggests probing only the second half of layers.
+
+### What the Literature Tells Us
+- **Anthropic's "Scaling Monosemanticity"** (2024): Individual neurons often
+  represent single concepts. The NeuronPrint should capture this — a monosemantic
+  neuron has a tight, unique fingerprint across all 6 roles.
+- **"Attention Head Superposition"** (2024): Attention heads can represent multiple
+  features simultaneously. The Q/K alignment in NeuronPrint detects this —
+  broad alignment = superposed, tight alignment = specialized.
+- **SwiGLU analysis** (Shazeer 2020, PaLM): Gate projection acts as a learned
+  binary mask over features. High Gate magnitude = important feature.
+- **Residual stream as communication bus** (Elhage et al. 2021): All layers
+  read from and write to the same residual stream. NeuronPrint captures both
+  the read (Q/K) and write (V/Down) sides.
+
+---
+
+## Next Steps
+
+1. **Hydrate a real model** with partition columns and build per-role palettes.
+   Compare archetype distributions across Q/K/V/Gate/Up/Down.
+
+2. **Cross-role distance analysis**: For each neuron, compute Q·K, K·V,
+   Gate magnitude, Up/Down ratio. Correlate with known interpretability results.
+
+3. **Layer progression**: Plot NeuronTrace (frequency, confidence, attention,
+   coherence) across layers. Does it match the feature→concept gradient?
+
+4. **Diff between models**: Compare Opus 4.5 vs 4.6 NeuronPrints.
+   Which roles diverge? Which layers? This tells you WHERE the behavioral
+   difference lives in the architecture.
+
+5. **Wire NeuronQuery into serve.rs**: Replace flat SPO extraction with
+   role-aware probing. "What does this model know about X?" becomes
+   `NeuronQuery::attention(encode(X)).at_layer(15)` → searches Q partition
+   at layer 15 only.
+
+---
+
+## Memory Budget
+
+For Qwen3.5-27B (28 layers, ~5M weight rows across all tensors):
+
+```
+Per neuron:        204 bytes (6 × 34)
+Per layer:         ~180K neurons × 204 bytes = ~36 MB
+Full model:        28 layers × 36 MB ≈ 1 GB (NeuronPrint for every feature)
+Bundled per layer: 28 × 34 bytes = 952 bytes (one HEEL per layer)
+Bundled per role:  6 × 34 bytes = 204 bytes (one HEEL per role type)
+Full model HEEL:   34 bytes
+```
+
+Compare: original BF16 weights = ~54 GB. NeuronPrint = ~1 GB. 54× compression
+while adding structural metadata (role, layer) that the raw weights don't have.
+
+The 5M × 34 bytes (170 MB) we already have in bgz7 = the LEAF level.
+NeuronPrint organizes the same data into 6 aligned tables with semantic meaning.
+No additional extraction needed — just grouping by tensor role.

From 4e8b96021646d1f1d29e9d43db12a858195bb03b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 19:49:16 +0000
Subject: [PATCH 7/8] =?UTF-8?q?docs:=206D=20SPO=20query=20language=20desig?=
 =?UTF-8?q?n=20=E2=80=94=20Cypher/GQL=20extension=20for=20NeuronPrint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends lance-graph's Cypher parser with 6D NeuronPrint-aware queries:
  MATCH (n:Neuron {layer:15})-[:ATTENDS]->(m:Neuron)
  WHERE l1(n.q, m.k) < 50
  RETURN n.feature, m.v, n.trace.confidence

Maps to DataFusion SQL over partitioned Lance datasets:
- Partition prune by tensor_role + layer_idx
- RaBitQ ANN on vector column
- UDFs: l1, magnitude, xor_bind, bundle, neuron_trace, nars_revision

4-phase implementation plan:
  Phase 1: DataFusion UDFs (pure SQL, no Cypher changes)
  Phase 2: Cypher extension (parser + planner)
  Phase 3: Cross-layer tracing (residual stream paths)
  Phase 4: Model comparison (multi-model diff queries)

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 docs/NEURON_QUERY_LANGUAGE.md | 229 ++++++++++++++++++++++++++++++++++
 1 file changed, 229 insertions(+)
 create mode 100644 docs/NEURON_QUERY_LANGUAGE.md

diff --git a/docs/NEURON_QUERY_LANGUAGE.md b/docs/NEURON_QUERY_LANGUAGE.md
new file mode 100644
index 00000000..6be4ee68
--- /dev/null
+++ b/docs/NEURON_QUERY_LANGUAGE.md
@@ -0,0 +1,229 @@
+# 6D SPO Query Language — Cypher/GQL Extension for NeuronPrint
+
+> **Date**: 2026-03-31
+> **Status**: Design — ready to implement when token budget refreshes
+> **Depends on**: Cypher parser (done), DataFusion planner (done), NeuronPrint (done), hydrate partitions (done)
+
+---
+
+## The Idea
+
+Extend lance-graph's existing Cypher/GQL parser to query the 6D NeuronPrint
+structure natively. DataFusion executes the query over partitioned Lance datasets.
+The 6 tensor roles become first-class graph relationships.
+
+```
+Today (string SPO):
+  MATCH (s:Entity)-[r:KNOWS]->(o:Entity) RETURN s, r, o
+
+Tomorrow (6D NeuronPrint SPO):
+  MATCH (n:Neuron)-[:Q]->(target)
+  WHERE n.layer = 15 AND distance(n.q, $query) < 100
+  RETURN n.feature, n.v AS retrieval, n.trace.confidence AS conf
+```
+
+---
+
+## Query Language Extension
+
+### Node Type: Neuron
+
+```cypher
+-- A neuron is identified by (layer, feature)
+MATCH (n:Neuron {layer: 15, feature: 42})
+RETURN n.q, n.k, n.v, n.gate, n.up, n.down
+```
+
+Each property (q, k, v, gate, up, down) is a 17-dim Base17 vector.
+
+### Relationship Types: The 6 Roles
+
+```cypher
+-- Attention: what does layer 15 attend to?
+MATCH (n:Neuron {layer: 15})-[:ATTENDS]->(m:Neuron)
+WHERE l1(n.q, m.k) < 50
+RETURN n.feature, m.feature, m.v AS retrieved
+
+-- Gating: which neurons fire at layer 10?
+MATCH (n:Neuron {layer: 10})
+WHERE magnitude(n.gate) > 0.8
+RETURN n.feature, n.trace.frequency
+
+-- MLP path: what does layer 5 amplify?
+MATCH (n:Neuron {layer: 5})
+WHERE magnitude(n.up) > magnitude(n.down) * 2
+RETURN n.feature AS amplified, n.trace.confidence
+```
+
+### Role Masks (Pearl 2³ → Pearl 2⁶)
+
+```cypher
+-- Probe only Q+K (attention query)
+MATCH (n:Neuron) USING ROLES(q, k)
+WHERE l1(n.q, $probe) < 100
+RETURN n.k, n.v
+
+-- Probe only Gate+Up+Down (reasoning query)
+MATCH (n:Neuron) USING ROLES(gate, up, down)
+WHERE n.trace.expectation > 0.7
+RETURN n.feature, n.layer, n.trace
+
+-- Full 6D probe
+MATCH (n:Neuron) USING ROLES(*)
+WHERE bundle_distance(n, $query) < 200
+RETURN n
+```
+
+### Cross-Layer Queries (Residual Stream Tracing)
+
+```cypher
+-- Trace a concept through the network:
+-- which neurons activate at each layer for this query?
+MATCH path = (n:Neuron)-[:ATTENDS*]->(m:Neuron)
+WHERE n.layer = 0 AND m.layer = 27
+  AND l1(n.q, $concept) < 50
+RETURN nodes(path), [x IN nodes(path) | x.trace.frequency] AS activations
+```
+
+### NARS-Enriched Queries
+
+```cypher
+-- Find neurons with high confidence AND high frequency
+-- (strong, reliable features)
+MATCH (n:Neuron)
+WHERE n.trace.frequency > 0.8 AND n.trace.confidence > 0.7
+RETURN n.layer, n.feature, n.trace.expectation
+ORDER BY n.trace.expectation DESC
+LIMIT 100
+
+-- Find contradictions: neurons where Q says one thing, Gate says another
+MATCH (n:Neuron)
+WHERE n.trace.attention > 0.8 AND n.trace.frequency < 0.2
+RETURN n AS "attends but doesn't fire"
+
+-- NARS revision across layers: combine evidence
+MATCH (a:Neuron {layer: 10}), (b:Neuron {layer: 20})
+WHERE a.feature = b.feature
+RETURN a.feature,
+       nars_revision(a.trace, b.trace) AS combined_truth
+```
+
+### Model Comparison (Diff)
+
+```cypher
+-- Compare Opus 4.5 vs 4.6: where do they diverge?
+MATCH (a:Neuron:Opus45), (b:Neuron:Opus46)
+WHERE a.layer = b.layer AND a.feature = b.feature
+  AND l1(a.bundle, b.bundle) > 500
+RETURN a.layer, a.feature,
+       l1(a.q, b.q) AS q_diff,
+       l1(a.gate, b.gate) AS gate_diff,
+       CASE
+         WHEN l1(a.q, b.q) > l1(a.gate, b.gate) THEN 'attention changed'
+         ELSE 'gating changed'
+       END AS change_type
+ORDER BY l1(a.bundle, b.bundle) DESC
+```
+
+---
+
+## DataFusion Execution Plan
+
+The Cypher extension maps to DataFusion SQL over Lance datasets:
+
+```
+Cypher:
+  MATCH (n:Neuron {layer: 15})-[:ATTENDS]->(m:Neuron)
+  WHERE l1(n.q, m.k) < 50
+
+DataFusion SQL:
+  SELECT a.feature, b.feature, b.vector AS v_vector
+  FROM weights a
+  JOIN weights b ON l1_distance(a.vector, b.vector) < 50
+  WHERE a.layer_idx = 15
+    AND a.tensor_role = 0  -- Q
+    AND b.tensor_role = 1  -- K
+
+Lance execution:
+  1. Partition prune: tensor_role=0 (Q) for a, tensor_role=1 (K) for b
+  2. Layer filter: layer_idx=15 for a
+  3. Vector search: RaBitQ ANN on a.vector against b.vector
+  4. Join: matching features where L1 < 50
+  5. Fetch: b's V-role vector for matched features
+```
+
+### UDFs Needed
+
+```sql
+-- L1 distance between two Base17 vectors (17 × i16)
+CREATE FUNCTION l1(a FIXED_SIZE_LIST(FLOAT32, 17), b FIXED_SIZE_LIST(FLOAT32, 17))
+  RETURNS UINT32 AS 'l1_distance';
+
+-- Magnitude of a Base17 vector (sum of abs values, normalized)
+CREATE FUNCTION magnitude(a FIXED_SIZE_LIST(FLOAT32, 17))
+  RETURNS FLOAT32 AS 'base17_magnitude';
+
+-- XOR bind two Base17 vectors
+CREATE FUNCTION xor_bind(a FIXED_SIZE_LIST(FLOAT32, 17), b FIXED_SIZE_LIST(FLOAT32, 17))
+  RETURNS FIXED_SIZE_LIST(FLOAT32, 17) AS 'base17_xor_bind';
+
+-- Bundle (average) multiple vectors
+CREATE AGGREGATE FUNCTION bundle(a FIXED_SIZE_LIST(FLOAT32, 17))
+  RETURNS FIXED_SIZE_LIST(FLOAT32, 17) AS 'base17_bundle';
+
+-- NeuronTrace from 6 role vectors
+CREATE FUNCTION neuron_trace(q, k, v, gate, up, down)
+  RETURNS STRUCT(frequency FLOAT32, confidence FLOAT32,
+                 attention FLOAT32, coherence FLOAT32,
+                 expectation FLOAT32) AS 'neuron_trace';
+
+-- NARS revision of two truth values
+CREATE FUNCTION nars_revision(a_f FLOAT32, a_c FLOAT32, b_f FLOAT32, b_c FLOAT32)
+  RETURNS STRUCT(frequency FLOAT32, confidence FLOAT32) AS 'nars_revision';
+```
+
+---
+
+## Implementation Plan
+
+### Phase 1: UDFs (pure DataFusion, no Cypher changes)
+Register `l1`, `magnitude`, `xor_bind`, `bundle`, `neuron_trace`, `nars_revision`
+as DataFusion scalar/aggregate UDFs. Queryable via raw SQL immediately.
+
+```sql
+-- Already works after Phase 1:
+SELECT tensor_name, row_idx,
+       l1(vector, ARRAY[100,200,...]) AS dist
+FROM weights
+WHERE tensor_role = 0 AND layer_idx = 15
+ORDER BY dist
+LIMIT 10;
+```
+
+### Phase 2: Cypher Extension (parser + planner)
+Add `Neuron` node type, role relationship types, `USING ROLES()` clause,
+and `trace` property access to the existing nom-based Cypher parser.
+Planner maps them to the Phase 1 UDFs.
+
+### Phase 3: Cross-Layer Tracing
+Add variable-length path patterns (`-[:ATTENDS*]->`) with layer progression
+constraints. DataFusion recursive CTE or iterative join.
+
+### Phase 4: Model Comparison
+Multi-model queries with label selectors (`:Opus45`, `:Opus46`).
+Multiple Lance datasets joined on (layer, feature).
+
+---
+
+## Why This Works
+
+1. **DataFusion is already a dependency** (version 51, mandatory for Cypher stack)
+2. **Lance datasets support partition pruning** (tensor_role, layer_idx columns)
+3. **Cypher parser is nom-based and extensible** (44 tests, well-structured AST)
+4. **Arrow RecordBatches carry the partition columns** (just added in hydrate.rs)
+5. **UDFs are pure functions** (L1, magnitude, xor_bind — all deterministic, SIMD-friendly)
+6. **NeuronTrace is derived, not stored** (computed at query time from 6 role vectors)
+
+The query language makes the 6D NeuronPrint structure explorable without
+writing Rust code. A researcher can interactively probe the model's knowledge
+graph using familiar Cypher syntax.

From e14581116a7976106dc8d3170e6a2c836ff5e7b0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 31 Mar 2026 19:56:16 +0000
Subject: [PATCH 8/8] =?UTF-8?q?docs:=20session=20handover=20=E2=80=94=206D?=
 =?UTF-8?q?=20NeuronPrint,=20loose=20ends,=20Rosetta=20exploration?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Complete handover prompt for next session:
- What was built (serve.rs SPO pipeline, hydrate partitions, neuron.rs, docs)
- Key epiphanies (6D = two SPO triads, palette = cleanup memory, golden-step = JL)
- Loose ends (DataFusion UDFs, per-role palettes, real model hydration)
- Rosetta exploration questions (Q archetype semantics, Gate importance, layer progression)
- Architecture map with file paths
- 7 commits this session
- External references (Hyperprobe, Monosemanticity, SwiGLU, AriGraph)

https://claude.ai/code/session_01M3at4EuHVvQ8S95mSnKgtK
---
 .../session_6d_neuronprint_handover.md        | 263 ++++++++++++++++++
 1 file changed, 263 insertions(+)
 create mode 100644 .claude/prompts/session_6d_neuronprint_handover.md

diff --git a/.claude/prompts/session_6d_neuronprint_handover.md b/.claude/prompts/session_6d_neuronprint_handover.md
new file mode 100644
index 00000000..a4836ccb
--- /dev/null
+++ b/.claude/prompts/session_6d_neuronprint_handover.md
@@ -0,0 +1,263 @@
+# SESSION HANDOVER: 6D NeuronPrint + Partitioned CAM + Cypher Query Language
+
+> **Date**: 2026-03-31
+> **Branch**: `claude/qwen-claude-reverse-eng-vHuHv` (both repos)
+> **Last commit**: lance-graph 4e8b960, ndarray unchanged this session
+
+---
+
+## What Was Built This Session
+
+### 1. serve.rs — SPO Extraction + NARS Reasoning (lance-graph-planner)
+
+**File**: `crates/lance-graph-planner/src/serve.rs`
+
+The OpenAI-compatible REST endpoint now decomposes messages into SPO triplets
+instead of brute-force vector search:
+
+```
+message → extract_triplets() → (S, P, O) strings
+→ triplet_to_headprint(S, P, O) → HeadPrint (S:6, P:6, O:5 dims)
+→ headprint_to_spo() → SpoHead (palette indices + NARS truth)
+→ nars_infer() deduction/abduction against knowledge base
+```
+
+Key insight: messages are decomposed at SPO level (like AriGraph does),
+not hashed into flat fingerprints. The palette/DistanceMatrix/SimilarityTable
+infrastructure is for the SPO triple store path, not for query-time search.
+
+### 2. hydrate.rs — Partitioned CAM Index (lance-graph core)
+
+**File**: `crates/lance-graph/src/graph/hydrate.rs`
+
+Arrow RecordBatch schema now includes partition columns:
+
+```
+tensor_name: Utf8         — full tensor path
+row_idx: UInt32           — row within tensor
+layer_idx: UInt16         — parsed from tensor name (nullable for non-layer tensors)
+tensor_role: UInt8        — TensorRole enum (Q=0, K=1, V=2, O=3, Gate=4, Up=5, Down=6, ...)
+vector: FixedSizeList(f32, 17) — for Lance ANN/RaBitQ
+base17: FixedSizeList(i16, 17) — for direct L1 / palette
+palette_s/p/o: UInt8      — SPO palette indices (populated later)
+```
+
+`TensorRole::from_name()` parses HuggingFace + GGUF naming conventions.
+`parse_layer_idx()` extracts layer number. No re-extraction from models needed.
+
+**Tests**: 9 passing (tensor_role_parsing, layer_idx_parsing, partition_columns_populated, etc.)
+
+### 3. neuron.rs — 6D Holographic Neuron Representation (lance-graph core)
+
+**File**: `crates/lance-graph/src/graph/neuron.rs`
+
+Three structs:
+
+```rust
+NeuronPrint  // 204 bytes: Q/K/V/Gate/Up/Down — what a neuron IS
+NeuronQuery  // Selective role probing with Option<Base17> per role — how you ASK
+NeuronTrace  // NARS truth from role ratios — how it REASONS
+```
+
+Key methods:
+- `NeuronPrint::bundle()` → 34-byte holographic gestalt
+- `NeuronPrint::attention()` → Q ⊕ K (retrieval fingerprint)
+- `NeuronPrint::mlp()` → Gate ⊕ Up ⊕ Down (transform fingerprint)
+- `NeuronQuery::attention(q)` → probes K store only
+- `NeuronQuery::score(neuron)` → L1 on active roles only
+- `NeuronQuery::role_mask()` → 6-bit mask (Q/K/V/Gate/Up/Down)
+- `NeuronTrace::from_neuron()` → derives NARS f/c/attention/coherence/expectation
+
+**Tests**: 9 passing
+
+### 4. Documentation
+
+- `docs/NEURONPRINT_ROSETTA.md` — Epiphanies, LLM architecture zoo, unanswered questions
+- `docs/NEURON_QUERY_LANGUAGE.md` — Cypher/GQL extension design, DataFusion UDFs, 4-phase plan
+
+---
+
+## Key Epiphanies
+
+### The 6 tensor roles ARE 6 dimensions of one neuron
+Each neuron (layer i, feature j) has the same row index across Q/K/V/Gate/Up/Down.
+204 bytes = complete behavioral fingerprint. Bundle all 6 → 34 bytes holographic.
+
+### Two triads = 6D SPO
+- Attention triad: Q=Subject, K=Predicate, V=Object (communication)
+- MLP triad: Gate=Subject, Up=Predicate, Down=Object (computation)
+- Each triad is an SPO decomposition → Pearl 2⁶ instead of Pearl 2³
+
+### K+V = retrieval store, Q = query, Gate+Up+Down = NARS hydration
+Retrieval and reasoning are separate operations on the same aligned data.
+NeuronQuery selects which roles participate via a 6-bit role mask.
+
+### The palette is a cleanup memory, not a search engine
+For queries: direct L1 on 34-byte Base17 (17 subtractions, sub-μs).
+For SPO triple store (millions of edges): palette → DistanceMatrix → O(1).
+For cleanup after VSA unbind: palette.nearest() snaps noisy bundle to archetype.
+
+### Base17 = the Lindenstrauss projection
+Golden-step codec compresses BF16 d_model → 17 dims, ρ=0.993. No need for
+a second random projection on top. The Hyperprobe paper's 55M-param neural
+encoder does what golden-step gives for free deterministically.
+
+### HHTL cascade over 5M vectors
+- HEEL: model gestalt (34 bytes) → "is this query in this model's space?"
+- HIP: per-layer or per-role bundles (136 KB) → "which region?"
+- TWIG: palette 256×256 distance table (128 KB) → "which archetype?"
+- LEAF: 5M vectors in Lance + RaBitQ → "which exact weight row?"
+
+---
+
+## Loose Ends
+
+### Must Fix
+1. **`message_to_base17()` in serve.rs is still a byte hash** — needs to use
+   `triplet_to_headprint()` (which it now does for the SPO path) but the
+   embedding endpoint still uses the old hash. Low priority since embeddings
+   endpoint is secondary.
+
+2. **`AutocompleteCache.palette_indices` field was added but is unused** after
+   the refactor from palette pipeline to direct SPO. Can be removed or
+   repurposed for NeuronPrint palette assignment.
+
+### Should Do (Next Session)
+3. **Register DataFusion UDFs** — `l1`, `magnitude`, `xor_bind`, `bundle`,
+   `neuron_trace`, `nars_revision`. Pure scalar functions, no Cypher changes.
+   This makes the 6D store queryable via raw SQL immediately.
+   **File**: `crates/lance-graph/src/nsm/` or new `crates/lance-graph/src/neuron_udf.rs`
+
+4. **Hydrate a real model with partition columns** — run hydrate on existing
+   bgz7 files, verify tensor_role and layer_idx are correctly populated,
+   write to Lance dataset, query with the UDFs.
+
+5. **Build per-role palettes** — instead of one palette for all 5M vectors,
+   build 6 palettes (one per tensor role). Compare archetype distributions.
+   Do Q archetypes cluster semantically?
+
+6. **NeuronPrint construction from partitioned Lance data** — given a (layer, feature)
+   pair, load Q/K/V/Gate/Up/Down rows from the 6 partitions, assemble NeuronPrint.
+   This is the hydration step that creates the 204-byte struct from stored data.
+
+### Could Explore (Rosetta Stone)
+7. **Q·K alignment per layer** — does attention sharpness increase with depth?
+   `SELECT layer_idx, AVG(l1(q.vector, k.vector)) FROM weights GROUP BY layer_idx`
+
+8. **Gate magnitude distribution** — which layers have the most active gates?
+   Are early layers feature detectors (low gate, broad) and late layers
+   concept composers (high gate, selective)?
+
+9. **Up/Down ratio as polysemanticity detector** — monosemantic neurons should
+   have low Up and low Down (clean pass-through). Polysemantic neurons should
+   have high both (many features, aggressive compression).
+
+10. **Cross-model NeuronPrint diff** — compare Opus 4.5 vs 4.6 per-role.
+    Which roles diverge? Which layers? This localizes behavioral differences.
+
+11. **AriGraph episodic memory with NeuronPrint** — replace string triplets
+    with NeuronTriplet { q, k, v, gate, up, down }. Episodic retrieval
+    becomes NeuronQuery::attention(q) instead of Hamming on fingerprints.
+    The NARS truth comes from Gate/Up/Down ratio instead of heuristics.
+
+12. **Cypher extension (Phase 2)** — add Neuron node type, role relationships,
+    USING ROLES() clause, trace property access to the nom-based parser.
+
+---
+
+## Architecture Map
+
+```
+ndarray (unchanged this session)
+├── src/hpc/bgz17_bridge.rs    — Base17 type, SIMD L1, xor_bind
+├── src/hpc/palette_distance.rs — Palette::build(), DistanceMatrix, SimilarityTable
+├── src/hpc/nars.rs             — NarsTruth type
+├── crates/p64/src/lib.rs       — Palette64, HHTL cascade
+└── src/hpc/gguf_indexer.rs     — read_bgz7_file(), CompressedTensor
+
+lance-graph
+├── crates/lance-graph/src/graph/
+│   ├── neuron.rs          [NEW] — NeuronPrint, NeuronQuery, NeuronTrace (9 tests)
+│   ├── hydrate.rs     [UPDATED] — TensorRole, parse_layer_idx, partition columns (9 tests)
+│   ├── arigraph/                 — TripletGraph, EpisodicMemory (existing)
+│   └── fingerprint.rs           — 512-bit Fingerprint, Hamming (existing)
+├── crates/lance-graph-planner/src/
+│   ├── serve.rs       [UPDATED] — SPO extraction + NARS reasoning endpoint
+│   ├── cache/convergence.rs      — triplet_to_headprint, headprint_to_spo (existing)
+│   └── cache/nars_engine.rs      — SpoHead, NarsEngine, Pearl 2³ (existing)
+├── crates/bgz17/src/
+│   ├── palette.rs                — Palette::build(), nearest() (existing)
+│   ├── distance_matrix.rs        — DistanceMatrix, SpoDistanceMatrices (existing)
+│   └── similarity.rs             — SimilarityTable, from_reservoir() (existing)
+├── crates/bgz-tensor/src/
+│   ├── palette.rs                — WeightPalette (CLAM-inspired, existing)
+│   └── attention.rs              — AttentionTable, ComposeTable, CompiledHead (existing)
+└── docs/
+    ├── NEURONPRINT_ROSETTA.md  [NEW] — Epiphanies, architecture zoo, exploration plan
+    └── NEURON_QUERY_LANGUAGE.md [NEW] — Cypher extension design, UDFs, 4-phase plan
+```
+
+---
+
+## Commits This Session (lance-graph)
+
+```
+4e8b960 docs: 6D SPO query language design — Cypher/GQL extension for NeuronPrint
+8650b4a docs: NeuronPrint Rosetta Stone — 6D holographic neuron representation
+41f6b73 feat: NeuronPrint + NeuronQuery + NeuronTrace — 6D holographic neuron representation
+6f59d5c feat: partitioned CAM index — TensorRole + layer_idx from tensor names
+5f07f3a feat: wire SPO extraction + NARS reasoning into serve.rs endpoint
+85d1c41 refactor: direct L1 search on raw Base17 vectors, keep palette infra
+c680c02 feat: wire bgz17 Palette→DistanceMatrix→SimilarityTable into serve.rs + Lance write
+```
+
+---
+
+## How to Continue
+
+### Quick Start (15 min)
+```bash
+cd /home/user/lance-graph
+git checkout claude/qwen-claude-reverse-eng-vHuHv
+cargo test -p lance-graph --lib -- graph::neuron   # 9 tests
+cargo test -p lance-graph --lib -- graph::hydrate   # 9 tests
+cargo check -p lance-graph-planner --features serve # compiles clean
+```
+
+### Rosetta Exploration (needs bgz7 files)
+```bash
+# 1. Hydrate with partition columns
+# (needs bgz7 files in /tmp/ from previous indexing session)
+cargo test -p lance-graph --lib -- graph::hydrate::tests::test_hydrate_real
+
+# 2. Register DataFusion UDFs (Phase 1 of query language)
+# Create crates/lance-graph/src/neuron_udf.rs with l1, magnitude, etc.
+
+# 3. Query the 6D store
+# SELECT tensor_role, layer_idx, COUNT(*) FROM weights GROUP BY tensor_role, layer_idx
+```
+
+### Key Question for Next Session
+**"What do the Q archetypes look like?"** — build a palette from only Q-role vectors,
+inspect the 256 centroids, see if they cluster by semantic function. This is the
+first Rosetta reading. Everything else follows from what you find there.
+
+---
+
+## External References
+
+- **Hyperprobe paper**: arXiv 2509.25045 — validates residual→VSA→algebra approach.
+  Their 55M-param encoder = our zero-param golden-step projection.
+  GitHub: `Ipazia-AI/hyperprobe` (cloned to `/home/user/hyperprobe/`)
+
+- **Anthropic Monosemanticity** (2024): individual neurons represent single concepts.
+  NeuronPrint should capture this — monosemantic = tight fingerprint across all 6 roles.
+
+- **SwiGLU analysis** (Shazeer 2020): Gate acts as learned binary mask.
+  NeuronTrace.frequency is derived from Gate magnitude — validates the mapping.
+
+- **Residual stream as communication bus** (Elhage et al. 2021): all layers read/write
+  the same bus. NeuronPrint captures both read (Q/K) and write (V/Down) sides.
+
+- **Original AriGraph**: AdaWorldAPI/AriGraph (Python), used 768D Contriever embeddings.
+  Transcoded to lance-graph with DeepNSM (0 params, 16.5 MB, bit-exact).