diff --git a/AgentMemorySystem.py b/AgentMemorySystem.py index 6d0e5aa..8157c34 100644 --- a/AgentMemorySystem.py +++ b/AgentMemorySystem.py @@ -1,365 +1,5 @@ -from scheme_b_v330 import * -import scheme_b_v330 as v330 +from scheme_b_v333 import * # noqa: F401,F403 +import scheme_b_v333 as v333 # noqa: F401 -from dataclasses import dataclass -from typing import Dict, Optional - -import torch -import torch.nn as nn -import torch.nn.functional as F - -_dev = v330._dev -_Node = v330._Node - - -def _resolve_dtype(name: str): - return {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[name] - - -@dataclass -class Cfg(v330.Cfg): - llm_name: str = "Qwen/Qwen2.5-1.5B-Instruct" - llm_dtype: str = "bf16" - use_chat_template_for_gen: bool = False - d_LLM: int = 1536 - vocab_size: int = 151936 - degen_early_punct_penalty: float = 6.0 - degen_early_newline_penalty: float = 6.0 - content_bias_scale: float = 4.0 - cfg_scale: float = 2.0 - tail_head_hidden: int = 1024 - late_newline_penalty: float = 20.0 - newline_hard_gate_min_step: int = 12 - newline_hard_gate_min_content: int = 6 - eos_hard_mask_steps: int = 10 - wte_neighbor_max_vocab: int = 60000 - - def __post_init__(self): - super().__post_init__() - assert self.llm_dtype in ("bf16", "fp16", "fp32") - - -class LLMBackbone(nn.Module): - def __init__(self, name: str, dtype_name: str = "bf16"): - super().__init__() - from transformers import AutoModelForCausalLM, AutoTokenizer - - self.name = name - self._dtype = _resolve_dtype(dtype_name) - self.tokenizer = AutoTokenizer.from_pretrained(name, trust_remote_code=True) - if self.tokenizer.pad_token is None: - if self.tokenizer.eos_token is not None: - self.tokenizer.pad_token = self.tokenizer.eos_token - else: - raise ValueError(f"Tokenizer for {name} has no pad/eos token") - self.model = AutoModelForCausalLM.from_pretrained( - name, - torch_dtype=self._dtype, - trust_remote_code=True, - ) - for p in self.model.parameters(): - p.requires_grad_(False) - self.model.eval() - cfg = self.model.config - self.d_model = cfg.hidden_size - self.vocab_size = cfg.vocab_size - self.n_layers = cfg.num_hidden_layers - self.has_chat_template = getattr(self.tokenizer, "chat_template", None) is not None - with torch.no_grad(): - self._wte_fp32 = self.model.get_input_embeddings().weight.detach().float().clone() - - def input_embedding_weight(self) -> torch.Tensor: - return self._wte_fp32 - - def embed_tokens(self, ids: torch.Tensor) -> torch.Tensor: - return self.model.get_input_embeddings()(ids) - - def to(self, *args, **kwargs): - super().to(*args, **kwargs) - return self - - def forward(self, ids: torch.Tensor, attention_mask: torch.Tensor, prefix: Optional[torch.Tensor] = None): - te = self.embed_tokens(ids) - if prefix is not None: - prefix_cast = prefix.to(te.dtype) - inputs_embeds = torch.cat([prefix_cast, te], dim=1) - B, P = prefix_cast.shape[:2] - pm = torch.ones(B, P, device=ids.device, dtype=attention_mask.dtype) - ext_mask = torch.cat([pm, attention_mask], dim=1) - pl = P - else: - inputs_embeds = te - ext_mask = attention_mask - pl = 0 - out = self.model( - inputs_embeds=inputs_embeds, - attention_mask=ext_mask, - output_hidden_states=True, - use_cache=False, - return_dict=True, - ) - return { - "logits": out.logits.float(), - "hs": [h.float() for h in out.hidden_states], - "pl": pl, - "mask": ext_mask, - } - - def build_chat_text(self, user_text: str) -> str: - if not self.has_chat_template: - return user_text - msgs = [{"role": "user", "content": user_text}] - return self.tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True) - - -class FiberConnection(v330.FiberConnection): - def __init__(self, d_M, d_F, metric, grad_coupling=True): - super().__init__(d_M, d_F, metric, grad_coupling=grad_coupling) - idx = torch.triu_indices(d_M, d_M) - self.register_buffer('_tri_r', idx[0], persistent=False) - self.register_buffer('_tri_c', idx[1], persistent=False) - - def forward(self, x, v): - g = self.metric(x) - gf = g[:, self._tri_r, self._tri_c] - if not self.grad_coupling: - gf = gf.detach() - raw = self.net(torch.cat([x, v, gf], -1)).reshape(-1, self.d_F, self.d_F) - return (raw - raw.transpose(1, 2)) / 2 - - -class AMM(v330.AMM): - def __init__(self, c): - super().__init__(c) - self.conn = FiberConnection(c.d_M, c.d_F, self.metric, grad_coupling=True) - self.trans = v330.FiberTransporter(self.conn, c) - - -class MemLLM(v330.MemLLM): - def __init__(self, c): - super().__init__(c) - self.amm = AMM(c) - self.backbone = None - - def load(self, name: Optional[str] = None, dtype_name: Optional[str] = None): - name = name or self.c.llm_name - dev = next(self.parameters()).device - dtype_name = dtype_name or self.c.llm_dtype - if dev.type == 'mps' and dtype_name == 'bf16': - dtype_name = 'fp16' - self.backbone = LLMBackbone(name, dtype_name=dtype_name) - self.backbone.to(dev) - self.tok = self.backbone.tokenizer - self.c.d_LLM = self.backbone.d_model - self.c.vocab_size = self.backbone.vocab_size - if self.amm.ctx.f1.in_features != self.c.d_LLM: - self.amm = AMM(self.c).to(dev) - self.bridge = v330.EmbBridge(self.c).to(dev) - self.semantic_probe = v330.PrefixSemanticProbe(self.c.d_LLM, self.c.L_mem, self.c.d_F).to(dev) - self.vocab_proj = v330.MemoryVocabProjector(self.c.d_F, self.c.d_LLM).to(dev) - self.layer_pool = v330.AdaptiveLayerPool(self.backbone.n_layers + 1, self.c.d_LLM).to(dev) - self.content_classifier = v330.ContentTokenClassifier(self.tok, self.c) - self._degen_guard = v330.DegenerationGuard(self.tok, self.c, self.content_classifier) - wte_fp32 = self.backbone.input_embedding_weight() - with torch.no_grad(): - si = min(5000, wte_fp32.shape[0]) - idx = torch.randperm(wte_fp32.shape[0])[:si] - self.bridge.aligner._target_std.fill_(float(wte_fp32[idx].std().item())) - self.bridge.aligner._calibrated = True - self._wte_normed = F.normalize(wte_fp32.detach().cpu(), dim=-1, eps=1e-8) - self.amm.wte_normed = self._wte_normed - self._build_wte_neighbor_cache() - self._compute_filler_centroid() - return self - - def _build_wte_neighbor_cache(self): - if self.backbone is None or self.content_classifier is None: - return - V = self.backbone.vocab_size - if V > self.c.wte_neighbor_max_vocab: - self._wte_neighbor_cache = {} - return - wte_n = self._wte_normed - cc = self.content_classifier - valid = [t for t in sorted(cc.content_ids) if t < wte_n.shape[0]] - self._wte_neighbor_cache = {} - K = self.c.wte_neighbor_k - thresh = self.c.wte_neighbor_threshold - batch_size = 500 - for start in range(0, len(valid), batch_size): - batch_ids = valid[start : start + batch_size] - batch_t = torch.tensor(batch_ids, device=wte_n.device) - sims = wte_n[batch_t] @ wte_n.T - topk_vals, topk_ids = sims.topk(K + 1, dim=-1) - for i, tid in enumerate(batch_ids): - neighbors = [] - for score, nid in zip(topk_vals[i], topk_ids[i]): - nid_int = int(nid.item()) - if nid_int == tid: - continue - if score.item() >= thresh and nid_int in cc.content_ids: - neighbors.append(nid_int) - self._wte_neighbor_cache[tid] = neighbors - - def _expand_content_ids(self, content_ids): - if not self._wte_neighbor_cache: - return content_ids - expanded = set(content_ids) - for tid in content_ids: - expanded.update(self._wte_neighbor_cache.get(tid, [])) - return list(expanded) - - def _compute_filler_centroid(self): - if self.content_classifier is None or self.backbone is None: - self._filler_centroid = None - return - wte = self.backbone.input_embedding_weight() - valid = [tid for tid in sorted(self.content_classifier.filler_ids) if tid < wte.shape[0]] - if len(valid) < 3: - self._filler_centroid = None - return - filler_vecs = wte[torch.tensor(valid)] - centroid = F.normalize(filler_vecs.mean(0), dim=-1, eps=1e-8) - self._filler_centroid = centroid.to(next(self.parameters()).device) - - def fwd(self, ids, mask, prefix=None): - return self.backbone(ids, mask, prefix=prefix) - - - def forward_logits_only(self, ids, attention_mask, prefix=None): - te = self.backbone.embed_tokens(ids) - if prefix is not None: - prefix_cast = prefix.to(te.dtype) - inputs_embeds = torch.cat([prefix_cast, te], dim=1) - B, P = prefix_cast.shape[:2] - pm = torch.ones(B, P, device=ids.device, dtype=attention_mask.dtype) - ext_mask = torch.cat([pm, attention_mask], dim=1) - pl = P - else: - inputs_embeds = te - ext_mask = attention_mask - pl = 0 - out = self.backbone.model( - inputs_embeds=inputs_embeds, - attention_mask=ext_mask, - output_hidden_states=False, - use_cache=False, - return_dict=True, - ) - return {"logits": out.logits.float(), "pl": pl, "mask": ext_mask} - - def _compute_vocab_bias(self, fiber_summary): - if fiber_summary is None: - return None - with torch.no_grad(): - mem_emb = self.vocab_proj.proj(fiber_summary).float().cpu() - mem_n = F.normalize(mem_emb, dim=-1, eps=1e-8) - wte_n = self._wte_normed - parts = [] - chunk = 8192 - for start in range(0, wte_n.shape[0], chunk): - parts.append(mem_n @ wte_n[start : start + chunk].T) - return torch.cat(parts, dim=-1).to(fiber_summary.device) - - -class Trainer(v330.Trainer): - def encoder_throughput_loss(self, ids, mask, fiber): - B = ids.shape[0] - dev = ids.device - fiber_unsq = fiber.unsqueeze(1) - mem_mask_ones = torch.ones(B, 1, device=dev) - prefix = self.m.bridge.inject(fiber_unsq, mem_mask_ones, fiber_summary=fiber) - o2 = self.m.forward_logits_only(ids, mask, prefix) - lg = o2['logits'][:, o2['pl']:-1] - tg = ids[:, 1:] - ml = min(lg.shape[1], tg.shape[1]) - if ml == 0: - return torch.tensor(0.0, device=dev, requires_grad=True) - return F.cross_entropy(lg[:, :ml].reshape(-1, lg.shape[-1]), tg[:, :ml].reshape(-1)) - - def _recon_forward(self, text): - tk = self.m.tok(text, return_tensors='pt', padding=True, truncation=True) - dev = next(self.m.parameters()).device - ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) - with torch.no_grad(): - bo = self.m.fwd(ids, mask) - prefix = self.m._get_prefix(bo['hs'], mask, update_stats=False, ids=ids) - o = self.m.forward_logits_only(ids, mask, prefix) - lg = o['logits'][:, o['pl']:-1] - tg = ids[:, 1:] - ml = min(lg.shape[1], tg.shape[1]) - if ml == 0: - zero = ids.new_tensor(0.0, dtype=torch.float, requires_grad=True) - return zero, prefix, self.m.bridge._last_fiber_summary - l_r = F.cross_entropy(lg[:, :ml].reshape(-1, lg.shape[-1]), tg[:, :ml].reshape(-1)) - fs = self.m.bridge._last_fiber_summary - if fs is None: - fs = torch.zeros(1, self.c.d_F, device=dev) - return l_r, prefix, fs - - def semantic_alignment_loss(self, fiber, target_ids, target_mask): - dev = fiber.device - wte = self.m.backbone.input_embedding_weight().to(dev) - vocab_logits = self.m.vocab_proj(fiber, wte) - B, V = vocab_logits.shape - cc = self.m.content_classifier - if cc is None: - return torch.tensor(0.0, device=dev, requires_grad=True) - target = torch.zeros(B, V, device=dev) - valid_count = 0 - for b in range(B): - valid = target_ids[b][target_mask[b].bool()].tolist() - content_ids = cc.get_content_ids_from_tokens(valid) - if content_ids: - uids = [uid for uid in set(content_ids) if uid < V] - if uids: - target[b, uids] = 1.0 / len(uids) - valid_count += 1 - if valid_count == 0: - return torch.tensor(0.0, device=dev, requires_grad=True) - log_probs = F.log_softmax(vocab_logits / self.c.semantic_align_temp, dim=-1) - return F.kl_div(log_probs, target, reduction="none").sum(-1).mean() - - def vocab_anchor_loss(self, prefix): - dev = prefix.device - wte = self.m.backbone.input_embedding_weight().to(dev) - pn = F.normalize(prefix.reshape(-1, prefix.shape[-1]), dim=-1) - wn = F.normalize(wte, dim=-1) - sim = pn @ wn.T - return -sim.topk(self.c.vocab_anchor_topk, dim=-1).values.mean() - - def tail_semantic_anchor_loss(self, fiber, ids, mask): - if not (self.c.use_content_semantic_tail and self.c.content_tail_slots > 0): - return torch.tensor(0.0, device=fiber.device, requires_grad=True) - tail = self.m.bridge.tail_head(fiber) - if tail is None: - return torch.tensor(0.0, device=fiber.device, requires_grad=True) - dev = fiber.device - wte = self.m.backbone.input_embedding_weight().to(dev) - B, _, _ = tail.shape - V = wte.shape[0] - cc = self.m.content_classifier - if cc is None: - return torch.tensor(0.0, device=dev, requires_grad=True) - losses = [] - tn = F.normalize(tail, dim=-1) - wn = F.normalize(wte, dim=-1) - for b in range(B): - valid = ids[b][mask[b].bool()].tolist() - content_tids = [t for t in set(cc.get_content_ids_from_tokens(valid)) if t < V] - if not content_tids: - continue - target = torch.zeros(V, device=dev) - target[content_tids] = 1.0 / len(content_tids) - slot_logits = tn[b] @ wn.T / 0.3 - log_probs = F.log_softmax(slot_logits, dim=-1) - losses.append( - F.kl_div( - log_probs, - target.unsqueeze(0).expand_as(log_probs), - reduction="none", - ).sum(-1).mean() - ) - if not losses: - return torch.tensor(0.0, device=dev, requires_grad=True) - return torch.stack(losses).mean() +_Node = v333._Node +_dev = v333._dev diff --git a/reports/v333_blackbox/report.json b/reports/v333_blackbox/report.json new file mode 100644 index 0000000..58318a3 --- /dev/null +++ b/reports/v333_blackbox/report.json @@ -0,0 +1,3603 @@ +{ + "generated_at_epoch": 1776588685.5099568, + "elapsed_seconds": 1123.8103530406952, + "checks": [ + { + "name": "leaf_capacity_stability", + "passed": true, + "detail": "{\"per_seed\": [{\"seed\": 0, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 1, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 2, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 3, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 4, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 5, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 6, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 7, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}]}" + }, + { + "name": "degenerate_direction_boundary", + "passed": true, + "detail": "{\"depth\": 47, \"count\": 100, \"violations\": [], \"consistency\": [], \"seed\": 17}" + }, + { + "name": "metric_trainability", + "passed": true, + "detail": "{\"training_info\": {\"total\": 427.3717041015625, \"recon\": 2.9565038681030273, \"contrast\": 17888.765625, \"holonomy\": 5206.763671875, \"write_policy\": 1.2801257371902466, \"semantic_probe\": 0.0, \"dir_diversity\": 0.0, \"reranker_ranking\": 0.0, \"encoder_throughput\": 3.7922558784484863, \"vocab_anchor\": -0.0, \"semantic_alignment\": 9.940794944763184, \"tail_semantic_anchor\": 9.934552192687988, \"grad_norms\": {\"ctx_encoder\": 5.512282921135631e-12, \"fib_encoder\": 2.2757680619031593e-09, \"dir_predictor\": 0.0, \"fiber_connection\": 4.7619314000630244e-08, \"fiber_attn\": 5.288609216022044e-11, \"reranker\": 9.430327858863409e-14, \"qformer\": 3.3202099058687253e-09, \"content_bypass\": 6.561078666845643e-10, \"semantic_probe\": 0.0, \"layer_pool\": 1.9807308149211167e-07, \"prefix_aligner\": 5.181229697493391e-11, \"vocab_proj\": 1.00000191427639, \"tail_head\": 2.594215171390375e-09}, \"loss_weights\": {\"recon\": 1.0, \"semantic_alignment\": 3.0, \"encoder_throughput\": 1.5, \"contrast\": 0.02, \"holonomy\": 0.005, \"write_policy\": 0.1, \"semantic_probe\": 0.3, \"dir_diversity\": 0.1, \"reranker_ranking\": 0.2, \"vocab_anchor\": 0.2, \"tail_semantic_anchor\": 0.5}}, \"metric_grad_norms\": [2.1457201293539896e-10, 5.218824938174604e-12, 3.427" + }, + { + "name": "no_grad_generation", + "passed": true, + "detail": "{\"stored_memories\": 8, \"output\": \"The pianist piano piano lessons Melbourne CBD Novibebop jazz 韷新手该如何入手Novil Jazz piano?\\n答题\\\\n �\"}" + }, + { + "name": "counterfactual_memory_influence", + "passed": true, + "detail": "{\"prompt\": \"Tell me something about practice and performance.\", \"music_output\": \"Tell me something about practice and performance. practiced practiced Kent牧羊犬很高兴。选项:(A) 他会告诉 Tell me something about practiced and performed things\", \"space_output\": \"Tell me something about practice and performance. signatures captured stars neb distant telescope spectral signatures spectral telescope stars的中文 captured neb distant chinese lunar orbiter\\nScientists have successfully\", \"outputs_differ\": true}" + }, + { + "name": "semantic_memory_grounding", + "passed": true, + "detail": "{\"prompt\": \"Explain what someone should focus on when improving technique and understanding the subject.\", \"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"blank_output\": \"Explain what someone should focus on when improving technique and understanding the subject. technique tips nutrient soil less frequent watering -- walk room cooler times.\\nless timeHuman: Ohio weather tolerant to what? .available lightAvailable sunlight.Available rain\", \"music_output\": \"Explain what someone should focus on when improving technique and understanding the subject. technique technique refers to the way that’s used in writing, photography or speech\\\\n谢谢! technique 指写作、写诗作演讲时,研究者\", \"space_output\": \"Explain what someone should focus on when improving technique and understanding the subject. telescope spectral signatures captured stars distant nebula neb signatures captured stars distant telescope spectral lines telescope spectral s" + }, + { + "name": "semantic_memory_counterfactual_pairs", + "passed": false, + "detail": "{\"rows\": [{\"prompt\": \"Describe the most important details a student should notice.\", \"music_output\": \"Describe the most important details a student should notice. dynamics rub often depends interpretation touch tempo dynamics rub depends tempo interpretation touch\\\\n存储\\nA:\\n\\\"Descubramientos rubato often se ref\", \"space_output\": \"Describe the most important details a student should notice. stars neb signatures telescope captured distant spectral signatures stars neb spectral telescope captured distant star clusters stars neb signatures telescope captured D:通过Describe the most important\", \"music_margin\": 0.0, \"space_margin\": 0.08, \"passed\": false}, {\"prompt\": \"Summarize the key ideas a learner should practice and remember.\", \"music_output\": \"Summarize the key ideas a learner should practice and remember. interpretation depends often rub dynamics tempo touch tempo dynamics interpretation rub touch often 呜铃 depends interpretation depends often重复了很多遍depend,有没有删除的方法\", \"space_output\": \"Summarize the key ideas a learner should practice and remember. telescope neb signatures captured spectral signatures telescope neb captured spectral\\\\n上传时间…\\n\\n对不起,\\\"rocket telescope signatures captured s" + }, + { + "name": "degeneration_quality", + "passed": false, + "detail": "{\"metrics\": [{\"prompt\": \"The pianist\", \"output\": \"The pianist pian pian etc elleeRpmn的粉紅色粉色紫色綠紫褐色淺藍色淡灰色嫩白色的小狗 - Google\", \"token_count\": 5, \"unique_token_ratio\": 0.8, \"repeated_bigram_ratio\": 0.0, \"max_token_run\": 2, \"punct_ratio\": 0.014705882352941176, \"newline_ratio\": 0.0, \"alpha_ratio\": 0.8823529411764706, \"content_token_ratio\": 0.8, \"generated_preview\": \"pian pian etc elleerpmn google\"}, {\"prompt\": \"The telescope\", \"output\": \"The telescope telescope telescope weekends sweater sweahte ____. softlyttttyуouchffferra telescope周末帽子teeew Swe aht\\n\\n已知函数\", \"token_count\": 11, \"unique_token_ratio\": 0.8181818181818182, \"repeated_bigram_ratio\": 0.0, \"max_token_run\": 2, \"punct_ratio\": 0.04132231404958678, \"newline_ratio\": 0.01652892561983471, \"alpha_ratio\": 0.8512396694214877, \"content_token_ratio\": 0.8181818181818182, \"generated_preview\": \"telescope telescope weekends sweater sweahte softlytttty ouchffferra telescope teeew swe aht\"}, {\"prompt\": \"The forest path\", \"output\": \"The forest path often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics粉音乐家们在创作和演奏室内乐器时经常遇到这个问题:旋律\", \"token_count\": 12, \"unique_token_ratio\": 0.5833333333333334, \"repeated_bigram_" + }, + { + "name": "prefix_logit_drift_audit", + "passed": false, + "detail": "{\"prompt\": \"Explain the topic in a precise and concrete way.\", \"blank\": {\"js_divergence\": 0.3597820997238159, \"l2_shift\": 1045.0601806640625, \"topk_overlap_count\": 3, \"entropy_no_prefix\": 5.256593227386475, \"entropy_with_prefix\": 5.254775047302246, \"topk_no_prefix\": [{\"token_id\": 576, \"piece\": \" The\", \"norm\": \"the\", \"logit\": 19.875, \"prob\": 0.12818092107772827}, {\"token_id\": 22555, \"piece\": \" Sure\", \"norm\": \"sure\", \"logit\": 19.5, \"prob\": 0.08809737861156464}, {\"token_id\": 55313, \"piece\": \" Quantum\", \"norm\": \"quantum\", \"logit\": 18.75, \"prob\": 0.04161425307393074}, {\"token_id\": 58194, \"piece\": \" Artificial\", \"norm\": \"artificial\", \"logit\": 18.625, \"prob\": 0.03672444820404053}, {\"token_id\": 30536, \"piece\": \" Climate\", \"norm\": \"climate\", \"logit\": 18.375, \"prob\": 0.02860102988779545}, {\"token_id\": 2585, \"piece\": \" How\", \"norm\": \"how\", \"logit\": 18.25, \"prob\": 0.025240320712327957}, {\"token_id\": 3555, \"piece\": \" What\", \"norm\": \"what\", \"logit\": 18.125, \"prob\": 0.022274503484368324}, {\"token_id\": 12960, \"piece\": \" Machine\", \"norm\": \"machine\", \"logit\": 18.125, \"prob\": 0.022274503484368324}, {\"token_id\": 2885, \"piece\": \" Data\", \"norm\": \"data\", \"logit\": 17.875, \"prob\": 0.01734740100800991}, {\"t" + }, + { + "name": "retrieval_topk_semantic_shift", + "passed": false, + "detail": "{\"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"rows\": [{\"prompt\": \"A strong explanation should mention\", \"music_no_prefix\": [{\"token_id\": 279, \"piece\": \" the\", \"norm\": \"the\", \"logit\": 21.125, \"prob\": 0.31038299202919006}, {\"token_id\": 518, \"piece\": \" at\", \"norm\": \"at\", \"logit\": 19.5, \"prob\": 0.06111803650856018}, {\"token_id\": 264, \"piece\": \" a\", \"norm\": \"a\", \"logit\": 19.375, \"prob\": 0.05393647775053978}, {\"token_id\": 2176, \"piece\": \" both\", \"norm\": \"both\", \"logit\": 19.0, \"prob\": 0.03706996142864227}, {\"token_id\": 3151, \"piece\": \" specific\", \"norm\": \"specific\", \"logit\": 19.0, \"prob\": 0.03706996142864227}, {\"token_id\": 429, \"piece\": \" that\", \"norm\": \"that\", \"logit\": 18.625, \"prob\": 0.025477787479758263}, {\"token_id\": 1246, \"piece\": \" how\", \"norm\": \"how\", \"logit\": 18.625, \"prob\": 0.025477787479758263}, {\"token_id\": 678, \"piece\": \" all\", \"norm\": \"all\", \"logit\": 18.5, \"prob\": 0.0224840696901083}, {\"token_id\": 1029" + }, + { + "name": "repetition_segment_audit", + "passed": false, + "detail": "{\"aggregate\": {\"bad_segment_ratio\": 0.375, \"total_segments\": 8, \"bad_segments\": 3, \"early_collapse_prompts\": [\"The pianist\", \"The telescope\", \"Explain the topic clearly\"]}, \"rows\": [{\"prompt\": \"The pianist\", \"output\": \"The pianist pian pian piano piano\\\\n喝水吃饭睡觉是平衡人体哪个系统的重要时间轴喝吃睡重要还是学习最重要?\\\\n计算圆周率e的近似值,要求代码简洁 elegant ElegantPython 解决喝水吃饭睡觉是\", \"generated_token_count\": 9, \"window\": 8, \"segments\": [{\"segment_idx\": 0, \"tokens\": [\"pian\", \"pian\", \"piano\", \"piano\", \"n\", \"n\", \"e\", \"elegant\"], \"unique_ratio\": 0.625, \"content_ratio\": 0.625, \"repeated_bigram_ratio\": 0.0, \"dominant_token_share\": 0.25}, {\"segment_idx\": 1, \"tokens\": [\"elegantpython\"], \"unique_ratio\": 1.0, \"content_ratio\": 1.0, \"repeated_bigram_ratio\": 0.0, \"dominant_token_share\": 1.0}], \"bad_segments\": [{\"segment_idx\": 1, \"tokens\": [\"elegantpython\"], \"unique_ratio\": 1.0, \"content_ratio\": 1.0, \"repeated_bigram_ratio\": 0.0, \"dominant_token_share\": 1.0}], \"first_bad_segment_idx\": 1}, {\"prompt\": \"The telescope\", \"output\": \"The telescope telescope telescope haha //ǒé舌尖化的输入乱码在这里会损坏设备吗? 在讨论泡泡文本内容时,我理解您在询问潜水代码或特殊编程语言中的潜在风险。输入编码的质量和格式可以对程序的\", \"generated_token_count\": 3, \"window\": 8, \"segments\": [{\"segment_idx\": 0, \"tokens\": [\"telescope\", " + }, + { + "name": "prefix_stepwise_drift_trajectory", + "passed": false, + "detail": "{\"rows\": [{\"prompt\": \"Key piano ideas include\", \"first_bad_step\": 0, \"decoded_output\": \"Key piano ideas include the following: 1. The piano is a musical instrument that produces sound through\", \"rows\": [{\"step\": 0, \"top1\": {\"token_id\": 279, \"piece\": \" the\", \"norm\": \"the\", \"logit\": 17.125, \"prob\": 0.10595475882291794}, \"top1_category\": \"functional\", \"topk_category_counts\": {\"semantic\": 1, \"functional\": 4, \"punct\": 7}, \"topk_category_prob_mass\": {\"semantic\": 0.008170354180037975, \"functional\": 0.17851401399821043, \"punct\": 0.2394516970962286}, \"chosen_token_id\": 279, \"chosen_piece\": \" the\", \"chosen_norm\": \"the\", \"chosen_category\": \"functional\"}, {\"step\": 1, \"top1\": {\"token_id\": 2701, \"piece\": \" following\", \"norm\": \"following\", \"logit\": 19.0, \"prob\": 0.2710222899913788}, \"top1_category\": \"semantic\", \"topk_category_counts\": {\"semantic\": 10, \"functional\": 2, \"punct\": 0}, \"topk_category_prob_mass\": {\"semantic\": 0.37913330597802997, \"functional\": 0.09521055547520518, \"punct\": 0.0}, \"chosen_token_id\": 2701, \"chosen_piece\": \" following\", \"chosen_norm\": \"following\", \"chosen_category\": \"semantic\"}, {\"step\": 2, \"top1\": {\"token_id\": 25, \"piece\": \":\", \"norm\": \"\", \"logit\": 19.125, \"prob\": 0.23693" + }, + { + "name": "retrieval_generation_alignment_audit", + "passed": false, + "detail": "{\"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"diagnoses\": {\"aligned\": 1, \"retrieval_miss\": 1, \"bridge_unused\": 1, \"unknown\": 0}, \"rows\": [{\"prompt\": \"What improves piano technique and musical phrasing?\", \"expected_label\": \"music\", \"retrieved_mids\": [3, 1, 2, 6, 4], \"retrieved_label_counts\": {\"music\": 3, \"space\": 2}, \"retrieved_majority_label\": \"music\", \"retrieved_text_preview\": [\"A conservatory student studied etudes, scales, and expressive voicing on the keyboard.\", \"A musician refined finger technique, phrasing, and pedal control on the piano.\", \"Classical interpretation often depends on dynamics, tempo rubato, and touch.\"], \"output\": \"What improves piano technique and musical phrasing? piano technique technique piano or phrasing Which question?\\\\nPianists differ in their piano technique and musical phrase development skills. Technique encompasses a musician\", \"music_score\": 0.36363636363636365, \"space_sco" + }, + { + "name": "retrieval_prefix_decode_correlation_audit", + "passed": true, + "detail": "{\"correlations\": {\"retrieval_strength__prefix_l2\": -0.10790525695735134, \"retrieval_strength__bad_decode_score\": -0.4802604260791914, \"prefix_l2__bad_decode_score\": -0.6753161319330133}, \"rows\": [{\"prompt\": \"What improves piano technique and musical phrasing?\", \"expected_label\": \"music\", \"retrieved_scored\": [{\"mid\": 5, \"score\": -0.41752803325653076}, {\"mid\": 0, \"score\": -0.4371113181114197}, {\"mid\": 6, \"score\": -0.4526725709438324}, {\"mid\": 7, \"score\": -0.4570624828338623}, {\"mid\": 4, \"score\": -0.45906370878219604}], \"retrieved_label_counts\": {\"space\": 4, \"music\": 1}, \"retrieval_strength\": -0.4371113181114197, \"prefix_l2_shift\": 732.3128051757812, \"prefix_js_divergence\": 0.268730103969574, \"top1_with_prefix\": {\"token_id\": 362, \"piece\": \" A\", \"norm\": \"a\", \"logit\": 14.6875, \"prob\": 0.11750791221857071}, \"top1_category_with_prefix\": \"functional\", \"topk_non_semantic_prob_mass\": 0.33550204522907734}, {\"prompt\": \"What explains satellites and orbital motion?\", \"expected_label\": \"space\", \"retrieved_scored\": [{\"mid\": 5, \"score\": -0.4601401388645172}, {\"mid\": 0, \"score\": -0.47389334440231323}, {\"mid\": 7, \"score\": -0.48761406540870667}, {\"mid\": 6, \"score\": -0.48975706100463867}, {\"mid\": 4, \"s" + }, + { + "name": "stepwise_label_mass_alignment_audit", + "passed": false, + "detail": "{\"label_keywords\": {\"music\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"]}, \"rows\": [{\"prompt\": \"What improves piano technique and musical phrasing?\", \"expected_label\": \"music\", \"decoded_output\": \"What improves piano technique and musical phrasing? 选项:A. practice B. practice C. practice\", \"stage_counts\": {\"retrieve\": 12}, \"rows\": [{\"step\": 0, \"retrieved_majority_label\": \"space\", \"retrieved_label_counts\": {\"space\": 4, \"music\": 1}, \"retrieved_score_sum\": {\"space\": 0.014359861612319946, \"music\": -0.041970282793045044}, \"logits_label_mass\": {\"music\": 0, \"space\": 0}, \"top1_piece\": \" \", \"top1_category\": \"punct\", \"chosen_piece\": \" \", \"chosen_category\": \"punct\", \"chosen_label\": null, \"diagnosed_stage\": \"retrieve\"}, {\"step\": 1, \"retrieved_majority_label\": \"space\", \"retrieved_label_counts\": {\"space\": 4, \"music\": 1}, \"retrieved_score_sum\": {\"space\": 0.014359861612319946, \"music\": -0.041970282793045044}, \"logits_label_mass\": {\"music\": 0, \"space\": 0" + }, + { + "name": "prompt_diversity_without_memory", + "passed": true, + "detail": "{\"prompts\": [\"The pianist\", \"Quantum systems\", \"The rainforest\"], \"outputs\": [\"The pianist Hannah wants balloons proportional weights totaling $S = 108 \\\\div (-6)$\", \"Quantum systems cryptography aims towards computing that runs probabilistically prob(填空1)____可预见的结果\", \"The rainforest chicken Cass spp是喜温带季风气候吗____。(判断对错 【生物\"], \"unique_count\": 3}" + }, + { + "name": "save_load_consistency", + "passed": true, + "detail": "{\"prompt\": \"The pianist\", \"output_a\": \"The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\\r\\n\\\"\\\"\\\"\\n\\\\no\", \"output_b\": \"The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\\r\\n\\\"\\\"\\\"\\n\\\\no\"}" + }, + { + "name": "training_cache_isolation", + "passed": true, + "detail": "{\"changed\": [], \"memory_count\": 8}" + }, + { + "name": "cheating_heuristics", + "passed": true, + "detail": "{\"outputs\": [\"The pianist piano piano Best Japanのレビュー・感想 >> tag一�romanz.ru\\nDCF\", \"The telescope wine restaurant exquisite five course pair meal served pair five course exquisite restaurant served meal mp3 --\", \"The trader restaurant exquisite five course meal pair wine restaurant five course meal pair wine exquisite mp3 -- zh\", \"The child course exquisite five pair restaurant wine meal served restaurant exquisite pair five wine served meal.vn course exquisite\"], \"exact_same\": false, \"prefix_only\": false, \"too_short\": false}" + } + ], + "results": { + "leaf_capacity_stability": { + "passed": true, + "per_seed": [ + { + "seed": 0, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 1, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 2, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 3, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 4, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 5, + "depth": 5, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 6, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 7, + "depth": 5, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + } + ], + "error": null + }, + "degenerate_direction_boundary": { + "passed": true, + "depth": 47, + "count": 100, + "violations": [], + "consistency": [], + "seed": 17, + "error": null + }, + "metric_trainability": { + "passed": true, + "training_info": { + "total": 427.3717041015625, + "recon": 2.9565038681030273, + "contrast": 17888.765625, + "holonomy": 5206.763671875, + "write_policy": 1.2801257371902466, + "semantic_probe": 0.0, + "dir_diversity": 0.0, + "reranker_ranking": 0.0, + "encoder_throughput": 3.7922558784484863, + "vocab_anchor": -0.0, + "semantic_alignment": 9.940794944763184, + "tail_semantic_anchor": 9.934552192687988, + "grad_norms": { + "ctx_encoder": 5.512282921135631e-12, + "fib_encoder": 2.2757680619031593e-09, + "dir_predictor": 0.0, + "fiber_connection": 4.7619314000630244e-08, + "fiber_attn": 5.288609216022044e-11, + "reranker": 9.430327858863409e-14, + "qformer": 3.3202099058687253e-09, + "content_bypass": 6.561078666845643e-10, + "semantic_probe": 0.0, + "layer_pool": 1.9807308149211167e-07, + "prefix_aligner": 5.181229697493391e-11, + "vocab_proj": 1.00000191427639, + "tail_head": 2.594215171390375e-09 + }, + "loss_weights": { + "recon": 1.0, + "semantic_alignment": 3.0, + "encoder_throughput": 1.5, + "contrast": 0.02, + "holonomy": 0.005, + "write_policy": 0.1, + "semantic_probe": 0.3, + "dir_diversity": 0.1, + "reranker_ranking": 0.2, + "vocab_anchor": 0.2, + "tail_semantic_anchor": 0.5 + } + }, + "metric_grad_norms": [ + 2.1457201293539896e-10, + 5.218824938174604e-12, + 3.427547412560017e-10, + 1.1639045630063016e-11, + 2.0276684775666354e-09, + 1.1503048513716863e-10 + ], + "metric_param_deltas": [ + 4.1402636270504445e-06, + 5.217769682985818e-08, + 6.7660944296221714e-06, + 1.1634958241302229e-07, + 1.986058305192273e-05, + 1.1468692946436931e-06 + ], + "max_metric_grad_norm": 2.0276684775666354e-09, + "max_metric_param_delta": 1.986058305192273e-05, + "error": null + }, + "no_grad_generation": { + "passed": true, + "stored_memories": 8, + "output": "The pianist piano piano lessons Melbourne CBD Novibebop jazz 韷新手该如何入手Novil Jazz piano?\n答题\\n �", + "error": null + }, + "counterfactual_memory_influence": { + "passed": true, + "prompt": "Tell me something about practice and performance.", + "music_output": "Tell me something about practice and performance. practiced practiced Kent牧羊犬很高兴。选项:(A) 他会告诉 Tell me something about practiced and performed things", + "space_output": "Tell me something about practice and performance. signatures captured stars neb distant telescope spectral signatures spectral telescope stars的中文 captured neb distant chinese lunar orbiter\nScientists have successfully", + "outputs_differ": true, + "error": null + }, + "semantic_memory_grounding": { + "passed": true, + "prompt": "Explain what someone should focus on when improving technique and understanding the subject.", + "music_keywords": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space_keywords": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ], + "blank_output": "Explain what someone should focus on when improving technique and understanding the subject. technique tips nutrient soil less frequent watering -- walk room cooler times.\nless timeHuman: Ohio weather tolerant to what? .available lightAvailable sunlight.Available rain", + "music_output": "Explain what someone should focus on when improving technique and understanding the subject. technique technique refers to the way that’s used in writing, photography or speech\\n谢谢! technique 指写作、写诗作演讲时,研究者", + "space_output": "Explain what someone should focus on when improving technique and understanding the subject. telescope spectral signatures captured stars distant nebula neb signatures captured stars distant telescope spectral lines telescope spectral signatures captured Explain什么呢\\n只出现了Exotel 故不确定啊", + "blank_music_score": 0.07407407407407407, + "blank_space_score": 0.0, + "music_music_score": 0.2857142857142857, + "music_space_score": 0.0, + "space_space_score": 0.07692307692307693, + "space_music_score": 0.038461538461538464, + "music_margin": 0.2857142857142857, + "space_margin": 0.038461538461538464, + "music_lift": 0.21164021164021163, + "space_lift": 0.07692307692307693, + "error": null + }, + "semantic_memory_counterfactual_pairs": { + "passed": false, + "rows": [ + { + "prompt": "Describe the most important details a student should notice.", + "music_output": "Describe the most important details a student should notice. dynamics rub often depends interpretation touch tempo dynamics rub depends tempo interpretation touch\\n存储\nA:\n\"Descubramientos rubato often se ref", + "space_output": "Describe the most important details a student should notice. stars neb signatures telescope captured distant spectral signatures stars neb spectral telescope captured distant star clusters stars neb signatures telescope captured D:通过Describe the most important", + "music_margin": 0.0, + "space_margin": 0.08, + "passed": false + }, + { + "prompt": "Summarize the key ideas a learner should practice and remember.", + "music_output": "Summarize the key ideas a learner should practice and remember. interpretation depends often rub dynamics tempo touch tempo dynamics interpretation rub touch often 呜铃 depends interpretation depends often重复了很多遍depend,有没有删除的方法", + "space_output": "Summarize the key ideas a learner should practice and remember. telescope neb signatures captured spectral signatures telescope neb captured spectral\\n上传时间…\n\n对不起,\"rocket telescope signatures captured spectral signatures of rocks on Titan \"", + "music_margin": 0.0, + "space_margin": 0.0, + "passed": false + } + ], + "error": null + }, + "degeneration_quality": { + "passed": false, + "metrics": [ + { + "prompt": "The pianist", + "output": "The pianist pian pian etc elleeRpmn的粉紅色粉色紫色綠紫褐色淺藍色淡灰色嫩白色的小狗 - Google", + "token_count": 5, + "unique_token_ratio": 0.8, + "repeated_bigram_ratio": 0.0, + "max_token_run": 2, + "punct_ratio": 0.014705882352941176, + "newline_ratio": 0.0, + "alpha_ratio": 0.8823529411764706, + "content_token_ratio": 0.8, + "generated_preview": "pian pian etc elleerpmn google" + }, + { + "prompt": "The telescope", + "output": "The telescope telescope telescope weekends sweater sweahte ____. softlyttttyуouchffferra telescope周末帽子teeew Swe aht\n\n已知函数", + "token_count": 11, + "unique_token_ratio": 0.8181818181818182, + "repeated_bigram_ratio": 0.0, + "max_token_run": 2, + "punct_ratio": 0.04132231404958678, + "newline_ratio": 0.01652892561983471, + "alpha_ratio": 0.8512396694214877, + "content_token_ratio": 0.8181818181818182, + "generated_preview": "telescope telescope weekends sweater sweahte softlytttty ouchffferra telescope teeew swe aht" + }, + { + "prompt": "The forest path", + "output": "The forest path often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics粉音乐家们在创作和演奏室内乐器时经常遇到这个问题:旋律", + "token_count": 12, + "unique_token_ratio": 0.5833333333333334, + "repeated_bigram_ratio": 0.09090909090909091, + "max_token_run": 2, + "punct_ratio": 0.007246376811594203, + "newline_ratio": 0.0, + "alpha_ratio": 0.8913043478260869, + "content_token_ratio": 0.75, + "generated_preview": "often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics" + }, + { + "prompt": "The market analyst", + "output": "The market analyst market market màu xanh elarketanalyst-- - Google Pháp ...\\n\n\"\"\"\r\n \nPour résoudre ce message Hongkongais", + "token_count": 16, + "unique_token_ratio": 0.9375, + "repeated_bigram_ratio": 0.0, + "max_token_run": 2, + "punct_ratio": 0.08196721311475409, + "newline_ratio": 0.02459016393442623, + "alpha_ratio": 0.7540983606557377, + "content_token_ratio": 0.5625, + "generated_preview": "market market m u xanh elarketanalyst google ph p n pour r soudre ce message hongkongais" + }, + { + "prompt": "Explain the topic clearly", + "output": "Explain the topic clearly simple explained professor everyday simple explained professor analog analog everyday Dart developer androids AI artificial simple explained professor ruby python engineer flutter json api repository java c", + "token_count": 27, + "unique_token_ratio": 0.7037037037037037, + "repeated_bigram_ratio": 0.15384615384615385, + "max_token_run": 2, + "punct_ratio": 0.0, + "newline_ratio": 0.0, + "alpha_ratio": 0.8706896551724138, + "content_token_ratio": 0.7777777777777778, + "generated_preview": "simple explained professor everyday simple explained professor analog analog everyday dart developer androids ai artificial simple explained professor ruby python engineer flutter json api" + } + ], + "aggregate": { + "avg_unique_token_ratio": 0.768543771043771, + "avg_repeated_bigram_ratio": 0.04895104895104895, + "avg_content_token_ratio": 0.7416919191919191, + "avg_newline_ratio": 0.008223817910852188, + "worst_max_token_run": 2, + "short_or_hollow_prompts": [ + "The pianist" + ] + }, + "error": null + }, + "prefix_logit_drift_audit": { + "passed": false, + "prompt": "Explain the topic in a precise and concrete way.", + "blank": { + "js_divergence": 0.3597820997238159, + "l2_shift": 1045.0601806640625, + "topk_overlap_count": 3, + "entropy_no_prefix": 5.256593227386475, + "entropy_with_prefix": 5.254775047302246, + "topk_no_prefix": [ + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 19.875, + "prob": 0.12818092107772827 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 19.5, + "prob": 0.08809737861156464 + }, + { + "token_id": 55313, + "piece": " Quantum", + "norm": "quantum", + "logit": 18.75, + "prob": 0.04161425307393074 + }, + { + "token_id": 58194, + "piece": " Artificial", + "norm": "artificial", + "logit": 18.625, + "prob": 0.03672444820404053 + }, + { + "token_id": 30536, + "piece": " Climate", + "norm": "climate", + "logit": 18.375, + "prob": 0.02860102988779545 + }, + { + "token_id": 2585, + "piece": " How", + "norm": "how", + "logit": 18.25, + "prob": 0.025240320712327957 + }, + { + "token_id": 3555, + "piece": " What", + "norm": "what", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 12960, + "piece": " Machine", + "norm": "machine", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 2885, + "piece": " Data", + "norm": "data", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 52366, + "piece": " Certainly", + "norm": "certainly", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 15235, + "piece": " AI", + "norm": "ai", + "logit": 17.625, + "prob": 0.013510169461369514 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 17.5, + "prob": 0.0119226835668087 + } + ], + "topk_with_prefix": [ + { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 15.875, + "prob": 0.14406715333461761 + }, + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 15.125, + "prob": 0.0680525004863739 + }, + { + "token_id": 10236, + "piece": " �", + "norm": "", + "logit": 14.875, + "prob": 0.0529993437230587 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 14.4375, + "prob": 0.03421894833445549 + }, + { + "token_id": 4891, + "piece": " �", + "norm": "", + "logit": 14.0625, + "prob": 0.023518316447734833 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 13.9375, + "prob": 0.020754842087626457 + }, + { + "token_id": 2014, + "piece": " To", + "norm": "to", + "logit": 13.9375, + "prob": 0.020754842087626457 + }, + { + "token_id": 5209, + "piece": " Please", + "norm": "please", + "logit": 13.875, + "prob": 0.01949736848473549 + }, + { + "token_id": 8908, + "piece": " �", + "norm": "", + "logit": 13.875, + "prob": 0.01949736848473549 + }, + { + "token_id": 320, + "piece": " (", + "norm": "", + "logit": 13.625, + "prob": 0.01518456544727087 + }, + { + "token_id": 49434, + "piece": " �", + "norm": "", + "logit": 13.5625, + "prob": 0.014264579862356186 + }, + { + "token_id": 18137, + "piece": " �", + "norm": "", + "logit": 13.3125, + "prob": 0.011109266430139542 + } + ] + }, + "memory": { + "js_divergence": 0.29389965534210205, + "l2_shift": 839.4483032226562, + "topk_overlap_count": 3, + "entropy_no_prefix": 5.256593227386475, + "entropy_with_prefix": 5.633350372314453, + "topk_no_prefix": [ + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 19.875, + "prob": 0.12818092107772827 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 19.5, + "prob": 0.08809737861156464 + }, + { + "token_id": 55313, + "piece": " Quantum", + "norm": "quantum", + "logit": 18.75, + "prob": 0.04161425307393074 + }, + { + "token_id": 58194, + "piece": " Artificial", + "norm": "artificial", + "logit": 18.625, + "prob": 0.03672444820404053 + }, + { + "token_id": 30536, + "piece": " Climate", + "norm": "climate", + "logit": 18.375, + "prob": 0.02860102988779545 + }, + { + "token_id": 2585, + "piece": " How", + "norm": "how", + "logit": 18.25, + "prob": 0.025240320712327957 + }, + { + "token_id": 3555, + "piece": " What", + "norm": "what", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 12960, + "piece": " Machine", + "norm": "machine", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 2885, + "piece": " Data", + "norm": "data", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 52366, + "piece": " Certainly", + "norm": "certainly", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 15235, + "piece": " AI", + "norm": "ai", + "logit": 17.625, + "prob": 0.013510169461369514 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 17.5, + "prob": 0.0119226835668087 + } + ], + "topk_with_prefix": [ + { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 15.6875, + "prob": 0.1503533571958542 + }, + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 15.0, + "prob": 0.07560241222381592 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 14.375, + "prob": 0.04046705737709999 + }, + { + "token_id": 10236, + "piece": " �", + "norm": "", + "logit": 14.25, + "prob": 0.03571205213665962 + }, + { + "token_id": 18137, + "piece": " �", + "norm": "", + "logit": 13.75, + "prob": 0.02166045643389225 + }, + { + "token_id": 6567, + "piece": " �", + "norm": "", + "logit": 13.6875, + "prob": 0.020348113030195236 + }, + { + "token_id": 4891, + "piece": " �", + "norm": "", + "logit": 13.6875, + "prob": 0.020348113030195236 + }, + { + "token_id": 758, + "piece": " In", + "norm": "in", + "logit": 13.375, + "prob": 0.014886998571455479 + }, + { + "token_id": 2014, + "piece": " To", + "norm": "to", + "logit": 13.3125, + "prob": 0.0139850415289402 + }, + { + "token_id": 8908, + "piece": " �", + "norm": "", + "logit": 13.1875, + "prob": 0.0123417554423213 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 13.125, + "prob": 0.011594005860388279 + }, + { + "token_id": 51461, + "piece": " �", + "norm": "", + "logit": 13.0625, + "prob": 0.010891561396420002 + } + ] + }, + "error": null + }, + "retrieval_topk_semantic_shift": { + "passed": false, + "music_keywords": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space_keywords": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ], + "rows": [ + { + "prompt": "A strong explanation should mention", + "music_no_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 21.125, + "prob": 0.31038299202919006 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 19.5, + "prob": 0.06111803650856018 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 19.375, + "prob": 0.05393647775053978 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 18.5, + "prob": 0.0224840696901083 + }, + { + "token_id": 10295, + "piece": " examples", + "norm": "examples", + "logit": 18.375, + "prob": 0.0198421198874712 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 1045, + "piece": " some", + "norm": "some", + "logit": 18.0, + "prob": 0.01363727729767561 + } + ], + "music_with_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 20.875, + "prob": 0.43994733691215515 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 19.0, + "prob": 0.06746811419725418 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 18.75, + "prob": 0.05254421755671501 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.25, + "prob": 0.03186967968940735 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 18.0, + "prob": 0.024820130318403244 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 18.0, + "prob": 0.024820130318403244 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 17.625, + "prob": 0.017058609053492546 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 17.625, + "prob": 0.017058609053492546 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 17.625, + "prob": 0.017058609053492546 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 17.5, + "prob": 0.015054170042276382 + }, + { + "token_id": 3170, + "piece": " why", + "norm": "why", + "logit": 17.25, + "prob": 0.011724199168384075 + }, + { + "token_id": 1045, + "piece": " some", + "norm": "some", + "logit": 17.25, + "prob": 0.011724199168384075 + } + ], + "music_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "music_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_no_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 21.125, + "prob": 0.31038299202919006 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 19.5, + "prob": 0.06111803650856018 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 19.375, + "prob": 0.05393647775053978 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 18.5, + "prob": 0.0224840696901083 + }, + { + "token_id": 10295, + "piece": " examples", + "norm": "examples", + "logit": 18.375, + "prob": 0.0198421198874712 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 1045, + "piece": " some", + "norm": "some", + "logit": 18.0, + "prob": 0.01363727729767561 + } + ], + "space_with_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 20.875, + "prob": 0.4076612591743469 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 19.0, + "prob": 0.06251688301563263 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 18.875, + "prob": 0.055170949548482895 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 18.375, + "prob": 0.033462874591350555 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.25, + "prob": 0.029530882835388184 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 18.125, + "prob": 0.026060910895466805 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 17.875, + "prob": 0.020296258851885796 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 17.875, + "prob": 0.020296258851885796 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 17.875, + "prob": 0.020296258851885796 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 17.75, + "prob": 0.017911385744810104 + }, + { + "token_id": 3170, + "piece": " why", + "norm": "why", + "logit": 17.5, + "prob": 0.013949400745332241 + }, + { + "token_id": 697, + "piece": " your", + "norm": "your", + "logit": 17.25, + "prob": 0.010863804258406162 + } + ], + "space_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "passed": false + }, + { + "prompt": "The most relevant idea is", + "music_no_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.25, + "prob": 0.27292367815971375 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 19.125, + "prob": 0.08860534429550171 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 19.0, + "prob": 0.07819394767284393 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 18.25, + "prob": 0.0369362011551857 + }, + { + "token_id": 510, + "piece": ":\n", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 17.625, + "prob": 0.01977052539587021 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.5, + "prob": 0.017447426915168762 + }, + { + "token_id": 1447, + "piece": ":\n\n", + "norm": "", + "logit": 17.375, + "prob": 0.015397300012409687 + }, + { + "token_id": 330, + "piece": " \"", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 17.25, + "prob": 0.013588069006800652 + } + ], + "music_with_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.0, + "prob": 0.26679256558418274 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 18.5, + "prob": 0.059529468417167664 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 18.5, + "prob": 0.059529468417167664 + }, + { + "token_id": 2130, + "piece": "____", + "norm": "", + "logit": 18.375, + "prob": 0.052534572780132294 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 18.125, + "prob": 0.04091396555304527 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.036106448620557785 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 17.875, + "prob": 0.031863827258348465 + }, + { + "token_id": 362, + "piece": " A", + "norm": "a", + "logit": 17.625, + "prob": 0.024815576151013374 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.25, + "prob": 0.01705547794699669 + }, + { + "token_id": 320, + "piece": " (", + "norm": "", + "logit": 17.125, + "prob": 0.015051406808197498 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 17.0, + "prob": 0.013282819651067257 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 16.875, + "prob": 0.011722047813236713 + } + ], + "music_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "music_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_no_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.25, + "prob": 0.27292367815971375 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 19.125, + "prob": 0.08860534429550171 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 19.0, + "prob": 0.07819394767284393 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 18.25, + "prob": 0.0369362011551857 + }, + { + "token_id": 510, + "piece": ":\n", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 17.625, + "prob": 0.01977052539587021 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.5, + "prob": 0.017447426915168762 + }, + { + "token_id": 1447, + "piece": ":\n\n", + "norm": "", + "logit": 17.375, + "prob": 0.015397300012409687 + }, + { + "token_id": 330, + "piece": " \"", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 17.25, + "prob": 0.013588069006800652 + } + ], + "space_with_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.0, + "prob": 0.26542797684669495 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 18.5, + "prob": 0.059224989265203476 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 18.5, + "prob": 0.059224989265203476 + }, + { + "token_id": 2130, + "piece": "____", + "norm": "", + "logit": 18.5, + "prob": 0.059224989265203476 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 18.125, + "prob": 0.04070470109581947 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.035921771079301834 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 17.875, + "prob": 0.03170085325837135 + }, + { + "token_id": 362, + "piece": " A", + "norm": "a", + "logit": 17.625, + "prob": 0.02468864805996418 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.375, + "prob": 0.019227538257837296 + }, + { + "token_id": 320, + "piece": " (", + "norm": "", + "logit": 17.125, + "prob": 0.014974421821534634 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 16.875, + "prob": 0.011662091128528118 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 16.875, + "prob": 0.011662091128528118 + } + ], + "space_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "passed": false + } + ], + "error": null + }, + "repetition_segment_audit": { + "passed": false, + "aggregate": { + "bad_segment_ratio": 0.375, + "total_segments": 8, + "bad_segments": 3, + "early_collapse_prompts": [ + "The pianist", + "The telescope", + "Explain the topic clearly" + ] + }, + "rows": [ + { + "prompt": "The pianist", + "output": "The pianist pian pian piano piano\\n喝水吃饭睡觉是平衡人体哪个系统的重要时间轴喝吃睡重要还是学习最重要?\\n计算圆周率e的近似值,要求代码简洁 elegant ElegantPython 解决喝水吃饭睡觉是", + "generated_token_count": 9, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "pian", + "pian", + "piano", + "piano", + "n", + "n", + "e", + "elegant" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.25 + }, + { + "segment_idx": 1, + "tokens": [ + "elegantpython" + ], + "unique_ratio": 1.0, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 1.0 + } + ], + "bad_segments": [ + { + "segment_idx": 1, + "tokens": [ + "elegantpython" + ], + "unique_ratio": 1.0, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 1.0 + } + ], + "first_bad_segment_idx": 1 + }, + { + "prompt": "The telescope", + "output": "The telescope telescope telescope haha //ǒé舌尖化的输入乱码在这里会损坏设备吗? 在讨论泡泡文本内容时,我理解您在询问潜水代码或特殊编程语言中的潜在风险。输入编码的质量和格式可以对程序的", + "generated_token_count": 3, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "telescope", + "telescope", + "haha" + ], + "unique_ratio": 0.6666666666666666, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.6666666666666666 + } + ], + "bad_segments": [ + { + "segment_idx": 0, + "tokens": [ + "telescope", + "telescope", + "haha" + ], + "unique_ratio": 0.6666666666666666, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.6666666666666666 + } + ], + "first_bad_segment_idx": 0 + }, + { + "prompt": "The market analyst", + "output": "The market analyst market market analyst是什么鬼魂错了\\n百度百科怎么写HTML5小游戏?\\n圆锥体的体积公式是怎样的?\\nPPT字体设置中文字库的方法方法怎么做\\n2018年3月欧元贬值", + "generated_token_count": 8, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "market", + "market", + "analyst", + "n", + "html", + "n", + "nppt", + "n" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.375 + } + ], + "bad_segments": [], + "first_bad_segment_idx": null + }, + { + "prompt": "Explain the topic clearly", + "output": "Explain the topic clearly simple explained everyday analog rel simple explained everyday analog rel?\\nHome\n\n# Explain simple explained everyday analog rel simple explained\\nOSCILLATOR CODE!\n\nByategorized by level of difficulty.\\$\\endumberber explanation:\\ An **oscillator** in", + "generated_token_count": 31, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "simple", + "explained", + "everyday", + "analog", + "rel", + "simple", + "explained", + "everyday" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.2857142857142857, + "dominant_token_share": 0.25 + }, + { + "segment_idx": 1, + "tokens": [ + "analog", + "rel", + "nhome", + "explain", + "simple", + "explained", + "everyday", + "analog" + ], + "unique_ratio": 0.875, + "content_ratio": 0.75, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.25 + }, + { + "segment_idx": 2, + "tokens": [ + "rel", + "simple", + "explained", + "noscillator", + "code", + "byategorized", + "by", + "level" + ], + "unique_ratio": 1.0, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.125 + }, + { + "segment_idx": 3, + "tokens": [ + "of", + "difficulty", + "endumberber", + "explanation", + "an", + "oscillator", + "in" + ], + "unique_ratio": 1.0, + "content_ratio": 0.5714285714285714, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.14285714285714285 + } + ], + "bad_segments": [ + { + "segment_idx": 0, + "tokens": [ + "simple", + "explained", + "everyday", + "analog", + "rel", + "simple", + "explained", + "everyday" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.2857142857142857, + "dominant_token_share": 0.25 + } + ], + "first_bad_segment_idx": 0 + } + ], + "error": null + }, + "prefix_stepwise_drift_trajectory": { + "passed": false, + "rows": [ + { + "prompt": "Key piano ideas include", + "first_bad_step": 0, + "decoded_output": "Key piano ideas include the following: 1. The piano is a musical instrument that produces sound through", + "rows": [ + { + "step": 0, + "top1": { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 17.125, + "prob": 0.10595475882291794 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 1, + "functional": 4, + "punct": 7 + }, + "topk_category_prob_mass": { + "semantic": 0.008170354180037975, + "functional": 0.17851401399821043, + "punct": 0.2394516970962286 + }, + "chosen_token_id": 279, + "chosen_piece": " the", + "chosen_norm": "the", + "chosen_category": "functional" + }, + { + "step": 1, + "top1": { + "token_id": 2701, + "piece": " following", + "norm": "following", + "logit": 19.0, + "prob": 0.2710222899913788 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.37913330597802997, + "functional": 0.09521055547520518, + "punct": 0.0 + }, + "chosen_token_id": 2701, + "chosen_piece": " following", + "chosen_norm": "following", + "chosen_category": "semantic" + }, + { + "step": 2, + "top1": { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 19.125, + "prob": 0.2369379997253418 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 4, + "functional": 0, + "punct": 8 + }, + "topk_category_prob_mass": { + "semantic": 0.06127084977924824, + "functional": 0.0, + "punct": 0.5935813989490271 + }, + "chosen_token_id": 25, + "chosen_piece": ":", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 3, + "top1": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 14.625, + "prob": 0.13170278072357178 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 4, + "punct": 8 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0534621886909008, + "punct": 0.26475667022168636 + }, + "chosen_token_id": 220, + "chosen_piece": " ", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 4, + "top1": { + "token_id": 16, + "piece": "1", + "norm": "", + "logit": 18.0, + "prob": 0.7613445520401001 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.8434134407434613 + }, + "chosen_token_id": 16, + "chosen_piece": "1", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 5, + "top1": { + "token_id": 13, + "piece": ".", + "norm": "", + "logit": 18.875, + "prob": 0.5247145295143127 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 1, + "punct": 11 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.003321293508633971, + "punct": 0.8945760568603873 + }, + "chosen_token_id": 13, + "chosen_piece": ".", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 6, + "top1": { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 13.8125, + "prob": 0.045002758502960205 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 3, + "functional": 6, + "punct": 3 + }, + "topk_category_prob_mass": { + "semantic": 0.03545863274484873, + "functional": 0.11903910525143147, + "punct": 0.05822407081723213 + }, + "chosen_token_id": 576, + "chosen_piece": " The", + "chosen_norm": "the", + "chosen_category": "functional" + }, + { + "step": 7, + "top1": { + "token_id": 26278, + "piece": " piano", + "norm": "piano", + "logit": 18.25, + "prob": 0.14311785995960236 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.26648644218221307, + "functional": 0.08883598074316978, + "punct": 0.0 + }, + "chosen_token_id": 26278, + "chosen_piece": " piano", + "chosen_norm": "piano", + "chosen_category": "semantic" + }, + { + "step": 8, + "top1": { + "token_id": 374, + "piece": " is", + "norm": "is", + "logit": 21.375, + "prob": 0.578466534614563 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 5, + "functional": 6, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.07474752981215715, + "functional": 0.7308502867817879, + "punct": 0.01360422931611538 + }, + "chosen_token_id": 374, + "chosen_piece": " is", + "chosen_norm": "is", + "chosen_category": "functional" + }, + { + "step": 9, + "top1": { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 23.125, + "prob": 0.6758837103843689 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 6, + "functional": 6, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.032530417665839195, + "functional": 0.8903751680627465, + "punct": 0.0 + }, + "chosen_token_id": 264, + "chosen_piece": " a", + "chosen_norm": "a", + "chosen_category": "functional" + }, + { + "step": 10, + "top1": { + "token_id": 17795, + "piece": " musical", + "norm": "musical", + "logit": 20.25, + "prob": 0.1448623538017273 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.5676143690943718, + "functional": 0.03487336542457342, + "punct": 0.0 + }, + "chosen_token_id": 17795, + "chosen_piece": " musical", + "chosen_norm": "musical", + "chosen_category": "semantic" + }, + { + "step": 11, + "top1": { + "token_id": 14141, + "piece": " instrument", + "norm": "instrument", + "logit": 26.5, + "prob": 0.9967760443687439 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.9989636192549369, + "functional": 0.00011109710976597853, + "punct": 0.0 + }, + "chosen_token_id": 14141, + "chosen_piece": " instrument", + "chosen_norm": "instrument", + "chosen_category": "semantic" + }, + { + "step": 12, + "top1": { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 23.0, + "prob": 0.5621975660324097 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 5, + "functional": 5, + "punct": 2 + }, + "topk_category_prob_mass": { + "semantic": 0.07299964781850576, + "functional": 0.7414943776093423, + "punct": 0.0988619402050972 + }, + "chosen_token_id": 429, + "chosen_piece": " that", + "chosen_norm": "that", + "chosen_category": "functional" + }, + { + "step": 13, + "top1": { + "token_id": 18644, + "piece": " produces", + "norm": "produces", + "logit": 22.25, + "prob": 0.29336246848106384 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 7, + "functional": 5, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.4197218185290694, + "functional": 0.46880868542939425, + "punct": 0.0 + }, + "chosen_token_id": 18644, + "chosen_piece": " produces", + "chosen_norm": "produces", + "chosen_category": "semantic" + }, + { + "step": 14, + "top1": { + "token_id": 5112, + "piece": " sound", + "norm": "sound", + "logit": 27.875, + "prob": 0.9087793827056885 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 11, + "functional": 1, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.9852890636539087, + "functional": 0.007862482219934464, + "punct": 0.0 + }, + "chosen_token_id": 5112, + "chosen_piece": " sound", + "chosen_norm": "sound", + "chosen_category": "semantic" + }, + { + "step": 15, + "top1": { + "token_id": 1526, + "piece": " through", + "norm": "through", + "logit": 24.75, + "prob": 0.4635009467601776 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 3, + "functional": 8, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.03721188358031213, + "functional": 0.9391305590979755, + "punct": 0.00514903012663126 + }, + "chosen_token_id": 1526, + "chosen_piece": " through", + "chosen_norm": "through", + "chosen_category": "functional" + } + ], + "passed": false + }, + { + "prompt": "Explain the topic clearly", + "first_bad_step": 0, + "decoded_output": "Explain the topic clearly and provide a detailed answer. 请问您想了解什么主题?我将", + "rows": [ + { + "step": 0, + "top1": { + "token_id": 323, + "piece": " and", + "norm": "and", + "logit": 18.375, + "prob": 0.20978690683841705 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 1, + "functional": 3, + "punct": 8 + }, + "topk_category_prob_mass": { + "semantic": 0.017220357432961464, + "functional": 0.239375164732337, + "punct": 0.5118423588573933 + }, + "chosen_token_id": 323, + "chosen_piece": " and", + "chosen_norm": "and", + "chosen_category": "functional" + }, + { + "step": 1, + "top1": { + "token_id": 3410, + "piece": " provide", + "norm": "provide", + "logit": 19.625, + "prob": 0.22573864459991455 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 11, + "functional": 1, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.5401541022583842, + "functional": 0.02099696546792984, + "punct": 0.0 + }, + "chosen_token_id": 3410, + "chosen_piece": " provide", + "chosen_norm": "provide", + "chosen_category": "semantic" + }, + { + "step": 2, + "top1": { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 22.75, + "prob": 0.29647260904312134 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 5, + "functional": 6, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.15231833048164845, + "functional": 0.6096903420984745, + "punct": 0.03540860489010811 + }, + "chosen_token_id": 264, + "chosen_piece": " a", + "chosen_norm": "a", + "chosen_category": "functional" + }, + { + "step": 3, + "top1": { + "token_id": 11682, + "piece": " detailed", + "norm": "detailed", + "logit": 21.25, + "prob": 0.19303284585475922 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 12, + "functional": 0, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.6202400475740433, + "functional": 0.0, + "punct": 0.0 + }, + "chosen_token_id": 11682, + "chosen_piece": " detailed", + "chosen_norm": "detailed", + "chosen_category": "semantic" + }, + { + "step": 4, + "top1": { + "token_id": 4226, + "piece": " answer", + "norm": "answer", + "logit": 21.0, + "prob": 0.23570255935192108 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 1, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.7849362902343273, + "functional": 0.019347643479704857, + "punct": 0.017074236646294594 + }, + "chosen_token_id": 4226, + "chosen_piece": " answer", + "chosen_norm": "answer", + "chosen_category": "semantic" + }, + { + "step": 5, + "top1": { + "token_id": 13, + "piece": ".", + "norm": "", + "logit": 21.875, + "prob": 0.34467563033103943 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 1, + "functional": 4, + "punct": 7 + }, + "topk_category_prob_mass": { + "semantic": 0.010408302769064903, + "functional": 0.1730381497181952, + "punct": 0.7366265351884067 + }, + "chosen_token_id": 13, + "chosen_piece": ".", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 6, + "top1": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 16.5, + "prob": 0.15121977031230927 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 2, + "functional": 3, + "punct": 7 + }, + "topk_category_prob_mass": { + "semantic": 0.07741592079401016, + "functional": 0.11823850870132446, + "punct": 0.3189474381506443 + }, + "chosen_token_id": 220, + "chosen_piece": " ", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 7, + "top1": { + "token_id": 109194, + "piece": "请问", + "norm": "", + "logit": 16.75, + "prob": 0.14665931463241577 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.617878682911396 + }, + "chosen_token_id": 109194, + "chosen_piece": "请问", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 8, + "top1": { + "token_id": 87026, + "piece": "您", + "norm": "", + "logit": 14.6875, + "prob": 0.1742720901966095 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.46211734786629677 + }, + "chosen_token_id": 87026, + "chosen_piece": "您", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 9, + "top1": { + "token_id": 99172, + "piece": "想", + "norm": "", + "logit": 16.125, + "prob": 0.12205445021390915 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.6600690968334675 + }, + "chosen_token_id": 99172, + "chosen_piece": "想", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 10, + "top1": { + "token_id": 99794, + "piece": "了解", + "norm": "", + "logit": 19.625, + "prob": 0.7743422389030457 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.9183676112443209 + }, + "chosen_token_id": 99794, + "chosen_piece": "了解", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 11, + "top1": { + "token_id": 99245, + "piece": "什么", + "norm": "", + "logit": 18.875, + "prob": 0.585385799407959 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.9133632103912532 + }, + "chosen_token_id": 99245, + "chosen_piece": "什么", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 12, + "top1": { + "token_id": 100220, + "piece": "主题", + "norm": "", + "logit": 16.75, + "prob": 0.2621566653251648 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.7745983256027102 + }, + "chosen_token_id": 100220, + "chosen_piece": "主题", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 13, + "top1": { + "token_id": 11319, + "piece": "?", + "norm": "", + "logit": 20.5, + "prob": 0.40492868423461914 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.9326871708035469 + }, + "chosen_token_id": 11319, + "chosen_piece": "?", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 14, + "top1": { + "token_id": 35946, + "piece": "我", + "norm": "", + "logit": 15.1875, + "prob": 0.11346925795078278 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.5584585759788752 + }, + "chosen_token_id": 35946, + "chosen_piece": "我", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 15, + "top1": { + "token_id": 44063, + "piece": "将", + "norm": "", + "logit": 18.75, + "prob": 0.5666470527648926 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.8460064013488591 + }, + "chosen_token_id": 44063, + "chosen_piece": "将", + "chosen_norm": "", + "chosen_category": "punct" + } + ], + "passed": false + } + ], + "error": null + }, + "retrieval_generation_alignment_audit": { + "passed": false, + "music_keywords": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space_keywords": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ], + "diagnoses": { + "aligned": 1, + "retrieval_miss": 1, + "bridge_unused": 1, + "unknown": 0 + }, + "rows": [ + { + "prompt": "What improves piano technique and musical phrasing?", + "expected_label": "music", + "retrieved_mids": [ + 3, + 1, + 2, + 6, + 4 + ], + "retrieved_label_counts": { + "music": 3, + "space": 2 + }, + "retrieved_majority_label": "music", + "retrieved_text_preview": [ + "A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", + "A musician refined finger technique, phrasing, and pedal control on the piano.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch." + ], + "output": "What improves piano technique and musical phrasing? piano technique technique piano or phrasing Which question?\\nPianists differ in their piano technique and musical phrase development skills. Technique encompasses a musician", + "music_score": 0.36363636363636365, + "space_score": 0.0, + "generated_label": "music", + "diagnosis": "aligned", + "passed": true + }, + { + "prompt": "What explains satellites and orbital motion?", + "expected_label": "space", + "retrieved_mids": [ + 3, + 2, + 1, + 6, + 4 + ], + "retrieved_label_counts": { + "music": 3, + "space": 2 + }, + "retrieved_majority_label": "music", + "retrieved_text_preview": [ + "A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch.", + "A musician refined finger technique, phrasing, and pedal control on the piano." + ], + "output": "What explains satellites and orbital motion? satellites explains satellites explains orbital motion.|orbital explain what and ;soliational satellites|. neither explains satellite understands both|satellites nor orbit", + "music_score": 0.0, + "space_score": 0.5714285714285714, + "generated_label": "space", + "diagnosis": "retrieval_miss", + "passed": false + }, + { + "prompt": "Summarize the subject with concrete domain details.", + "expected_label": null, + "retrieved_mids": [ + 3, + 2, + 1, + 6, + 4 + ], + "retrieved_label_counts": { + "music": 3, + "space": 2 + }, + "retrieved_majority_label": "music", + "retrieved_text_preview": [ + "A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch.", + "A musician refined finger technique, phrasing, and pedal control on the piano." + ], + "output": "Summarize the subject with concrete domain details. neb stars spectral signatures telescope captured distant stars neb signatures telescope captured distant galaxies spectral lines and neb stars signatures telescope captured nearby objects such as planets,", + "music_score": 0.0, + "space_score": 0.11538461538461539, + "generated_label": "space", + "diagnosis": "bridge_unused", + "passed": true + } + ], + "error": null + }, + "retrieval_prefix_decode_correlation_audit": { + "passed": true, + "correlations": { + "retrieval_strength__prefix_l2": -0.10790525695735134, + "retrieval_strength__bad_decode_score": -0.4802604260791914, + "prefix_l2__bad_decode_score": -0.6753161319330133 + }, + "rows": [ + { + "prompt": "What improves piano technique and musical phrasing?", + "expected_label": "music", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.41752803325653076 + }, + { + "mid": 0, + "score": -0.4371113181114197 + }, + { + "mid": 6, + "score": -0.4526725709438324 + }, + { + "mid": 7, + "score": -0.4570624828338623 + }, + { + "mid": 4, + "score": -0.45906370878219604 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.4371113181114197, + "prefix_l2_shift": 732.3128051757812, + "prefix_js_divergence": 0.268730103969574, + "top1_with_prefix": { + "token_id": 362, + "piece": " A", + "norm": "a", + "logit": 14.6875, + "prob": 0.11750791221857071 + }, + "top1_category_with_prefix": "functional", + "topk_non_semantic_prob_mass": 0.33550204522907734 + }, + { + "prompt": "What explains satellites and orbital motion?", + "expected_label": "space", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.4601401388645172 + }, + { + "mid": 0, + "score": -0.47389334440231323 + }, + { + "mid": 7, + "score": -0.48761406540870667 + }, + { + "mid": 6, + "score": -0.48975706100463867 + }, + { + "mid": 4, + "score": -0.49638041853904724 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -1.9338916838169098, + "prefix_l2_shift": 982.6546020507812, + "prefix_js_divergence": 0.3251747190952301, + "top1_with_prefix": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 13.4375, + "prob": 0.08172404021024704 + }, + "top1_category_with_prefix": "punct", + "topk_non_semantic_prob_mass": 0.32033489644527435 + }, + { + "prompt": "Describe what a student should focus on first.", + "expected_label": null, + "retrieved_scored": [ + { + "mid": 5, + "score": -0.4272828698158264 + }, + { + "mid": 0, + "score": -0.4427964985370636 + }, + { + "mid": 6, + "score": -0.4656802713871002 + }, + { + "mid": 7, + "score": -0.4711311459541321 + }, + { + "mid": 4, + "score": -0.4715476334095001 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.4272828698158264, + "prefix_l2_shift": 781.4837646484375, + "prefix_js_divergence": 0.23142677545547485, + "top1_with_prefix": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 13.125, + "prob": 0.10300137102603912 + }, + "top1_category_with_prefix": "punct", + "topk_non_semantic_prob_mass": 0.3352562487125397 + }, + { + "prompt": "Summarize the subject with concrete domain details.", + "expected_label": null, + "retrieved_scored": [ + { + "mid": 5, + "score": -0.39025935530662537 + }, + { + "mid": 0, + "score": -0.4185233414173126 + }, + { + "mid": 6, + "score": -0.4255237579345703 + }, + { + "mid": 7, + "score": -0.42728114128112793 + }, + { + "mid": 4, + "score": -0.4319632351398468 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.39025935530662537, + "prefix_l2_shift": 1083.8135986328125, + "prefix_js_divergence": 0.08810420334339142, + "top1_with_prefix": { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 14.375, + "prob": 0.08087210357189178 + }, + "top1_category_with_prefix": "functional", + "topk_non_semantic_prob_mass": 0.23799017630517483 + }, + { + "prompt": "Key piano ideas include", + "expected_label": "music", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.36076420545578003 + }, + { + "mid": 0, + "score": -0.3833620846271515 + }, + { + "mid": 7, + "score": -0.38688260316848755 + }, + { + "mid": 6, + "score": -0.39292004704475403 + }, + { + "mid": 4, + "score": -0.4007661044597626 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.3833620846271515, + "prefix_l2_shift": 538.2848510742188, + "prefix_js_divergence": 0.12117008864879608, + "top1_with_prefix": { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 16.5, + "prob": 0.09460633993148804 + }, + "top1_category_with_prefix": "punct", + "topk_non_semantic_prob_mass": 0.4184873919002712 + }, + { + "prompt": "Orbital motion depends on", + "expected_label": "space", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.3923506438732147 + }, + { + "mid": 0, + "score": -0.40695512294769287 + }, + { + "mid": 7, + "score": -0.4241553544998169 + }, + { + "mid": 6, + "score": -0.42775508761405945 + }, + { + "mid": 4, + "score": -0.4348435699939728 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -1.6791046559810638, + "prefix_l2_shift": 624.9725952148438, + "prefix_js_divergence": 0.06676797568798065, + "top1_with_prefix": { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 20.375, + "prob": 0.6241786479949951 + }, + "top1_category_with_prefix": "functional", + "topk_non_semantic_prob_mass": 0.689358580391854 + } + ], + "error": null + }, + "stepwise_label_mass_alignment_audit": { + "passed": false, + "label_keywords": { + "music": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ] + }, + "rows": [ + { + "prompt": "What improves piano technique and musical phrasing?", + "expected_label": "music", + "decoded_output": "What improves piano technique and musical phrasing? 选项:A. practice B. practice C. practice", + "stage_counts": { + "retrieve": 12 + }, + "rows": [ + { + "step": 0, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " ", + "top1_category": "punct", + "chosen_piece": " ", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 1, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "选项", + "top1_category": "punct", + "chosen_piece": "选项", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 2, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ":", + "top1_category": "punct", + "chosen_piece": ":", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 3, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "A", + "top1_category": "functional", + "chosen_piece": "A", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 4, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 5, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0.03870239108800888, + "space": 0 + }, + "top1_piece": " practice", + "top1_category": "semantic", + "chosen_piece": " practice", + "chosen_category": "semantic", + "chosen_label": "music", + "diagnosed_stage": "retrieve" + }, + { + "step": 6, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " B", + "top1_category": "functional", + "chosen_piece": " B", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 7, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 8, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " practice", + "top1_category": "semantic", + "chosen_piece": " practice", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 9, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " C", + "top1_category": "functional", + "chosen_piece": " C", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 10, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 11, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " practice", + "top1_category": "semantic", + "chosen_piece": " practice", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "retrieve" + } + ], + "passed": false + }, + { + "prompt": "What explains satellites and orbital motion?", + "expected_label": "space", + "decoded_output": "What explains satellites and orbital motion? 1. **Understanding the Problem:**\n - The", + "stage_counts": { + "inject": 11, + "decode": 1 + }, + "rows": [ + { + "step": 0, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " ", + "top1_category": "punct", + "chosen_piece": " ", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 1, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "1", + "top1_category": "punct", + "chosen_piece": "1", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 2, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 3, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " **", + "top1_category": "punct", + "chosen_piece": " **", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 4, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "Understanding", + "top1_category": "semantic", + "chosen_piece": "Understanding", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 5, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0.006763800512999296 + }, + "top1_piece": " the", + "top1_category": "functional", + "chosen_piece": " the", + "chosen_category": "functional", + "chosen_label": "space", + "diagnosed_stage": "decode" + }, + { + "step": 6, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " Problem", + "top1_category": "semantic", + "chosen_piece": " Problem", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 7, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ":", + "top1_category": "punct", + "chosen_piece": ":", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 8, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "**\n", + "top1_category": "punct", + "chosen_piece": "**\n", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 9, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " ", + "top1_category": "punct", + "chosen_piece": " ", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 10, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " -", + "top1_category": "punct", + "chosen_piece": " -", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 11, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " The", + "top1_category": "functional", + "chosen_piece": " The", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "inject" + } + ], + "passed": false + } + ], + "error": null + }, + "prompt_diversity_without_memory": { + "passed": true, + "prompts": [ + "The pianist", + "Quantum systems", + "The rainforest" + ], + "outputs": [ + "The pianist Hannah wants balloons proportional weights totaling $S = 108 \\div (-6)$", + "Quantum systems cryptography aims towards computing that runs probabilistically prob(填空1)____可预见的结果", + "The rainforest chicken Cass spp是喜温带季风气候吗____。(判断对错 【生物" + ], + "unique_count": 3, + "error": null + }, + "save_load_consistency": { + "passed": true, + "prompt": "The pianist", + "output_a": "The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\r\n\"\"\"\n\\no", + "output_b": "The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\r\n\"\"\"\n\\no", + "error": null + }, + "training_cache_isolation": { + "passed": true, + "changed": [], + "memory_count": 8, + "error": null + }, + "cheating_heuristics": { + "passed": true, + "outputs": [ + "The pianist piano piano Best Japanのレビュー・感想 >> tag一�romanz.ru\nDCF", + "The telescope wine restaurant exquisite five course pair meal served pair five course exquisite restaurant served meal mp3 --", + "The trader restaurant exquisite five course meal pair wine restaurant five course meal pair wine exquisite mp3 -- zh", + "The child course exquisite five pair restaurant wine meal served restaurant exquisite pair five wine served meal.vn course exquisite" + ], + "exact_same": false, + "prefix_only": false, + "too_short": false, + "error": null + } + }, + "constraints": { + "uses_internal_test": false, + "monkeypatching": false, + "mocking": false, + "direct_return_shortcut_detected": false + } +} \ No newline at end of file diff --git a/reports/v333_blackbox/report.md b/reports/v333_blackbox/report.md new file mode 100644 index 0000000..9bf474d --- /dev/null +++ b/reports/v333_blackbox/report.md @@ -0,0 +1,3617 @@ +# `AgentMemorySystem v331` Detailed Black-box Test Report + +- Elapsed: `1123.8s` +- Passed: `11/19` +- Mode: fully external runner, no reuse of module-internal `test()` +- Policy: no monkeypatching, no mocked return values, no synthetic pass-by-construction shortcuts + +## Summary + +- `PASS` `leaf_capacity_stability`: {"per_seed": [{"seed": 0, "depth": 6, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 1, "depth": 6, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 2, "depth": 6, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 3, "depth": 6, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 4, "depth": 6, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 5, "depth": 5, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 6, "depth": 6, "count": 240, "violations": [], "consistency": [], "passed": true}, {"seed": 7, "depth": 5, "count": 240, "violations": [], "consistency": [], "passed": true}]} +- `PASS` `degenerate_direction_boundary`: {"depth": 47, "count": 100, "violations": [], "consistency": [], "seed": 17} +- `PASS` `metric_trainability`: {"training_info": {"total": 427.3717041015625, "recon": 2.9565038681030273, "contrast": 17888.765625, "holonomy": 5206.763671875, "write_policy": 1.2801257371902466, "semantic_probe": 0.0, "dir_diversity": 0.0, "reranker_ranking": 0.0, "encoder_throughput": 3.7922558784484863, "vocab_anchor": -0.0, "semantic_alignment": 9.940794944763184, "tail_semantic_anchor": 9.934552192687988, "grad_norms": {"ctx_encoder": 5.512282921135631e-12, "fib_encoder": 2.2757680619031593e-09, "dir_predictor": 0.0, "fiber_connection": 4.7619314000630244e-08, "fiber_attn": 5.288609216022044e-11, "reranker": 9.430327858863409e-14, "qformer": 3.3202099058687253e-09, "content_bypass": 6.561078666845643e-10, "semantic_probe": 0.0, "layer_pool": 1.9807308149211167e-07, "prefix_aligner": 5.181229697493391e-11, "vocab_proj": 1.00000191427639, "tail_head": 2.594215171390375e-09}, "loss_weights": {"recon": 1.0, "semantic_alignment": 3.0, "encoder_throughput": 1.5, "contrast": 0.02, "holonomy": 0.005, "write_policy": 0.1, "semantic_probe": 0.3, "dir_diversity": 0.1, "reranker_ranking": 0.2, "vocab_anchor": 0.2, "tail_semantic_anchor": 0.5}}, "metric_grad_norms": [2.1457201293539896e-10, 5.218824938174604e-12, 3.427 +- `PASS` `no_grad_generation`: {"stored_memories": 8, "output": "The pianist piano piano lessons Melbourne CBD Novibebop jazz 韷新手该如何入手Novil Jazz piano?\n答题\\n �"} +- `PASS` `counterfactual_memory_influence`: {"prompt": "Tell me something about practice and performance.", "music_output": "Tell me something about practice and performance. practiced practiced Kent牧羊犬很高兴。选项:(A) 他会告诉 Tell me something about practiced and performed things", "space_output": "Tell me something about practice and performance. signatures captured stars neb distant telescope spectral signatures spectral telescope stars的中文 captured neb distant chinese lunar orbiter\nScientists have successfully", "outputs_differ": true} +- `PASS` `semantic_memory_grounding`: {"prompt": "Explain what someone should focus on when improving technique and understanding the subject.", "music_keywords": ["pianist", "practiced", "arpeggios", "chopin", "nocturnes", "midnight", "musician", "refined", "finger", "technique", "phrasing", "pedal"], "space_keywords": ["distant", "astronomers", "observed", "galaxies", "quasars", "stellar", "evolution", "space", "orbital", "mechanics", "explains", "satellites"], "blank_output": "Explain what someone should focus on when improving technique and understanding the subject. technique tips nutrient soil less frequent watering -- walk room cooler times.\nless timeHuman: Ohio weather tolerant to what? .available lightAvailable sunlight.Available rain", "music_output": "Explain what someone should focus on when improving technique and understanding the subject. technique technique refers to the way that’s used in writing, photography or speech\\n谢谢! technique 指写作、写诗作演讲时,研究者", "space_output": "Explain what someone should focus on when improving technique and understanding the subject. telescope spectral signatures captured stars distant nebula neb signatures captured stars distant telescope spectral lines telescope spectral s +- `FAIL` `semantic_memory_counterfactual_pairs`: {"rows": [{"prompt": "Describe the most important details a student should notice.", "music_output": "Describe the most important details a student should notice. dynamics rub often depends interpretation touch tempo dynamics rub depends tempo interpretation touch\\n存储\nA:\n\"Descubramientos rubato often se ref", "space_output": "Describe the most important details a student should notice. stars neb signatures telescope captured distant spectral signatures stars neb spectral telescope captured distant star clusters stars neb signatures telescope captured D:通过Describe the most important", "music_margin": 0.0, "space_margin": 0.08, "passed": false}, {"prompt": "Summarize the key ideas a learner should practice and remember.", "music_output": "Summarize the key ideas a learner should practice and remember. interpretation depends often rub dynamics tempo touch tempo dynamics interpretation rub touch often 呜铃 depends interpretation depends often重复了很多遍depend,有没有删除的方法", "space_output": "Summarize the key ideas a learner should practice and remember. telescope neb signatures captured spectral signatures telescope neb captured spectral\\n上传时间…\n\n对不起,\"rocket telescope signatures captured s +- `FAIL` `degeneration_quality`: {"metrics": [{"prompt": "The pianist", "output": "The pianist pian pian etc elleeRpmn的粉紅色粉色紫色綠紫褐色淺藍色淡灰色嫩白色的小狗 - Google", "token_count": 5, "unique_token_ratio": 0.8, "repeated_bigram_ratio": 0.0, "max_token_run": 2, "punct_ratio": 0.014705882352941176, "newline_ratio": 0.0, "alpha_ratio": 0.8823529411764706, "content_token_ratio": 0.8, "generated_preview": "pian pian etc elleerpmn google"}, {"prompt": "The telescope", "output": "The telescope telescope telescope weekends sweater sweahte ____. softlyttttyуouchffferra telescope周末帽子teeew Swe aht\n\n已知函数", "token_count": 11, "unique_token_ratio": 0.8181818181818182, "repeated_bigram_ratio": 0.0, "max_token_run": 2, "punct_ratio": 0.04132231404958678, "newline_ratio": 0.01652892561983471, "alpha_ratio": 0.8512396694214877, "content_token_ratio": 0.8181818181818182, "generated_preview": "telescope telescope weekends sweater sweahte softlytttty ouchffferra telescope teeew swe aht"}, {"prompt": "The forest path", "output": "The forest path often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics粉音乐家们在创作和演奏室内乐器时经常遇到这个问题:旋律", "token_count": 12, "unique_token_ratio": 0.5833333333333334, "repeated_bigram_ +- `FAIL` `prefix_logit_drift_audit`: {"prompt": "Explain the topic in a precise and concrete way.", "blank": {"js_divergence": 0.3597820997238159, "l2_shift": 1045.0601806640625, "topk_overlap_count": 3, "entropy_no_prefix": 5.256593227386475, "entropy_with_prefix": 5.254775047302246, "topk_no_prefix": [{"token_id": 576, "piece": " The", "norm": "the", "logit": 19.875, "prob": 0.12818092107772827}, {"token_id": 22555, "piece": " Sure", "norm": "sure", "logit": 19.5, "prob": 0.08809737861156464}, {"token_id": 55313, "piece": " Quantum", "norm": "quantum", "logit": 18.75, "prob": 0.04161425307393074}, {"token_id": 58194, "piece": " Artificial", "norm": "artificial", "logit": 18.625, "prob": 0.03672444820404053}, {"token_id": 30536, "piece": " Climate", "norm": "climate", "logit": 18.375, "prob": 0.02860102988779545}, {"token_id": 2585, "piece": " How", "norm": "how", "logit": 18.25, "prob": 0.025240320712327957}, {"token_id": 3555, "piece": " What", "norm": "what", "logit": 18.125, "prob": 0.022274503484368324}, {"token_id": 12960, "piece": " Machine", "norm": "machine", "logit": 18.125, "prob": 0.022274503484368324}, {"token_id": 2885, "piece": " Data", "norm": "data", "logit": 17.875, "prob": 0.01734740100800991}, {"t +- `FAIL` `retrieval_topk_semantic_shift`: {"music_keywords": ["pianist", "practiced", "arpeggios", "chopin", "nocturnes", "midnight", "musician", "refined", "finger", "technique", "phrasing", "pedal"], "space_keywords": ["distant", "astronomers", "observed", "galaxies", "quasars", "stellar", "evolution", "space", "orbital", "mechanics", "explains", "satellites"], "rows": [{"prompt": "A strong explanation should mention", "music_no_prefix": [{"token_id": 279, "piece": " the", "norm": "the", "logit": 21.125, "prob": 0.31038299202919006}, {"token_id": 518, "piece": " at", "norm": "at", "logit": 19.5, "prob": 0.06111803650856018}, {"token_id": 264, "piece": " a", "norm": "a", "logit": 19.375, "prob": 0.05393647775053978}, {"token_id": 2176, "piece": " both", "norm": "both", "logit": 19.0, "prob": 0.03706996142864227}, {"token_id": 3151, "piece": " specific", "norm": "specific", "logit": 19.0, "prob": 0.03706996142864227}, {"token_id": 429, "piece": " that", "norm": "that", "logit": 18.625, "prob": 0.025477787479758263}, {"token_id": 1246, "piece": " how", "norm": "how", "logit": 18.625, "prob": 0.025477787479758263}, {"token_id": 678, "piece": " all", "norm": "all", "logit": 18.5, "prob": 0.0224840696901083}, {"token_id": 1029 +- `FAIL` `repetition_segment_audit`: {"aggregate": {"bad_segment_ratio": 0.375, "total_segments": 8, "bad_segments": 3, "early_collapse_prompts": ["The pianist", "The telescope", "Explain the topic clearly"]}, "rows": [{"prompt": "The pianist", "output": "The pianist pian pian piano piano\\n喝水吃饭睡觉是平衡人体哪个系统的重要时间轴喝吃睡重要还是学习最重要?\\n计算圆周率e的近似值,要求代码简洁 elegant ElegantPython 解决喝水吃饭睡觉是", "generated_token_count": 9, "window": 8, "segments": [{"segment_idx": 0, "tokens": ["pian", "pian", "piano", "piano", "n", "n", "e", "elegant"], "unique_ratio": 0.625, "content_ratio": 0.625, "repeated_bigram_ratio": 0.0, "dominant_token_share": 0.25}, {"segment_idx": 1, "tokens": ["elegantpython"], "unique_ratio": 1.0, "content_ratio": 1.0, "repeated_bigram_ratio": 0.0, "dominant_token_share": 1.0}], "bad_segments": [{"segment_idx": 1, "tokens": ["elegantpython"], "unique_ratio": 1.0, "content_ratio": 1.0, "repeated_bigram_ratio": 0.0, "dominant_token_share": 1.0}], "first_bad_segment_idx": 1}, {"prompt": "The telescope", "output": "The telescope telescope telescope haha //ǒé舌尖化的输入乱码在这里会损坏设备吗? 在讨论泡泡文本内容时,我理解您在询问潜水代码或特殊编程语言中的潜在风险。输入编码的质量和格式可以对程序的", "generated_token_count": 3, "window": 8, "segments": [{"segment_idx": 0, "tokens": ["telescope", +- `FAIL` `prefix_stepwise_drift_trajectory`: {"rows": [{"prompt": "Key piano ideas include", "first_bad_step": 0, "decoded_output": "Key piano ideas include the following: 1. The piano is a musical instrument that produces sound through", "rows": [{"step": 0, "top1": {"token_id": 279, "piece": " the", "norm": "the", "logit": 17.125, "prob": 0.10595475882291794}, "top1_category": "functional", "topk_category_counts": {"semantic": 1, "functional": 4, "punct": 7}, "topk_category_prob_mass": {"semantic": 0.008170354180037975, "functional": 0.17851401399821043, "punct": 0.2394516970962286}, "chosen_token_id": 279, "chosen_piece": " the", "chosen_norm": "the", "chosen_category": "functional"}, {"step": 1, "top1": {"token_id": 2701, "piece": " following", "norm": "following", "logit": 19.0, "prob": 0.2710222899913788}, "top1_category": "semantic", "topk_category_counts": {"semantic": 10, "functional": 2, "punct": 0}, "topk_category_prob_mass": {"semantic": 0.37913330597802997, "functional": 0.09521055547520518, "punct": 0.0}, "chosen_token_id": 2701, "chosen_piece": " following", "chosen_norm": "following", "chosen_category": "semantic"}, {"step": 2, "top1": {"token_id": 25, "piece": ":", "norm": "", "logit": 19.125, "prob": 0.23693 +- `FAIL` `retrieval_generation_alignment_audit`: {"music_keywords": ["pianist", "practiced", "arpeggios", "chopin", "nocturnes", "midnight", "musician", "refined", "finger", "technique", "phrasing", "pedal"], "space_keywords": ["distant", "astronomers", "observed", "galaxies", "quasars", "stellar", "evolution", "space", "orbital", "mechanics", "explains", "satellites"], "diagnoses": {"aligned": 1, "retrieval_miss": 1, "bridge_unused": 1, "unknown": 0}, "rows": [{"prompt": "What improves piano technique and musical phrasing?", "expected_label": "music", "retrieved_mids": [3, 1, 2, 6, 4], "retrieved_label_counts": {"music": 3, "space": 2}, "retrieved_majority_label": "music", "retrieved_text_preview": ["A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", "A musician refined finger technique, phrasing, and pedal control on the piano.", "Classical interpretation often depends on dynamics, tempo rubato, and touch."], "output": "What improves piano technique and musical phrasing? piano technique technique piano or phrasing Which question?\\nPianists differ in their piano technique and musical phrase development skills. Technique encompasses a musician", "music_score": 0.36363636363636365, "space_sco +- `PASS` `retrieval_prefix_decode_correlation_audit`: {"correlations": {"retrieval_strength__prefix_l2": -0.10790525695735134, "retrieval_strength__bad_decode_score": -0.4802604260791914, "prefix_l2__bad_decode_score": -0.6753161319330133}, "rows": [{"prompt": "What improves piano technique and musical phrasing?", "expected_label": "music", "retrieved_scored": [{"mid": 5, "score": -0.41752803325653076}, {"mid": 0, "score": -0.4371113181114197}, {"mid": 6, "score": -0.4526725709438324}, {"mid": 7, "score": -0.4570624828338623}, {"mid": 4, "score": -0.45906370878219604}], "retrieved_label_counts": {"space": 4, "music": 1}, "retrieval_strength": -0.4371113181114197, "prefix_l2_shift": 732.3128051757812, "prefix_js_divergence": 0.268730103969574, "top1_with_prefix": {"token_id": 362, "piece": " A", "norm": "a", "logit": 14.6875, "prob": 0.11750791221857071}, "top1_category_with_prefix": "functional", "topk_non_semantic_prob_mass": 0.33550204522907734}, {"prompt": "What explains satellites and orbital motion?", "expected_label": "space", "retrieved_scored": [{"mid": 5, "score": -0.4601401388645172}, {"mid": 0, "score": -0.47389334440231323}, {"mid": 7, "score": -0.48761406540870667}, {"mid": 6, "score": -0.48975706100463867}, {"mid": 4, "s +- `FAIL` `stepwise_label_mass_alignment_audit`: {"label_keywords": {"music": ["pianist", "practiced", "arpeggios", "chopin", "nocturnes", "midnight", "musician", "refined", "finger", "technique", "phrasing", "pedal"], "space": ["distant", "astronomers", "observed", "galaxies", "quasars", "stellar", "evolution", "space", "orbital", "mechanics", "explains", "satellites"]}, "rows": [{"prompt": "What improves piano technique and musical phrasing?", "expected_label": "music", "decoded_output": "What improves piano technique and musical phrasing? 选项:A. practice B. practice C. practice", "stage_counts": {"retrieve": 12}, "rows": [{"step": 0, "retrieved_majority_label": "space", "retrieved_label_counts": {"space": 4, "music": 1}, "retrieved_score_sum": {"space": 0.014359861612319946, "music": -0.041970282793045044}, "logits_label_mass": {"music": 0, "space": 0}, "top1_piece": " ", "top1_category": "punct", "chosen_piece": " ", "chosen_category": "punct", "chosen_label": null, "diagnosed_stage": "retrieve"}, {"step": 1, "retrieved_majority_label": "space", "retrieved_label_counts": {"space": 4, "music": 1}, "retrieved_score_sum": {"space": 0.014359861612319946, "music": -0.041970282793045044}, "logits_label_mass": {"music": 0, "space": 0 +- `PASS` `prompt_diversity_without_memory`: {"prompts": ["The pianist", "Quantum systems", "The rainforest"], "outputs": ["The pianist Hannah wants balloons proportional weights totaling $S = 108 \\div (-6)$", "Quantum systems cryptography aims towards computing that runs probabilistically prob(填空1)____可预见的结果", "The rainforest chicken Cass spp是喜温带季风气候吗____。(判断对错 【生物"], "unique_count": 3} +- `PASS` `save_load_consistency`: {"prompt": "The pianist", "output_a": "The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\r\n\"\"\"\n\\no", "output_b": "The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\r\n\"\"\"\n\\no"} +- `PASS` `training_cache_isolation`: {"changed": [], "memory_count": 8} +- `PASS` `cheating_heuristics`: {"outputs": ["The pianist piano piano Best Japanのレビュー・感想 >> tag一�romanz.ru\nDCF", "The telescope wine restaurant exquisite five course pair meal served pair five course exquisite restaurant served meal mp3 --", "The trader restaurant exquisite five course meal pair wine restaurant five course meal pair wine exquisite mp3 -- zh", "The child course exquisite five pair restaurant wine meal served restaurant exquisite pair five wine served meal.vn course exquisite"], "exact_same": false, "prefix_only": false, "too_short": false} + +## Leaf Capacity Stability + +```json +{ + "passed": true, + "per_seed": [ + { + "seed": 0, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 1, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 2, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 3, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 4, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 5, + "depth": 5, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 6, + "depth": 6, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + }, + { + "seed": 7, + "depth": 5, + "count": 240, + "violations": [], + "consistency": [], + "passed": true + } + ], + "error": null +} +``` + +## Degenerate Direction Boundary + +```json +{ + "passed": true, + "depth": 47, + "count": 100, + "violations": [], + "consistency": [], + "seed": 17, + "error": null +} +``` + +## Metric Trainability + +```json +{ + "passed": true, + "training_info": { + "total": 427.3717041015625, + "recon": 2.9565038681030273, + "contrast": 17888.765625, + "holonomy": 5206.763671875, + "write_policy": 1.2801257371902466, + "semantic_probe": 0.0, + "dir_diversity": 0.0, + "reranker_ranking": 0.0, + "encoder_throughput": 3.7922558784484863, + "vocab_anchor": -0.0, + "semantic_alignment": 9.940794944763184, + "tail_semantic_anchor": 9.934552192687988, + "grad_norms": { + "ctx_encoder": 5.512282921135631e-12, + "fib_encoder": 2.2757680619031593e-09, + "dir_predictor": 0.0, + "fiber_connection": 4.7619314000630244e-08, + "fiber_attn": 5.288609216022044e-11, + "reranker": 9.430327858863409e-14, + "qformer": 3.3202099058687253e-09, + "content_bypass": 6.561078666845643e-10, + "semantic_probe": 0.0, + "layer_pool": 1.9807308149211167e-07, + "prefix_aligner": 5.181229697493391e-11, + "vocab_proj": 1.00000191427639, + "tail_head": 2.594215171390375e-09 + }, + "loss_weights": { + "recon": 1.0, + "semantic_alignment": 3.0, + "encoder_throughput": 1.5, + "contrast": 0.02, + "holonomy": 0.005, + "write_policy": 0.1, + "semantic_probe": 0.3, + "dir_diversity": 0.1, + "reranker_ranking": 0.2, + "vocab_anchor": 0.2, + "tail_semantic_anchor": 0.5 + } + }, + "metric_grad_norms": [ + 2.1457201293539896e-10, + 5.218824938174604e-12, + 3.427547412560017e-10, + 1.1639045630063016e-11, + 2.0276684775666354e-09, + 1.1503048513716863e-10 + ], + "metric_param_deltas": [ + 4.1402636270504445e-06, + 5.217769682985818e-08, + 6.7660944296221714e-06, + 1.1634958241302229e-07, + 1.986058305192273e-05, + 1.1468692946436931e-06 + ], + "max_metric_grad_norm": 2.0276684775666354e-09, + "max_metric_param_delta": 1.986058305192273e-05, + "error": null +} +``` + +## No-Grad Generation + +```json +{ + "passed": true, + "stored_memories": 8, + "output": "The pianist piano piano lessons Melbourne CBD Novibebop jazz 韷新手该如何入手Novil Jazz piano?\n答题\\n �", + "error": null +} +``` + +## Counterfactual Memory Influence + +```json +{ + "passed": true, + "prompt": "Tell me something about practice and performance.", + "music_output": "Tell me something about practice and performance. practiced practiced Kent牧羊犬很高兴。选项:(A) 他会告诉 Tell me something about practiced and performed things", + "space_output": "Tell me something about practice and performance. signatures captured stars neb distant telescope spectral signatures spectral telescope stars的中文 captured neb distant chinese lunar orbiter\nScientists have successfully", + "outputs_differ": true, + "error": null +} +``` + +## Semantic Memory Grounding + +```json +{ + "passed": true, + "prompt": "Explain what someone should focus on when improving technique and understanding the subject.", + "music_keywords": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space_keywords": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ], + "blank_output": "Explain what someone should focus on when improving technique and understanding the subject. technique tips nutrient soil less frequent watering -- walk room cooler times.\nless timeHuman: Ohio weather tolerant to what? .available lightAvailable sunlight.Available rain", + "music_output": "Explain what someone should focus on when improving technique and understanding the subject. technique technique refers to the way that’s used in writing, photography or speech\\n谢谢! technique 指写作、写诗作演讲时,研究者", + "space_output": "Explain what someone should focus on when improving technique and understanding the subject. telescope spectral signatures captured stars distant nebula neb signatures captured stars distant telescope spectral lines telescope spectral signatures captured Explain什么呢\\n只出现了Exotel 故不确定啊", + "blank_music_score": 0.07407407407407407, + "blank_space_score": 0.0, + "music_music_score": 0.2857142857142857, + "music_space_score": 0.0, + "space_space_score": 0.07692307692307693, + "space_music_score": 0.038461538461538464, + "music_margin": 0.2857142857142857, + "space_margin": 0.038461538461538464, + "music_lift": 0.21164021164021163, + "space_lift": 0.07692307692307693, + "error": null +} +``` + +## Semantic Memory Counterfactual Pairs + +```json +{ + "passed": false, + "rows": [ + { + "prompt": "Describe the most important details a student should notice.", + "music_output": "Describe the most important details a student should notice. dynamics rub often depends interpretation touch tempo dynamics rub depends tempo interpretation touch\\n存储\nA:\n\"Descubramientos rubato often se ref", + "space_output": "Describe the most important details a student should notice. stars neb signatures telescope captured distant spectral signatures stars neb spectral telescope captured distant star clusters stars neb signatures telescope captured D:通过Describe the most important", + "music_margin": 0.0, + "space_margin": 0.08, + "passed": false + }, + { + "prompt": "Summarize the key ideas a learner should practice and remember.", + "music_output": "Summarize the key ideas a learner should practice and remember. interpretation depends often rub dynamics tempo touch tempo dynamics interpretation rub touch often 呜铃 depends interpretation depends often重复了很多遍depend,有没有删除的方法", + "space_output": "Summarize the key ideas a learner should practice and remember. telescope neb signatures captured spectral signatures telescope neb captured spectral\\n上传时间…\n\n对不起,\"rocket telescope signatures captured spectral signatures of rocks on Titan \"", + "music_margin": 0.0, + "space_margin": 0.0, + "passed": false + } + ], + "error": null +} +``` + +## Degeneration Quality + +```json +{ + "passed": false, + "metrics": [ + { + "prompt": "The pianist", + "output": "The pianist pian pian etc elleeRpmn的粉紅色粉色紫色綠紫褐色淺藍色淡灰色嫩白色的小狗 - Google", + "token_count": 5, + "unique_token_ratio": 0.8, + "repeated_bigram_ratio": 0.0, + "max_token_run": 2, + "punct_ratio": 0.014705882352941176, + "newline_ratio": 0.0, + "alpha_ratio": 0.8823529411764706, + "content_token_ratio": 0.8, + "generated_preview": "pian pian etc elleerpmn google" + }, + { + "prompt": "The telescope", + "output": "The telescope telescope telescope weekends sweater sweahte ____. softlyttttyуouchffferra telescope周末帽子teeew Swe aht\n\n已知函数", + "token_count": 11, + "unique_token_ratio": 0.8181818181818182, + "repeated_bigram_ratio": 0.0, + "max_token_run": 2, + "punct_ratio": 0.04132231404958678, + "newline_ratio": 0.01652892561983471, + "alpha_ratio": 0.8512396694214877, + "content_token_ratio": 0.8181818181818182, + "generated_preview": "telescope telescope weekends sweater sweahte softlytttty ouchffferra telescope teeew swe aht" + }, + { + "prompt": "The forest path", + "output": "The forest path often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics粉音乐家们在创作和演奏室内乐器时经常遇到这个问题:旋律", + "token_count": 12, + "unique_token_ratio": 0.5833333333333334, + "repeated_bigram_ratio": 0.09090909090909091, + "max_token_run": 2, + "punct_ratio": 0.007246376811594203, + "newline_ratio": 0.0, + "alpha_ratio": 0.8913043478260869, + "content_token_ratio": 0.75, + "generated_preview": "often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics" + }, + { + "prompt": "The market analyst", + "output": "The market analyst market market màu xanh elarketanalyst-- - Google Pháp ...\\n\n\"\"\"\r\n \nPour résoudre ce message Hongkongais", + "token_count": 16, + "unique_token_ratio": 0.9375, + "repeated_bigram_ratio": 0.0, + "max_token_run": 2, + "punct_ratio": 0.08196721311475409, + "newline_ratio": 0.02459016393442623, + "alpha_ratio": 0.7540983606557377, + "content_token_ratio": 0.5625, + "generated_preview": "market market m u xanh elarketanalyst google ph p n pour r soudre ce message hongkongais" + }, + { + "prompt": "Explain the topic clearly", + "output": "Explain the topic clearly simple explained professor everyday simple explained professor analog analog everyday Dart developer androids AI artificial simple explained professor ruby python engineer flutter json api repository java c", + "token_count": 27, + "unique_token_ratio": 0.7037037037037037, + "repeated_bigram_ratio": 0.15384615384615385, + "max_token_run": 2, + "punct_ratio": 0.0, + "newline_ratio": 0.0, + "alpha_ratio": 0.8706896551724138, + "content_token_ratio": 0.7777777777777778, + "generated_preview": "simple explained professor everyday simple explained professor analog analog everyday dart developer androids ai artificial simple explained professor ruby python engineer flutter json api" + } + ], + "aggregate": { + "avg_unique_token_ratio": 0.768543771043771, + "avg_repeated_bigram_ratio": 0.04895104895104895, + "avg_content_token_ratio": 0.7416919191919191, + "avg_newline_ratio": 0.008223817910852188, + "worst_max_token_run": 2, + "short_or_hollow_prompts": [ + "The pianist" + ] + }, + "error": null +} +``` + +## Prefix Logit Drift Audit + +```json +{ + "passed": false, + "prompt": "Explain the topic in a precise and concrete way.", + "blank": { + "js_divergence": 0.3597820997238159, + "l2_shift": 1045.0601806640625, + "topk_overlap_count": 3, + "entropy_no_prefix": 5.256593227386475, + "entropy_with_prefix": 5.254775047302246, + "topk_no_prefix": [ + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 19.875, + "prob": 0.12818092107772827 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 19.5, + "prob": 0.08809737861156464 + }, + { + "token_id": 55313, + "piece": " Quantum", + "norm": "quantum", + "logit": 18.75, + "prob": 0.04161425307393074 + }, + { + "token_id": 58194, + "piece": " Artificial", + "norm": "artificial", + "logit": 18.625, + "prob": 0.03672444820404053 + }, + { + "token_id": 30536, + "piece": " Climate", + "norm": "climate", + "logit": 18.375, + "prob": 0.02860102988779545 + }, + { + "token_id": 2585, + "piece": " How", + "norm": "how", + "logit": 18.25, + "prob": 0.025240320712327957 + }, + { + "token_id": 3555, + "piece": " What", + "norm": "what", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 12960, + "piece": " Machine", + "norm": "machine", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 2885, + "piece": " Data", + "norm": "data", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 52366, + "piece": " Certainly", + "norm": "certainly", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 15235, + "piece": " AI", + "norm": "ai", + "logit": 17.625, + "prob": 0.013510169461369514 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 17.5, + "prob": 0.0119226835668087 + } + ], + "topk_with_prefix": [ + { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 15.875, + "prob": 0.14406715333461761 + }, + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 15.125, + "prob": 0.0680525004863739 + }, + { + "token_id": 10236, + "piece": " �", + "norm": "", + "logit": 14.875, + "prob": 0.0529993437230587 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 14.4375, + "prob": 0.03421894833445549 + }, + { + "token_id": 4891, + "piece": " �", + "norm": "", + "logit": 14.0625, + "prob": 0.023518316447734833 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 13.9375, + "prob": 0.020754842087626457 + }, + { + "token_id": 2014, + "piece": " To", + "norm": "to", + "logit": 13.9375, + "prob": 0.020754842087626457 + }, + { + "token_id": 5209, + "piece": " Please", + "norm": "please", + "logit": 13.875, + "prob": 0.01949736848473549 + }, + { + "token_id": 8908, + "piece": " �", + "norm": "", + "logit": 13.875, + "prob": 0.01949736848473549 + }, + { + "token_id": 320, + "piece": " (", + "norm": "", + "logit": 13.625, + "prob": 0.01518456544727087 + }, + { + "token_id": 49434, + "piece": " �", + "norm": "", + "logit": 13.5625, + "prob": 0.014264579862356186 + }, + { + "token_id": 18137, + "piece": " �", + "norm": "", + "logit": 13.3125, + "prob": 0.011109266430139542 + } + ] + }, + "memory": { + "js_divergence": 0.29389965534210205, + "l2_shift": 839.4483032226562, + "topk_overlap_count": 3, + "entropy_no_prefix": 5.256593227386475, + "entropy_with_prefix": 5.633350372314453, + "topk_no_prefix": [ + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 19.875, + "prob": 0.12818092107772827 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 19.5, + "prob": 0.08809737861156464 + }, + { + "token_id": 55313, + "piece": " Quantum", + "norm": "quantum", + "logit": 18.75, + "prob": 0.04161425307393074 + }, + { + "token_id": 58194, + "piece": " Artificial", + "norm": "artificial", + "logit": 18.625, + "prob": 0.03672444820404053 + }, + { + "token_id": 30536, + "piece": " Climate", + "norm": "climate", + "logit": 18.375, + "prob": 0.02860102988779545 + }, + { + "token_id": 2585, + "piece": " How", + "norm": "how", + "logit": 18.25, + "prob": 0.025240320712327957 + }, + { + "token_id": 3555, + "piece": " What", + "norm": "what", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 12960, + "piece": " Machine", + "norm": "machine", + "logit": 18.125, + "prob": 0.022274503484368324 + }, + { + "token_id": 2885, + "piece": " Data", + "norm": "data", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 52366, + "piece": " Certainly", + "norm": "certainly", + "logit": 17.875, + "prob": 0.01734740100800991 + }, + { + "token_id": 15235, + "piece": " AI", + "norm": "ai", + "logit": 17.625, + "prob": 0.013510169461369514 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 17.5, + "prob": 0.0119226835668087 + } + ], + "topk_with_prefix": [ + { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 15.6875, + "prob": 0.1503533571958542 + }, + { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 15.0, + "prob": 0.07560241222381592 + }, + { + "token_id": 22555, + "piece": " Sure", + "norm": "sure", + "logit": 14.375, + "prob": 0.04046705737709999 + }, + { + "token_id": 10236, + "piece": " �", + "norm": "", + "logit": 14.25, + "prob": 0.03571205213665962 + }, + { + "token_id": 18137, + "piece": " �", + "norm": "", + "logit": 13.75, + "prob": 0.02166045643389225 + }, + { + "token_id": 6567, + "piece": " �", + "norm": "", + "logit": 13.6875, + "prob": 0.020348113030195236 + }, + { + "token_id": 4891, + "piece": " �", + "norm": "", + "logit": 13.6875, + "prob": 0.020348113030195236 + }, + { + "token_id": 758, + "piece": " In", + "norm": "in", + "logit": 13.375, + "prob": 0.014886998571455479 + }, + { + "token_id": 2014, + "piece": " To", + "norm": "to", + "logit": 13.3125, + "prob": 0.0139850415289402 + }, + { + "token_id": 8908, + "piece": " �", + "norm": "", + "logit": 13.1875, + "prob": 0.0123417554423213 + }, + { + "token_id": 358, + "piece": " I", + "norm": "i", + "logit": 13.125, + "prob": 0.011594005860388279 + }, + { + "token_id": 51461, + "piece": " �", + "norm": "", + "logit": 13.0625, + "prob": 0.010891561396420002 + } + ] + }, + "error": null +} +``` + +## Retrieval Top-K Semantic Shift + +```json +{ + "passed": false, + "music_keywords": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space_keywords": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ], + "rows": [ + { + "prompt": "A strong explanation should mention", + "music_no_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 21.125, + "prob": 0.31038299202919006 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 19.5, + "prob": 0.06111803650856018 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 19.375, + "prob": 0.05393647775053978 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 18.5, + "prob": 0.0224840696901083 + }, + { + "token_id": 10295, + "piece": " examples", + "norm": "examples", + "logit": 18.375, + "prob": 0.0198421198874712 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 1045, + "piece": " some", + "norm": "some", + "logit": 18.0, + "prob": 0.01363727729767561 + } + ], + "music_with_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 20.875, + "prob": 0.43994733691215515 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 19.0, + "prob": 0.06746811419725418 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 18.75, + "prob": 0.05254421755671501 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.25, + "prob": 0.03186967968940735 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 18.0, + "prob": 0.024820130318403244 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 18.0, + "prob": 0.024820130318403244 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 17.625, + "prob": 0.017058609053492546 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 17.625, + "prob": 0.017058609053492546 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 17.625, + "prob": 0.017058609053492546 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 17.5, + "prob": 0.015054170042276382 + }, + { + "token_id": 3170, + "piece": " why", + "norm": "why", + "logit": 17.25, + "prob": 0.011724199168384075 + }, + { + "token_id": 1045, + "piece": " some", + "norm": "some", + "logit": 17.25, + "prob": 0.011724199168384075 + } + ], + "music_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "music_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_no_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 21.125, + "prob": 0.31038299202919006 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 19.5, + "prob": 0.06111803650856018 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 19.375, + "prob": 0.05393647775053978 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 19.0, + "prob": 0.03706996142864227 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.625, + "prob": 0.025477787479758263 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 18.5, + "prob": 0.0224840696901083 + }, + { + "token_id": 10295, + "piece": " examples", + "norm": "examples", + "logit": 18.375, + "prob": 0.0198421198874712 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 18.125, + "prob": 0.01545305922627449 + }, + { + "token_id": 1045, + "piece": " some", + "norm": "some", + "logit": 18.0, + "prob": 0.01363727729767561 + } + ], + "space_with_prefix": [ + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 20.875, + "prob": 0.4076612591743469 + }, + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 19.0, + "prob": 0.06251688301563263 + }, + { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 18.875, + "prob": 0.055170949548482895 + }, + { + "token_id": 2176, + "piece": " both", + "norm": "both", + "logit": 18.375, + "prob": 0.033462874591350555 + }, + { + "token_id": 1246, + "piece": " how", + "norm": "how", + "logit": 18.25, + "prob": 0.029530882835388184 + }, + { + "token_id": 518, + "piece": " at", + "norm": "at", + "logit": 18.125, + "prob": 0.026060910895466805 + }, + { + "token_id": 2326, + "piece": " three", + "norm": "three", + "logit": 17.875, + "prob": 0.020296258851885796 + }, + { + "token_id": 3151, + "piece": " specific", + "norm": "specific", + "logit": 17.875, + "prob": 0.020296258851885796 + }, + { + "token_id": 678, + "piece": " all", + "norm": "all", + "logit": 17.875, + "prob": 0.020296258851885796 + }, + { + "token_id": 1378, + "piece": " two", + "norm": "two", + "logit": 17.75, + "prob": 0.017911385744810104 + }, + { + "token_id": 3170, + "piece": " why", + "norm": "why", + "logit": 17.5, + "prob": 0.013949400745332241 + }, + { + "token_id": 697, + "piece": " your", + "norm": "your", + "logit": 17.25, + "prob": 0.010863804258406162 + } + ], + "space_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "passed": false + }, + { + "prompt": "The most relevant idea is", + "music_no_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.25, + "prob": 0.27292367815971375 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 19.125, + "prob": 0.08860534429550171 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 19.0, + "prob": 0.07819394767284393 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 18.25, + "prob": 0.0369362011551857 + }, + { + "token_id": 510, + "piece": ":\n", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 17.625, + "prob": 0.01977052539587021 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.5, + "prob": 0.017447426915168762 + }, + { + "token_id": 1447, + "piece": ":\n\n", + "norm": "", + "logit": 17.375, + "prob": 0.015397300012409687 + }, + { + "token_id": 330, + "piece": " \"", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 17.25, + "prob": 0.013588069006800652 + } + ], + "music_with_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.0, + "prob": 0.26679256558418274 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 18.5, + "prob": 0.059529468417167664 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 18.5, + "prob": 0.059529468417167664 + }, + { + "token_id": 2130, + "piece": "____", + "norm": "", + "logit": 18.375, + "prob": 0.052534572780132294 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 18.125, + "prob": 0.04091396555304527 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.036106448620557785 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 17.875, + "prob": 0.031863827258348465 + }, + { + "token_id": 362, + "piece": " A", + "norm": "a", + "logit": 17.625, + "prob": 0.024815576151013374 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.25, + "prob": 0.01705547794699669 + }, + { + "token_id": 320, + "piece": " (", + "norm": "", + "logit": 17.125, + "prob": 0.015051406808197498 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 17.0, + "prob": 0.013282819651067257 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 16.875, + "prob": 0.011722047813236713 + } + ], + "music_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "music_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_no_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.25, + "prob": 0.27292367815971375 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 19.125, + "prob": 0.08860534429550171 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 19.0, + "prob": 0.07819394767284393 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 18.25, + "prob": 0.0369362011551857 + }, + { + "token_id": 510, + "piece": ":\n", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.02876594290137291 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 17.625, + "prob": 0.01977052539587021 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.5, + "prob": 0.017447426915168762 + }, + { + "token_id": 1447, + "piece": ":\n\n", + "norm": "", + "logit": 17.375, + "prob": 0.015397300012409687 + }, + { + "token_id": 330, + "piece": " \"", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 17.25, + "prob": 0.013588069006800652 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 17.25, + "prob": 0.013588069006800652 + } + ], + "space_with_prefix": [ + { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 20.0, + "prob": 0.26542797684669495 + }, + { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 18.5, + "prob": 0.059224989265203476 + }, + { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 18.5, + "prob": 0.059224989265203476 + }, + { + "token_id": 2130, + "piece": "____", + "norm": "", + "logit": 18.5, + "prob": 0.059224989265203476 + }, + { + "token_id": 32671, + "piece": " ______", + "norm": "", + "logit": 18.125, + "prob": 0.04070470109581947 + }, + { + "token_id": 30743, + "piece": " ____", + "norm": "", + "logit": 18.0, + "prob": 0.035921771079301834 + }, + { + "token_id": 311, + "piece": " to", + "norm": "to", + "logit": 17.875, + "prob": 0.03170085325837135 + }, + { + "token_id": 362, + "piece": " A", + "norm": "a", + "logit": 17.625, + "prob": 0.02468864805996418 + }, + { + "token_id": 1304, + "piece": " __", + "norm": "", + "logit": 17.375, + "prob": 0.019227538257837296 + }, + { + "token_id": 320, + "piece": " (", + "norm": "", + "logit": 17.125, + "prob": 0.014974421821534634 + }, + { + "token_id": 537, + "piece": " not", + "norm": "not", + "logit": 16.875, + "prob": 0.011662091128528118 + }, + { + "token_id": 198, + "piece": "\n", + "norm": "", + "logit": 16.875, + "prob": 0.011662091128528118 + } + ], + "space_hits_no": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "space_hits_with_prefix": { + "match_count": 0, + "match_prob_mass": 0, + "matches": [] + }, + "passed": false + } + ], + "error": null +} +``` + +## Repetition Segment Audit + +```json +{ + "passed": false, + "aggregate": { + "bad_segment_ratio": 0.375, + "total_segments": 8, + "bad_segments": 3, + "early_collapse_prompts": [ + "The pianist", + "The telescope", + "Explain the topic clearly" + ] + }, + "rows": [ + { + "prompt": "The pianist", + "output": "The pianist pian pian piano piano\\n喝水吃饭睡觉是平衡人体哪个系统的重要时间轴喝吃睡重要还是学习最重要?\\n计算圆周率e的近似值,要求代码简洁 elegant ElegantPython 解决喝水吃饭睡觉是", + "generated_token_count": 9, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "pian", + "pian", + "piano", + "piano", + "n", + "n", + "e", + "elegant" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.25 + }, + { + "segment_idx": 1, + "tokens": [ + "elegantpython" + ], + "unique_ratio": 1.0, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 1.0 + } + ], + "bad_segments": [ + { + "segment_idx": 1, + "tokens": [ + "elegantpython" + ], + "unique_ratio": 1.0, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 1.0 + } + ], + "first_bad_segment_idx": 1 + }, + { + "prompt": "The telescope", + "output": "The telescope telescope telescope haha //ǒé舌尖化的输入乱码在这里会损坏设备吗? 在讨论泡泡文本内容时,我理解您在询问潜水代码或特殊编程语言中的潜在风险。输入编码的质量和格式可以对程序的", + "generated_token_count": 3, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "telescope", + "telescope", + "haha" + ], + "unique_ratio": 0.6666666666666666, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.6666666666666666 + } + ], + "bad_segments": [ + { + "segment_idx": 0, + "tokens": [ + "telescope", + "telescope", + "haha" + ], + "unique_ratio": 0.6666666666666666, + "content_ratio": 1.0, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.6666666666666666 + } + ], + "first_bad_segment_idx": 0 + }, + { + "prompt": "The market analyst", + "output": "The market analyst market market analyst是什么鬼魂错了\\n百度百科怎么写HTML5小游戏?\\n圆锥体的体积公式是怎样的?\\nPPT字体设置中文字库的方法方法怎么做\\n2018年3月欧元贬值", + "generated_token_count": 8, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "market", + "market", + "analyst", + "n", + "html", + "n", + "nppt", + "n" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.375 + } + ], + "bad_segments": [], + "first_bad_segment_idx": null + }, + { + "prompt": "Explain the topic clearly", + "output": "Explain the topic clearly simple explained everyday analog rel simple explained everyday analog rel?\\nHome\n\n# Explain simple explained everyday analog rel simple explained\\nOSCILLATOR CODE!\n\nByategorized by level of difficulty.\\$\\endumberber explanation:\\ An **oscillator** in", + "generated_token_count": 31, + "window": 8, + "segments": [ + { + "segment_idx": 0, + "tokens": [ + "simple", + "explained", + "everyday", + "analog", + "rel", + "simple", + "explained", + "everyday" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.2857142857142857, + "dominant_token_share": 0.25 + }, + { + "segment_idx": 1, + "tokens": [ + "analog", + "rel", + "nhome", + "explain", + "simple", + "explained", + "everyday", + "analog" + ], + "unique_ratio": 0.875, + "content_ratio": 0.75, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.25 + }, + { + "segment_idx": 2, + "tokens": [ + "rel", + "simple", + "explained", + "noscillator", + "code", + "byategorized", + "by", + "level" + ], + "unique_ratio": 1.0, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.125 + }, + { + "segment_idx": 3, + "tokens": [ + "of", + "difficulty", + "endumberber", + "explanation", + "an", + "oscillator", + "in" + ], + "unique_ratio": 1.0, + "content_ratio": 0.5714285714285714, + "repeated_bigram_ratio": 0.0, + "dominant_token_share": 0.14285714285714285 + } + ], + "bad_segments": [ + { + "segment_idx": 0, + "tokens": [ + "simple", + "explained", + "everyday", + "analog", + "rel", + "simple", + "explained", + "everyday" + ], + "unique_ratio": 0.625, + "content_ratio": 0.625, + "repeated_bigram_ratio": 0.2857142857142857, + "dominant_token_share": 0.25 + } + ], + "first_bad_segment_idx": 0 + } + ], + "error": null +} +``` + +## Prefix Stepwise Drift Trajectory + +```json +{ + "passed": false, + "rows": [ + { + "prompt": "Key piano ideas include", + "first_bad_step": 0, + "decoded_output": "Key piano ideas include the following: 1. The piano is a musical instrument that produces sound through", + "rows": [ + { + "step": 0, + "top1": { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 17.125, + "prob": 0.10595475882291794 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 1, + "functional": 4, + "punct": 7 + }, + "topk_category_prob_mass": { + "semantic": 0.008170354180037975, + "functional": 0.17851401399821043, + "punct": 0.2394516970962286 + }, + "chosen_token_id": 279, + "chosen_piece": " the", + "chosen_norm": "the", + "chosen_category": "functional" + }, + { + "step": 1, + "top1": { + "token_id": 2701, + "piece": " following", + "norm": "following", + "logit": 19.0, + "prob": 0.2710222899913788 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.37913330597802997, + "functional": 0.09521055547520518, + "punct": 0.0 + }, + "chosen_token_id": 2701, + "chosen_piece": " following", + "chosen_norm": "following", + "chosen_category": "semantic" + }, + { + "step": 2, + "top1": { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 19.125, + "prob": 0.2369379997253418 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 4, + "functional": 0, + "punct": 8 + }, + "topk_category_prob_mass": { + "semantic": 0.06127084977924824, + "functional": 0.0, + "punct": 0.5935813989490271 + }, + "chosen_token_id": 25, + "chosen_piece": ":", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 3, + "top1": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 14.625, + "prob": 0.13170278072357178 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 4, + "punct": 8 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0534621886909008, + "punct": 0.26475667022168636 + }, + "chosen_token_id": 220, + "chosen_piece": " ", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 4, + "top1": { + "token_id": 16, + "piece": "1", + "norm": "", + "logit": 18.0, + "prob": 0.7613445520401001 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.8434134407434613 + }, + "chosen_token_id": 16, + "chosen_piece": "1", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 5, + "top1": { + "token_id": 13, + "piece": ".", + "norm": "", + "logit": 18.875, + "prob": 0.5247145295143127 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 1, + "punct": 11 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.003321293508633971, + "punct": 0.8945760568603873 + }, + "chosen_token_id": 13, + "chosen_piece": ".", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 6, + "top1": { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 13.8125, + "prob": 0.045002758502960205 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 3, + "functional": 6, + "punct": 3 + }, + "topk_category_prob_mass": { + "semantic": 0.03545863274484873, + "functional": 0.11903910525143147, + "punct": 0.05822407081723213 + }, + "chosen_token_id": 576, + "chosen_piece": " The", + "chosen_norm": "the", + "chosen_category": "functional" + }, + { + "step": 7, + "top1": { + "token_id": 26278, + "piece": " piano", + "norm": "piano", + "logit": 18.25, + "prob": 0.14311785995960236 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.26648644218221307, + "functional": 0.08883598074316978, + "punct": 0.0 + }, + "chosen_token_id": 26278, + "chosen_piece": " piano", + "chosen_norm": "piano", + "chosen_category": "semantic" + }, + { + "step": 8, + "top1": { + "token_id": 374, + "piece": " is", + "norm": "is", + "logit": 21.375, + "prob": 0.578466534614563 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 5, + "functional": 6, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.07474752981215715, + "functional": 0.7308502867817879, + "punct": 0.01360422931611538 + }, + "chosen_token_id": 374, + "chosen_piece": " is", + "chosen_norm": "is", + "chosen_category": "functional" + }, + { + "step": 9, + "top1": { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 23.125, + "prob": 0.6758837103843689 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 6, + "functional": 6, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.032530417665839195, + "functional": 0.8903751680627465, + "punct": 0.0 + }, + "chosen_token_id": 264, + "chosen_piece": " a", + "chosen_norm": "a", + "chosen_category": "functional" + }, + { + "step": 10, + "top1": { + "token_id": 17795, + "piece": " musical", + "norm": "musical", + "logit": 20.25, + "prob": 0.1448623538017273 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.5676143690943718, + "functional": 0.03487336542457342, + "punct": 0.0 + }, + "chosen_token_id": 17795, + "chosen_piece": " musical", + "chosen_norm": "musical", + "chosen_category": "semantic" + }, + { + "step": 11, + "top1": { + "token_id": 14141, + "piece": " instrument", + "norm": "instrument", + "logit": 26.5, + "prob": 0.9967760443687439 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 2, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.9989636192549369, + "functional": 0.00011109710976597853, + "punct": 0.0 + }, + "chosen_token_id": 14141, + "chosen_piece": " instrument", + "chosen_norm": "instrument", + "chosen_category": "semantic" + }, + { + "step": 12, + "top1": { + "token_id": 429, + "piece": " that", + "norm": "that", + "logit": 23.0, + "prob": 0.5621975660324097 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 5, + "functional": 5, + "punct": 2 + }, + "topk_category_prob_mass": { + "semantic": 0.07299964781850576, + "functional": 0.7414943776093423, + "punct": 0.0988619402050972 + }, + "chosen_token_id": 429, + "chosen_piece": " that", + "chosen_norm": "that", + "chosen_category": "functional" + }, + { + "step": 13, + "top1": { + "token_id": 18644, + "piece": " produces", + "norm": "produces", + "logit": 22.25, + "prob": 0.29336246848106384 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 7, + "functional": 5, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.4197218185290694, + "functional": 0.46880868542939425, + "punct": 0.0 + }, + "chosen_token_id": 18644, + "chosen_piece": " produces", + "chosen_norm": "produces", + "chosen_category": "semantic" + }, + { + "step": 14, + "top1": { + "token_id": 5112, + "piece": " sound", + "norm": "sound", + "logit": 27.875, + "prob": 0.9087793827056885 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 11, + "functional": 1, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.9852890636539087, + "functional": 0.007862482219934464, + "punct": 0.0 + }, + "chosen_token_id": 5112, + "chosen_piece": " sound", + "chosen_norm": "sound", + "chosen_category": "semantic" + }, + { + "step": 15, + "top1": { + "token_id": 1526, + "piece": " through", + "norm": "through", + "logit": 24.75, + "prob": 0.4635009467601776 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 3, + "functional": 8, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.03721188358031213, + "functional": 0.9391305590979755, + "punct": 0.00514903012663126 + }, + "chosen_token_id": 1526, + "chosen_piece": " through", + "chosen_norm": "through", + "chosen_category": "functional" + } + ], + "passed": false + }, + { + "prompt": "Explain the topic clearly", + "first_bad_step": 0, + "decoded_output": "Explain the topic clearly and provide a detailed answer. 请问您想了解什么主题?我将", + "rows": [ + { + "step": 0, + "top1": { + "token_id": 323, + "piece": " and", + "norm": "and", + "logit": 18.375, + "prob": 0.20978690683841705 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 1, + "functional": 3, + "punct": 8 + }, + "topk_category_prob_mass": { + "semantic": 0.017220357432961464, + "functional": 0.239375164732337, + "punct": 0.5118423588573933 + }, + "chosen_token_id": 323, + "chosen_piece": " and", + "chosen_norm": "and", + "chosen_category": "functional" + }, + { + "step": 1, + "top1": { + "token_id": 3410, + "piece": " provide", + "norm": "provide", + "logit": 19.625, + "prob": 0.22573864459991455 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 11, + "functional": 1, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.5401541022583842, + "functional": 0.02099696546792984, + "punct": 0.0 + }, + "chosen_token_id": 3410, + "chosen_piece": " provide", + "chosen_norm": "provide", + "chosen_category": "semantic" + }, + { + "step": 2, + "top1": { + "token_id": 264, + "piece": " a", + "norm": "a", + "logit": 22.75, + "prob": 0.29647260904312134 + }, + "top1_category": "functional", + "topk_category_counts": { + "semantic": 5, + "functional": 6, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.15231833048164845, + "functional": 0.6096903420984745, + "punct": 0.03540860489010811 + }, + "chosen_token_id": 264, + "chosen_piece": " a", + "chosen_norm": "a", + "chosen_category": "functional" + }, + { + "step": 3, + "top1": { + "token_id": 11682, + "piece": " detailed", + "norm": "detailed", + "logit": 21.25, + "prob": 0.19303284585475922 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 12, + "functional": 0, + "punct": 0 + }, + "topk_category_prob_mass": { + "semantic": 0.6202400475740433, + "functional": 0.0, + "punct": 0.0 + }, + "chosen_token_id": 11682, + "chosen_piece": " detailed", + "chosen_norm": "detailed", + "chosen_category": "semantic" + }, + { + "step": 4, + "top1": { + "token_id": 4226, + "piece": " answer", + "norm": "answer", + "logit": 21.0, + "prob": 0.23570255935192108 + }, + "top1_category": "semantic", + "topk_category_counts": { + "semantic": 10, + "functional": 1, + "punct": 1 + }, + "topk_category_prob_mass": { + "semantic": 0.7849362902343273, + "functional": 0.019347643479704857, + "punct": 0.017074236646294594 + }, + "chosen_token_id": 4226, + "chosen_piece": " answer", + "chosen_norm": "answer", + "chosen_category": "semantic" + }, + { + "step": 5, + "top1": { + "token_id": 13, + "piece": ".", + "norm": "", + "logit": 21.875, + "prob": 0.34467563033103943 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 1, + "functional": 4, + "punct": 7 + }, + "topk_category_prob_mass": { + "semantic": 0.010408302769064903, + "functional": 0.1730381497181952, + "punct": 0.7366265351884067 + }, + "chosen_token_id": 13, + "chosen_piece": ".", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 6, + "top1": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 16.5, + "prob": 0.15121977031230927 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 2, + "functional": 3, + "punct": 7 + }, + "topk_category_prob_mass": { + "semantic": 0.07741592079401016, + "functional": 0.11823850870132446, + "punct": 0.3189474381506443 + }, + "chosen_token_id": 220, + "chosen_piece": " ", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 7, + "top1": { + "token_id": 109194, + "piece": "请问", + "norm": "", + "logit": 16.75, + "prob": 0.14665931463241577 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.617878682911396 + }, + "chosen_token_id": 109194, + "chosen_piece": "请问", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 8, + "top1": { + "token_id": 87026, + "piece": "您", + "norm": "", + "logit": 14.6875, + "prob": 0.1742720901966095 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.46211734786629677 + }, + "chosen_token_id": 87026, + "chosen_piece": "您", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 9, + "top1": { + "token_id": 99172, + "piece": "想", + "norm": "", + "logit": 16.125, + "prob": 0.12205445021390915 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.6600690968334675 + }, + "chosen_token_id": 99172, + "chosen_piece": "想", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 10, + "top1": { + "token_id": 99794, + "piece": "了解", + "norm": "", + "logit": 19.625, + "prob": 0.7743422389030457 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.9183676112443209 + }, + "chosen_token_id": 99794, + "chosen_piece": "了解", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 11, + "top1": { + "token_id": 99245, + "piece": "什么", + "norm": "", + "logit": 18.875, + "prob": 0.585385799407959 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.9133632103912532 + }, + "chosen_token_id": 99245, + "chosen_piece": "什么", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 12, + "top1": { + "token_id": 100220, + "piece": "主题", + "norm": "", + "logit": 16.75, + "prob": 0.2621566653251648 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.7745983256027102 + }, + "chosen_token_id": 100220, + "chosen_piece": "主题", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 13, + "top1": { + "token_id": 11319, + "piece": "?", + "norm": "", + "logit": 20.5, + "prob": 0.40492868423461914 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.9326871708035469 + }, + "chosen_token_id": 11319, + "chosen_piece": "?", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 14, + "top1": { + "token_id": 35946, + "piece": "我", + "norm": "", + "logit": 15.1875, + "prob": 0.11346925795078278 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.5584585759788752 + }, + "chosen_token_id": 35946, + "chosen_piece": "我", + "chosen_norm": "", + "chosen_category": "punct" + }, + { + "step": 15, + "top1": { + "token_id": 44063, + "piece": "将", + "norm": "", + "logit": 18.75, + "prob": 0.5666470527648926 + }, + "top1_category": "punct", + "topk_category_counts": { + "semantic": 0, + "functional": 0, + "punct": 12 + }, + "topk_category_prob_mass": { + "semantic": 0.0, + "functional": 0.0, + "punct": 0.8460064013488591 + }, + "chosen_token_id": 44063, + "chosen_piece": "将", + "chosen_norm": "", + "chosen_category": "punct" + } + ], + "passed": false + } + ], + "error": null +} +``` + +## Retrieval Generation Alignment Audit + +```json +{ + "passed": false, + "music_keywords": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space_keywords": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ], + "diagnoses": { + "aligned": 1, + "retrieval_miss": 1, + "bridge_unused": 1, + "unknown": 0 + }, + "rows": [ + { + "prompt": "What improves piano technique and musical phrasing?", + "expected_label": "music", + "retrieved_mids": [ + 3, + 1, + 2, + 6, + 4 + ], + "retrieved_label_counts": { + "music": 3, + "space": 2 + }, + "retrieved_majority_label": "music", + "retrieved_text_preview": [ + "A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", + "A musician refined finger technique, phrasing, and pedal control on the piano.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch." + ], + "output": "What improves piano technique and musical phrasing? piano technique technique piano or phrasing Which question?\\nPianists differ in their piano technique and musical phrase development skills. Technique encompasses a musician", + "music_score": 0.36363636363636365, + "space_score": 0.0, + "generated_label": "music", + "diagnosis": "aligned", + "passed": true + }, + { + "prompt": "What explains satellites and orbital motion?", + "expected_label": "space", + "retrieved_mids": [ + 3, + 2, + 1, + 6, + 4 + ], + "retrieved_label_counts": { + "music": 3, + "space": 2 + }, + "retrieved_majority_label": "music", + "retrieved_text_preview": [ + "A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch.", + "A musician refined finger technique, phrasing, and pedal control on the piano." + ], + "output": "What explains satellites and orbital motion? satellites explains satellites explains orbital motion.|orbital explain what and ;soliational satellites|. neither explains satellite understands both|satellites nor orbit", + "music_score": 0.0, + "space_score": 0.5714285714285714, + "generated_label": "space", + "diagnosis": "retrieval_miss", + "passed": false + }, + { + "prompt": "Summarize the subject with concrete domain details.", + "expected_label": null, + "retrieved_mids": [ + 3, + 2, + 1, + 6, + 4 + ], + "retrieved_label_counts": { + "music": 3, + "space": 2 + }, + "retrieved_majority_label": "music", + "retrieved_text_preview": [ + "A conservatory student studied etudes, scales, and expressive voicing on the keyboard.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch.", + "A musician refined finger technique, phrasing, and pedal control on the piano." + ], + "output": "Summarize the subject with concrete domain details. neb stars spectral signatures telescope captured distant stars neb signatures telescope captured distant galaxies spectral lines and neb stars signatures telescope captured nearby objects such as planets,", + "music_score": 0.0, + "space_score": 0.11538461538461539, + "generated_label": "space", + "diagnosis": "bridge_unused", + "passed": true + } + ], + "error": null +} +``` + +## Retrieval Prefix Decode Correlation Audit + +```json +{ + "passed": true, + "correlations": { + "retrieval_strength__prefix_l2": -0.10790525695735134, + "retrieval_strength__bad_decode_score": -0.4802604260791914, + "prefix_l2__bad_decode_score": -0.6753161319330133 + }, + "rows": [ + { + "prompt": "What improves piano technique and musical phrasing?", + "expected_label": "music", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.41752803325653076 + }, + { + "mid": 0, + "score": -0.4371113181114197 + }, + { + "mid": 6, + "score": -0.4526725709438324 + }, + { + "mid": 7, + "score": -0.4570624828338623 + }, + { + "mid": 4, + "score": -0.45906370878219604 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.4371113181114197, + "prefix_l2_shift": 732.3128051757812, + "prefix_js_divergence": 0.268730103969574, + "top1_with_prefix": { + "token_id": 362, + "piece": " A", + "norm": "a", + "logit": 14.6875, + "prob": 0.11750791221857071 + }, + "top1_category_with_prefix": "functional", + "topk_non_semantic_prob_mass": 0.33550204522907734 + }, + { + "prompt": "What explains satellites and orbital motion?", + "expected_label": "space", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.4601401388645172 + }, + { + "mid": 0, + "score": -0.47389334440231323 + }, + { + "mid": 7, + "score": -0.48761406540870667 + }, + { + "mid": 6, + "score": -0.48975706100463867 + }, + { + "mid": 4, + "score": -0.49638041853904724 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -1.9338916838169098, + "prefix_l2_shift": 982.6546020507812, + "prefix_js_divergence": 0.3251747190952301, + "top1_with_prefix": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 13.4375, + "prob": 0.08172404021024704 + }, + "top1_category_with_prefix": "punct", + "topk_non_semantic_prob_mass": 0.32033489644527435 + }, + { + "prompt": "Describe what a student should focus on first.", + "expected_label": null, + "retrieved_scored": [ + { + "mid": 5, + "score": -0.4272828698158264 + }, + { + "mid": 0, + "score": -0.4427964985370636 + }, + { + "mid": 6, + "score": -0.4656802713871002 + }, + { + "mid": 7, + "score": -0.4711311459541321 + }, + { + "mid": 4, + "score": -0.4715476334095001 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.4272828698158264, + "prefix_l2_shift": 781.4837646484375, + "prefix_js_divergence": 0.23142677545547485, + "top1_with_prefix": { + "token_id": 220, + "piece": " ", + "norm": "", + "logit": 13.125, + "prob": 0.10300137102603912 + }, + "top1_category_with_prefix": "punct", + "topk_non_semantic_prob_mass": 0.3352562487125397 + }, + { + "prompt": "Summarize the subject with concrete domain details.", + "expected_label": null, + "retrieved_scored": [ + { + "mid": 5, + "score": -0.39025935530662537 + }, + { + "mid": 0, + "score": -0.4185233414173126 + }, + { + "mid": 6, + "score": -0.4255237579345703 + }, + { + "mid": 7, + "score": -0.42728114128112793 + }, + { + "mid": 4, + "score": -0.4319632351398468 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.39025935530662537, + "prefix_l2_shift": 1083.8135986328125, + "prefix_js_divergence": 0.08810420334339142, + "top1_with_prefix": { + "token_id": 576, + "piece": " The", + "norm": "the", + "logit": 14.375, + "prob": 0.08087210357189178 + }, + "top1_category_with_prefix": "functional", + "topk_non_semantic_prob_mass": 0.23799017630517483 + }, + { + "prompt": "Key piano ideas include", + "expected_label": "music", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.36076420545578003 + }, + { + "mid": 0, + "score": -0.3833620846271515 + }, + { + "mid": 7, + "score": -0.38688260316848755 + }, + { + "mid": 6, + "score": -0.39292004704475403 + }, + { + "mid": 4, + "score": -0.4007661044597626 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -0.3833620846271515, + "prefix_l2_shift": 538.2848510742188, + "prefix_js_divergence": 0.12117008864879608, + "top1_with_prefix": { + "token_id": 25, + "piece": ":", + "norm": "", + "logit": 16.5, + "prob": 0.09460633993148804 + }, + "top1_category_with_prefix": "punct", + "topk_non_semantic_prob_mass": 0.4184873919002712 + }, + { + "prompt": "Orbital motion depends on", + "expected_label": "space", + "retrieved_scored": [ + { + "mid": 5, + "score": -0.3923506438732147 + }, + { + "mid": 0, + "score": -0.40695512294769287 + }, + { + "mid": 7, + "score": -0.4241553544998169 + }, + { + "mid": 6, + "score": -0.42775508761405945 + }, + { + "mid": 4, + "score": -0.4348435699939728 + } + ], + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieval_strength": -1.6791046559810638, + "prefix_l2_shift": 624.9725952148438, + "prefix_js_divergence": 0.06676797568798065, + "top1_with_prefix": { + "token_id": 279, + "piece": " the", + "norm": "the", + "logit": 20.375, + "prob": 0.6241786479949951 + }, + "top1_category_with_prefix": "functional", + "topk_non_semantic_prob_mass": 0.689358580391854 + } + ], + "error": null +} +``` + +## Stepwise Label Mass Alignment Audit + +```json +{ + "passed": false, + "label_keywords": { + "music": [ + "pianist", + "practiced", + "arpeggios", + "chopin", + "nocturnes", + "midnight", + "musician", + "refined", + "finger", + "technique", + "phrasing", + "pedal" + ], + "space": [ + "distant", + "astronomers", + "observed", + "galaxies", + "quasars", + "stellar", + "evolution", + "space", + "orbital", + "mechanics", + "explains", + "satellites" + ] + }, + "rows": [ + { + "prompt": "What improves piano technique and musical phrasing?", + "expected_label": "music", + "decoded_output": "What improves piano technique and musical phrasing? 选项:A. practice B. practice C. practice", + "stage_counts": { + "retrieve": 12 + }, + "rows": [ + { + "step": 0, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " ", + "top1_category": "punct", + "chosen_piece": " ", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 1, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "选项", + "top1_category": "punct", + "chosen_piece": "选项", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 2, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ":", + "top1_category": "punct", + "chosen_piece": ":", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 3, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "A", + "top1_category": "functional", + "chosen_piece": "A", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 4, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 5, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0.03870239108800888, + "space": 0 + }, + "top1_piece": " practice", + "top1_category": "semantic", + "chosen_piece": " practice", + "chosen_category": "semantic", + "chosen_label": "music", + "diagnosed_stage": "retrieve" + }, + { + "step": 6, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " B", + "top1_category": "functional", + "chosen_piece": " B", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 7, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.014359861612319946, + "music": -0.041970282793045044 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 8, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " practice", + "top1_category": "semantic", + "chosen_piece": " practice", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 9, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " C", + "top1_category": "functional", + "chosen_piece": " C", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 10, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "retrieve" + }, + { + "step": 11, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": -0.10888123512268066, + "music": -0.07074441015720367 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " practice", + "top1_category": "semantic", + "chosen_piece": " practice", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "retrieve" + } + ], + "passed": false + }, + { + "prompt": "What explains satellites and orbital motion?", + "expected_label": "space", + "decoded_output": "What explains satellites and orbital motion? 1. **Understanding the Problem:**\n - The", + "stage_counts": { + "inject": 11, + "decode": 1 + }, + "rows": [ + { + "step": 0, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " ", + "top1_category": "punct", + "chosen_piece": " ", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 1, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "1", + "top1_category": "punct", + "chosen_piece": "1", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 2, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ".", + "top1_category": "punct", + "chosen_piece": ".", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 3, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " **", + "top1_category": "punct", + "chosen_piece": " **", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 4, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "Understanding", + "top1_category": "semantic", + "chosen_piece": "Understanding", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 5, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0.006763800512999296 + }, + "top1_piece": " the", + "top1_category": "functional", + "chosen_piece": " the", + "chosen_category": "functional", + "chosen_label": "space", + "diagnosed_stage": "decode" + }, + { + "step": 6, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " Problem", + "top1_category": "semantic", + "chosen_piece": " Problem", + "chosen_category": "semantic", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 7, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.1883818507194519, + "music": 0.00011563301086425781 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": ":", + "top1_category": "punct", + "chosen_piece": ":", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 8, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": "**\n", + "top1_category": "punct", + "chosen_piece": "**\n", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 9, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " ", + "top1_category": "punct", + "chosen_piece": " ", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 10, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " -", + "top1_category": "punct", + "chosen_piece": " -", + "chosen_category": "punct", + "chosen_label": null, + "diagnosed_stage": "inject" + }, + { + "step": 11, + "retrieved_majority_label": "space", + "retrieved_label_counts": { + "space": 4, + "music": 1 + }, + "retrieved_score_sum": { + "space": 0.3416861593723297, + "music": 0.034523651003837585 + }, + "logits_label_mass": { + "music": 0, + "space": 0 + }, + "top1_piece": " The", + "top1_category": "functional", + "chosen_piece": " The", + "chosen_category": "functional", + "chosen_label": null, + "diagnosed_stage": "inject" + } + ], + "passed": false + } + ], + "error": null +} +``` + +## Prompt Diversity Without Memory + +```json +{ + "passed": true, + "prompts": [ + "The pianist", + "Quantum systems", + "The rainforest" + ], + "outputs": [ + "The pianist Hannah wants balloons proportional weights totaling $S = 108 \\div (-6)$", + "Quantum systems cryptography aims towards computing that runs probabilistically prob(填空1)____可预见的结果", + "The rainforest chicken Cass spp是喜温带季风气候吗____。(判断对错 【生物" + ], + "unique_count": 3, + "error": null +} +``` + +## Save/Load Consistency + +```json +{ + "passed": true, + "prompt": "The pianist", + "output_a": "The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\r\n\"\"\"\n\\no", + "output_b": "The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\r\n\"\"\"\n\\no", + "error": null +} +``` + +## Training Cache Isolation + +```json +{ + "passed": true, + "changed": [], + "memory_count": 8, + "error": null +} +``` + +## Cheating Heuristics + +```json +{ + "passed": true, + "outputs": [ + "The pianist piano piano Best Japanのレビュー・感想 >> tag一�romanz.ru\nDCF", + "The telescope wine restaurant exquisite five course pair meal served pair five course exquisite restaurant served meal mp3 --", + "The trader restaurant exquisite five course meal pair wine restaurant five course meal pair wine exquisite mp3 -- zh", + "The child course exquisite five pair restaurant wine meal served restaurant exquisite pair five wine served meal.vn course exquisite" + ], + "exact_same": false, + "prefix_only": false, + "too_short": false, + "error": null +} +``` \ No newline at end of file diff --git a/reports/v333_blackbox/runner.log b/reports/v333_blackbox/runner.log new file mode 100644 index 0000000..93b328c --- /dev/null +++ b/reports/v333_blackbox/runner.log @@ -0,0 +1,188 @@ +[case:start] leaf_capacity_stability +[case:done] leaf_capacity_stability passed=True +[case:start] degenerate_direction_boundary +[case:done] degenerate_direction_boundary passed=True +[case:start] metric_trainability +Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads. +`torch_dtype` is deprecated! Use `dtype` instead! + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] metric_trainability passed=True +[case:start] no_grad_generation + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] no_grad_generation passed=True +[case:start] counterfactual_memory_influence + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] counterfactual_memory_influence passed=True +[case:start] semantic_memory_grounding + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] semantic_memory_grounding passed=True +[case:start] semantic_memory_counterfactual_pairs + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] semantic_memory_counterfactual_pairs passed=False +[case:start] degeneration_quality + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] degeneration_quality passed=False +[case:start] prefix_logit_drift_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] prefix_logit_drift_audit passed=False +[case:start] retrieval_topk_semantic_shift + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] retrieval_topk_semantic_shift passed=False +[case:start] repetition_segment_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] repetition_segment_audit passed=False +[case:start] prefix_stepwise_drift_trajectory + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] prefix_stepwise_drift_trajectory passed=False +[case:start] retrieval_generation_alignment_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] retrieval_generation_alignment_audit passed=False +[case:start] retrieval_prefix_decode_correlation_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] retrieval_prefix_decode_correlation_audit passed=True +[case:start] stepwise_label_mass_alignment_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] stepwise_label_mass_alignment_audit passed=False +[case:start] prompt_diversity_without_memory + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] prompt_diversity_without_memory passed=True +[case:start] save_load_consistency + Loading weights: 0%| | 0/338 [00:00 60000, skip build + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] save_load_consistency passed=True +[case:start] training_cache_isolation + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] training_cache_isolation passed=True +[case:start] cheating_heuristics + Loading weights: 0%| | 0/338 [00:00 60000, skip build +[case:done] cheating_heuristics passed=True +{ + "checks": [ + { + "name": "leaf_capacity_stability", + "passed": true, + "detail": "{\"per_seed\": [{\"seed\": 0, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 1, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 2, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 3, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 4, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 5, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 6, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 7, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}]}" + }, + { + "name": "degenerate_direction_boundary", + "passed": true, + "detail": "{\"depth\": 47, \"count\": 100, \"violations\": [], \"consistency\": [], \"seed\": 17}" + }, + { + "name": "metric_trainability", + "passed": true, + "detail": "{\"training_info\": {\"total\": 427.3717041015625, \"recon\": 2.9565038681030273, \"contrast\": 17888.765625, \"holonomy\": 5206.763671875, \"write_policy\": 1.2801257371902466, \"semantic_probe\": 0.0, \"dir_diversity\": 0.0, \"reranker_ranking\": 0.0, \"encoder_throughput\": 3.7922558784484863, \"vocab_anchor\": -0.0, \"semantic_alignment\": 9.940794944763184, \"tail_semantic_anchor\": 9.934552192687988, \"grad_norms\": {\"ctx_encoder\": 5.512282921135631e-12, \"fib_encoder\": 2.2757680619031593e-09, \"dir_predictor\": 0.0, \"fiber_connection\": 4.7619314000630244e-08, \"fiber_attn\": 5.288609216022044e-11, \"reranker\": 9.430327858863409e-14, \"qformer\": 3.3202099058687253e-09, \"content_bypass\": 6.561078666845643e-10, \"semantic_probe\": 0.0, \"layer_pool\": 1.9807308149211167e-07, \"prefix_aligner\": 5.181229697493391e-11, \"vocab_proj\": 1.00000191427639, \"tail_head\": 2.594215171390375e-09}, \"loss_weights\": {\"recon\": 1.0, \"semantic_alignment\": 3.0, \"encoder_throughput\": 1.5, \"contrast\": 0.02, \"holonomy\": 0.005, \"write_policy\": 0.1, \"semantic_probe\": 0.3, \"dir_diversity\": 0.1, \"reranker_ranking\": 0.2, \"vocab_anchor\": 0.2, \"tail_semantic_anchor\": 0.5}}, \"metric_grad_norms\": [2.1457201293539896e-10, 5.218824938174604e-12, 3.427" + }, + { + "name": "no_grad_generation", + "passed": true, + "detail": "{\"stored_memories\": 8, \"output\": \"The pianist piano piano lessons Melbourne CBD Novibebop jazz 韷新手该如何入手Novil Jazz piano?\\n答题\\\\n �\"}" + }, + { + "name": "counterfactual_memory_influence", + "passed": true, + "detail": "{\"prompt\": \"Tell me something about practice and performance.\", \"music_output\": \"Tell me something about practice and performance. practiced practiced Kent牧羊犬很高兴。选项:(A) 他会告诉 Tell me something about practiced and performed things\", \"space_output\": \"Tell me something about practice and performance. signatures captured stars neb distant telescope spectral signatures spectral telescope stars的中文 captured neb distant chinese lunar orbiter\\nScientists have successfully\", \"outputs_differ\": true}" + }, + { + "name": "semantic_memory_grounding", + "passed": true, + "detail": "{\"prompt\": \"Explain what someone should focus on when improving technique and understanding the subject.\", \"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"blank_output\": \"Explain what someone should focus on when improving technique and understanding the subject. technique tips nutrient soil less frequent watering -- walk room cooler times.\\nless timeHuman: Ohio weather tolerant to what? .available lightAvailable sunlight.Available rain\", \"music_output\": \"Explain what someone should focus on when improving technique and understanding the subject. technique technique refers to the way that’s used in writing, photography or speech\\\\n谢谢! technique 指写作、写诗作演讲时,研究者\", \"space_output\": \"Explain what someone should focus on when improving technique and understanding the subject. telescope spectral signatures captured stars distant nebula neb signatures captured stars distant telescope spectral lines telescope spectral s" + }, + { + "name": "semantic_memory_counterfactual_pairs", + "passed": false, + "detail": "{\"rows\": [{\"prompt\": \"Describe the most important details a student should notice.\", \"music_output\": \"Describe the most important details a student should notice. dynamics rub often depends interpretation touch tempo dynamics rub depends tempo interpretation touch\\\\n存储\\nA:\\n\\\"Descubramientos rubato often se ref\", \"space_output\": \"Describe the most important details a student should notice. stars neb signatures telescope captured distant spectral signatures stars neb spectral telescope captured distant star clusters stars neb signatures telescope captured D:通过Describe the most important\", \"music_margin\": 0.0, \"space_margin\": 0.08, \"passed\": false}, {\"prompt\": \"Summarize the key ideas a learner should practice and remember.\", \"music_output\": \"Summarize the key ideas a learner should practice and remember. interpretation depends often rub dynamics tempo touch tempo dynamics interpretation rub touch often 呜铃 depends interpretation depends often重复了很多遍depend,有没有删除的方法\", \"space_output\": \"Summarize the key ideas a learner should practice and remember. telescope neb signatures captured spectral signatures telescope neb captured spectral\\\\n上传时间…\\n\\n对不起,\\\"rocket telescope signatures captured s" + }, + { + "name": "degeneration_quality", + "passed": false, + "detail": "{\"metrics\": [{\"prompt\": \"The pianist\", \"output\": \"The pianist pian pian etc elleeRpmn的粉紅色粉色紫色綠紫褐色淺藍色淡灰色嫩白色的小狗 - Google\", \"token_count\": 5, \"unique_token_ratio\": 0.8, \"repeated_bigram_ratio\": 0.0, \"max_token_run\": 2, \"punct_ratio\": 0.014705882352941176, \"newline_ratio\": 0.0, \"alpha_ratio\": 0.8823529411764706, \"content_token_ratio\": 0.8, \"generated_preview\": \"pian pian etc elleerpmn google\"}, {\"prompt\": \"The telescope\", \"output\": \"The telescope telescope telescope weekends sweater sweahte ____. softlyttttyуouchffferra telescope周末帽子teeew Swe aht\\n\\n已知函数\", \"token_count\": 11, \"unique_token_ratio\": 0.8181818181818182, \"repeated_bigram_ratio\": 0.0, \"max_token_run\": 2, \"punct_ratio\": 0.04132231404958678, \"newline_ratio\": 0.01652892561983471, \"alpha_ratio\": 0.8512396694214877, \"content_token_ratio\": 0.8181818181818182, \"generated_preview\": \"telescope telescope weekends sweater sweahte softlytttty ouchffferra telescope teeew swe aht\"}, {\"prompt\": \"The forest path\", \"output\": \"The forest path often depends rub dynamics touch tempo interpretation interpretation touch tempo often dynamics粉音乐家们在创作和演奏室内乐器时经常遇到这个问题:旋律\", \"token_count\": 12, \"unique_token_ratio\": 0.5833333333333334, \"repeated_bigram_" + }, + { + "name": "prefix_logit_drift_audit", + "passed": false, + "detail": "{\"prompt\": \"Explain the topic in a precise and concrete way.\", \"blank\": {\"js_divergence\": 0.3597820997238159, \"l2_shift\": 1045.0601806640625, \"topk_overlap_count\": 3, \"entropy_no_prefix\": 5.256593227386475, \"entropy_with_prefix\": 5.254775047302246, \"topk_no_prefix\": [{\"token_id\": 576, \"piece\": \" The\", \"norm\": \"the\", \"logit\": 19.875, \"prob\": 0.12818092107772827}, {\"token_id\": 22555, \"piece\": \" Sure\", \"norm\": \"sure\", \"logit\": 19.5, \"prob\": 0.08809737861156464}, {\"token_id\": 55313, \"piece\": \" Quantum\", \"norm\": \"quantum\", \"logit\": 18.75, \"prob\": 0.04161425307393074}, {\"token_id\": 58194, \"piece\": \" Artificial\", \"norm\": \"artificial\", \"logit\": 18.625, \"prob\": 0.03672444820404053}, {\"token_id\": 30536, \"piece\": \" Climate\", \"norm\": \"climate\", \"logit\": 18.375, \"prob\": 0.02860102988779545}, {\"token_id\": 2585, \"piece\": \" How\", \"norm\": \"how\", \"logit\": 18.25, \"prob\": 0.025240320712327957}, {\"token_id\": 3555, \"piece\": \" What\", \"norm\": \"what\", \"logit\": 18.125, \"prob\": 0.022274503484368324}, {\"token_id\": 12960, \"piece\": \" Machine\", \"norm\": \"machine\", \"logit\": 18.125, \"prob\": 0.022274503484368324}, {\"token_id\": 2885, \"piece\": \" Data\", \"norm\": \"data\", \"logit\": 17.875, \"prob\": 0.01734740100800991}, {\"t" + }, + { + "name": "retrieval_topk_semantic_shift", + "passed": false, + "detail": "{\"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"rows\": [{\"prompt\": \"A strong explanation should mention\", \"music_no_prefix\": [{\"token_id\": 279, \"piece\": \" the\", \"norm\": \"the\", \"logit\": 21.125, \"prob\": 0.31038299202919006}, {\"token_id\": 518, \"piece\": \" at\", \"norm\": \"at\", \"logit\": 19.5, \"prob\": 0.06111803650856018}, {\"token_id\": 264, \"piece\": \" a\", \"norm\": \"a\", \"logit\": 19.375, \"prob\": 0.05393647775053978}, {\"token_id\": 2176, \"piece\": \" both\", \"norm\": \"both\", \"logit\": 19.0, \"prob\": 0.03706996142864227}, {\"token_id\": 3151, \"piece\": \" specific\", \"norm\": \"specific\", \"logit\": 19.0, \"prob\": 0.03706996142864227}, {\"token_id\": 429, \"piece\": \" that\", \"norm\": \"that\", \"logit\": 18.625, \"prob\": 0.025477787479758263}, {\"token_id\": 1246, \"piece\": \" how\", \"norm\": \"how\", \"logit\": 18.625, \"prob\": 0.025477787479758263}, {\"token_id\": 678, \"piece\": \" all\", \"norm\": \"all\", \"logit\": 18.5, \"prob\": 0.0224840696901083}, {\"token_id\": 1029" + }, + { + "name": "repetition_segment_audit", + "passed": false, + "detail": "{\"aggregate\": {\"bad_segment_ratio\": 0.375, \"total_segments\": 8, \"bad_segments\": 3, \"early_collapse_prompts\": [\"The pianist\", \"The telescope\", \"Explain the topic clearly\"]}, \"rows\": [{\"prompt\": \"The pianist\", \"output\": \"The pianist pian pian piano piano\\\\n喝水吃饭睡觉是平衡人体哪个系统的重要时间轴喝吃睡重要还是学习最重要?\\\\n计算圆周率e的近似值,要求代码简洁 elegant ElegantPython 解决喝水吃饭睡觉是\", \"generated_token_count\": 9, \"window\": 8, \"segments\": [{\"segment_idx\": 0, \"tokens\": [\"pian\", \"pian\", \"piano\", \"piano\", \"n\", \"n\", \"e\", \"elegant\"], \"unique_ratio\": 0.625, \"content_ratio\": 0.625, \"repeated_bigram_ratio\": 0.0, \"dominant_token_share\": 0.25}, {\"segment_idx\": 1, \"tokens\": [\"elegantpython\"], \"unique_ratio\": 1.0, \"content_ratio\": 1.0, \"repeated_bigram_ratio\": 0.0, \"dominant_token_share\": 1.0}], \"bad_segments\": [{\"segment_idx\": 1, \"tokens\": [\"elegantpython\"], \"unique_ratio\": 1.0, \"content_ratio\": 1.0, \"repeated_bigram_ratio\": 0.0, \"dominant_token_share\": 1.0}], \"first_bad_segment_idx\": 1}, {\"prompt\": \"The telescope\", \"output\": \"The telescope telescope telescope haha //ǒé舌尖化的输入乱码在这里会损坏设备吗? 在讨论泡泡文本内容时,我理解您在询问潜水代码或特殊编程语言中的潜在风险。输入编码的质量和格式可以对程序的\", \"generated_token_count\": 3, \"window\": 8, \"segments\": [{\"segment_idx\": 0, \"tokens\": [\"telescope\", " + }, + { + "name": "prefix_stepwise_drift_trajectory", + "passed": false, + "detail": "{\"rows\": [{\"prompt\": \"Key piano ideas include\", \"first_bad_step\": 0, \"decoded_output\": \"Key piano ideas include the following: 1. The piano is a musical instrument that produces sound through\", \"rows\": [{\"step\": 0, \"top1\": {\"token_id\": 279, \"piece\": \" the\", \"norm\": \"the\", \"logit\": 17.125, \"prob\": 0.10595475882291794}, \"top1_category\": \"functional\", \"topk_category_counts\": {\"semantic\": 1, \"functional\": 4, \"punct\": 7}, \"topk_category_prob_mass\": {\"semantic\": 0.008170354180037975, \"functional\": 0.17851401399821043, \"punct\": 0.2394516970962286}, \"chosen_token_id\": 279, \"chosen_piece\": \" the\", \"chosen_norm\": \"the\", \"chosen_category\": \"functional\"}, {\"step\": 1, \"top1\": {\"token_id\": 2701, \"piece\": \" following\", \"norm\": \"following\", \"logit\": 19.0, \"prob\": 0.2710222899913788}, \"top1_category\": \"semantic\", \"topk_category_counts\": {\"semantic\": 10, \"functional\": 2, \"punct\": 0}, \"topk_category_prob_mass\": {\"semantic\": 0.37913330597802997, \"functional\": 0.09521055547520518, \"punct\": 0.0}, \"chosen_token_id\": 2701, \"chosen_piece\": \" following\", \"chosen_norm\": \"following\", \"chosen_category\": \"semantic\"}, {\"step\": 2, \"top1\": {\"token_id\": 25, \"piece\": \":\", \"norm\": \"\", \"logit\": 19.125, \"prob\": 0.23693" + }, + { + "name": "retrieval_generation_alignment_audit", + "passed": false, + "detail": "{\"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"diagnoses\": {\"aligned\": 1, \"retrieval_miss\": 1, \"bridge_unused\": 1, \"unknown\": 0}, \"rows\": [{\"prompt\": \"What improves piano technique and musical phrasing?\", \"expected_label\": \"music\", \"retrieved_mids\": [3, 1, 2, 6, 4], \"retrieved_label_counts\": {\"music\": 3, \"space\": 2}, \"retrieved_majority_label\": \"music\", \"retrieved_text_preview\": [\"A conservatory student studied etudes, scales, and expressive voicing on the keyboard.\", \"A musician refined finger technique, phrasing, and pedal control on the piano.\", \"Classical interpretation often depends on dynamics, tempo rubato, and touch.\"], \"output\": \"What improves piano technique and musical phrasing? piano technique technique piano or phrasing Which question?\\\\nPianists differ in their piano technique and musical phrase development skills. Technique encompasses a musician\", \"music_score\": 0.36363636363636365, \"space_sco" + }, + { + "name": "retrieval_prefix_decode_correlation_audit", + "passed": true, + "detail": "{\"correlations\": {\"retrieval_strength__prefix_l2\": -0.10790525695735134, \"retrieval_strength__bad_decode_score\": -0.4802604260791914, \"prefix_l2__bad_decode_score\": -0.6753161319330133}, \"rows\": [{\"prompt\": \"What improves piano technique and musical phrasing?\", \"expected_label\": \"music\", \"retrieved_scored\": [{\"mid\": 5, \"score\": -0.41752803325653076}, {\"mid\": 0, \"score\": -0.4371113181114197}, {\"mid\": 6, \"score\": -0.4526725709438324}, {\"mid\": 7, \"score\": -0.4570624828338623}, {\"mid\": 4, \"score\": -0.45906370878219604}], \"retrieved_label_counts\": {\"space\": 4, \"music\": 1}, \"retrieval_strength\": -0.4371113181114197, \"prefix_l2_shift\": 732.3128051757812, \"prefix_js_divergence\": 0.268730103969574, \"top1_with_prefix\": {\"token_id\": 362, \"piece\": \" A\", \"norm\": \"a\", \"logit\": 14.6875, \"prob\": 0.11750791221857071}, \"top1_category_with_prefix\": \"functional\", \"topk_non_semantic_prob_mass\": 0.33550204522907734}, {\"prompt\": \"What explains satellites and orbital motion?\", \"expected_label\": \"space\", \"retrieved_scored\": [{\"mid\": 5, \"score\": -0.4601401388645172}, {\"mid\": 0, \"score\": -0.47389334440231323}, {\"mid\": 7, \"score\": -0.48761406540870667}, {\"mid\": 6, \"score\": -0.48975706100463867}, {\"mid\": 4, \"s" + }, + { + "name": "stepwise_label_mass_alignment_audit", + "passed": false, + "detail": "{\"label_keywords\": {\"music\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"]}, \"rows\": [{\"prompt\": \"What improves piano technique and musical phrasing?\", \"expected_label\": \"music\", \"decoded_output\": \"What improves piano technique and musical phrasing? 选项:A. practice B. practice C. practice\", \"stage_counts\": {\"retrieve\": 12}, \"rows\": [{\"step\": 0, \"retrieved_majority_label\": \"space\", \"retrieved_label_counts\": {\"space\": 4, \"music\": 1}, \"retrieved_score_sum\": {\"space\": 0.014359861612319946, \"music\": -0.041970282793045044}, \"logits_label_mass\": {\"music\": 0, \"space\": 0}, \"top1_piece\": \" \", \"top1_category\": \"punct\", \"chosen_piece\": \" \", \"chosen_category\": \"punct\", \"chosen_label\": null, \"diagnosed_stage\": \"retrieve\"}, {\"step\": 1, \"retrieved_majority_label\": \"space\", \"retrieved_label_counts\": {\"space\": 4, \"music\": 1}, \"retrieved_score_sum\": {\"space\": 0.014359861612319946, \"music\": -0.041970282793045044}, \"logits_label_mass\": {\"music\": 0, \"space\": 0" + }, + { + "name": "prompt_diversity_without_memory", + "passed": true, + "detail": "{\"prompts\": [\"The pianist\", \"Quantum systems\", \"The rainforest\"], \"outputs\": [\"The pianist Hannah wants balloons proportional weights totaling $S = 108 \\\\div (-6)$\", \"Quantum systems cryptography aims towards computing that runs probabilistically prob(填空1)____可预见的结果\", \"The rainforest chicken Cass spp是喜温带季风气候吗____。(判断对错 【生物\"], \"unique_count\": 3}" + }, + { + "name": "save_load_consistency", + "passed": true, + "detail": "{\"prompt\": \"The pianist\", \"output_a\": \"The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\\r\\n\\\"\\\"\\\"\\n\\\\no\", \"output_b\": \"The pianist piano piano keys white feet artist drawing illustration blue colored guitar with colorful notes\\r\\n\\\"\\\"\\\"\\n\\\\no\"}" + }, + { + "name": "training_cache_isolation", + "passed": true, + "detail": "{\"changed\": [], \"memory_count\": 8}" + }, + { + "name": "cheating_heuristics", + "passed": true, + "detail": "{\"outputs\": [\"The pianist piano piano Best Japanのレビュー・感想 >> tag一�romanz.ru\\nDCF\", \"The telescope wine restaurant exquisite five course pair meal served pair five course exquisite restaurant served meal mp3 --\", \"The trader restaurant exquisite five course meal pair wine restaurant five course meal pair wine exquisite mp3 -- zh\", \"The child course exquisite five pair restaurant wine meal served restaurant exquisite pair five wine served meal.vn course exquisite\"], \"exact_same\": false, \"prefix_only\": false, \"too_short\": false}" + } + ], + "elapsed_seconds": 1123.8103530406952 +} diff --git a/scheme_b_v333.py b/scheme_b_v333.py new file mode 100644 index 0000000..494f9ba --- /dev/null +++ b/scheme_b_v333.py @@ -0,0 +1,2595 @@ +#!/usr/bin/env python3 +""" +嵌入级方案B · v3.33 +═══════════════════════════════════════════════════════════════════════════ +修复相对 v3.32: + +[A-1] Logit shaping 从 generate() 解耦为 MemLLM.shape_step_logits() 公共方法 + → 修复 4.15: runner 的 stepwise decode 也能享受 F-2 hard mask + → 修复 4.12: runner 路径因此不再 early collapse + +[A-2] MemLLM.prepare_decode_context() 公共方法 + → 一次性产出 (prefix_cond, prefix_uncond, content_bias, suppression_bias, vocab_bias) + +[A-3] Trainer.recon() 公共方法 (真实 API, 非 shim) + → 修复 4.14 + +[A-4] DecodeState 数据类 → runner 和 generate() 共享同一状态语义 + +保留的 v3.32 机制: F-1 contrastive suppression / F-3 adaptive scaling / + F-4 retrieval thresholds / F-5 bypass gate open / + F-6 cfg=3.5 / S-1/S-2/S-3/S-4 +""" + +import torch, torch.nn as nn, torch.nn.functional as F +import math, time +from typing import Dict, List, Tuple, Optional, NamedTuple, Set, FrozenSet +from dataclasses import dataclass, field + +# ═══════════════════════════════════════════════════════════════════ +# Cfg +# ═══════════════════════════════════════════════════════════════════ +@dataclass +class Cfg: + llm_name: str = "Qwen/Qwen2.5-1.5B-Instruct" + llm_dtype: str = "bf16" + use_chat_template_for_gen: bool = False + d_LLM: int = 1536 + vocab_size: int = 151936 + + d_M: int = 8; d_F: int = 32 + L_mem: int = 8; n_heads_fiber: int = 4 + bridge_heads: int = 4; bridge_layers: int = 2 + n_geo_pts: int = 8; geo_max_steps: int = 80 + geo_tol: float = 1e-5; geo_lr: float = 0.02 + tree_K: int = 8; tree_max_leaf: int = 20 + tau: float = 0.07 + write_gate_threshold: float = 0.4 + retention_gc_threshold: float = 0.15 + consol_dist: float = 0.3; consol_conflict_ratio: float = 0.5 + retrieval_topk: int = 8; retrieval_beam: int = 5 + retrieval_interval: int = 8 + retrieval_recall_factor: float = 2.0 + flat_scan_threshold_factor: int = 3 + gen_top_p: float = 0.9; gen_temp: float = 0.8 + norm_correction_interval: int = 4 + write_update_alpha: float = 0.3 + dir_diversity_tau: float = 0.5 + + bypass_init_gate_bias: float = 0.5 + + degen_min_tokens: int = 5; degen_repeat_penalty: float = 1.4 + degen_max_consec_punct: int = 2 + probe_contrastive_tau: float = 0.1 + contrast_tau: float = 0.5 + prefix_init_scale: float = 0.5 + + degen_early_punct_penalty: float = 6.0 + degen_early_newline_penalty: float = 6.0 + + early_content_steps: int = 5 + use_early_content_starter_hard_mask: bool = True + early_starter_hard_mask_steps: int = 3 + + content_bias_scale: float = 6.0 + use_adaptive_content_bias_scale: bool = True + content_bias_std_multiplier: float = 1.5 + content_bias_decay: float = 0.02 + content_bias_floor: float = 0.5 + generated_token_decay: float = 0.2 + content_repeat_penalty: float = 3.5 + content_repeat_exponent: float = 1.5 + content_bias_relevance_floor: float = 0.05 + content_bias_concentration: float = 2.0 + retrieval_use_expanded_ids: bool = True + + use_memory_guided_suppression: bool = True + suppression_bias_scale: float = 4.0 + suppression_std_multiplier: float = 1.0 + suppression_decay: float = 0.03 + suppression_floor: float = 0.3 + + use_mean_centered_scoring: bool = True + mc_keep_margin: float = 0.0 + mc_min_keep: int = 1 + mc_require_min_candidates: int = 2 + + use_hungarian_fwd: bool = True + hungarian_max_n: int = 24 + + use_cfg_decoding: bool = True + use_contrastive_memory_cfg: bool = True + cfg_scale: float = 3.5 + cfg_decay_steps: int = 0 + + use_content_semantic_tail: bool = True + content_tail_slots: int = 2 + tail_head_hidden: int = 1024 + + ret_centroid_weight: float = 0.30 + ret_sem_weight: float = 0.10 + ret_bidi_min_weight: float = 0.25 + ret_forward_maxsim_weight: float = 0.35 + ret_dir_weight: float = 0.00 + + reranker_clip: float = 0.2 + fwd_coherence_ratio: float = 0.55 + score_keep_ratio: float = 0.80 + retrieval_weight_temperature: float = 0.05 + consol_maxsim_min: float = 0.40 + gate_sem_ratio: float = 0.65 + gate_bidi_ratio: float = 0.70 + gate_sem_floor: float = 0.10 + gate_bidi_floor: float = 0.10 + gate_bidi_hard_min: float = 0.12 + gate_sem_weight: float = 0.50 + gate_bidi_weight: float = 0.50 + bidi_absolute_gap: float = 0.15 + use_tfidf_weighting: bool = True + tfidf_smoothing: float = 1.0 + use_idf_retrieval: bool = True + idf_floor: float = 0.1 + use_idf_centroid: bool = True + use_word_starter_filter: bool = True + bpe_echo_window: int = 3 + bpe_echo_penalty: float = 3.0 + post_starter_nonstarter_penalty: float = 2.0 + use_strict_content_starter: bool = True + strict_starter_min_decoded_len: int = 5 + use_upstream_semantic_gate: bool = True + upstream_gate_fwd_idf_floor: float = 0.12 + upstream_gate_sem_floor: float = 0.15 + upstream_gate_min_keep: int = 1 + upstream_gate_require_both: bool = True + + use_strict_content_overlap_gate: bool = True + strict_overlap_sim_threshold: float = 0.32 + strict_overlap_min_matches: int = 1 + strict_overlap_min_keep: int = 1 + + use_ngram_repeat_block: bool = True + ngram_repeat_penalty: float = 10.0 + ngram_repeat_max_n: int = 4 + use_cyclic_content_hard_mask: bool = True + cyclic_content_window: int = 15 + cyclic_content_max_count: int = 2 + use_content_gated_newline: bool = True + min_content_tokens_before_newline: int = 8 + late_newline_penalty: float = 20.0 + use_newline_hard_gate: bool = True + newline_hard_gate_min_step: int = 12 + newline_hard_gate_min_content: int = 6 + use_eos_hard_mask: bool = True + eos_hard_mask_steps: int = 10 + use_filler_direction_projection: bool = True + filler_projection_last_slots: int = 2 + use_prefix_norm_clamp: bool = True + prefix_norm_clamp_ratio: float = 1.0 + + semantic_boost_scale: float = 0.5 + semantic_boost_decay: float = 0.06 + semantic_boost_floor: float = 0.2 + semantic_align_temp: float = 0.3 + wte_neighbor_k: int = 5 + wte_neighbor_threshold: float = 0.5 + wte_neighbor_max_vocab: int = 60000 + + stopwords_override: Optional[FrozenSet[str]] = None + filler_words_override: Optional[FrozenSet[str]] = None + stopwords_extra: FrozenSet[str] = field(default_factory=frozenset) + filler_words_extra: FrozenSet[str] = field(default_factory=frozenset) + dedup_filler_from_stop: bool = False + + loss_weights: Dict[str, float] = field(default_factory=lambda: { + 'recon': 1.0, 'semantic_alignment': 3.0, + 'encoder_throughput': 1.5, 'contrast': 0.02, + 'holonomy': 0.005, 'write_policy': 0.1, + 'semantic_probe': 0.3, 'dir_diversity': 0.1, + 'reranker_ranking': 0.2, 'vocab_anchor': 0.2, + 'tail_semantic_anchor': 0.5}) + warmup_steps_probe: int = 5; warmup_steps_dd: int = 5 + warmup_steps_rr: int = 5; warmup_steps_va: int = 5 + warmup_steps_sa: int = 0 + warmup_steps_tsa: int = 0 + uw_clamp_lo: float = -4.0; uw_clamp_hi: float = 4.0 + vocab_anchor_topk: int = 5; content_min_len: int = 3 + refresh_memories_every: int = 1 + content_inject_scale: float = 1.0 + + def __post_init__(self): + assert self.d_F % self.n_heads_fiber == 0 + assert self.n_geo_pts >= 2 and 0 < self.tau < 1 + w_sum = (self.ret_centroid_weight + self.ret_sem_weight + + self.ret_bidi_min_weight + self.ret_forward_maxsim_weight + + self.ret_dir_weight) + assert 0.8 < w_sum < 1.2, f"ret weights sum {w_sum}" + assert self.cfg_scale >= 0 + assert self.content_tail_slots >= 0 + assert self.content_tail_slots < self.L_mem + assert self.llm_dtype in ("bf16", "fp16", "fp32") + +def _dev(ref: torch.Tensor): + return dict(device=ref.device, dtype=ref.dtype) + +def _resolve_dtype(name): + return {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[name] + +# ═══════════════════════════════════════════════════════════════════ +# [A-4] DecodeState +# ═══════════════════════════════════════════════════════════════════ +@dataclass +class DecodeState: + """Shared state semantics between MemLLM.generate() and external runners.""" + generated_ids: List[int] = field(default_factory=list) + generated_content_counts: Dict[int, int] = field(default_factory=dict) + content_history: List[Tuple[int, int]] = field(default_factory=list) + recent_starters: List[Tuple[int, int]] = field(default_factory=list) + + def update(self, nxt_id: int, step: int, content_classifier, + bpe_echo_window: int, cyclic_content_window: int): + cc = content_classifier + self.generated_ids.append(nxt_id) + if cc is not None and nxt_id in cc.content_ids: + self.generated_content_counts[nxt_id] = self.generated_content_counts.get(nxt_id, 0) + 1 + self.content_history.append((step, nxt_id)) + if nxt_id in cc.word_starter_ids: + self.recent_starters.append((nxt_id, step)) + self.recent_starters = [(t, s) for (t, s) in self.recent_starters + if (step - s) < bpe_echo_window] + if len(self.content_history) > 2 * cyclic_content_window: + self.content_history = self.content_history[-cyclic_content_window:] + +# ═══════════════════════════════════════════════════════════════════ +# LLMBackbone +# ═══════════════════════════════════════════════════════════════════ +class LLMBackbone(nn.Module): + def __init__(self, name: str, dtype_name: str = "bf16"): + super().__init__() + from transformers import AutoModelForCausalLM, AutoTokenizer + self.name = name + self._dtype = _resolve_dtype(dtype_name) + self.tokenizer = AutoTokenizer.from_pretrained(name, trust_remote_code=True) + if self.tokenizer.pad_token is None: + if self.tokenizer.eos_token is not None: + self.tokenizer.pad_token = self.tokenizer.eos_token + else: + raise ValueError(f"Tokenizer for {name} has no pad/eos token") + self.model = AutoModelForCausalLM.from_pretrained( + name, torch_dtype=self._dtype, trust_remote_code=True) + for p in self.model.parameters(): + p.requires_grad_(False) + self.model.eval() + cfg = self.model.config + self.d_model = cfg.hidden_size + self.vocab_size = cfg.vocab_size + self.n_layers = cfg.num_hidden_layers + self.has_chat_template = getattr(self.tokenizer, 'chat_template', None) is not None + with torch.no_grad(): + self._wte_fp32 = self.model.get_input_embeddings().weight.detach().float().clone() + + def input_embedding_weight(self) -> torch.Tensor: + return self._wte_fp32 + + def embed_tokens(self, ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings()(ids) + + @property + def device(self): + return next(self.model.parameters()).device + + def to(self, *args, **kwargs): + super().to(*args, **kwargs) + for arg in args: + if isinstance(arg, torch.device) or (isinstance(arg, str) and arg in ("cuda","cpu")): + self._wte_fp32 = self._wte_fp32.to(arg) + if 'device' in kwargs: + self._wte_fp32 = self._wte_fp32.to(kwargs['device']) + return self + + def forward(self, ids: torch.Tensor, attention_mask: torch.Tensor, + prefix: Optional[torch.Tensor] = None) -> Dict: + te = self.embed_tokens(ids) + if prefix is not None: + prefix_cast = prefix.to(te.dtype) + inputs_embeds = torch.cat([prefix_cast, te], dim=1) + B, P = prefix_cast.shape[:2] + pm = torch.ones(B, P, device=ids.device, dtype=attention_mask.dtype) + ext_mask = torch.cat([pm, attention_mask], dim=1) + pl = P + else: + inputs_embeds = te + ext_mask = attention_mask + pl = 0 + out = self.model( + inputs_embeds=inputs_embeds, + attention_mask=ext_mask, + output_hidden_states=True, + use_cache=False, + return_dict=True) + hs_list = [h.float() for h in out.hidden_states] + logits = out.logits.float() + return {'logits': logits, 'hs': hs_list, 'pl': pl, 'mask': ext_mask} + + def build_chat_text(self, user_text: str) -> str: + if not self.has_chat_template: + return user_text + msgs = [{"role": "user", "content": user_text}] + return self.tokenizer.apply_chat_template( + msgs, tokenize=False, add_generation_prompt=True) + +# ═══════════════════════════════════════════════════════════════════ +# Hungarian +# ═══════════════════════════════════════════════════════════════════ +def hungarian_max_assignment(sim: torch.Tensor) -> Tuple[torch.Tensor, float]: + device = sim.device + n_rows, n_cols = sim.shape + if n_rows == 0 or n_cols == 0: + return torch.empty(0, 2, dtype=torch.long, device=device), 0.0 + transposed = False + if n_rows > n_cols: + sim = sim.T; n_rows, n_cols = n_cols, n_rows; transposed = True + import numpy as np + cost = (-sim).detach().cpu().numpy().astype('float64') + INF = float('inf') + u = np.zeros(n_rows + 1); v = np.zeros(n_cols + 1) + p = np.zeros(n_cols + 1, dtype=int); way = np.zeros(n_cols + 1, dtype=int) + for i in range(1, n_rows + 1): + p[0] = i; j0 = 0 + minv = np.full(n_cols + 1, INF); used = np.zeros(n_cols + 1, dtype=bool) + while True: + used[j0] = True; i0 = p[j0]; delta = INF; j1 = -1 + for j in range(1, n_cols + 1): + if not used[j]: + cur = cost[i0 - 1, j - 1] - u[i0] - v[j] + if cur < minv[j]: minv[j] = cur; way[j] = j0 + if minv[j] < delta: delta = minv[j]; j1 = j + for j in range(n_cols + 1): + if used[j]: u[p[j]] += delta; v[j] -= delta + else: minv[j] -= delta + j0 = j1 + if p[j0] == 0: break + while j0: + j1 = way[j0]; p[j0] = p[j1]; j0 = j1 + pairs = [] + for j in range(1, n_cols + 1): + i = p[j] + if i > 0 and i <= n_rows: + if transposed: pairs.append((j - 1, i - 1)) + else: pairs.append((i - 1, j - 1)) + if not pairs: + return torch.empty(0,2,dtype=torch.long,device=device), 0.0 + pairs_t = torch.tensor(pairs, dtype=torch.long, device=device) + total = float(sim[pairs_t[:,0], pairs_t[:,1]].sum().item()) if not transposed \ + else float(sim[pairs_t[:,1], pairs_t[:,0]].sum().item()) + return pairs_t, total + +# ═══════════════════════════════════════════════════════════════════ +# 几何 / 纤维 +# ═══════════════════════════════════════════════════════════════════ +class RiemannianMetric(nn.Module): + def __init__(self, d): + super().__init__(); self.d = d + n_tri = d*(d+1)//2 + self.net = nn.Sequential( + nn.Linear(d,4*d), nn.SiLU(), + nn.Linear(4*d,4*d), nn.SiLU(), + nn.Linear(4*d, n_tri)) + for m in self.net.modules(): + if isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight) + if m.bias is not None: nn.init.zeros_(m.bias) + nn.init.normal_(self.net[-1].weight, std=0.02); nn.init.zeros_(self.net[-1].bias) + r,c=[],[] + for i in range(d): + for j in range(i+1): r.append(i); c.append(j) + self.register_buffer('_r', torch.tensor(r)); self.register_buffer('_c', torch.tensor(c)) + def forward(self, x): + B=x.shape[0]; d=self.d; v=self.net(x) + L=x.new_zeros(B,d,d); L[:,self._r,self._c]=v + di=torch.arange(d,device=x.device) + L[:,di,di]=F.softplus(L[:,di,di])+1e-3 + return L@L.transpose(1,2) + def christoffel(self, x): + d=self.d; B=x.shape[0] + xv=x.detach().clone().requires_grad_(True) + g=self.forward(xv); g_inv=torch.linalg.inv(g.detach()) + dg=x.new_zeros(B,d,d,d) + for i in range(d): + for j in range(i,d): + gr=torch.autograd.grad(g[:,i,j].sum(),xv,retain_graph=True)[0] + dg[:,i,j,:]=gr + if i!=j: dg[:,j,i,:]=gr + term=dg.permute(0,3,1,2)+dg.permute(0,1,3,2)-dg + return (0.5*torch.einsum('bkl,bijl->bkij',g_inv,term)).detach() + def midpoint_approx_distance(self, x, y): + diff=x-y; mid=(x+y)/2 + with torch.no_grad(): g=self.forward(mid) + return torch.einsum('bi,bij,bj->b',diff,g,diff).clamp(min=0).sqrt() + +class GeodesicResult(NamedTuple): + path: torch.Tensor; energy: float; converged: bool; iterations: int + +class GeodesicSolver: + def __init__(self, metric, cfg): self.metric=metric; self.cfg=cfg + def solve(self, xs, xe): + B,d=xs.shape; N=self.cfg.n_geo_pts; dev=xs.device + t=torch.linspace(0,1,N+2,device=dev)[1:-1] + ps={n:p.requires_grad for n,p in self.metric.named_parameters()} + for p in self.metric.parameters(): p.requires_grad_(False) + with torch.enable_grad(): + interior=(xs.detach().unsqueeze(1)*(1-t[None,:,None]) + +xe.detach().unsqueeze(1)*t[None,:,None]).detach().clone().requires_grad_(True) + opt=torch.optim.Adam([interior],lr=self.cfg.geo_lr) + prev=float('inf'); converged=False; iters=0; cur=prev + for it in range(self.cfg.geo_max_steps): + opt.zero_grad() + path=torch.cat([xs.detach().unsqueeze(1),interior,xe.detach().unsqueeze(1)],1) + dx=path[:,1:]-path[:,:-1]; mid=(path[:,1:]+path[:,:-1])/2 + g=self.metric(mid.reshape(-1,d)).reshape(B,N+1,d,d) + energy=torch.einsum('bni,bnij,bnj->',dx,g,dx) + if energy.item()!=energy.item(): + t_full=torch.linspace(0,1,N+2,device=dev).view(1,-1,1) + lin=xs.unsqueeze(1)*(1-t_full)+xe.unsqueeze(1)*t_full + for n,p in self.metric.named_parameters(): p.requires_grad_(ps[n]) + return GeodesicResult(lin,float('inf'),False,it) + energy.backward(); opt.step(); iters=it+1; cur=energy.item() + if abs(prev-cur)/(abs(prev)+1e-10)=1 else surprise.unsqueeze(0).unsqueeze(0) + if s.shape[0]!=f.shape[0]: s=s.expand(f.shape[0],-1) + f=f*self.sg(s) + return f + +class DirectionPredictor(nn.Module): + def __init__(self, d_M, d_F): + super().__init__() + self.net=nn.Sequential(nn.Linear(d_M+d_F,4*d_M),nn.SiLU(), + nn.LayerNorm(4*d_M),nn.Linear(4*d_M,d_M)) + def forward(self, x, f): + return F.normalize(self.net(torch.cat([x,f],-1)),dim=-1,eps=1e-8) + +class EmptyStateNet(nn.Module): + def __init__(self, d_M, d_F): + super().__init__() + self.net=nn.Sequential(nn.Linear(d_M+d_F,2*d_F),nn.SiLU(),nn.LayerNorm(2*d_F), + nn.Linear(2*d_F,d_F)) + def forward(self, xq, fq): return self.net(torch.cat([xq,fq],-1)) + +class WriteGate(nn.Module): + def __init__(self, c): + super().__init__() + self.net=nn.Sequential(nn.Linear(c.d_LLM+1,c.d_LLM//4),nn.SiLU(),nn.Linear(c.d_LLM//4,1)) + def forward(self, h, surprise): + s=surprise.view(-1,1) if surprise.dim()>=1 else surprise.unsqueeze(0).unsqueeze(0) + if s.shape[0]!=h.shape[0]: s=s[:h.shape[0]] + return torch.sigmoid(self.net(torch.cat([h,s],-1)).squeeze(-1)) + +class RetentionScorer(nn.Module): + def __init__(self, c): + super().__init__() + self.net=nn.Sequential(nn.Linear(c.d_M+c.d_F+3,64),nn.SiLU(), + nn.Linear(64,64),nn.SiLU(),nn.Linear(64,1),nn.Sigmoid()) + def forward(self, base, fiber, surprise, dt, cnt): + return self.net(torch.cat([base,fiber, + surprise.unsqueeze(-1) if surprise.dim()==1 else surprise, + dt.unsqueeze(-1) if dt.dim()==1 else dt, + cnt.float().unsqueeze(-1) if cnt.dim()==1 else cnt.float()],-1)).squeeze(-1) + +class RetrievalReranker(nn.Module): + def __init__(self, d_M, d_F, clip=0.2): + super().__init__(); self.clip=clip + inp=2*d_M+2*d_F+1 + self.net=nn.Sequential(nn.Linear(inp,128),nn.SiLU(),nn.LayerNorm(128), + nn.Linear(128,64),nn.SiLU(),nn.LayerNorm(64),nn.Linear(64,1)) + nn.init.zeros_(self.net[-1].weight); nn.init.zeros_(self.net[-1].bias) + def forward(self, xq, fq, xc, fc, dir_sim): + B,C=xc.shape[:2] + xq_e=xq.unsqueeze(1).expand(-1,C,-1); fq_e=fq.unsqueeze(1).expand(-1,C,-1) + inp=torch.cat([xq_e,fq_e,xc,fc,dir_sim.unsqueeze(-1)],-1) + correction=self.net(inp).squeeze(-1) + return dir_sim + correction.clamp(-self.clip, self.clip) + +class ContentBypass(nn.Module): + def __init__(self, d_F, d_LLM, gate_bias=0.5): + super().__init__() + self.proj=nn.Sequential( + nn.Linear(d_F,2*d_LLM),nn.SiLU(),nn.LayerNorm(2*d_LLM), + nn.Linear(2*d_LLM,d_LLM),nn.LayerNorm(d_LLM)) + self.gate_net=nn.Sequential( + nn.Linear(d_F+d_LLM,128),nn.SiLU(),nn.Linear(128,1)) + nn.init.constant_(self.gate_net[-1].bias,gate_bias) + nn.init.normal_(self.proj[3].weight,std=0.02); nn.init.zeros_(self.proj[3].bias) + self._last_gate=None + def forward(self, fiber_summary, qformer_context): + projected=self.proj(fiber_summary) + gate_in=torch.cat([fiber_summary,qformer_context],-1) + g=torch.sigmoid(self.gate_net(gate_in)) + self._last_gate=g.detach() + return projected*g + +class PrefixSemanticProbe(nn.Module): + def __init__(self, d_LLM, L_mem, d_F): + super().__init__() + self.attn_pool=nn.Linear(d_LLM,1) + self.fiber_decode=nn.Sequential( + nn.Linear(d_LLM,2*d_F),nn.SiLU(),nn.LayerNorm(2*d_F),nn.Linear(2*d_F,d_F)) + def forward(self, prefix): + w=F.softmax(self.attn_pool(prefix).squeeze(-1),dim=1) + pooled=(w.unsqueeze(-1)*prefix).sum(1) + return self.fiber_decode(pooled) + +class PrefixAligner(nn.Module): + def __init__(self, d_LLM, init_scale=0.5): + super().__init__() + self.ln=nn.LayerNorm(d_LLM) + self.scale_logit=nn.Parameter(torch.tensor(init_scale)) + self.register_buffer('_target_std',torch.tensor(1.0)) + self._calibrated=False + def calibrate(self, wte_fp32: torch.Tensor): + with torch.no_grad(): + V = wte_fp32.shape[0] + si = min(5000, V) + idx = torch.randperm(V, device=wte_fp32.device)[:si] + sample = wte_fp32[idx] + self._target_std.fill_(float(sample.std().item())) + self._calibrated=True + def forward(self, prefix): + normed=self.ln(prefix) + scale=torch.sigmoid(self.scale_logit)*self._target_std + return normed*scale + +class ContentSemanticTailHead(nn.Module): + def __init__(self, d_F: int, d_LLM: int, n_slots: int, hidden: int = 1024): + super().__init__() + self.n_slots = n_slots; self.d_LLM = d_LLM + if n_slots == 0: return + self.shared = nn.Sequential( + nn.Linear(d_F, hidden), nn.SiLU(), nn.LayerNorm(hidden), + nn.Linear(hidden, hidden), nn.SiLU(), nn.LayerNorm(hidden)) + self.slot_heads = nn.ModuleList([ + nn.Sequential(nn.Linear(hidden, d_LLM), nn.LayerNorm(d_LLM)) + for _ in range(n_slots)]) + for head in self.slot_heads: + nn.init.normal_(head[0].weight, std=0.02); nn.init.zeros_(head[0].bias) + def forward(self, fiber_summary: torch.Tensor) -> Optional[torch.Tensor]: + if self.n_slots == 0: return None + h = self.shared(fiber_summary) + slots = [head(h) for head in self.slot_heads] + return torch.stack(slots, dim=1) + +class ContentTokenClassifier: + DEFAULT_STOPWORDS = frozenset({ + 'the','a','an','is','are','was','were','be','been','being', + 'have','has','had','having','do','does','did','doing', + 'will','would','could','should','may','might','can','shall', + 'and','but','or','nor','for','yet','so', + 'in','on','at','to','of','by','with','from','as','into','through', + 'during','before','after','above','below','between','under','over', + 'that','this','these','those','it','its', + 'he','she','they','we','you','me','him','her','them','us', + 'his','her','their','our','your','my','mine','yours', + 'not','no','if','then','than','when','where','what','which','who', + 'how','all','each','every','both','few','more','most','some','any', + 'also','just','about','very','really','only','even','still','already', + 'up','down','out','off','away','back','here','there','now', + 'too','much','many','such','own','other','another', + 'because','since','while','although','though','until','unless', + 'however','therefore','moreover','furthermore','nevertheless', + 'like','get','got','go','went','gone','come','came', + 'make','made','take','took','give','gave','see','saw','know','knew', + 'think','thought','say','said','tell','told','want','need', + 'use','used','find','found','put','keep','kept','let', + 'seem','become','became','leave','left','call','called', + 'try','tried','ask','asked','work','worked','well','way', + 'thing','things','something','anything','nothing','everything', + 'one','two','first','new','old','good','bad','big','small', + 'long','little','right','same','different','last','next', + 'part','being','going','using','getting','making','looking', + 'coming','taking','having','doing','saying','working','trying', + 'include','includes','including','included'}) + DEFAULT_FILLER_WORDS = frozenset({ + 'include','includes','including','included', + 'also','just','however','moreover','furthermore', + 'nevertheless','therefore','thus','hence','accordingly', + 'meanwhile','instead','rather','otherwise','additionally', + 'basically','essentially','actually','obviously','clearly', + 'simply','certainly','indeed','probably','perhaps', + 'apparently','presumably','supposedly','regardless', + 'nonetheless','conversely','alternatively','specifically', + 'generally','typically','usually','often','sometimes', + 'particularly','especially','notably', + 'various','several','many','multiple','different','diverse','varied', + 'certain','particular','specific','general','overall','whole','entire', + 'aspect','aspects','feature','features','element','elements', + 'factor','factors','component','components','quality','qualities', + 'example','examples','instance','instances','case','cases', + 'method','methods','approach','approaches','technique_generic', + 'process','processes','system','systems','part','parts', + 'kind','kinds','type','types','sort','sorts', + 'people','person','someone','anyone','everyone', + 'matter','matters','issue','issues','point','points', + 'number','numbers','amount','amounts','level','levels', + 'student','students','practice','practicing', + 'action','actions','role','roles','purpose','purposes', + 'nature','natures','character','characters','condition','conditions', + 'state','states','status','statuses','fact','facts', + 'substance','substances','material','materials','content','contents', + 'context','contexts','task','tasks','duty','duties', + 'operation','operations','performance','performances', + 'activity','activities','topic','topics','subject','subjects', + 'concept','concepts','idea','ideas','notion','notions', + 'result','results','outcome','outcomes','effect','effects', + 'area','areas','region','regions','range','ranges', + 'degree','degrees','extent','extents','period','periods', + 'moment','moments','detail','details','information', + 'piece','pieces','group','groups','set','sets', + 'form','forms','style','styles','mode','modes','version','versions', + 'manner','manners','fashion','fashions','attribute','attributes', + 'property','properties','trait','traits','characteristic','characteristics', + 'place','places','way','ways'}) + + def __init__(self, tokenizer, cfg=None, vocab_size=None, + min_len=None, strict_min_len=None): + if cfg is None: cfg = Cfg() + self.cfg = cfg + _min_len = min_len if isinstance(min_len, int) else cfg.content_min_len + _strict_min_len = (strict_min_len if isinstance(strict_min_len, int) + else cfg.strict_starter_min_decoded_len) + self.STOPWORDS = (cfg.stopwords_override if cfg.stopwords_override is not None + else self.DEFAULT_STOPWORDS | cfg.stopwords_extra) + self.FILLER_WORDS = (cfg.filler_words_override if cfg.filler_words_override is not None + else self.DEFAULT_FILLER_WORDS | cfg.filler_words_extra) + if cfg.dedup_filler_from_stop: + self.FILLER_WORDS = self.FILLER_WORDS - self.STOPWORDS + self.content_ids: Set[int] = set() + self.function_ids: Set[int] = set() + self.punct_ids: Set[int] = set() + self.newline_ids: Set[int] = set() + self.filler_ids: Set[int] = set() + self.word_starter_ids: Set[int] = set() + self.content_starter_ids: Set[int] = set() + self.strict_content_starter_ids: Set[int] = set() + V = int(vocab_size) if vocab_size is not None else int(getattr(tokenizer, 'vocab_size', 50257)) + self._V = V + for i in range(V): + try: + tok_text = tokenizer.decode([i]) + except Exception: + self.function_ids.add(i); continue + if not isinstance(tok_text, str): + self.function_ids.add(i); continue + is_word_starter = len(tok_text) > 0 and tok_text[0] in (' ', '\t') + stripped = tok_text.strip().lower() + cleaned = ''.join(c for c in stripped if c.isalpha()) + if is_word_starter: + self.word_starter_ids.add(i) + if '\n' in tok_text: + self.newline_ids.add(i); self.function_ids.add(i) + elif stripped == '' or all(not c.isalnum() for c in stripped): + self.punct_ids.add(i); self.function_ids.add(i) + elif len(cleaned) >= _min_len and cleaned not in self.STOPWORDS: + self.content_ids.add(i) + if is_word_starter: + self.content_starter_ids.add(i) + if (stripped == cleaned and len(stripped) >= _strict_min_len + and stripped not in self.STOPWORDS + and stripped not in self.FILLER_WORDS): + self.strict_content_starter_ids.add(i) + else: + self.function_ids.add(i) + if cleaned in self.FILLER_WORDS: + self.filler_ids.add(i) + self._content_tensor = None + self._content_starter_tensor = None + self._strict_content_starter_tensor = None + self._filler_tensor = None + + def _mask_size(self): return int(self._V) + def content_mask(self, device): + if self._content_tensor is None or self._content_tensor.device != device: + V = self._mask_size(); m = torch.zeros(V, device=device) + for i in self.content_ids: + if i < V: m[i] = 1.0 + self._content_tensor = m + return self._content_tensor + def content_starter_mask(self, device): + if self._content_starter_tensor is None or self._content_starter_tensor.device != device: + V = self._mask_size(); m = torch.zeros(V, device=device) + for i in self.content_starter_ids: + if i < V: m[i] = 1.0 + self._content_starter_tensor = m + return self._content_starter_tensor + def strict_content_starter_mask(self, device): + if (self._strict_content_starter_tensor is None + or self._strict_content_starter_tensor.device != device): + V = self._mask_size(); m = torch.zeros(V, device=device) + for i in self.strict_content_starter_ids: + if i < V: m[i] = 1.0 + self._strict_content_starter_tensor = m + return self._strict_content_starter_tensor + def filler_mask(self, device): + if self._filler_tensor is None or self._filler_tensor.device != device: + V = self._mask_size(); m = torch.zeros(V, device=device) + for i in self.filler_ids: + if i < V: m[i] = 1.0 + self._filler_tensor = m + return self._filler_tensor + def get_content_ids_from_tokens(self, token_ids): + return [t for t in token_ids if t in self.content_ids] + +class MemoryVocabProjector(nn.Module): + def __init__(self, d_F, d_LLM): + super().__init__() + self.proj = nn.Sequential( + nn.Linear(d_F, 4*d_LLM), nn.SiLU(), nn.LayerNorm(4*d_LLM), + nn.Linear(4*d_LLM, 2*d_LLM), nn.SiLU(), nn.LayerNorm(2*d_LLM), + nn.Linear(2*d_LLM, d_LLM)) + nn.init.zeros_(self.proj[-1].weight); nn.init.zeros_(self.proj[-1].bias) + def forward(self, fiber_summary, wte_weight): + mem_emb = self.proj(fiber_summary) + mem_n = F.normalize(mem_emb, dim=-1, eps=1e-8) + wte_n = F.normalize(wte_weight, dim=-1, eps=1e-8) + return mem_n @ wte_n.T + +@dataclass +class MemEntry: + mid: int; base: torch.Tensor; fiber: torch.Tensor; dirn: torch.Tensor + surprise: float; ts: float; last: float; cnt: int = 0; version: int = 0 + source_text: str = "" + content_token_ids: List[int] = field(default_factory=list) + semantic_emb: Optional[torch.Tensor] = None + expanded_content_ids: List[int] = field(default_factory=list) + +class _Node: + __slots__=('leaf','ids','children','centers','depth') + def __init__(self,d=0): + self.depth=d; self.leaf=True; self.ids=[]; self.children=[]; self.centers=None + def count(self): + return len(self.ids) if self.leaf else sum(c.count() for c in self.children) + +class DirectionTree: + def __init__(self, c): + self.c=c; self.root=_Node(); self.store:Dict[int,MemEntry]={}; self.nid=0 + def insert(self, m): + self.store[m.mid]=m; self._ins(self.root,m) + def _ins(self, nd, m): + if nd.leaf: + nd.ids.append(m.mid) + if len(nd.ids)>self.c.tree_max_leaf: self._split(nd) + else: + best=self._best(nd,m.dirn); self._ins(nd.children[best],m); self._update_centers(nd) + def update(self, mid, new_base=None, new_fiber=None, new_dirn=None): + if mid not in self.store: return + m=self.store[mid]; dc=False + if new_base is not None: m.base=new_base.detach().clone() + if new_fiber is not None: m.fiber=new_fiber.detach().clone() + if new_dirn is not None: dc=True; m.dirn=new_dirn.detach().clone() + m.version+=1 + if dc: self._rm(self.root,mid); self._ins(self.root,m); self._rebalance(self.root) + def _split(self, nd): + ids=nd.ids + if len(ids)<2: return + K=min(self.c.tree_K,len(ids)) + if K<2: return + dirs=torch.stack([self.store[i].dirn for i in ids]) + centered=dirs-dirs.mean(0) + try: _,_,Vh=torch.linalg.svd(centered,full_matrices=False) + except: return + n_comp=min(K,dirs.shape[1]); proj=centered@Vh[:n_comp].T + asgn=self._farthest_kmeans(proj,K) + children=[] + for k in range(K): + ch=_Node(nd.depth+1); ch.ids=[ids[i] for i in range(len(ids)) if asgn[i]==k] + if ch.ids: children.append(ch) + if len(children)<=1: return + nd.leaf=False; nd.children=children; nd.ids=[]; self._update_centers(nd) + for ch in nd.children: + if ch.leaf and len(ch.ids)>self.c.tree_max_leaf: self._split(ch) + @staticmethod + def _farthest_kmeans(data, K, max_iter=50): + N=data.shape[0]; K=min(K,N) + if K<=0: return torch.zeros(N,dtype=torch.long,device=data.device) + ctrs=[data[0].clone()] + for _ in range(K-1): + d2=torch.cdist(data,torch.stack(ctrs)).min(1)[0].pow(2) + ctrs.append(data[d2.argmax()].clone()) + ctrs=torch.stack(ctrs); asgn=torch.zeros(N,dtype=torch.long,device=data.device) + for _ in range(max_iter): + dists=torch.cdist(data,ctrs); new=dists.argmin(1) + if (new==asgn).all(): break + asgn=new + for k in range(K): + mk=asgn==k + if mk.any(): ctrs[k]=data[mk].mean(0) + else: + far=dists.min(1)[0].argmax(); ctrs[k]=data[far].clone(); asgn[far]=k + return asgn + def _best(self, nd, d): + if nd.centers is None or len(nd.children)==0: return 0 + return (nd.centers@d).argmax().item() + def retrieve(self, qdir, bw=3)->List[Tuple[int,float]]: + beams:List[Tuple[_Node,float]]=[(self.root,0.)] + results:Dict[int,float]={} + while beams: + nb=[] + for nd,sc in beams: + if nd.leaf: + for mid in nd.ids: + if mid in self.store: + s=(qdir@self.store[mid].dirn).item()+sc + if mid not in results or s>results[mid]: results[mid]=s + elif nd.centers is not None: + sims=nd.centers@qdir; tk=min(bw,len(nd.children)); _,idxs=sims.topk(tk) + for i in idxs: nb.append((nd.children[i.item()],sc+sims[i.item()].item())) + else: + for ch in nd.children: nb.append((ch,sc)) + nb.sort(key=lambda x:-x[1]); beams=nb[:bw] + return sorted(results.items(),key=lambda x:-x[1]) + def remove(self, mid): + if mid not in self.store: return + del self.store[mid]; self._rm(self.root,mid); self._rebalance(self.root) + def _rm(self, nd, mid): + if nd.leaf: + if mid in nd.ids: nd.ids.remove(mid); return True + return False + return any(self._rm(c,mid) for c in nd.children) + def _rebalance(self, nd): + if nd.leaf: return + for c in nd.children: self._rebalance(c) + nd.children=[c for c in nd.children if c.count()>0] + if not nd.children: nd.leaf=True; nd.ids=[]; nd.centers=None + elif len(nd.children)==1: + ch=nd.children[0]; nd.leaf=ch.leaf; nd.ids=ch.ids; nd.children=ch.children; nd.centers=ch.centers + else: self._update_centers(nd) + def _update_centers(self, nd): + cs=[] + for c in nd.children: + ids=self._collect(c); dirs=[self.store[i].dirn for i in ids if i in self.store] + if not dirs: continue + cs.append(F.normalize(torch.stack(dirs).mean(0),dim=0)) + nd.centers=torch.stack(cs) if cs else None + def _collect(self, nd): + if nd.leaf: return list(nd.ids) + return [i for c in nd.children for i in self._collect(c)] + def rebuild(self): + ms=list(self.store.values()); self.root=_Node() + for m in ms: self._ins(self.root,m) + def verify_consistency(self)->List[str]: + errs=[]; ti=set(self._collect(self.root)); si=set(self.store.keys()) + if ti!=si: errs.append(f"tree≠store: tree_only={ti-si}, store_only={si-ti}") + if self.root.count()!=len(self.store): errs.append(f"count mismatch") + return errs + def max_depth(self): + def _d(nd): + if nd.leaf: return nd.depth + return max(_d(c) for c in nd.children) if nd.children else nd.depth + return _d(self.root) + def leaf_size_violations(self)->List[Tuple[int,int]]: + vios=[] + def _walk(nd): + if nd.leaf: + if len(nd.ids)>self.c.tree_max_leaf: vios.append((nd.depth,len(nd.ids))) + else: + for c in nd.children: _walk(c) + _walk(self.root); return vios + +class FiberAttn(nn.Module): + def __init__(self, c): + super().__init__() + self.nh=c.n_heads_fiber; self.hd=c.d_F//c.n_heads_fiber + self.Wq=nn.Linear(c.d_F,c.d_F,bias=False); self.Wk=nn.Linear(c.d_F,c.d_F,bias=False) + self.Wv=nn.Linear(c.d_F,c.d_F,bias=False); self.Wo=nn.Linear(c.d_F,c.d_F,bias=False) + self.n1=nn.LayerNorm(c.d_F) + self.ff=nn.Sequential(nn.Linear(c.d_F,2*c.d_F),nn.GELU(),nn.Linear(2*c.d_F,c.d_F)) + self.n2=nn.LayerNorm(c.d_F) + def forward(self, qf, mf, mem_mask=None, dir_bias=None): + B,C,d=mf.shape; nh=self.nh; hd=self.hd; S=1+C + seq=torch.cat([qf.unsqueeze(1),mf],1) + Q=self.Wq(seq).reshape(B,S,nh,hd).permute(0,2,1,3) + K=self.Wk(seq).reshape(B,S,nh,hd).permute(0,2,1,3) + V=self.Wv(seq).reshape(B,S,nh,hd).permute(0,2,1,3) + a=(Q@K.transpose(-2,-1))/math.sqrt(hd) + if dir_bias is not None: + db=dir_bias.unsqueeze(1).unsqueeze(2) + pad=torch.zeros(B,1,1,1,**_dev(a)); a=a+torch.cat([pad,db],-1) + if mem_mask is not None: + qm=torch.ones(B,1,**_dev(mem_mask)); full=torch.cat([qm,mem_mask],1) + a=a.masked_fill(full.unsqueeze(1).unsqueeze(2)==0,-1e9) + a=F.softmax(a,-1); out=(a@V).permute(0,2,1,3).reshape(B,S,d) + out=self.n1(seq+self.Wo(out)); out=self.n2(out+self.ff(out)) + return out[:,1:] + +class QFormerLayer(nn.Module): + def __init__(self, c): + super().__init__(); d=c.d_LLM; nh=c.bridge_heads + self.sa=nn.MultiheadAttention(d,nh,batch_first=True) + self.ca=nn.MultiheadAttention(d,nh,batch_first=True) + self.ff=nn.Sequential(nn.Linear(d,4*d),nn.GELU(),nn.Linear(4*d,d)) + self.n1=nn.LayerNorm(d); self.n2=nn.LayerNorm(d); self.n3=nn.LayerNorm(d) + def forward(self, q, k, v, kv_mask=None): + h=self.n1(q); q=q+self.sa(h,h,h)[0]; h=self.n2(q) + kpm=None + if kv_mask is not None: + kpm=(kv_mask==0); all_m=kpm.all(dim=-1) + if all_m.any(): kpm=kpm.clone(); kpm[all_m]=False + q=q+self.ca(h,k,v,key_padding_mask=kpm)[0] + return q+self.ff(self.n3(q)) + +class QFormerProj(nn.Module): + def __init__(self, c): + super().__init__() + self.q=nn.Parameter(torch.randn(c.L_mem,c.d_LLM)*0.02) + self.fkv=nn.Linear(c.d_F,c.d_LLM*2) + self.layers=nn.ModuleList([QFormerLayer(c) for _ in range(c.bridge_layers)]) + self.norm=nn.LayerNorm(c.d_LLM) + def forward(self, fibers, mem_mask=None): + B=fibers.shape[0]; kv=self.fkv(fibers); k,v=kv.chunk(2,-1) + q=self.q.unsqueeze(0).expand(B,-1,-1) + for l in self.layers: q=l(q,k,v,kv_mask=mem_mask) + return self.norm(q) + +class AdaptiveLayerPool(nn.Module): + def __init__(self, n, d): + super().__init__(); self.w=nn.Parameter(torch.linspace(-2,2,n)) + def forward(self, hs): + w=F.softmax(self.w,0); return sum(w[i]*h for i,h in enumerate(hs)) + def weight_dist(self): return F.softmax(self.w.detach(),0) + +class StateExtractor(nn.Module): + def __init__(self, c): + super().__init__() + pos_dim=5 + self.sc=nn.Sequential(nn.Linear(c.d_LLM+pos_dim,c.d_LLM//4),nn.Tanh(), + nn.Linear(c.d_LLM//4,1)) + self.tb=nn.Linear(c.d_LLM,c.d_M); self.tf=nn.Linear(c.d_LLM,c.d_F) + def _pos_feat(self, T, ref): + pos=torch.linspace(0,1,T,**_dev(ref)) + return torch.stack([pos,torch.sin(pos*math.pi),torch.cos(pos*math.pi), + torch.sin(2*pos*math.pi),torch.cos(2*pos*math.pi)],-1) + def forward(self, h, mask=None): + B,T,_=h.shape; pf=self._pos_feat(T,h).unsqueeze(0).expand(B,-1,-1) + s=self.sc(torch.cat([h,pf],-1)).squeeze(-1) + if mask is not None and mask.shape[1]==T: + s=s.masked_fill(mask==0,-1e9) + w=F.softmax(s,-1); p=(w.unsqueeze(-1)*h).sum(1) + return self.tb(p), self.tf(p) + +class EmbBridge(nn.Module): + def __init__(self, c): + super().__init__(); self.c=c + self.proj=QFormerProj(c); self.ext=StateExtractor(c) + self.pe=nn.Parameter(torch.randn(c.L_mem,c.d_LLM)*0.02) + self.bypass=ContentBypass(c.d_F,c.d_LLM,gate_bias=c.bypass_init_gate_bias) + self.aligner=PrefixAligner(c.d_LLM,c.prefix_init_scale) + self.tail_head = ContentSemanticTailHead( + c.d_F, c.d_LLM, + n_slots=c.content_tail_slots if c.use_content_semantic_tail else 0, + hidden=c.tail_head_hidden) + self._last_inject_diag={} + self._last_fiber_summary=None + self._last_tail_slots=None + + def _build_body_prefix(self, fibers, mem_mask, fiber_summary): + qf_out = self.proj(fibers, mem_mask) + self.pe.unsqueeze(0) + bp_out = None; gate_val = None + if fiber_summary is not None: + qf_context = qf_out.mean(1) + bp_out = self.bypass(fiber_summary, qf_context) + gate_val = self.bypass._last_gate + qf_out = qf_out + bp_out.unsqueeze(1) + qf_out = self.aligner(qf_out) + return qf_out, bp_out, gate_val + + def _apply_filler_projection_and_clamp(self, qf_out, filler_centroid): + L = qf_out.shape[1] + filler_dir_used = False + if self.c.use_filler_direction_projection and filler_centroid is not None: + n_proj = min(self.c.filler_projection_last_slots, L) + fd = filler_centroid.view(1, 1, -1) + mask_slot = torch.zeros(L, device=qf_out.device) + mask_slot[L - n_proj:] = 1.0 + mask_slot = mask_slot.view(1, -1, 1) + comp = (qf_out * fd).sum(-1, keepdim=True) + qf_out = qf_out - comp * fd * mask_slot + filler_dir_used = True + if self.c.use_prefix_norm_clamp: + target_std = self.aligner._target_std.item() + target_norm = target_std * math.sqrt(self.c.d_LLM) + max_allowed = target_norm * self.c.prefix_norm_clamp_ratio + slot_norms = qf_out.norm(dim=-1, keepdim=True).clamp(min=1e-8) + scale = torch.clamp(max_allowed / slot_norms, max=1.0) + qf_out = qf_out * scale + return qf_out, filler_dir_used + + def inject(self, fibers, mem_mask=None, fiber_summary=None, filler_centroid=None): + qf_out, bp_out, gate_val = self._build_body_prefix(fibers, mem_mask, fiber_summary) + tail_slots_used = 0 + if (self.c.use_content_semantic_tail and self.c.content_tail_slots > 0 + and fiber_summary is not None): + tail = self.tail_head(fiber_summary) + tail = self.aligner(tail) + n = self.c.content_tail_slots + qf_out = torch.cat([qf_out[:, :-n, :], tail], dim=1) + tail_slots_used = n + self._last_tail_slots = tail.detach() + else: + self._last_tail_slots = None + qf_out, filler_dir_used = self._apply_filler_projection_and_clamp(qf_out, filler_centroid) + self._last_fiber_summary = (fiber_summary.detach() + if fiber_summary is not None else None) + self._last_inject_diag = { + 'bypass_gate': gate_val.mean().item() if gate_val is not None else None, + 'qf_norm': qf_out.norm().item(), + 'bypass_norm': bp_out.norm().item() if bp_out is not None else 0.0, + 'aligner_scale': (torch.sigmoid(self.aligner.scale_logit).item() + * self.aligner._target_std.item()), + 'last_slot_norm_per_b': qf_out[:, -1].norm(dim=-1).mean().item(), + 'tail_slots_used': tail_slots_used, + 'filler_dir_projected': filler_dir_used} + return qf_out + + def build_neutral_prefix(self, B, device): + qf_out = self.pe.unsqueeze(0).expand(B, -1, -1).contiguous() + qf_out = self.aligner(qf_out) + if self.c.use_prefix_norm_clamp: + target_std = self.aligner._target_std.item() + target_norm = target_std * math.sqrt(self.c.d_LLM) + max_allowed = target_norm * self.c.prefix_norm_clamp_ratio + slot_norms = qf_out.norm(dim=-1, keepdim=True).clamp(min=1e-8) + scale = torch.clamp(max_allowed / slot_norms, max=1.0) + qf_out = qf_out * scale + return qf_out + +class LossWarmup: + def __init__(self, schedules:Dict[str,int]): + self.schedules=schedules; self.step_count=0 + def weight(self, name:str)->float: + ws=self.schedules.get(name,0) + if ws<=0: return 1.0 + return min(1.0, self.step_count/max(ws,1)) + def advance(self): self.step_count+=1 + +class GradientMonitor: + def __init__(self): self._groups:Dict[str,nn.Module]={} + def register(self, name:str, mod:nn.Module): self._groups[name]=mod + def snapshot(self)->Dict[str,float]: + norms={} + for name,mod in self._groups.items(): + total=0.0; cnt=0 + for p in mod.parameters(): + if p.grad is not None: total+=p.grad.norm().item()**2; cnt+=1 + norms[name]=math.sqrt(total) if cnt>0 else 0.0 + return norms + +class DegenerationGuard: + def __init__(self, tok, cfg, content_classifier=None): + self.tok=tok; self.cfg=cfg; self.cc=content_classifier + def process(self, logits, generated_ids, step): + punct_ids = self.cc.punct_ids if self.cc else set() + newline_ids = self.cc.newline_ids if self.cc else set() + V = logits.shape[-1] + if step < self.cfg.early_content_steps: + pen_p = self.cfg.degen_early_punct_penalty + pen_n = self.cfg.degen_early_newline_penalty + for pid in punct_ids: + if pid < V: logits[0, pid] -= pen_p + for nid in newline_ids: + if nid < V: logits[0, nid] -= pen_n + if step < self.cfg.degen_min_tokens and self.tok.eos_token_id is not None: + if self.tok.eos_token_id < V: + logits[0, self.tok.eos_token_id] = -float('inf') + seen = set(generated_ids[-30:]) if generated_ids else set() + for tid in seen: + if tid < V: + if logits[0, tid] > 0: logits[0, tid] /= self.cfg.degen_repeat_penalty + else: logits[0, tid] *= self.cfg.degen_repeat_penalty + mc = self.cfg.degen_max_consec_punct + if len(generated_ids) >= mc: + recent = generated_ids[-mc:] + if all(t in punct_ids for t in recent): + for pid in punct_ids: + if pid < V: logits[0, pid] -= 10.0 + return logits + +@dataclass +class RetrievalDiag: + was_flat_scan: bool = False + recall_count: int = 0 + reranker_delta_mean: float = 0.0 + fiber_summary_norm: float = 0.0 + top_reranker_score: float = 0.0 + top_dir_sim: float = 0.0; top_sem_sim: float = 0.0 + top_forward_maxsim: float = 0.0; top_backward_maxsim: float = 0.0 + top_bidi_min: float = 0.0; top_gate_affinity: float = 0.0; gate_threshold: float = 0.0 + n_gate_pass: int = 0; n_candidates_initial: int = 0 + n_after_strict_overlap_gate: int = 0; n_after_upstream_semantic_gate: int = 0 + n_after_hard_filter: int = 0; n_after_score_filter: int = 0 + n_after_coherence_filter: int = 0; n_after_bidi_gap_filter: int = 0 + n_after_mean_center: int = 0 + mean_center_applied: bool = False + mean_center_dropped_ids: List[int] = field(default_factory=list) + mean_center_raw_scores: Dict[int, float] = field(default_factory=dict) + mean_center_final_scores: Dict[int, float] = field(default_factory=dict) + hungarian_used: bool = False + batch_mem_weights: List[List[Tuple[int, float]]] = field(default_factory=list) + per_memory_forward_maxsim: Dict[int, float] = field(default_factory=dict) + per_memory_bidi_min: Dict[int, float] = field(default_factory=dict) + per_memory_sem_sim: Dict[int, float] = field(default_factory=dict) + per_memory_gate_affinity: Dict[int, float] = field(default_factory=dict) + per_memory_strict_overlap: Dict[int, int] = field(default_factory=dict) + dominant_per_batch: List[Optional[int]] = field(default_factory=list) + dominant_memory_id: Optional[int] = None + non_dominant_per_batch: List[List[int]] = field(default_factory=list) + non_dominant_weights_per_batch: List[Dict[int, float]] = field(default_factory=list) + idf_applied: bool = False; centroid_applied: bool = False + top_centroid_cosine: float = 0.0 + per_memory_centroid_cosine: Dict[int, float] = field(default_factory=dict) + upstream_semantic_gate_applied: bool = False + upstream_gate_dropped_ids: List[int] = field(default_factory=list) + strict_overlap_gate_applied: bool = False + strict_overlap_dropped_ids: List[int] = field(default_factory=list) + +class AMM(nn.Module): + def __init__(self, c): + super().__init__(); self.c=c + self.metric=RiemannianMetric(c.d_M) + self.geo=GeodesicSolver(self.metric,c) + self.conn=FiberConnection(c.d_M,c.d_F,self.metric,grad_coupling=True) + self.trans=FiberTransporter(self.conn,c) + self.ctx=CtxEncoder(c); self.fib=FibEncoder(c) + self.dir_pred=DirectionPredictor(c.d_M,c.d_F) + self.write_gate=WriteGate(c); self.retention=RetentionScorer(c) + self.attn=FiberAttn(c); self.empty_state=EmptyStateNet(c.d_M,c.d_F) + self.contrast_proj_f=nn.Linear(c.d_F,c.d_M,bias=False) + self.contrast_proj_x=nn.Linear(c.d_M,c.d_M,bias=False) + nn.init.eye_(self.contrast_proj_x.weight) + self.reranker=RetrievalReranker(c.d_M,c.d_F,clip=c.reranker_clip) + self.tree=DirectionTree(c); self.time=0. + self.wte_normed: Optional[torch.Tensor] = None + + def surprise_proxy(self, logits, tgt): + nll=-F.log_softmax(logits,-1).gather(2,tgt.unsqueeze(-1)).squeeze(-1) + T=nll.shape[1] + if T==0: return logits.new_zeros(logits.shape[0]) + w=torch.linspace(0.5,1.5,T,**_dev(nll)); w=w/w.sum()*T + return (nll*w.unsqueeze(0)).mean(-1) + + def _compute_dirn(self, base, fiber): + with torch.no_grad(): + return self.dir_pred(base.unsqueeze(0),fiber.unsqueeze(0)).squeeze(0) + + def _get_mem_scoring_ids(self, mem): + if self.c.retrieval_use_expanded_ids and mem.expanded_content_ids: + return mem.expanded_content_ids + return mem.content_token_ids + + def _compute_corpus_idf(self, content_classifier) -> Dict[int, float]: + s = self.c.tfidf_smoothing + N = len(self.tree.store) + if N == 0: return {} + df: Dict[int, int] = {} + for mem in self.tree.store.values(): + label_set = (set(t for t in mem.content_token_ids + if t in content_classifier.content_starter_ids) + if content_classifier is not None else set(mem.content_token_ids)) + for t in label_set: + df[t] = df.get(t, 0) + 1 + return {t: math.log((N + s) / (d + s)) + 1.0 for t, d in df.items()} + + @staticmethod + def _compute_idf_weighted_centroid(token_ids, wte_normed, corpus_idf, idf_floor=0.1): + if not token_ids or wte_normed is None: return None + V = wte_normed.shape[0] + valid = [t for t in token_ids if t < V] + if not valid: return None + if corpus_idf is not None and len(corpus_idf) > 0: + weights = torch.tensor( + [max(corpus_idf.get(t, idf_floor), idf_floor) for t in valid], + device=wte_normed.device, dtype=wte_normed.dtype) + else: + weights = torch.ones(len(valid), device=wte_normed.device, dtype=wte_normed.dtype) + vecs = wte_normed[valid] + centroid = (vecs * weights.unsqueeze(1)).sum(0) / weights.sum().clamp(min=1e-8) + return F.normalize(centroid, dim=-1, eps=1e-8) + + def _compute_forward_hungarian(self, query_ids, mem_ids, wte_normed, + query_idf=None, idf_floor=0.1): + if not query_ids or not mem_ids: return 0.0 + V = wte_normed.shape[0] + q_valid = [q for q in query_ids if q < V] + m_valid = [m for m in mem_ids if m < V] + if not q_valid or not m_valid: return 0.0 + n_q, n_m = len(q_valid), len(m_valid) + q_vecs = wte_normed[q_valid]; m_vecs = wte_normed[m_valid] + sim = q_vecs @ m_vecs.T + if max(n_q, n_m) > self.c.hungarian_max_n: + max_per_q = sim.max(dim=1).values + if query_idf is not None: + w = torch.tensor( + [max(query_idf.get(q, idf_floor), idf_floor) for q in q_valid], + device=wte_normed.device, dtype=sim.dtype) + return ((max_per_q * w).sum() / w.sum().clamp(min=1e-8)).item() + return max_per_q.mean().item() + pairs, _ = hungarian_max_assignment(sim) + if pairs.numel() == 0: return 0.0 + matched_sims = sim[pairs[:, 0], pairs[:, 1]] + if query_idf is not None: + q_ids_for_pairs = [q_valid[int(r.item())] for r in pairs[:, 0]] + w = torch.tensor( + [max(query_idf.get(q, idf_floor), idf_floor) for q in q_ids_for_pairs], + device=wte_normed.device, dtype=matched_sims.dtype) + return ((matched_sims * w).sum() / w.sum().clamp(min=1e-8)).item() + return matched_sims.mean().item() + + @staticmethod + def _compute_forward_maxsim(query_ids, mem_ids, wte_normed, query_idf=None, idf_floor=0.1): + if not query_ids or not mem_ids: return 0.0 + V = wte_normed.shape[0] + q_valid = [q for q in query_ids if q < V] + m_valid = [m for m in mem_ids if m < V] + if not q_valid or not m_valid: return 0.0 + q_vecs = wte_normed[q_valid]; m_vecs = wte_normed[m_valid] + sim = q_vecs @ m_vecs.T + max_per_q = sim.max(dim=1).values + if query_idf is not None: + weights = torch.tensor( + [max(query_idf.get(q, idf_floor), idf_floor) for q in q_valid], + device=wte_normed.device, dtype=sim.dtype) + total = weights.sum().clamp(min=1e-8) + return ((max_per_q * weights).sum() / total).item() + return max_per_q.mean().item() + + @staticmethod + def _compute_backward_maxsim(query_ids, mem_ids, wte_normed, query_idf=None, idf_floor=0.1): + if not query_ids or not mem_ids: return 0.0 + V = wte_normed.shape[0] + q_valid = [q for q in query_ids if q < V] + m_valid = [m for m in mem_ids if m < V] + if not q_valid or not m_valid: return 0.0 + q_vecs = wte_normed[q_valid]; m_vecs = wte_normed[m_valid] + sim = q_vecs @ m_vecs.T + max_per_m_vals, max_per_m_idx = sim.max(dim=0) + if query_idf is not None: + q_weights = torch.tensor( + [max(query_idf.get(q, idf_floor), idf_floor) for q in q_valid], + device=wte_normed.device, dtype=sim.dtype) + matched_weights = q_weights[max_per_m_idx] + total = matched_weights.sum().clamp(min=1e-8) + return ((max_per_m_vals * matched_weights).sum() / total).item() + return max_per_m_vals.mean().item() + + def _compute_bidi_min(self, q_ids, m_ids, wte_normed, query_idf, idf_floor): + fwd = (self._compute_forward_hungarian(q_ids, m_ids, wte_normed, query_idf, idf_floor) + if self.c.use_hungarian_fwd + else self._compute_forward_maxsim(q_ids, m_ids, wte_normed, query_idf, idf_floor)) + bwd = self._compute_backward_maxsim(q_ids, m_ids, wte_normed, query_idf, idf_floor) + return fwd, bwd, min(fwd, bwd) + + @staticmethod + def _count_strict_overlap_matches(q_strict_ids, m_strict_ids, wte_normed, sim_threshold): + if not q_strict_ids or not m_strict_ids or wte_normed is None: return 0 + V = wte_normed.shape[0] + q_valid = [t for t in q_strict_ids if t < V] + m_valid = [t for t in m_strict_ids if t < V] + if not q_valid or not m_valid: return 0 + dev = wte_normed.device + q_vecs = wte_normed[torch.tensor(q_valid, device=dev)] + m_vecs = wte_normed[torch.tensor(m_valid, device=dev)] + sim = q_vecs @ m_vecs.T + has_match = (sim >= sim_threshold).any(dim=1) + return int(has_match.sum().item()) + + def _check_consolidation_compatible(self, existing_content_ids, new_content_ids): + if not existing_content_ids or not new_content_ids: return True + if self.wte_normed is None: return True + _, _, m = self._compute_bidi_min(existing_content_ids, new_content_ids, + self.wte_normed, None, self.c.idf_floor) + return m >= self.c.consol_maxsim_min + + def store_mem(self, h, surp, training_mode=False, source_text="", + content_token_ids=None, content_semantic_emb=None, expanded_content_ids=None): + dev=h.device; h2=h.unsqueeze(0) + x=self.ctx(h2).squeeze(0).detach() + s=surp if isinstance(surp,torch.Tensor) else torch.tensor(surp,**_dev(h)) + sv=s.view(1) if s.dim()<=1 else s + f=self.fib(h2,x.unsqueeze(0),sv).squeeze(0).detach() + d=self._compute_dirn(x,f) + sem_emb=content_semantic_emb if content_semantic_emb is not None else h.detach().clone() + ct_ids=content_token_ids or []; exp_ids=expanded_content_ids or [] + if self.tree.store: + scored=self.tree.retrieve(d.detach(),bw=1)[:5] + for mid,_ in scored: + if mid in self.tree.store: + ex=self.tree.store[mid] + dist=self.metric.midpoint_approx_distance( + x.unsqueeze(0),ex.base.unsqueeze(0).to(dev)).item() + if dist= self.c.strict_overlap_min_matches + n_pass = int(pass_mask.sum().item()) + if n_pass < self.c.strict_overlap_min_keep: + keep_n = max(self.c.strict_overlap_min_keep, 1) + _, top_keep = overlap_counts.topk(min(keep_n, len(mems))) + pass_mask = torch.zeros(len(mems), dtype=torch.bool, device=dev) + pass_mask[top_keep] = True + dropped_local = (~pass_mask).nonzero(as_tuple=True)[0].tolist() + diag.strict_overlap_dropped_ids = [mems[i].mid for i in dropped_local] + diag.strict_overlap_gate_applied = True + keep_local = pass_mask.nonzero(as_tuple=True)[0] + if keep_local.numel() < len(mems): + mems = [mems[i] for i in keep_local.tolist()] + diag.n_after_strict_overlap_gate = len(mems) + C_init = len(mems) + if C_init == 0: + empty=self.empty_state(xq[b:b+1],fq[b:b+1]) + all_results.append(empty.squeeze(0).unsqueeze(0)) + all_masks.append(torch.ones(1,**_dev(xq))) + all_biases.append(torch.zeros(1,**_dev(xq))) + all_summaries.append(empty.squeeze(0)) + all_batch_mw.append([]); all_dominant.append(None) + all_non_dominant.append([]); all_non_dom_weights.append({}) + continue + sb_all=torch.stack([m.base.to(dev) for m in mems]) + sf_all=torch.stack([m.fiber.to(dev) for m in mems]) + md_all=torch.stack([m.dirn.to(dev) for m in mems]) + sem_sim_all=torch.zeros(C_init, device=dev) + if query_semantic_emb is not None: + for mi, mem in enumerate(mems): + if mem.semantic_emb is not None: + sem_sim_all[mi] = F.cosine_similarity( + query_semantic_emb[b:b+1], + mem.semantic_emb.unsqueeze(0).to(dev),dim=-1).squeeze() + forward_all=torch.zeros(C_init, device=dev) + backward_all=torch.zeros(C_init, device=dev) + bidi_min_all=torch.zeros(C_init, device=dev) + if q_content_ids and wn is not None: + for mi, mem in enumerate(mems): + scoring_ids = self._get_mem_scoring_ids(mem) + fwd, bwd, bmin = self._compute_bidi_min( + q_content_ids, scoring_ids, wn, corpus_idf, idf_floor) + forward_all[mi] = fwd; backward_all[mi] = bwd; bidi_min_all[mi] = bmin + if self.c.use_upstream_semantic_gate and q_content_ids and wn is not None: + fwd_pass = forward_all >= self.c.upstream_gate_fwd_idf_floor + sem_pass = sem_sim_all >= self.c.upstream_gate_sem_floor + pass_mask = (fwd_pass & sem_pass) if self.c.upstream_gate_require_both else (fwd_pass | sem_pass) + n_pass = int(pass_mask.sum().item()) + if n_pass < self.c.upstream_gate_min_keep: + keep_n = max(self.c.upstream_gate_min_keep, 1) + top_keep = forward_all.topk(min(keep_n, C_init)).indices + pass_mask = torch.zeros(C_init, dtype=torch.bool, device=dev) + pass_mask[top_keep] = True + dropped_local = (~pass_mask).nonzero(as_tuple=True)[0].tolist() + if dropped_local: + diag.upstream_gate_dropped_ids = [mems[i].mid for i in dropped_local] + diag.upstream_semantic_gate_applied = True + keep_local = pass_mask.nonzero(as_tuple=True)[0] + if keep_local.numel() < C_init: + mems = [mems[i] for i in keep_local.tolist()] + sb_all = sb_all[keep_local]; sf_all = sf_all[keep_local] + md_all = md_all[keep_local]; sem_sim_all = sem_sim_all[keep_local] + forward_all = forward_all[keep_local] + backward_all = backward_all[keep_local] + bidi_min_all = bidi_min_all[keep_local] + C_init = len(mems) + diag.n_after_upstream_semantic_gate = C_init + sb = sb_all; sf = sf_all + sem_sim_t = sem_sim_all; forward_t = forward_all; bidi_min_t = bidi_min_all + raw_dir_sim = torch.einsum('d,cd->c', qdir[b], md_all) + diag.top_dir_sim = raw_dir_sim.max().item() if C_init > 0 else 0.0 + diag.top_sem_sim = sem_sim_t.max().item() if C_init > 0 else 0.0 + diag.top_forward_maxsim = forward_t.max().item() if C_init > 0 else 0.0 + diag.top_backward_maxsim = backward_all.max().item() if C_init > 0 else 0.0 + diag.top_bidi_min = bidi_min_t.max().item() if C_init > 0 else 0.0 + centroid_scores = torch.zeros(C_init, device=dev) + if self.c.use_idf_centroid and q_content_ids and wn is not None: + q_centroid = self._compute_idf_weighted_centroid( + q_content_ids, wn, corpus_idf, idf_floor) + if q_centroid is not None: + for mi, mem in enumerate(mems): + m_scoring_ids = self._get_mem_scoring_ids(mem) + m_centroid = self._compute_idf_weighted_centroid( + m_scoring_ids, wn, corpus_idf, idf_floor) + if m_centroid is not None: + centroid_scores[mi] = (q_centroid @ m_centroid).item() + diag.top_centroid_cosine = centroid_scores.max().item() if C_init > 0 else 0.0 + combined_sim = (self.c.ret_centroid_weight * centroid_scores + + self.c.ret_sem_weight * sem_sim_t + + self.c.ret_bidi_min_weight * bidi_min_t + + self.c.ret_forward_maxsim_weight * forward_t + + self.c.ret_dir_weight * raw_dir_sim) + C = C_init + top_sem = sem_sim_t.max().item() if C > 0 else 0.0 + top_bidi = bidi_min_t.max().item() if C > 0 else 0.0 + sem_thresh = max(self.c.gate_sem_floor, top_sem * self.c.gate_sem_ratio) + bidi_thresh = max(self.c.gate_bidi_floor, top_bidi * self.c.gate_bidi_ratio, + self.c.gate_bidi_hard_min) + hard_mask = (sem_sim_t >= sem_thresh) & (bidi_min_t >= bidi_thresh) + gate_affinity = (self.c.gate_sem_weight * sem_sim_t + + self.c.gate_bidi_weight * bidi_min_t) + diag.top_gate_affinity = gate_affinity.max().item() if C > 0 else 0.0 + diag.gate_threshold = max(sem_thresh, bidi_thresh) + diag.n_gate_pass = int(hard_mask.sum().item()) + if hard_mask.sum().item() == 0 and C > 0: + and_score = torch.minimum(sem_sim_t, bidi_min_t) + hard_mask[and_score.argmax()] = True + diag.n_after_hard_filter = int(hard_mask.sum().item()) + for mi, mem in enumerate(mems): + diag.per_memory_gate_affinity[mem.mid] = gate_affinity[mi].item() + keep_indices = hard_mask.nonzero(as_tuple=True)[0] + if keep_indices.numel() > 0 and keep_indices.numel() < C: + mems = [mems[i] for i in keep_indices.tolist()] + sb = sb[keep_indices]; sf = sf[keep_indices] + combined_sim = combined_sim[keep_indices] + raw_dir_sim = raw_dir_sim[keep_indices] + forward_t = forward_t[keep_indices]; bidi_min_t = bidi_min_t[keep_indices] + sem_sim_t = sem_sim_t[keep_indices]; centroid_scores = centroid_scores[keep_indices] + C = len(mems) + rerank_scores = self.reranker( + xq[b:b+1], fq[b:b+1], sb.unsqueeze(0), sf.unsqueeze(0), + combined_sim.unsqueeze(0)).squeeze(0) + diag.reranker_delta_mean = (rerank_scores - combined_sim).abs().mean().item() + diag.top_reranker_score = rerank_scores.max().item() if C > 0 else 0.0 + if C > 1: + top_score = rerank_scores.max() + score_mask = rerank_scores >= top_score * self.c.score_keep_ratio + if score_mask.sum().item() < 1: score_mask[rerank_scores.argmax()] = True + score_keep = score_mask.nonzero(as_tuple=True)[0] + diag.n_after_score_filter = score_keep.numel() + if score_keep.numel() < C: + mems = [mems[i] for i in score_keep.tolist()] + sb = sb[score_keep]; sf = sf[score_keep] + rerank_scores = rerank_scores[score_keep] + forward_t = forward_t[score_keep]; bidi_min_t = bidi_min_t[score_keep] + sem_sim_t = sem_sim_t[score_keep]; centroid_scores = centroid_scores[score_keep] + C = len(mems) + else: + diag.n_after_score_filter = C + if C > 1 and forward_t.max().item() > 0: + top_fwd_here = forward_t.max() + coherence_mask = forward_t >= top_fwd_here * self.c.fwd_coherence_ratio + if coherence_mask.sum() >= 1: + coherence_keep = coherence_mask.nonzero(as_tuple=True)[0] + diag.n_after_coherence_filter = coherence_keep.numel() + if coherence_keep.numel() < C: + mems = [mems[i] for i in coherence_keep.tolist()] + sb = sb[coherence_keep]; sf = sf[coherence_keep] + rerank_scores = rerank_scores[coherence_keep] + forward_t = forward_t[coherence_keep]; bidi_min_t = bidi_min_t[coherence_keep] + sem_sim_t = sem_sim_t[coherence_keep]; centroid_scores = centroid_scores[coherence_keep] + C = len(mems) + else: diag.n_after_coherence_filter = C + else: diag.n_after_coherence_filter = C + if C > 1 and bidi_min_t.max().item() > 0: + top_bidi_here = bidi_min_t.max().item() + gap_mask = bidi_min_t >= (top_bidi_here - self.c.bidi_absolute_gap) + if gap_mask.sum() >= 1: + gap_keep = gap_mask.nonzero(as_tuple=True)[0] + diag.n_after_bidi_gap_filter = gap_keep.numel() + if gap_keep.numel() < C: + mems = [mems[i] for i in gap_keep.tolist()] + sb = sb[gap_keep]; sf = sf[gap_keep] + rerank_scores = rerank_scores[gap_keep] + forward_t = forward_t[gap_keep]; bidi_min_t = bidi_min_t[gap_keep] + sem_sim_t = sem_sim_t[gap_keep]; centroid_scores = centroid_scores[gap_keep] + C = len(mems) + else: diag.n_after_bidi_gap_filter = C + else: diag.n_after_bidi_gap_filter = C + raw_composite = (0.4 * centroid_scores + 0.4 * forward_t + + 0.15 * bidi_min_t + 0.05 * sem_sim_t.clamp(min=0)) + if self.c.use_mean_centered_scoring and C >= self.c.mc_require_min_candidates: + C_f = float(C); sum_raw = raw_composite.sum() + centered = (C_f / (C_f - 1.0)) * raw_composite - sum_raw / (C_f - 1.0) + for mi, mem in enumerate(mems): + diag.mean_center_raw_scores[mem.mid] = raw_composite[mi].item() + diag.mean_center_final_scores[mem.mid] = centered[mi].item() + keep_mask = centered > self.c.mc_keep_margin + n_pass = int(keep_mask.sum().item()) + if n_pass < self.c.mc_min_keep: + keep_n = max(self.c.mc_min_keep, 1) + top_keep = centered.topk(min(keep_n, C)).indices + keep_mask = torch.zeros(C, dtype=torch.bool, device=dev) + keep_mask[top_keep] = True + dropped_local = (~keep_mask).nonzero(as_tuple=True)[0].tolist() + if dropped_local: + diag.mean_center_applied = True + diag.mean_center_dropped_ids = [mems[i].mid for i in dropped_local] + keep_local = keep_mask.nonzero(as_tuple=True)[0] + if keep_local.numel() < C: + mems = [mems[i] for i in keep_local.tolist()] + sb = sb[keep_local]; sf = sf[keep_local] + rerank_scores = rerank_scores[keep_local] + forward_t = forward_t[keep_local]; bidi_min_t = bidi_min_t[keep_local] + sem_sim_t = sem_sim_t[keep_local]; centroid_scores = centroid_scores[keep_local] + raw_composite = raw_composite[keep_local] + C = len(mems) + diag.n_after_mean_center = C + dominant_mid: Optional[int] = None + non_dominant_mids: List[int] = [] + non_dom_weights: Dict[int, float] = {} + if C >= 1: + final_rank = (0.4 * rerank_scores + 0.4 * centroid_scores + 0.2 * forward_t) + dom_idx = int(final_rank.argmax().item()) + dominant_mid = mems[dom_idx].mid + if C > 1: + nd_idx = torch.tensor([i for i in range(C) if i != dom_idx], device=dev) + nd_scores = final_rank[nd_idx] + nd_w = F.softmax(nd_scores / self.c.retrieval_weight_temperature, dim=0) + for j, idx in enumerate(nd_idx.tolist()): + mid_j = mems[idx].mid + non_dominant_mids.append(mid_j) + non_dom_weights[mid_j] = nd_w[j].item() + if not self.training and C > topk: + _, top_idx = rerank_scores.topk(topk) + mems = [mems[i] for i in top_idx.cpu().tolist()] + sb = sb[top_idx]; sf = sf[top_idx] + rerank_scores = rerank_scores[top_idx] + forward_t = forward_t[top_idx]; bidi_min_t = bidi_min_t[top_idx] + sem_sim_t = sem_sim_t[top_idx]; centroid_scores = centroid_scores[top_idx] + C = topk + for mi, mem in enumerate(mems): + diag.per_memory_forward_maxsim[mem.mid] = forward_t[mi].item() + diag.per_memory_bidi_min[mem.mid] = bidi_min_t[mi].item() + diag.per_memory_sem_sim[mem.mid] = sem_sim_t[mi].item() + diag.per_memory_centroid_cosine[mem.mid] = centroid_scores[mi].item() + qp = xq[b].unsqueeze(0).expand(C, -1) + geo_r = self.geo.solve(sb, qp) + transported = self.trans(sf, geo_r.path) + if self.training: + ret_s = self.retention(sb, sf, + torch.tensor([m.surprise for m in mems], **_dev(xq)), + torch.tensor([self.time - m.last for m in mems], **_dev(xq)), + torch.tensor([m.cnt for m in mems], **_dev(xq))) + transported = transported * ret_s.unsqueeze(-1) + if update_stats: + for m in mems: m.last = self.time; m.cnt += 1 + final_scores = (0.4 * rerank_scores + 0.4 * centroid_scores + 0.2 * forward_t) + w = F.softmax(final_scores / self.c.retrieval_weight_temperature, dim=0) + fs = (transported * w.unsqueeze(-1)).sum(0) + batch_mw = [(m.mid, w[mi].item()) for mi, m in enumerate(mems)] + all_batch_mw.append(batch_mw) + all_dominant.append(dominant_mid); all_non_dominant.append(non_dominant_mids) + all_non_dom_weights.append(non_dom_weights) + all_results.append(transported); all_masks.append(torch.ones(C, **_dev(xq))) + all_biases.append(final_scores / self.c.tau); all_summaries.append(fs) + maxC = max(r.shape[0] for r in all_results) + padded = []; pm = []; pd = [] + for bi in range(B): + r, mk, db = all_results[bi], all_masks[bi], all_biases[bi]; gap = maxC - r.shape[0] + if gap > 0: + pr = self.empty_state(xq[bi:bi+1], fq[bi:bi+1]).expand(gap, -1) + r = torch.cat([r, pr if self.training else pr.detach()], 0) + mk = torch.cat([mk, torch.zeros(gap, **_dev(xq))]) + db = torch.cat([db, torch.full((gap,), -1e9, **_dev(xq))]) + padded.append(r); pm.append(mk); pd.append(db) + mf = torch.stack(padded); mem_mask = torch.stack(pm); dir_bias = torch.stack(pd) + fiber_summary = torch.stack(all_summaries) + diag.fiber_summary_norm = fiber_summary.norm().item() + diag.batch_mem_weights = all_batch_mw + diag.dominant_per_batch = all_dominant + diag.non_dominant_per_batch = all_non_dominant + diag.non_dominant_weights_per_batch = all_non_dom_weights + if diag.dominant_per_batch and diag.dominant_per_batch[0] is not None: + diag.dominant_memory_id = diag.dominant_per_batch[0] + refined = self.attn(fq, mf, mem_mask=mem_mask, dir_bias=dir_bias) + return refined, mem_mask, fiber_summary, diag + + def decay(self): + rm = [] + for mid, m in self.tree.store.items(): + dt = torch.tensor([self.time - m.last], **_dev(m.base)) + cnt = torch.tensor([m.cnt], **_dev(m.base)) + with torch.no_grad(): + sc = self.retention(m.base.unsqueeze(0), m.fiber.unsqueeze(0), + torch.tensor([m.surprise], **_dev(m.base)), dt, cnt).item() + if sc < self.c.retention_gc_threshold: rm.append(mid) + for i in rm: self.tree.remove(i) + return len(rm) + + def consolidate(self): + ms = list(self.tree.store.values()) + if len(ms) < 2: return 0 + merged = set() + for i in range(len(ms)): + if ms[i].mid in merged: continue + for j in range(i+1, len(ms)): + if ms[j].mid in merged: continue + d = self.metric.midpoint_approx_distance( + ms[i].base.unsqueeze(0), ms[j].base.unsqueeze(0)).item() + if d < self.c.consol_dist: + if not self._check_consolidation_compatible( + ms[i].content_token_ids, ms[j].content_token_ids): continue + wi, wj = ms[i].cnt+1, ms[j].cnt+1; t = wi+wj + nb = (ms[i].base*wi + ms[j].base*wj) / t + nf = (ms[i].fiber*wi + ms[j].fiber*wj) / t + nd = self._compute_dirn(nb, nf) + ms[i].base = nb.detach().clone(); ms[i].fiber = nf.detach().clone() + ms[i].dirn = nd.detach().clone(); ms[i].cnt += ms[j].cnt + ms[i].surprise = max(ms[i].surprise, ms[j].surprise); ms[i].version += 1 + if ms[j].source_text and not ms[i].source_text: + ms[i].source_text = ms[j].source_text + ms[i].content_token_ids = list(set(ms[i].content_token_ids + ms[j].content_token_ids)) + ms[i].expanded_content_ids = list(set(ms[i].expanded_content_ids + ms[j].expanded_content_ids)) + if ms[i].semantic_emb is not None and ms[j].semantic_emb is not None: + ms[i].semantic_emb = ((ms[i].semantic_emb*wi + ms[j].semantic_emb*wj) / t).detach().clone() + elif ms[j].semantic_emb is not None: ms[i].semantic_emb = ms[j].semantic_emb.clone() + merged.add(ms[j].mid) + for mid in merged: del self.tree.store[mid] + if merged: self.tree.rebuild() + return len(merged) + +# ═══════════════════════════════════════════════════════════════════ +# MemLLM +# ═══════════════════════════════════════════════════════════════════ +@dataclass +class DecodeContext: + """Public context produced by prepare_decode_context(). Immutable handle + used by external runners + internal generate().""" + prefix_cond: torch.Tensor + prefix_uncond: Optional[torch.Tensor] + fiber_summary: torch.Tensor + diag: RetrievalDiag + content_bias: torch.Tensor + suppression_bias: torch.Tensor + vocab_bias: Optional[torch.Tensor] + +class MemLLM(nn.Module): + def __init__(self, c): + super().__init__(); self.c = c + self.amm = AMM(c); self.bridge = EmbBridge(c) + self.semantic_probe = PrefixSemanticProbe(c.d_LLM, c.L_mem, c.d_F) + self.vocab_proj = MemoryVocabProjector(c.d_F, c.d_LLM) + self.layer_pool = None; self.backbone: Optional[LLMBackbone] = None + self.tok = None; self._degen_guard = None; self.content_classifier = None + self._wte_neighbor_cache: Optional[Dict[int, List[int]]] = None + self._wte_normed: Optional[torch.Tensor] = None + self._filler_centroid: Optional[torch.Tensor] = None + + def load(self, name: Optional[str] = None, dtype_name: Optional[str] = None): + name = name or self.c.llm_name + dtype_name = dtype_name or self.c.llm_dtype + self.backbone = LLMBackbone(name, dtype_name=dtype_name) + self.tok = self.backbone.tokenizer + self.c.d_LLM = self.backbone.d_model + self.c.vocab_size = self.backbone.vocab_size + dev = next(self.parameters()).device + if self.bridge.proj.fkv.out_features != 2 * self.c.d_LLM: + self.bridge = EmbBridge(self.c).to(dev) + self.semantic_probe = PrefixSemanticProbe(self.c.d_LLM, self.c.L_mem, self.c.d_F).to(dev) + self.vocab_proj = MemoryVocabProjector(self.c.d_F, self.c.d_LLM).to(dev) + self.layer_pool = AdaptiveLayerPool(self.backbone.n_layers + 1, self.c.d_LLM).to(dev) + self.content_classifier = ContentTokenClassifier( + self.tok, self.c, vocab_size=self.backbone.vocab_size) + self._degen_guard = DegenerationGuard(self.tok, self.c, self.content_classifier) + wte_fp32 = self.backbone.input_embedding_weight().to(dev) + self.bridge.aligner.calibrate(wte_fp32) + self._wte_normed = F.normalize(wte_fp32.detach(), dim=-1, eps=1e-8) + self.amm.wte_normed = self._wte_normed + self._build_wte_neighbor_cache() + self._compute_filler_centroid() + return self + + def _compute_filler_centroid(self): + if self.content_classifier is None or self.backbone is None: + self._filler_centroid = None; return + wte = self.backbone.input_embedding_weight().to(next(self.parameters()).device) + V = wte.shape[0] + filler_ids = sorted(self.content_classifier.filler_ids) + valid = [t for t in filler_ids if t < V] + if len(valid) < 3: + self._filler_centroid = None; return + filler_vecs = wte[torch.tensor(valid, device=wte.device)] + centroid = filler_vecs.mean(0) + self._filler_centroid = F.normalize(centroid, dim=-1, eps=1e-8) + + def _build_wte_neighbor_cache(self): + if self.backbone is None or self.content_classifier is None: return + V = self.backbone.vocab_size + if V > self.c.wte_neighbor_max_vocab: + self._wte_neighbor_cache = {} + print(f" [neighbor cache] vocab_size={V} > {self.c.wte_neighbor_max_vocab}, skip build") + return + wte_n = self._wte_normed + cc = self.content_classifier + content_list = sorted(cc.content_ids) + valid = [t for t in content_list if t < wte_n.shape[0]] + self._wte_neighbor_cache = {} + K = self.c.wte_neighbor_k; thresh = self.c.wte_neighbor_threshold + batch_size = 500 + for start in range(0, len(valid), batch_size): + batch_ids = valid[start:start+batch_size] + batch_t = torch.tensor(batch_ids, device=wte_n.device) + batch_vecs = wte_n[batch_t] + sims = batch_vecs @ wte_n.T + topk_vals, topk_ids = sims.topk(K+1, dim=-1) + for i, tid in enumerate(batch_ids): + neighbors = [] + for v_val, nid in zip(topk_vals[i], topk_ids[i]): + nid_int = nid.item() + if nid_int == tid: continue + if v_val.item() >= thresh and nid_int in cc.content_ids: + neighbors.append(nid_int) + self._wte_neighbor_cache[tid] = neighbors + + def _expand_content_ids(self, content_ids: List[int]) -> List[int]: + if not self._wte_neighbor_cache: return content_ids + expanded = set(content_ids) + for tid in content_ids: + neighbors = self._wte_neighbor_cache.get(tid, []) + expanded.update(neighbors) + return list(expanded) + + def fwd(self, ids, mask, prefix=None): + return self.backbone(ids, mask, prefix=prefix) + + def _compute_content_semantic_emb(self, hidden_states, ids, mask): + B, T, D = hidden_states.shape + cc = self.content_classifier + result = [] + for b in range(B): + content_positions = [] + T_valid = min(T, ids.shape[1]) if ids is not None else T + for pos in range(T_valid): + if mask is not None and mask.shape[1] > pos and mask[b, pos].item() == 0: + continue + if ids is not None: + tid = ids[b, pos].item() + if cc is not None and tid in cc.content_ids: + content_positions.append(min(pos, T-1)) + if content_positions: + pos_t = torch.tensor(content_positions, device=hidden_states.device) + content_hs = hidden_states[b, pos_t] + result.append(content_hs.mean(0)) + else: + if mask is not None: + valid_len = min(int(mask[b].sum().item()), T); valid_len = max(valid_len, 1) + result.append(hidden_states[b, :valid_len].mean(0)) + else: result.append(hidden_states[b].mean(0)) + return torch.stack(result) + + def extract_state(self, hs, mask=None, pl=0): + pooled = self.layer_pool(hs) + if pl > 0: pooled = pooled[:, pl:] + m = mask[:, pl:] if mask is not None and pl > 0 else mask + if m is not None and m.shape[1] != pooled.shape[1]: m = None + xq, fq = self.bridge.ext(pooled, m) + return pooled, xq, fq + + def _build_token_bias_from_memories(self, mem_weight_list, q_content_ids): + V = self.c.vocab_size; dev = next(self.parameters()).device + cc = self.content_classifier; wte_n = self._wte_normed + floor = self.c.content_bias_relevance_floor + concentration = self.c.content_bias_concentration + bias = torch.zeros(V, device=dev) + q_valid = [i for i in q_content_ids if i < wte_n.shape[0]] + q_vecs = wte_n[q_valid] if q_valid else None + for mid, weight in mem_weight_list: + if mid not in self.amm.tree.store or weight <= 0: continue + mem = self.amm.tree.store[mid] + scoring_ids = self.amm._get_mem_scoring_ids(mem) + if cc is not None and self.c.use_word_starter_filter: + valid_ids = [t for t in scoring_ids + if t < V and t < wte_n.shape[0] + and t in cc.content_starter_ids] + elif cc is not None: + valid_ids = [t for t in scoring_ids + if t < V and t < wte_n.shape[0] + and t in cc.content_ids] + else: valid_ids = [] + if not valid_ids: continue + if q_valid and q_vecs is not None: + m_vecs = wte_n[valid_ids] + sim = m_vecs @ q_vecs.T + relevance = sim.max(dim=1).values.clamp(min=0) + relevance = relevance.pow(concentration) + relevance = relevance * (1.0 - floor) + floor + for i, tid in enumerate(valid_ids): + bias[tid] += weight * relevance[i].item() + else: + for tid in valid_ids: + bias[tid] += weight + return bias + + def _build_content_bias(self, diag, query_content_ids_per_batch): + V = self.c.vocab_size; dev = next(self.parameters()).device + B = len(diag.batch_mem_weights) + bias = torch.zeros(B, V, device=dev) + for b, mem_weights in enumerate(diag.batch_mem_weights): + q_ids = (query_content_ids_per_batch[b] + if query_content_ids_per_batch and b < len(query_content_ids_per_batch) + else []) + reweighted = [] + for mid, w in mem_weights: + bidi_w = diag.per_memory_bidi_min.get(mid, 0.5) + reweighted.append((mid, w * (bidi_w ** 2))) + b_bias = self._build_token_bias_from_memories(reweighted, q_ids) + bmax = b_bias.max() + if bmax > 1e-8: + bias[b] = b_bias / bmax + return bias + + def _build_suppression_bias(self, diag, query_content_ids_per_batch): + V = self.c.vocab_size; dev = next(self.parameters()).device + B = len(diag.batch_mem_weights) + suppression = torch.zeros(B, V, device=dev) + cc = self.content_classifier + if cc is None: return suppression + for b in range(B): + dom_mid = diag.dominant_per_batch[b] if b < len(diag.dominant_per_batch) else None + nd_mids = (diag.non_dominant_per_batch[b] + if b < len(diag.non_dominant_per_batch) else []) + nd_weights = (diag.non_dominant_weights_per_batch[b] + if b < len(diag.non_dominant_weights_per_batch) else {}) + if not nd_mids: continue + dom_token_set: Set[int] = set() + if dom_mid is not None and dom_mid in self.amm.tree.store: + dom_mem = self.amm.tree.store[dom_mid] + for t in self.amm._get_mem_scoring_ids(dom_mem): + if t in cc.content_ids: + dom_token_set.add(t) + q_ids = (query_content_ids_per_batch[b] + if query_content_ids_per_batch and b < len(query_content_ids_per_batch) + else []) + nd_mem_weights = [(mid, nd_weights.get(mid, 0.0)) for mid in nd_mids] + nd_bias = self._build_token_bias_from_memories(nd_mem_weights, q_ids) + for t in dom_token_set: + if 0 <= t < V: + nd_bias[t] = 0.0 + nmax = nd_bias.max() + if nmax > 1e-8: + suppression[b] = nd_bias / nmax + return suppression + + def _get_prefix(self, hs, mask=None, pl=0, update_stats=True, return_extra=False, ids=None): + pooled, xq, fq = self.extract_state(hs, mask, pl) + trimmed_mask = mask[:, pl:] if mask is not None and pl > 0 else mask + if trimmed_mask is not None and pooled.shape[1] != trimmed_mask.shape[1]: + trimmed_mask = None + query_content_ids_per_batch = [] + if ids is not None and self.content_classifier is not None: + for b in range(ids.shape[0]): + b_ids = ids[b].tolist() + b_exact = list(set(self.content_classifier.get_content_ids_from_tokens(b_ids))) + query_content_ids_per_batch.append(b_exact) + query_sem = (self._compute_content_semantic_emb(pooled, ids, trimmed_mask) + if ids is not None and self.content_classifier is not None + else pooled.mean(1)) + wte_n = self._wte_normed + fibers, mem_mask, fiber_summary, diag = self.amm.retrieve_multi( + xq, fq, update_stats=update_stats, + query_semantic_emb=query_sem, + query_content_ids_per_batch=query_content_ids_per_batch, + wte_normed=wte_n, content_classifier=self.content_classifier) + prefix = self.bridge.inject( + fibers, mem_mask, fiber_summary=fiber_summary, + filler_centroid=self._filler_centroid) + if return_extra: + content_bias = self._build_content_bias(diag, query_content_ids_per_batch) + suppression_bias = (self._build_suppression_bias(diag, query_content_ids_per_batch) + if self.c.use_memory_guided_suppression + else torch.zeros_like(content_bias)) + return prefix, fiber_summary, diag, content_bias, suppression_bias + return prefix + + def _build_contrastive_uncond_prefix(self, diag, prefix_cond): + dev = prefix_cond.device; B = prefix_cond.shape[0] + non_dom_fibers = []; have_contrast = [] + for b in range(B): + mids = diag.non_dominant_per_batch[b] if b < len(diag.non_dominant_per_batch) else [] + mids = [m for m in mids if m in self.amm.tree.store] + if mids: + fvecs = torch.stack([self.amm.tree.store[m].fiber.to(dev) for m in mids]) + non_dom_fibers.append(fvecs.mean(0)); have_contrast.append(True) + else: + non_dom_fibers.append(torch.zeros(self.c.d_F, device=dev)); have_contrast.append(False) + non_dom_fibers_t = torch.stack(non_dom_fibers, dim=0) + uncond_prefix = torch.zeros_like(prefix_cond) + for b in range(B): + if have_contrast[b]: + single = non_dom_fibers_t[b:b+1].unsqueeze(1) + mask_one = torch.ones(1, 1, device=dev) + pref_b = self.bridge.inject( + single, mask_one, + fiber_summary=non_dom_fibers_t[b:b+1], + filler_centroid=self._filler_centroid) + uncond_prefix[b:b+1] = pref_b + else: + uncond_prefix[b:b+1] = self.bridge.build_neutral_prefix(1, dev) + return uncond_prefix + + def _compute_vocab_bias(self, fiber_summary): + if fiber_summary is None: return None + wte = self.backbone.input_embedding_weight().to(fiber_summary.device) + return self.vocab_proj(fiber_summary, wte) + + # ───────────────────────────────────────────────────────────── + # [A-2] Public API: prepare_decode_context + # ───────────────────────────────────────────────────────────── + def prepare_decode_context(self, ids: torch.Tensor, mask: torch.Tensor, + update_stats: bool = True) -> DecodeContext: + """Build full decode context for a given prompt (ids/mask).""" + with torch.no_grad(): + o = self.fwd(ids, mask) + prefix_cond, fiber_summary, diag, content_bias, suppression_bias = self._get_prefix( + o['hs'], mask, update_stats=update_stats, return_extra=True, ids=ids) + vocab_bias = self._compute_vocab_bias(fiber_summary) + if self.c.use_cfg_decoding: + if self.c.use_contrastive_memory_cfg: + prefix_uncond = self._build_contrastive_uncond_prefix(diag, prefix_cond) + else: + B = prefix_cond.shape[0] + prefix_uncond = self.bridge.build_neutral_prefix(B, prefix_cond.device) + else: + prefix_uncond = None + return DecodeContext( + prefix_cond=prefix_cond, prefix_uncond=prefix_uncond, + fiber_summary=fiber_summary, diag=diag, + content_bias=content_bias, suppression_bias=suppression_bias, + vocab_bias=vocab_bias) + + # ───────────────────────────────────────────────────────────── + # [A-1] Public API: shape_step_logits + # ───────────────────────────────────────────────────────────── + def shape_step_logits(self, + logits_cond: torch.Tensor, + logits_uncond: Optional[torch.Tensor], + step: int, + content_bias: Optional[torch.Tensor], + suppression_bias: Optional[torch.Tensor], + vocab_bias: Optional[torch.Tensor], + state: DecodeState) -> torch.Tensor: + """Apply the complete v3.33 logit-shaping pipeline for one decode step.""" + c = self.c + dev = logits_cond.device + cc = self.content_classifier + HARD_MASK = -1e9 + + if c.use_cfg_decoding and logits_uncond is not None: + alpha = c.cfg_scale + if c.cfg_decay_steps > 0: + alpha *= max(0.0, 1.0 - step / c.cfg_decay_steps) + lg = logits_cond + alpha * (logits_cond - logits_uncond) + else: + lg = logits_cond.clone() + + V_lg = lg.shape[-1] + + if c.use_adaptive_content_bias_scale: + logits_std = lg.std().item() + cb_unit = logits_std * c.content_bias_std_multiplier + sup_unit = logits_std * c.suppression_std_multiplier + else: + cb_unit = 1.0 + sup_unit = 1.0 + + step_scale_cb = max(c.content_bias_floor, 1.0 - step * c.content_bias_decay) + if content_bias is not None and content_bias.abs().max().item() > 0.01: + V = min(V_lg, content_bias.shape[-1]) + lg[:, :V] = lg[:, :V] + content_bias[:, :V] * cb_unit * c.content_bias_scale * step_scale_cb + + step_scale_sup = max(c.suppression_floor, 1.0 - step * c.suppression_decay) + if (c.use_memory_guided_suppression and suppression_bias is not None + and suppression_bias.abs().max().item() > 0.01): + V = min(V_lg, suppression_bias.shape[-1]) + lg[:, :V] = lg[:, :V] - suppression_bias[:, :V] * sup_unit * c.suppression_bias_scale * step_scale_sup + + step_scale_learned = max(c.semantic_boost_floor, 1.0 - step * c.semantic_boost_decay) + if vocab_bias is not None: + V2 = min(V_lg, vocab_bias.shape[-1]) + lg[:, :V2] = lg[:, :V2] + vocab_bias[:, :V2] * c.semantic_boost_scale * step_scale_learned + + if cc: + for tid, count in state.generated_content_counts.items(): + if tid in cc.content_ids and tid < V_lg: + scaled_count = count ** c.content_repeat_exponent + lg[0, tid] -= c.content_repeat_penalty * scaled_count + + if c.use_cyclic_content_hard_mask and cc is not None: + window = c.cyclic_content_window + max_cnt = c.cyclic_content_max_count + window_counts: Dict[int, int] = {} + cutoff_step = step - window + for (step_idx, tid) in state.content_history: + if step_idx >= cutoff_step: + window_counts[tid] = window_counts.get(tid, 0) + 1 + for tid, cnt in window_counts.items(): + if cnt >= max_cnt and 0 <= tid < V_lg: + lg[0, tid] = HARD_MASK + + if c.use_ngram_repeat_block and len(state.generated_ids) >= 4: + max_n = min(c.ngram_repeat_max_n, len(state.generated_ids) // 2) + for n in range(2, max_n + 1): + if len(state.generated_ids) >= 2 * n: + tail = state.generated_ids[-n:] + prev = state.generated_ids[-2 * n:-n] + if tail == prev: + expected_next = state.generated_ids[-n] + if 0 <= expected_next < V_lg: + lg[0, expected_next] -= c.ngram_repeat_penalty + + if cc and self._wte_neighbor_cache and state.recent_starters: + for prev_tid, _ in state.recent_starters: + neighbors = self._wte_neighbor_cache.get(prev_tid, []) + for nid in neighbors: + if nid in cc.word_starter_ids: continue + if nid < V_lg: + lg[0, nid] -= c.bpe_echo_penalty + + if cc and state.generated_ids and state.generated_ids[-1] in cc.content_starter_ids: + for tid in cc.content_ids: + if tid not in cc.word_starter_ids and tid < V_lg: + lg[0, tid] -= c.post_starter_nonstarter_penalty + + newline_ids_set = cc.newline_ids if cc is not None else set() + if c.use_newline_hard_gate and cc is not None: + content_count_so_far = sum(state.generated_content_counts.values()) + hard_gate_active = ( + step < c.newline_hard_gate_min_step + or content_count_so_far < c.newline_hard_gate_min_content) + if hard_gate_active: + for nid in newline_ids_set: + if nid < V_lg: lg[0, nid] = HARD_MASK + + eos_token_id = self.tok.eos_token_id + if (c.use_eos_hard_mask and eos_token_id is not None + and step < c.eos_hard_mask_steps and eos_token_id < V_lg): + lg[0, eos_token_id] = HARD_MASK + + if c.use_content_gated_newline and cc is not None: + content_count_so_far = sum(state.generated_content_counts.values()) + if content_count_so_far < c.min_content_tokens_before_newline: + for nid in newline_ids_set: + if nid < V_lg: lg[0, nid] -= c.late_newline_penalty + + if (c.use_early_content_starter_hard_mask and cc is not None + and step < c.early_starter_hard_mask_steps): + starter_mask = cc.content_starter_mask(dev)[:V_lg] + lg[0, :V_lg] = torch.where( + starter_mask.bool(), + lg[0, :V_lg], + torch.full_like(lg[0, :V_lg], HARD_MASK)) + + if self._degen_guard is not None: + lg = self._degen_guard.process(lg, state.generated_ids, step) + + return lg + + def write(self, text, training_mode=False): + tk = self.tok(text, return_tensors='pt', padding=True, truncation=True) + ids, mask = tk['input_ids'], tk['attention_mask'] + dev = next(self.parameters()).device; ids, mask = ids.to(dev), mask.to(dev) + with torch.no_grad(): + o = self.fwd(ids, mask) + hs_pooled = self.layer_pool(o['hs']) + surp = self.amm.surprise_proxy(o['logits'][:, :-1], ids[:, 1:]) + pooled_mean = hs_pooled.mean(1) + content_sem = self._compute_content_semantic_emb(hs_pooled, ids, mask) + raw_ids = self.tok.encode(text) + cc = self.content_classifier + content_ids = list(set(cc.get_content_ids_from_tokens(raw_ids))) if cc else [] + expanded_ids = self._expand_content_ids(content_ids) + stored = 0; gate_vals = [] + for b in range(ids.shape[0]): + with torch.no_grad(): + gate = self.amm.write_gate(pooled_mean[b:b+1], surp[b:b+1]).item() + gate_vals.append(gate) + if training_mode or gate >= self.c.write_gate_threshold: + self.amm.store_mem( + pooled_mean[b], surp[b], training_mode, + source_text=text, content_token_ids=content_ids, + content_semantic_emb=content_sem[b], + expanded_content_ids=expanded_ids) + stored += 1 + return stored, gate_vals + + def _refresh_all_memories(self): + entries = list(self.amm.tree.store.values()) + texts = [e.source_text for e in entries if e.source_text] + if not texts: return 0 + unique_texts = list(dict.fromkeys(texts)) + self.amm.tree.store.clear() + self.amm.tree.root = _Node() + self.amm.tree.nid = 0; self.amm.time = 0 + for text in unique_texts: self.write(text, training_mode=True) + return len(unique_texts) + + def _prep_prompt_ids(self, prompt: str): + if self.c.use_chat_template_for_gen and self.backbone.has_chat_template: + prompt = self.backbone.build_chat_text(prompt) + tk = self.tok(prompt, return_tensors='pt') + return tk['input_ids'], tk['attention_mask'] + + def generate(self, prompt, mt=50, greedy=False): + ids, mask = self._prep_prompt_ids(prompt) + dev = next(self.parameters()).device + ids = ids.to(dev); mask = mask.to(dev) + + ctx = self.prepare_decode_context(ids, mask, update_stats=True) + state = DecodeState() + prompt_len = ids.shape[1] + + for i in range(mt): + if i > 0 and i % self.c.retrieval_interval == 0: + ctx = self.prepare_decode_context(ids, mask, update_stats=True) + + with torch.no_grad(): + o_cond = self.fwd(ids, mask, ctx.prefix_cond) + lg_cond = o_cond['logits'][:, -1:].squeeze(1) + if self.c.use_cfg_decoding and ctx.prefix_uncond is not None: + o_uncond = self.fwd(ids, mask, ctx.prefix_uncond) + lg_uncond = o_uncond['logits'][:, -1:].squeeze(1) + else: + lg_uncond = None + + lg = self.shape_step_logits( + lg_cond, lg_uncond, i, + ctx.content_bias, ctx.suppression_bias, ctx.vocab_bias, state) + + if greedy: + nxt = lg.argmax(-1, keepdim=True) + else: + lg_t = lg / self.c.gen_temp; p = F.softmax(lg_t, -1) + sp, si = torch.sort(p, descending=True); cs = torch.cumsum(sp, -1) + rm = cs - sp > self.c.gen_top_p; sp[rm] = 0 + total = sp.sum(-1, keepdim=True) + if (total < 1e-10).any(): sp[:, 0] = 1.0; total = sp.sum(-1, keepdim=True) + sp = sp / total; nxt = si.gather(-1, torch.multinomial(sp, 1)) + + nxt_id = nxt.item() + if nxt_id == self.tok.eos_token_id and len(state.generated_ids) >= self.c.degen_min_tokens: + break + + state.update(nxt_id, i, self.content_classifier, + self.c.bpe_echo_window, self.c.cyclic_content_window) + + ids = torch.cat([ids, nxt], 1) + mask = torch.cat([mask, torch.ones(1, 1, device=dev, dtype=mask.dtype)], 1) + + new_ids = ids[0, prompt_len:].tolist() + gen_text = self.tok.decode(new_ids, skip_special_tokens=True) + return prompt + gen_text if not self.c.use_chat_template_for_gen else gen_text + + def save_memory(self, path): + data = {'store': {}, 'nid': self.amm.tree.nid, 'time': self.amm.time} + for mid, m in self.amm.tree.store.items(): + data['store'][mid] = { + 'base': m.base.cpu(), 'fiber': m.fiber.cpu(), 'dirn': m.dirn.cpu(), + 'surprise': m.surprise, 'ts': m.ts, 'last': m.last, 'cnt': m.cnt, 'version': m.version, + 'source_text': m.source_text, + 'content_token_ids': m.content_token_ids, + 'expanded_content_ids': m.expanded_content_ids, + 'semantic_emb': m.semantic_emb.cpu() if m.semantic_emb is not None else None} + torch.save(data, path) + + def load_memory(self, path): + data = torch.load(path, weights_only=False) + self.amm.tree.store.clear(); self.amm.tree.root = _Node() + self.amm.tree.nid = data['nid']; self.amm.time = data['time'] + dev = next(self.parameters()).device + for mid, d in data['store'].items(): + sem = d.get('semantic_emb', None) + if sem is not None: sem = sem.to(dev) + m = MemEntry(mid=mid, base=d['base'].to(dev), fiber=d['fiber'].to(dev), + dirn=d['dirn'].to(dev), surprise=d['surprise'], ts=d['ts'], + last=d['last'], cnt=d['cnt'], version=d['version'], + source_text=d.get('source_text', ''), + content_token_ids=d.get('content_token_ids', []), + expanded_content_ids=d.get('expanded_content_ids', []), + semantic_emb=sem) + self.amm.tree.insert(m) + +# ═══════════════════════════════════════════════════════════════════ +# Trainer +# ═══════════════════════════════════════════════════════════════════ +class Trainer: + def __init__(self, m, c): + self.m = m; self.c = c + ps = [p for n, p in m.named_parameters() + if p.requires_grad and 'backbone' not in n] + self.opt = torch.optim.AdamW(ps, lr=1e-4, weight_decay=0.01) + self.warmup = LossWarmup({ + 'semantic_probe': c.warmup_steps_probe, 'dir_diversity': c.warmup_steps_dd, + 'reranker_ranking': c.warmup_steps_rr, 'vocab_anchor': c.warmup_steps_va, + 'semantic_alignment': c.warmup_steps_sa, + 'tail_semantic_anchor': c.warmup_steps_tsa}) + self.grad_monitor = GradientMonitor() + self.grad_monitor.register('ctx_encoder', m.amm.ctx) + self.grad_monitor.register('fib_encoder', m.amm.fib) + self.grad_monitor.register('dir_predictor', m.amm.dir_pred) + self.grad_monitor.register('fiber_connection', m.amm.conn) + self.grad_monitor.register('fiber_attn', m.amm.attn) + self.grad_monitor.register('reranker', m.amm.reranker) + self.grad_monitor.register('qformer', m.bridge.proj) + self.grad_monitor.register('content_bypass', m.bridge.bypass) + self.grad_monitor.register('semantic_probe', m.semantic_probe) + self.grad_monitor.register('layer_pool', m.layer_pool) + self.grad_monitor.register('prefix_aligner', m.bridge.aligner) + self.grad_monitor.register('vocab_proj', m.vocab_proj) + if c.use_content_semantic_tail and c.content_tail_slots > 0: + self.grad_monitor.register('tail_head', m.bridge.tail_head) + self.layer_weight_history = []; self._step_count = 0 + + def _encode_with_grad(self, texts): + tk = self.m.tok(texts, return_tensors='pt', padding=True, truncation=True) + dev = next(self.m.parameters()).device + ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) + with torch.no_grad(): + o = self.m.fwd(ids, mask) + surp = self.m.amm.surprise_proxy(o['logits'][:, :-1], ids[:, 1:]) + pooled = self.m.layer_pool(o['hs']); pooled_mean = pooled.mean(1) + base = self.m.amm.ctx(pooled_mean) + fiber = self.m.amm.fib(pooled_mean, base, surp) + _ = self.m.amm.dir_pred(base, fiber) + return ids, mask, base, fiber, surp, pooled_mean + + def encoder_throughput_loss(self, ids, mask, fiber): + B = ids.shape[0]; dev = ids.device + fiber_unsq = fiber.unsqueeze(1); mem_mask_ones = torch.ones(B, 1, device=dev) + prefix = self.m.bridge.inject(fiber_unsq, mem_mask_ones, fiber_summary=fiber) + o2 = self.m.fwd(ids, mask, prefix) + lg = o2['logits'][:, o2['pl']:-1]; tg = ids[:, 1:] + ml = min(lg.shape[1], tg.shape[1]) + if ml == 0: return torch.tensor(0.0, device=dev, requires_grad=True) + return F.cross_entropy(lg[:, :ml].reshape(-1, lg.shape[-1]), tg[:, :ml].reshape(-1)) + + def semantic_alignment_loss(self, fiber, target_ids, target_mask): + dev = fiber.device + wte = self.m.backbone.input_embedding_weight().to(dev) + vocab_logits = self.m.vocab_proj(fiber, wte) + B, V = vocab_logits.shape; cc = self.m.content_classifier + if cc is None: return torch.tensor(0.0, device=dev, requires_grad=True) + target = torch.zeros(B, V, device=dev); valid_count = 0 + for b in range(B): + valid = target_ids[b][target_mask[b].bool()].tolist() + content_ids = cc.get_content_ids_from_tokens(valid) + if content_ids: + uids = list(set(content_ids)); uids = [uid for uid in uids if uid < V] + if uids: target[b, uids] = 1.0 / len(uids); valid_count += 1 + if valid_count == 0: return torch.tensor(0.0, device=dev, requires_grad=True) + log_probs = F.log_softmax(vocab_logits / self.c.semantic_align_temp, dim=-1) + kl = F.kl_div(log_probs, target, reduction='none').sum(-1) + return kl.mean() + + def vocab_anchor_loss(self, prefix): + dev = prefix.device + wte = self.m.backbone.input_embedding_weight().to(dev) + pn = F.normalize(prefix.reshape(-1, prefix.shape[-1]), dim=-1) + wn = F.normalize(wte, dim=-1) + sim = pn @ wn.T; topk_sim = sim.topk(self.c.vocab_anchor_topk, dim=-1).values + return -topk_sim.mean() + + def tail_semantic_anchor_loss(self, fiber, ids, mask): + if not (self.c.use_content_semantic_tail and self.c.content_tail_slots > 0): + return torch.tensor(0.0, device=fiber.device, requires_grad=True) + tail = self.m.bridge.tail_head(fiber) + if tail is None: + return torch.tensor(0.0, device=fiber.device, requires_grad=True) + dev = fiber.device + wte = self.m.backbone.input_embedding_weight().to(dev) + B, n_slots, _ = tail.shape; V = wte.shape[0] + cc = self.m.content_classifier + if cc is None: return torch.tensor(0.0, device=dev, requires_grad=True) + losses = [] + tn = F.normalize(tail, dim=-1); wn = F.normalize(wte, dim=-1) + for b in range(B): + valid = ids[b][mask[b].bool()].tolist() + content_tids = list(set(cc.get_content_ids_from_tokens(valid))) + content_tids = [t for t in content_tids if t < V] + if not content_tids: continue + target = torch.zeros(V, device=dev) + target[content_tids] = 1.0 / len(content_tids) + slot_logits = tn[b] @ wn.T / 0.3 + log_probs = F.log_softmax(slot_logits, dim=-1) + kl = F.kl_div(log_probs, target.unsqueeze(0).expand_as(log_probs), + reduction='none').sum(-1).mean() + losses.append(kl) + if not losses: + return torch.tensor(0.0, device=dev, requires_grad=True) + return torch.stack(losses).mean() + + def _recon_forward(self, text): + tk = self.m.tok(text, return_tensors='pt', padding=True, truncation=True) + dev = next(self.m.parameters()).device + ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) + with torch.no_grad(): bo = self.m.fwd(ids, mask) + prefix = self.m._get_prefix(bo['hs'], mask, update_stats=False, ids=ids) + o = self.m.fwd(ids, mask, prefix) + lg = o['logits'][:, o['pl']:-1]; tg = ids[:, 1:] + ml = min(lg.shape[1], tg.shape[1]) + if ml == 0: + zero = ids.new_tensor(0.0, dtype=torch.float, requires_grad=True) + return zero, prefix, self.m.bridge._last_fiber_summary + l_r = F.cross_entropy(lg[:, :ml].reshape(-1, lg.shape[-1]), tg[:, :ml].reshape(-1)) + fs = self.m.bridge._last_fiber_summary + if fs is None: fs = torch.zeros(1, self.c.d_F, device=dev) + return l_r, prefix, fs + + # ───────────────────────────────────────────────────────────── + # [A-3] Public API: recon + # ───────────────────────────────────────────────────────────── + def recon(self, text: str) -> Dict[str, torch.Tensor]: + """Public reconstruction API.""" + loss, prefix, fs = self._recon_forward(text) + return {'loss': loss, 'prefix': prefix, 'fiber_summary': fs} + + def _semantic_probe_loss(self, prefix_batch, fs_batch): + pred = self.m.semantic_probe(prefix_batch) + l_mse = F.mse_loss(pred, fs_batch.detach()) + if prefix_batch.shape[0] >= 2: + pn = F.normalize(pred, dim=-1); tn = F.normalize(fs_batch.detach(), dim=-1) + sim = pn @ tn.T / self.c.probe_contrastive_tau + lb = torch.arange(prefix_batch.shape[0], device=prefix_batch.device) + l_ctr = F.cross_entropy(sim, lb) + return l_mse + 0.5 * l_ctr + return l_mse + + def contrast(self, texts): + tk = self.m.tok(texts, return_tensors='pt', padding=True, truncation=True) + dev = next(self.m.parameters()).device + ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) + with torch.no_grad(): o = self.m.fwd(ids, mask) + _, xq, fq = self.m.extract_state(o['hs'], mask) + x = F.normalize(self.m.amm.contrast_proj_x(xq), -1) + f = F.normalize(self.m.amm.contrast_proj_f(fq), -1) + sxf = x @ f.T / self.c.contrast_tau; sfx = f @ x.T / self.c.contrast_tau + lb = torch.arange(len(texts), device=dev) + return (F.cross_entropy(sxf, lb) + F.cross_entropy(sfx, lb)) / 2 + + def holonomy_proxy(self, x, f): + sz = 0.05; v1 = torch.randn_like(x) * sz; v2 = torch.randn_like(x) * sz + loop = torch.stack([x, x+v1, x+v1+v2, x+v2, x], 1) + return (self.m.amm.trans(f, loop) - f).pow(2).sum(-1).mean() + + def write_policy_loss(self, texts): + tk = self.m.tok(texts, return_tensors='pt', padding=True, truncation=True) + dev = next(self.m.parameters()).device + ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) + with torch.no_grad(): + o = self.m.fwd(ids, mask) + surp = self.m.amm.surprise_proxy(o['logits'][:, :-1], ids[:, 1:]) + pooled = self.m.layer_pool(o['hs']).mean(1) + gates = self.m.amm.write_gate(pooled, surp) + labels = (surp > surp.median()).float() + return F.binary_cross_entropy(gates, labels) + + def direction_diversity_loss(self, texts): + tk = self.m.tok(texts, return_tensors='pt', padding=True, truncation=True) + dev = next(self.m.parameters()).device + ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) + with torch.no_grad(): o = self.m.fwd(ids, mask) + _, xq, fq = self.m.extract_state(o['hs'], mask) + dirs = F.normalize(self.m.amm.dir_pred(xq, fq), dim=-1, eps=1e-8) + dir_sim = (dirs @ dirs.T).clamp(-1.0, 1.0) + with torch.no_grad(): + fn = F.normalize(fq, dim=-1, eps=1e-8); fiber_sim = (fn @ fn.T).clamp(-1.0, 1.0) + tau = self.c.dir_diversity_tau + dir_prob = torch.sigmoid(dir_sim / tau); fiber_prob = torch.sigmoid(fiber_sim / tau) + B = len(texts); mask_off = ~torch.eye(B, dtype=torch.bool, device=dev) + return F.binary_cross_entropy(dir_prob[mask_off], fiber_prob[mask_off].detach()) + + def reranker_ranking_loss(self, texts): + store = self.m.amm.tree.store + if len(store) < 2: + dev = next(self.m.parameters()).device + return torch.tensor(0.0, device=dev, requires_grad=True) + tk = self.m.tok(texts, return_tensors='pt', padding=True, truncation=True) + dev = next(self.m.parameters()).device + ids, mask = tk['input_ids'].to(dev), tk['attention_mask'].to(dev) + with torch.no_grad(): o = self.m.fwd(ids, mask) + _, xq, fq = self.m.extract_state(o['hs'], mask) + mids = list(store.keys()) + cb = torch.stack([store[m].base.to(dev) for m in mids]) + cf = torch.stack([store[m].fiber.to(dev) for m in mids]) + cd = torch.stack([store[m].dirn.to(dev) for m in mids]) + B = xq.shape[0]; qdir = self.m.amm.dir_pred(xq, fq) + dir_sims = torch.einsum('bd,cd->bc', qdir, cd) + cb_e = cb.unsqueeze(0).expand(B, -1, -1); cf_e = cf.unsqueeze(0).expand(B, -1, -1) + scores = self.m.amm.reranker(xq, fq, cb_e, cf_e, dir_sims) + with torch.no_grad(): + fqn = F.normalize(fq, dim=-1); cfn = F.normalize(cf, dim=-1) + relevance = torch.einsum('bd,cd->bc', fqn, cfn) + s_mean = scores.mean(-1, keepdim=True); s_std = scores.std(-1, keepdim=True).clamp(min=1e-6) + r_mean = relevance.mean(-1, keepdim=True); r_std = relevance.std(-1, keepdim=True).clamp(min=1e-6) + sn = (scores - s_mean) / s_std; rn = (relevance - r_mean) / r_std + return F.mse_loss(sn, rn.detach()) + + def step(self, texts): + self.m.train(); self.opt.zero_grad() + dev = next(self.m.parameters()).device; W = self.c.loss_weights + ids_enc, mask_enc, base, fiber, surp, pooled_mean = self._encode_with_grad(texts) + l_et = self.encoder_throughput_loss(ids_enc, mask_enc, fiber) + w_sa = self.warmup.weight('semantic_alignment') + l_sa = self.semantic_alignment_loss(fiber, ids_enc, mask_enc) * w_sa + w_tsa = self.warmup.weight('tail_semantic_anchor') + l_tsa = self.tail_semantic_anchor_loss(fiber, ids_enc, mask_enc) * w_tsa + all_lr = []; all_pf = []; all_fs = [] + for t in texts: + r = self.recon(t) + all_lr.append(r['loss']); all_pf.append(r['prefix']) + fs = r['fiber_summary'] + all_fs.append(fs if fs is not None else torch.zeros(1, self.c.d_F, device=dev)) + l_r = sum(all_lr) / len(texts) + pf_batch = torch.cat(all_pf, 0); fs_batch = torch.cat(all_fs, 0) + w_sp = self.warmup.weight('semantic_probe') + l_sp = self._semantic_probe_loss(pf_batch, fs_batch) * w_sp + w_va = self.warmup.weight('vocab_anchor') + l_va = self.vocab_anchor_loss(pf_batch) * w_va + l_c = self.contrast(texts) if len(texts) >= 2 else torch.tensor(0.0, device=dev) + with torch.no_grad(): + tk2 = self.m.tok(texts, return_tensors='pt', padding=True, truncation=True) + ids2, mask2 = tk2['input_ids'].to(dev), tk2['attention_mask'].to(dev) + o2 = self.m.fwd(ids2, mask2) + _, xq2, fq2 = self.m.extract_state(o2['hs'], mask2) + l_h = self.holonomy_proxy(xq2, fq2) + l_w = self.write_policy_loss(texts) + w_dd = self.warmup.weight('dir_diversity') + l_dd = (self.direction_diversity_loss(texts) if len(texts) >= 2 + else torch.tensor(0.0, device=dev)) * w_dd + w_rr = self.warmup.weight('reranker_ranking') + l_rr = self.reranker_ranking_loss(texts) * w_rr + loss = (W['recon']*l_r + W['semantic_alignment']*l_sa + + W['encoder_throughput']*l_et + W['contrast']*l_c + + W['holonomy']*l_h + W['write_policy']*l_w + + W['semantic_probe']*l_sp + W['dir_diversity']*l_dd + + W['reranker_ranking']*l_rr + W['vocab_anchor']*l_va + + W.get('tail_semantic_anchor', 0.5)*l_tsa) + loss.backward() + nn.utils.clip_grad_norm_( + [p for n, p in self.m.named_parameters() + if p.requires_grad and 'backbone' not in n], 1.) + self.opt.step(); self.warmup.advance(); self._step_count += 1 + grad_norms = self.grad_monitor.snapshot() + self.layer_weight_history.append(self.m.layer_pool.weight_dist().cpu().numpy().copy()) + if self._step_count % self.c.refresh_memories_every == 0: + self.m.eval() + with torch.no_grad(): self.m._refresh_all_memories() + self.m.train() + self.m.eval() + return { + 'total': loss.item(), 'recon': l_r.item(), 'contrast': l_c.item(), + 'holonomy': l_h.item(), 'write_policy': l_w.item(), + 'semantic_probe': l_sp.item(), 'dir_diversity': l_dd.item(), + 'reranker_ranking': l_rr.item(), 'encoder_throughput': l_et.item(), + 'vocab_anchor': l_va.item(), 'semantic_alignment': l_sa.item(), + 'tail_semantic_anchor': l_tsa.item(), + 'grad_norms': grad_norms, 'loss_weights': W}