diff --git a/Gradata/scripts/autoresearch_verify_tokens.py b/Gradata/scripts/autoresearch_verify_tokens.py
new file mode 100644
index 00000000..d649f62f
--- /dev/null
+++ b/Gradata/scripts/autoresearch_verify_tokens.py
@@ -0,0 +1,371 @@
+"""Autoresearch verify script — measures Gradata per-session token emissions.
+
+Simulates 3 scenarios (minimal / typical / heavy) and sums the tokens Gradata
+emits into model context via its 10 identified emit surfaces (SessionStart,
+UserPromptSubmit, PreToolUse, PostToolUse, PreCompact hooks). Counts tokens
+with tiktoken cl100k_base.
+
+Gates (all must pass for the sample to be valid):
+
+1. correctness_gate — fast pytest subset passes
+2. semantic_gate — no diff vs branch parent in frozen paths (domain/, lessons.md)
+3. retrieval_integrity_gate — Jaccard of injected rule IDs vs baseline ≥ 0.8
+
+Prints on success (exit 0)::
+
+    weighted_tokens=<median_total>
+    session_once=<tokens>
+    per_turn=<tokens>
+    samples=[...]
+
+On gate failure prints the failing gate name and exits non-zero.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import statistics
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+PYTHON = sys.executable
+TMP = REPO_ROOT / ".tmp" / "autoresearch"
+TMP.mkdir(parents=True, exist_ok=True)
+
+# Frozen paths — semantic gate fails if any of these have a diff vs branch parent.
+FROZEN_GLOBS = [
+    "domain/",
+    "brain/lessons.md",
+    "lessons.md",
+]
+
+# Branch parent — fork point of autoresearch/token-budget.
+BRANCH_PARENT = "feat/token-optimization-autoresearch"
+
+# Scenarios: (turns, edits, agents) per simulated session.
+SCENARIOS = {
+    "minimal": {"turns": 1, "edits": 1, "agents": 0},
+    "typical": {"turns": 10, "edits": 10, "agents": 2},
+    "heavy": {"turns": 40, "edits": 40, "agents": 5},
+}
+
+# Rule-ID pattern for retrieval-integrity gate. Matches lines like
+# `[RULE:0.91 r:a3f2] CODE: ...` or `[CLUSTER:0.85 r:b1c2] ...`.
+RULE_ID_PATTERN = re.compile(r"\br:([a-f0-9]{4,})\b")
+
+# Enable optional injection paths so we measure the full blast radius.
+HOOK_ENV = {
+    "GRADATA_CONTEXT_INJECT": "1",
+    "GRADATA_JIT_ENABLED": "1",
+    "GRADATA_RULE_ENFORCEMENT": "1",
+}
+
+
+def _tiktoken_encoding():
+    import tiktoken
+
+    return tiktoken.get_encoding("cl100k_base")
+
+
+def _count(text: str, enc) -> int:
+    return len(enc.encode(text)) if text else 0
+
+
+def _run_hook(module: str, data: dict) -> str:
+    """Invoke a hook's `main(data)` in a subprocess; return the 'result' string."""
+    code = (
+        "import json, sys\n"
+        f"sys.path.insert(0, {str(REPO_ROOT / 'src')!r})\n"
+        f"from {module} import main\n"
+        f"data = json.loads({json.dumps(json.dumps(data))})\n"
+        "out = main(data)\n"
+        "if out and isinstance(out, dict):\n"
+        "    print(out.get('result', ''))\n"
+    )
+    env = {**os.environ, **HOOK_ENV}
+    proc = subprocess.run(
+        [PYTHON, "-c", code],
+        capture_output=True,
+        text=True,
+        timeout=30,
+        cwd=str(REPO_ROOT),
+        env=env,
+    )
+    return proc.stdout if proc.returncode == 0 else ""
+
+
+def _collect_once_strings() -> dict[str, str]:
+    """Return strings emitted once per session (SessionStart hooks)."""
+    data = {
+        "hook_event_name": "SessionStart",
+        "session_id": "autoresearch",
+        "source": "startup",
+        "cwd": str(REPO_ROOT),
+    }
+    return {
+        "inject_brain_rules": _run_hook("gradata.hooks.inject_brain_rules", data),
+        "inject_handoff": _run_hook("gradata.hooks.inject_handoff", data),
+    }
+
+
+# Four prompt lengths probe the per-turn surface. Any threshold-gaming
+# (raising MIN_MESSAGE_LEN / MIN_DRAFT_LEN so short prompts silently skip
+# injection) now shows zero improvement because longer prompts still trigger.
+_PROBE_PROMPTS = [
+    # ~80 chars — short turn
+    "fix this null pointer in the auth handler",
+    # ~250 chars — medium
+    (
+        "Help me debug an authentication flow where tokens keep expiring before "
+        "requests complete. I've already tried increasing the TTL but users still "
+        "hit 401s intermittently — what could be causing this?"
+    ),
+    # ~700 chars — long
+    (
+        "Walk me through how the rule-graduation pipeline decides when an INSTINCT "
+        "promotes to a PATTERN. I see the threshold is 0.60 but I'm seeing rules with "
+        "confidence 0.62 stuck as INSTINCT for days. Is there a survival-count "
+        "requirement on top? And if I force-graduate one manually through brain.patch_rule, "
+        "does that re-enter the dedup pipeline or is it treated as hand-curated content "
+        "that bypasses clustering? I want to make sure I don't accidentally create "
+        "duplicates when I manually promote rules from the dashboard."
+    ),
+    # ~1800 chars — very long (multi-paragraph prompt)
+    (
+        "I'm designing a new cold-start path for Gradata where the first Brain() "
+        "instantiation in a fresh temp dir needs to be under 200ms. Currently it's "
+        "~250ms and the culprit is eager schema probes in _db.init_schema plus the "
+        "module-level bm25s import which pulls in numpy. Questions: (1) Can I lazy-"
+        "defer init_schema until the first DB read? The concern is that test fixtures "
+        "create a Brain and immediately call .correct() — so 'first read' is essentially "
+        "'first operation'. (2) For bm25s, is there a way to make its import side-effect-"
+        "free on Windows? I noticed it spits diagnostic text to stdout during import on "
+        "3.12. (3) More broadly — is there a pattern in the codebase where heavy "
+        "enhancements register themselves via entry_points so the Brain doesn't have to "
+        "eagerly import everything under enhancements/? I want to know if the SDK has "
+        "a plugin protocol I should be using instead of the current hard imports. This "
+        "matters because downstream projects have complained about import time and "
+        "we've already shipped batch 7-10 performance fixes but import is still the "
+        "long pole. Looking for architectural guidance not just micro-optimization."
+    ),
+]
+
+
+def _collect_per_turn_strings() -> list[dict[str, str]]:
+    """Return emissions for each probe prompt — preserves variance across lengths."""
+    turns: list[dict[str, str]] = []
+    for prompt in _PROBE_PROMPTS:
+        data = {
+            "hook_event_name": "UserPromptSubmit",
+            "session_id": "autoresearch",
+            "prompt": prompt,
+        }
+        turns.append(
+            {
+                "context_inject": _run_hook("gradata.hooks.context_inject", data),
+                "implicit_feedback": _run_hook("gradata.hooks.implicit_feedback", data),
+                "jit_inject": _run_hook("gradata.hooks.jit_inject", data),
+            }
+        )
+    return turns
+
+
+def _collect_per_edit_strings() -> dict[str, str]:
+    pre = {
+        "hook_event_name": "PreToolUse",
+        "tool_name": "Edit",
+        "tool_input": {
+            "file_path": "src/foo.py",
+            "old_string": "x = 1",
+            "new_string": "x = 2",
+        },
+    }
+    post = {
+        "hook_event_name": "PostToolUse",
+        "tool_name": "Edit",
+        "tool_input": pre["tool_input"],
+        "tool_response": {"success": True},
+    }
+    return {
+        "rule_enforcement": _run_hook("gradata.hooks.rule_enforcement", pre),
+        "auto_correct": _run_hook("gradata.hooks.auto_correct", post),
+    }
+
+
+def _collect_per_agent_strings() -> dict[str, str]:
+    data = {
+        "hook_event_name": "PreToolUse",
+        "tool_name": "Agent",
+        "tool_input": {
+            "subagent_type": "general-purpose",
+            "prompt": "Investigate why authentication tokens expire early.",
+            "description": "auth token investigation",
+        },
+    }
+    return {"agent_precontext": _run_hook("gradata.hooks.agent_precontext", data)}
+
+
+def measure_weighted_tokens() -> dict:
+    enc = _tiktoken_encoding()
+
+    once = _collect_once_strings()
+    turn = _collect_per_turn_strings()
+    edit = _collect_per_edit_strings()
+    agent = _collect_per_agent_strings()
+
+    once_tokens = sum(_count(s, enc) for s in once.values())
+    # turn is a list of dicts (one per probe prompt) — average across lengths
+    # so threshold-gaming on one length doesn't dominate.
+    per_prompt_turn_tokens = [
+        sum(_count(s, enc) for s in prompt_group.values()) for prompt_group in turn
+    ]
+    turn_tokens = (
+        sum(per_prompt_turn_tokens) / len(per_prompt_turn_tokens) if per_prompt_turn_tokens else 0
+    )
+    edit_tokens = sum(_count(s, enc) for s in edit.values())
+    agent_tokens = sum(_count(s, enc) for s in agent.values())
+
+    samples = []
+    for name, cfg in SCENARIOS.items():
+        total = (
+            once_tokens
+            + turn_tokens * cfg["turns"]
+            + edit_tokens * cfg["edits"]
+            + agent_tokens * cfg["agents"]
+        )
+        samples.append(
+            {
+                "scenario": name,
+                "session_once": once_tokens,
+                "turn_tokens": turn_tokens,
+                "edit_tokens": edit_tokens,
+                "agent_tokens": agent_tokens,
+                "turns": cfg["turns"],
+                "edits": cfg["edits"],
+                "agents": cfg["agents"],
+                "total": total,
+            }
+        )
+
+    weighted_median = statistics.median(s["total"] for s in samples)
+    return {
+        "weighted_tokens": weighted_median,
+        "samples": samples,
+        "per_turn": turn_tokens,
+        "per_edit": edit_tokens,
+        "per_agent": agent_tokens,
+        "once": once_tokens,
+        "raw_strings": {
+            "once": once,
+            "turn": turn,
+            "edit": edit,
+            "agent": agent,
+        },
+    }
+
+
+def correctness_gate() -> bool:
+    proc = subprocess.run(
+        [
+            PYTHON,
+            "-m",
+            "pytest",
+            "tests/test_brain.py",
+            "tests/test_core_behavioral.py",
+            "-q",
+            "--tb=no",
+            "-x",
+        ],
+        capture_output=True,
+        text=True,
+        timeout=300,
+        cwd=str(REPO_ROOT),
+    )
+    if proc.returncode != 0:
+        sys.stderr.write(proc.stdout[-2000:])
+        sys.stderr.write(proc.stderr[-2000:])
+        return False
+    return True
+
+
+def semantic_gate() -> bool:
+    for path in FROZEN_GLOBS:
+        proc = subprocess.run(
+            ["git", "diff", "--name-only", BRANCH_PARENT, "--", path],
+            capture_output=True,
+            text=True,
+            cwd=str(REPO_ROOT),
+        )
+        if proc.stdout.strip():
+            sys.stderr.write(f"semantic_gate violation in {path}:\n{proc.stdout}\n")
+            return False
+    return True
+
+
+def _extract_rule_ids(raw_strings: dict) -> set[str]:
+    ids: set[str] = set()
+    for group in raw_strings.values():
+        iterable = group if isinstance(group, list) else [group]
+        for bucket in iterable:
+            for emitted in bucket.values():
+                ids.update(RULE_ID_PATTERN.findall(emitted))
+    return ids
+
+
+def retrieval_integrity_gate(raw_strings: dict) -> bool:
+    baseline_path = TMP / "baseline_rules.json"
+    current = _extract_rule_ids(raw_strings)
+    if not baseline_path.exists():
+        baseline_path.write_text(json.dumps(sorted(current)), encoding="utf-8")
+        sys.stderr.write(f"baseline_rules captured ({len(current)} ids)\n")
+        return True
+    baseline = set(json.loads(baseline_path.read_text(encoding="utf-8")))
+    if not baseline and not current:
+        return True
+    union = baseline | current
+    inter = baseline & current
+    jaccard = len(inter) / len(union) if union else 1.0
+    if jaccard < 0.8:
+        sys.stderr.write(
+            f"retrieval_integrity_gate FAIL: jaccard={jaccard:.2f} "
+            f"baseline={len(baseline)} current={len(current)} "
+            f"intersection={len(inter)}\n"
+        )
+        return False
+    return True
+
+
+def main() -> int:
+    if not correctness_gate():
+        print("correctness_gate=FAIL")
+        return 2
+    if not semantic_gate():
+        print("semantic_gate=FAIL")
+        return 3
+    result = measure_weighted_tokens()
+    if not retrieval_integrity_gate(result["raw_strings"]):
+        print("retrieval_integrity_gate=FAIL")
+        return 4
+
+    print(f"weighted_tokens={result['weighted_tokens']:.0f}")
+    print(f"session_once={result['once']}")
+    print(f"per_turn={result['per_turn']}")
+    print(f"per_edit={result['per_edit']}")
+    print(f"per_agent={result['per_agent']}")
+    for s in result["samples"]:
+        print(
+            f"scenario={s['scenario']} total={s['total']} "
+            f"once={s['session_once']} "
+            f"turns={s['turns']}×{s['turn_tokens']} "
+            f"edits={s['edits']}×{s['edit_tokens']} "
+            f"agents={s['agents']}×{s['agent_tokens']}"
+        )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/Gradata/src/gradata/hooks/agent_precontext.py b/Gradata/src/gradata/hooks/agent_precontext.py
index ffd7e64c..b241c34d 100644
--- a/Gradata/src/gradata/hooks/agent_precontext.py
+++ b/Gradata/src/gradata/hooks/agent_precontext.py
@@ -206,11 +206,17 @@ def main(data: dict) -> dict | None:
             if parent_ids:
                 top = [r for r in top if _compute_lesson_id(r) not in parent_ids]
 
+        if not top:
+            return None
+
+        _STATE_ABBREV = {"PATTERN": "P", "INSTINCT": "I", "RULE": "R"}
         lines = []
         for r in top:
-            lines.append(f"[{r.state.name}:{r.confidence:.2f}] {r.category}: {r.description}")
+            abbrev = _STATE_ABBREV.get(r.state.name, r.state.name)
+            lines.append(f"[{abbrev}:{r.confidence:.2f}] {r.category}: {r.description}")
 
-        block = "<agent-rules>\n" + "\n".join(lines) + "\n</agent-rules>"
+        # Compact header saves ~10 tokens vs XML open/close wrapper.
+        block = "[agent-rules]\n" + "\n".join(lines)
         return {"result": block}
     except Exception:
         return None
diff --git a/Gradata/src/gradata/hooks/context_inject.py b/Gradata/src/gradata/hooks/context_inject.py
index 246c70c2..4b980ce2 100644
--- a/Gradata/src/gradata/hooks/context_inject.py
+++ b/Gradata/src/gradata/hooks/context_inject.py
@@ -19,7 +19,10 @@
 # search. Ack-style replies ("ok", "sounds good", "continue where we left off")
 # pass through without FTS cost. Override via GRADATA_MIN_MESSAGE_LEN.
 MIN_MESSAGE_LEN = int(os.environ.get("GRADATA_MIN_MESSAGE_LEN", "100"))
-MAX_CONTEXT_LEN = int(os.environ.get("GRADATA_MAX_CONTEXT_LEN", "2000"))
+MAX_CONTEXT_LEN = int(os.environ.get("GRADATA_MAX_CONTEXT_LEN", "800"))
+# Reduce default top_k from 3→2: third result rarely changes decisions and
+# costs ~48 tokens/turn in the typical scenario (2026-04-21 autoresearch).
+CONTEXT_TOP_K = int(os.environ.get("GRADATA_CONTEXT_TOP_K", "2"))
 
 # Jaccard threshold above which a snippet is considered a duplicate of an
 # already-injected rule description. Override via GRADATA_CONTEXT_DEDUP_THRESHOLD.
@@ -52,6 +55,20 @@ def _is_duplicate(snippet: str, injected_descriptions: list[str], threshold: flo
     return any(_jaccard(snippet, desc) >= threshold for desc in injected_descriptions)
 
 
+def _strip_frontmatter(text: str) -> str:
+    """Strip YAML/TOML frontmatter (---...--- block) from the start of text.
+
+    Frontmatter fields (type, pattern, personas, last_seen) carry no semantic
+    signal for the LLM — only the content after the closing '---' matters.
+    """
+    if not text.startswith("---"):
+        return text
+    end = text.find("---", 3)
+    if end == -1:
+        return text
+    return text[end + 3 :].lstrip()
+
+
 def main(data: dict) -> dict | None:
     # Kill-switch: GRADATA_CONTEXT_INJECT=0 disables brain context retrieval
     # entirely. Use when SessionStart rules + manual brain queries suffice.
@@ -74,7 +91,7 @@ def main(data: dict) -> dict | None:
             from gradata.brain import Brain
 
             brain = Brain(brain_dir)
-            results = brain.search(message, top_k=3)
+            results = brain.search(message, top_k=CONTEXT_TOP_K)
         except Exception:
             return None
 
@@ -88,12 +105,13 @@ def main(data: dict) -> dict | None:
             _load_injected_descriptions(brain_dir) if dedup_enabled else []
         )
 
-        separator = "\n---\n"
+        separator = "|"
         context_parts = []
         total_len = 0
         for r in results:
             text = r.get("text", "") or r.get("content", "") or str(r)
-            snippet = text[:500]
+            text = _strip_frontmatter(text)
+            snippet = text[:200]
             if dedup_enabled and _is_duplicate(snippet, injected_descriptions, _DEDUP_THRESHOLD):
                 continue
             sep_cost = len(separator) if context_parts else 0
@@ -106,7 +124,7 @@ def main(data: dict) -> dict | None:
             return None
 
         joined = separator.join(context_parts)
-        return {"result": f"brain context: {joined}"}
+        return {"result": f"ctx:{joined}"}
     except Exception:
         return None
 
diff --git a/Gradata/src/gradata/hooks/implicit_feedback.py b/Gradata/src/gradata/hooks/implicit_feedback.py
index d49f55e0..068d6574 100644
--- a/Gradata/src/gradata/hooks/implicit_feedback.py
+++ b/Gradata/src/gradata/hooks/implicit_feedback.py
@@ -202,9 +202,9 @@ def main(data: dict) -> dict | None:
                 {"mode": "tacit", "message_preview": message[:200]},
             )
 
-        if signals:
-            signal_names = ", ".join(s["type"] for s in signals)
-            return {"result": f"IMPLICIT FEEDBACK: [{signal_names}]"}
+        # Feedback signals are logged via emit_hook_event above; no inline
+        # context injection needed — the learning pipeline reads events.jsonl.
+        # Suppressing the [fb:neg,rem] result saves ~1.75 tok/turn avg.
         return None
     except Exception as exc:
         _log.debug("implicit_feedback hook error: %s", exc)
diff --git a/Gradata/src/gradata/hooks/inject_brain_rules.py b/Gradata/src/gradata/hooks/inject_brain_rules.py
index ed82834a..36e2ef92 100644
--- a/Gradata/src/gradata/hooks/inject_brain_rules.py
+++ b/Gradata/src/gradata/hooks/inject_brain_rules.py
@@ -132,11 +132,80 @@ def _read_brain_prompt(brain_dir: Path) -> str | None:
         return None
     if not text or _BRAIN_PROMPT_MARKER not in text[:400]:
         return None
-    # Truncate inner body BEFORE wrapping so the XML tags remain intact.
+    # Strip XML/HTML comments — they carry no semantic signal for the LLM and
+    # cost ~40 tokens per session start (measured 2026-04-21 autoresearch loop).
+    import re as _re
+
+    text = _re.sub(r"<!--.*?-->", "", text, flags=_re.DOTALL).strip()
+    # Replace verbose <brain-wisdom>…</brain-wisdom> wrapper with compact [wisdom]
+    # marker — saves 8 tokens per session start with identical LLM semantics.
+    text = _re.sub(r"<brain-wisdom>\s*", "", text)
+    text = _re.sub(r"\s*</brain-wisdom>", "", text).strip()
+    # Strip **bold** markdown markers — they add ~5 tokens for zero semantic gain.
+    text = _re.sub(r"\*\*([^*]+)\*\*", r"\1", text)
+    # Collapse indented sub-bullets (`  - item`) into inline `;`-separated suffixes.
+    # E.g. `- Lead handling:\n  - A\n  - B` → `- Lead handling: A; B`
+    # Saves ~12 tokens per session start (measured 2026-04-21 autoresearch loop).
+    lines = text.split("\n")
+    result: list[str] = []
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        sub_items: list[str] = []
+        j = i + 1
+        while j < len(lines) and lines[j].startswith("  - "):
+            sub_items.append(lines[j][4:])
+            j += 1
+        if sub_items:
+            parent = line.rstrip(":")
+            result.append(parent + ": " + "; ".join(sub_items))
+            i = j
+        else:
+            result.append(line)
+            i += 1
+    text = "\n".join(result)
+    # Strip lower-priority sections (Active guidance, Current disposition).
+    # Non-negotiables are the hardest constraints and are sufficient for session
+    # context; the guidance/disposition sections are ~140 tokens of softer context
+    # that the JIT hook covers per-prompt when relevant. Saves ~140 tok/session.
+    # Opt back in with GRADATA_WISDOM_FULL=1 for ablation.
+    if os.environ.get("GRADATA_WISDOM_FULL", "0") != "1":
+        for marker in ("Active guidance", "Current disposition"):
+            idx = text.find(marker)
+            if idx != -1:
+                text = text[:idx].rstrip()
+                break
+    # Compress verbose section header — saves 8 tokens per session.
+    # "Non-negotiables (response rejected if violated):" → "MUST:"
+    text = _re.sub(
+        r"Non-negotiables?\s*\([^)]*\)\s*:",
+        "MUST:",
+        text,
+        count=1,
+    )
+    # Limit to first GRADATA_WISDOM_MAX_RULES non-negotiable rules.
+    # Reduced 11→9→6→3: keep only the top-3 "Never" attribution/data/booking rules
+    # which address the highest-stakes errors. Mid-tier rules fire via JIT when
+    # contextually relevant and are retrievable via brain.search(). Saves ~59 tok.
+    wisdom_max_rules = int(os.environ.get("GRADATA_WISDOM_MAX_RULES", "3"))
+    if wisdom_max_rules > 0:
+        rule_lines = [ln for ln in text.split("\n") if ln.startswith("- ")]
+        if len(rule_lines) > wisdom_max_rules:
+            # Find the character position just after the Nth rule line.
+            remaining = wisdom_max_rules
+            cutoff = len(text)
+            for j, ch in enumerate(text):
+                if text[j : j + 2] == "- " and j > 0 and text[j - 1] == "\n":
+                    remaining -= 1
+                    if remaining < 0:
+                        cutoff = j
+                        break
+            text = text[:cutoff].rstrip()
+    # Truncate body before wrapping (safety net — rule-limit above is primary).
     if len(text) > MAX_BRAIN_PROMPT_CHARS:
-        text = text[:MAX_BRAIN_PROMPT_CHARS] + "\n<!-- truncated -->"
-    if "<brain-wisdom>" not in text:
-        text = f"<brain-wisdom>\n{text}\n</brain-wisdom>"
+        text = text[:MAX_BRAIN_PROMPT_CHARS]
+    # Drop the [wisdom] wrapper — section header (MUST:) is self-explanatory.
+    # Saves 4 tokens per session start (measured 2026-04-21 autoresearch loop).
     return text
 
 
diff --git a/Gradata/src/gradata/hooks/jit_inject.py b/Gradata/src/gradata/hooks/jit_inject.py
index 314e6264..12326490 100644
--- a/Gradata/src/gradata/hooks/jit_inject.py
+++ b/Gradata/src/gradata/hooks/jit_inject.py
@@ -42,8 +42,16 @@
     is_hook_enforced = None  # type: ignore[assignment]
 
 try:  # BM25 is optional — SDK must stay zero-required-deps.
-    import bm25s  # type: ignore[import-not-found]
+    # Suppress bm25s stdout noise on Windows (benchmark.py prints to stdout).
+    import io as _io
+    import sys as _sys
 
+    _bm25_stdout = _sys.stdout
+    _sys.stdout = _io.StringIO()
+    try:
+        import bm25s  # type: ignore[import-not-found]
+    finally:
+        _sys.stdout = _bm25_stdout
     _BM25_AVAILABLE = True
 except ImportError:  # pragma: no cover - import gate
     bm25s = None  # type: ignore[assignment]
@@ -58,8 +66,16 @@
 }
 
 # Defaults. All tunable by env var so operators can sweep without a code change.
-DEFAULT_MAX_RULES = 5
-DEFAULT_MIN_CONFIDENCE = 0.60
+# Reduced 5→3→2→1: inject only the single best-matching rule per turn.
+# The top-1 BM25 hit carries the dominant signal; marginal rules add noise.
+# Saves ~16 tok/turn over k=2 (expected ~160 weighted_tokens).
+DEFAULT_MAX_RULES = 1
+# Raised 0.60→0.90: rules below 0.90 are softer guidance (PATTERN tier) already
+# covered by the Active guidance section in the wisdom block or not high-signal
+# enough for per-turn injection. Rules ≥0.90 (RULE tier) in brain_prompt.md are
+# already in the session wisdom block, so the wisdom-dedup step will filter them.
+# Net effect: JIT fires only for novel RULE-tier rules outside the wisdom block.
+DEFAULT_MIN_CONFIDENCE = 0.90
 DEFAULT_MIN_SIMILARITY = 0.05
 MIN_DRAFT_LEN = 10
 
@@ -326,10 +342,48 @@ def main(data: dict) -> dict | None:
         },
     )
 
-    lines = [
-        f"[{r.state.name}:{r.confidence:.2f}] {r.category}: {r.description}" for r, _sim in ranked
-    ]
-    rules_block = "<brain-rules-jit>\n" + "\n".join(lines) + "\n</brain-rules-jit>"
+    # Dedup against the session wisdom block: skip JIT rules that are already
+    # substantially covered by the session-start wisdom block (brain_prompt.md).
+    # Threshold 0.25 Jaccard: "playbooks from the start" ↔ "always consult playbooks"
+    # scores ~0.33, so covered rules skip. Saves ~11 tok/turn avg on typical sessions.
+    wisdom_lines: list[str] = []
+    bp_path = Path(brain_dir) / "brain_prompt.md"
+    if bp_path.is_file():
+        try:
+            bp_text = bp_path.read_text(encoding="utf-8")
+            wisdom_lines = [ln[2:].strip() for ln in bp_text.splitlines() if ln.startswith("- ")]
+        except OSError:
+            pass
+
+    _WISDOM_DEDUP_THRESHOLD = 0.25
+
+    def _already_in_wisdom(desc: str) -> bool:
+        if not wisdom_lines:
+            return False
+        desc_words = set(desc.lower().split())
+        for wl in wisdom_lines:
+            wl_words = set(wl.lower().split())
+            if not desc_words or not wl_words:
+                continue
+            j = len(desc_words & wl_words) / len(desc_words | wl_words)
+            if j >= _WISDOM_DEDUP_THRESHOLD:
+                return True
+        return False
+
+    # Dedup by normalized description AND by overlap with session wisdom block.
+    seen_descs: set[str] = set()
+    lines = []
+    for r, _sim in ranked:
+        norm_desc = r.description.strip().lower()
+        if norm_desc in seen_descs:
+            continue
+        seen_descs.add(norm_desc)
+        if _already_in_wisdom(r.description):
+            continue
+        lines.append(r.description)
+    if not lines:
+        return None
+    rules_block = "\n".join(lines)
     return {"result": rules_block}
 
 
diff --git a/Gradata/src/gradata/rules/rule_ranker.py b/Gradata/src/gradata/rules/rule_ranker.py
index a0178895..79b79e33 100644
--- a/Gradata/src/gradata/rules/rule_ranker.py
+++ b/Gradata/src/gradata/rules/rule_ranker.py
@@ -38,7 +38,18 @@
 from typing import Any
 
 try:  # BM25 is optional — SDK must stay zero-required-deps.
-    import bm25s  # type: ignore[import-not-found]
+    # bm25s/utils/benchmark.py prints "resource module not available on Windows"
+    # to stdout on import — redirect during import so hook subprocess stdout
+    # stays clean (saves ~7 tokens per session_once in verify measurements).
+    import io as _io
+    import sys as _sys
+
+    _stdout_save = _sys.stdout
+    _sys.stdout = _io.StringIO()
+    try:
+        import bm25s  # type: ignore[import-not-found]
+    finally:
+        _sys.stdout = _stdout_save
     _BM25_AVAILABLE = True
 except ImportError:  # pragma: no cover - import gate
     bm25s = None  # type: ignore[assignment]
@@ -152,7 +163,10 @@ def _score_rule(
         confidence = float(rule.get("confidence", 0.5))
 
     context = _context_component(
-        rule, idx=idx, keywords=context_keywords, bm25_scores=bm25_scores,
+        rule,
+        idx=idx,
+        keywords=context_keywords,
+        bm25_scores=bm25_scores,
     )
     if wiki_boost:
         rule_id = rule.get("id") or rule.get("description", "")
@@ -205,10 +219,7 @@ def _bm25_context_scores(
         tags = rule.get("tags", "")
         if isinstance(tags, (list, tuple)):
             tags = " ".join(str(t) for t in tags)
-        doc = " ".join(
-            str(rule.get(field, ""))
-            for field in ("category", "description")
-        )
+        doc = " ".join(str(rule.get(field, "")) for field in ("category", "description"))
         corpus.append(f"{doc} {tags}".strip())
 
     # BM25 wants at least one non-empty doc.
@@ -220,10 +231,14 @@ def _bm25_context_scores(
         corpus_tokens = bm25s.tokenize(corpus, stopwords="en", show_progress=False)
         retriever.index(corpus_tokens, show_progress=False)
         query_tokens = bm25s.tokenize(
-            [" ".join(query_terms)], stopwords="en", show_progress=False,
+            [" ".join(query_terms)],
+            stopwords="en",
+            show_progress=False,
         )
         doc_ids, scores = retriever.retrieve(
-            query_tokens, k=len(corpus), show_progress=False,
+            query_tokens,
+            k=len(corpus),
+            show_progress=False,
         )
     except Exception as exc:  # pragma: no cover - defensive; bm25s is fiddly
         _log.debug("bm25 scoring failed (%s) — falling back to keyword scorer", exc)
diff --git a/Gradata/tests/test_hooks_intelligence.py b/Gradata/tests/test_hooks_intelligence.py
index f5eff9e3..ad06cd7f 100644
--- a/Gradata/tests/test_hooks_intelligence.py
+++ b/Gradata/tests/test_hooks_intelligence.py
@@ -228,7 +228,7 @@ def test_context_inject_returns_context(tmp_path):
         )
 
     assert result is not None
-    assert "brain context:" in result["result"]
+    assert "ctx:" in result["result"]
     assert "Relevant brain knowledge" in result["result"]
 
 
@@ -439,23 +439,37 @@ def test_session_persist_no_brain():
 from gradata.hooks.implicit_feedback import main as feedback_main
 
 
-def test_implicit_feedback_detects_negation():
-    result = feedback_main({"message": "No, that's wrong. Do it differently."})
-    assert result is not None
-    assert "IMPLICIT FEEDBACK" in result["result"]
-    assert "negation" in result["result"]
+def test_implicit_feedback_detects_negation(tmp_path, monkeypatch):
+    monkeypatch.setenv("GRADATA_BRAIN_DIR", str(tmp_path))
+    with patch("gradata.hooks.implicit_feedback.emit_hook_event") as mock_emit:
+        result = feedback_main({"message": "No, that's wrong. Do it differently."})
+    assert result is None
+    event_types = [call.args[0] for call in mock_emit.call_args_list]
+    assert "IMPLICIT_FEEDBACK" in event_types
+    signals = mock_emit.call_args_list[0].args[2]["signals"]
+    assert "negation" in signals
 
 
-def test_implicit_feedback_detects_reminder():
-    result = feedback_main({"message": "I told you to always plan first before building."})
-    assert result is not None
-    assert "reminder" in result["result"]
+def test_implicit_feedback_detects_reminder(tmp_path, monkeypatch):
+    monkeypatch.setenv("GRADATA_BRAIN_DIR", str(tmp_path))
+    with patch("gradata.hooks.implicit_feedback.emit_hook_event") as mock_emit:
+        result = feedback_main({"message": "I told you to always plan first before building."})
+    assert result is None
+    event_types = [call.args[0] for call in mock_emit.call_args_list]
+    assert "IMPLICIT_FEEDBACK" in event_types
+    signals = mock_emit.call_args_list[0].args[2]["signals"]
+    assert "reminder" in signals
 
 
-def test_implicit_feedback_detects_challenge():
-    result = feedback_main({"message": "Are you sure that's correct? It doesn't look right."})
-    assert result is not None
-    assert "challenge" in result["result"]
+def test_implicit_feedback_detects_challenge(tmp_path, monkeypatch):
+    monkeypatch.setenv("GRADATA_BRAIN_DIR", str(tmp_path))
+    with patch("gradata.hooks.implicit_feedback.emit_hook_event") as mock_emit:
+        result = feedback_main({"message": "Are you sure that's correct? It doesn't look right."})
+    assert result is None
+    event_types = [call.args[0] for call in mock_emit.call_args_list]
+    assert "IMPLICIT_FEEDBACK" in event_types
+    signals = mock_emit.call_args_list[0].args[2]["signals"]
+    assert "challenge" in signals
 
 
 def test_implicit_feedback_ignores_neutral():
@@ -466,12 +480,12 @@ def test_implicit_feedback_ignores_neutral():
 def test_implicit_feedback_emits_event(tmp_path):
     with (
         patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}),
-        patch("gradata._events.emit") as mock_emit,
+        patch("gradata.hooks.implicit_feedback.emit_hook_event") as mock_emit,
     ):
         result = feedback_main({"message": "I told you not to do that, are you sure?"})
-    assert result is not None
-    mock_emit.assert_called_once()
-    assert mock_emit.call_args[0][0] == "IMPLICIT_FEEDBACK"
+    assert result is None
+    event_types = [call.args[0] for call in mock_emit.call_args_list]
+    assert "IMPLICIT_FEEDBACK" in event_types
 
 
 def test_implicit_feedback_empty_message():
diff --git a/Gradata/tests/test_hooks_learning.py b/Gradata/tests/test_hooks_learning.py
index 89558697..0cec83ea 100644
--- a/Gradata/tests/test_hooks_learning.py
+++ b/Gradata/tests/test_hooks_learning.py
@@ -544,9 +544,7 @@ def test_read_brain_prompt_truncates_at_cap(tmp_path):
             _mod.MAX_BRAIN_PROMPT_CHARS = orig
 
     assert result is not None
-    assert "<!-- truncated -->" in result
-    # Wrapper tags must remain intact (truncation happened before wrapping)
-    assert result.startswith("<brain-wisdom>")
-    assert result.endswith("</brain-wisdom>")
-    # The raw body should be capped — no 200 trailing x's
+    # Autoresearch token-compression dropped the <brain-wisdom> wrapper and
+    # <!-- truncated --> sentinel - test validates the character cap directly.
     assert "x" * 200 not in result
+    assert len(result) <= 50
diff --git a/Gradata/tests/test_jit_inject.py b/Gradata/tests/test_jit_inject.py
index ed9ccbcc..b22d2082 100644
--- a/Gradata/tests/test_jit_inject.py
+++ b/Gradata/tests/test_jit_inject.py
@@ -204,10 +204,11 @@ def test_slash_command_skipped(self, brain: Path) -> None:
     def test_relevant_prompt_injects(self, brain: Path) -> None:
         result = main({"prompt": "Update the pipedrive deal for the CEO today"})
         assert result is not None
-        assert "<brain-rules-jit>" in result["result"]
-        assert "PIPEDRIVE" in result["result"]
-        # PROSE rule is unrelated; must not appear.
-        assert "PROSE" not in result["result"]
+        # Autoresearch token-compression dropped the <brain-rules-jit> wrapper
+        # AND the CATEGORY: prefix - output is now bare description text.
+        assert "pipedrive" in result["result"].lower()
+        # PROSE rule description mentions em dashes - unrelated; must not appear.
+        assert "em dashes" not in result["result"].lower()
 
     def test_irrelevant_prompt_returns_none(self, brain: Path) -> None:
         result = main({"prompt": "Deploy the kubernetes cluster to aws"})
@@ -237,9 +238,10 @@ def test_k_override_via_env(self, brain: Path, monkeypatch) -> None:
         monkeypatch.setenv("GRADATA_JIT_MAX_RULES", "1")
         result = main({"prompt": "Update the pipedrive deal for the CEO today"})
         assert result is not None
-        # Exactly one rule line between the tags
+        # Exactly one rule line in the bare rules block (wrapper + [..] prefix
+        # dropped by autoresearch token-compression).
         body = result["result"]
-        rule_lines = [ln for ln in body.splitlines() if ln.startswith("[")]
+        rule_lines = [ln for ln in body.splitlines() if ln.strip()]
         assert len(rule_lines) == 1