diff --git a/src/gradata/enhancements/meta_rules.py b/src/gradata/enhancements/meta_rules.py
index 2b2162f0..03dd9723 100644
--- a/src/gradata/enhancements/meta_rules.py
+++ b/src/gradata/enhancements/meta_rules.py
@@ -36,7 +36,17 @@
@dataclass
class MetaRule:
- """Emergent principle from 3+ related corrections."""
+ """Emergent principle from 3+ related corrections.
+
+ The ``source`` field tracks how the principle text was generated:
+ - ``"deterministic"`` (default): produced by token-frequency / cluster
+ heuristics. Empirically (2026-04-14 ablation) these regress
+ correctness when injected into prompts. Excluded from injection.
+ - ``"llm_synth"``: produced by cloud-side LLM synthesis from the
+ source rules. Eligible for injection.
+ - ``"human_curated"``: hand-written or human-edited principle. Always
+ eligible for injection.
+ """
id: str
principle: str
@@ -51,6 +61,13 @@ class MetaRule:
applies_when: list[str] = field(default_factory=list)
never_when: list[str] = field(default_factory=list)
transfer_scope: RuleTransferScope = RuleTransferScope.PERSONAL
+ source: str = "deterministic" # provenance of the principle text — see class docstring
+
+
+# Sources whose principle text is trusted enough to inject into LLM prompts.
+# Deterministic auto-generated principles regress correctness empirically
+# (2026-04-14 ablation, 432 trials, judged blind).
+INJECTABLE_META_SOURCES = frozenset({"llm_synth", "human_curated"})
@dataclass
@@ -305,6 +322,7 @@ def format_meta_rules_for_prompt(
context: str = "",
condition_context: dict | None = None,
scope_filter: RuleTransferScope | None = None,
+ limit: int | None = None,
) -> str:
"""Format meta-rules for injection into LLM context.
@@ -322,6 +340,10 @@ def format_meta_rules_for_prompt(
When *scope_filter* is provided, only meta-rules with the matching
``transfer_scope`` are included.
+ When *limit* is provided, the cap is applied AFTER context-aware
+ ranking so a lower-confidence rule with a stronger context weight
+ can still be promoted into the final set.
+
Args:
metas: Meta-rules to format.
context: Optional task-context label (e.g. ``"drafting"``,
@@ -332,6 +354,9 @@ def format_meta_rules_for_prompt(
:func:`evaluate_conditions` are included.
scope_filter: When set, only include meta-rules with this
transfer scope.
+ limit: Optional maximum number of meta-rules to include.
+ Applied AFTER context-aware ranking so context weight can
+ influence which rules make the cut. ``None`` means no cap.
Returns:
Formatted string block, or ``""`` if *metas* is empty.
@@ -349,9 +374,15 @@ def format_meta_rules_for_prompt(
if not metas:
return ""
- # Re-rank by context weight when a context is provided
+ # Re-rank by context weight when a context is provided. Pass `limit`
+ # through as `max_rules` so ranking + capping happens atomically;
+ # otherwise apply the cap after the fact (no ranking case).
if context:
- metas = rank_meta_rules_by_context(metas, context)
+ metas = rank_meta_rules_by_context(
+ metas, context, max_rules=limit if limit is not None else len(metas),
+ )
+ elif limit is not None:
+ metas = metas[:limit]
lines = ["## Brain Meta-Rules (compound principles)"]
for i, meta in enumerate(metas, start=1):
diff --git a/src/gradata/enhancements/meta_rules_storage.py b/src/gradata/enhancements/meta_rules_storage.py
index 47b9e3bc..f299f0a1 100644
--- a/src/gradata/enhancements/meta_rules_storage.py
+++ b/src/gradata/enhancements/meta_rules_storage.py
@@ -53,12 +53,20 @@
"ALTER TABLE meta_rules ADD COLUMN transfer_scope TEXT DEFAULT 'personal'"
)
+# Provenance of the principle text (deterministic / llm_synth / human_curated).
+# Default is 'deterministic' for safety — pre-existing rows from before this
+# migration are auto-generated by the clusterer and should NOT be injected
+# (2026-04-14 ablation showed they regress correctness).
+_ADD_SOURCE_SQL = (
+ "ALTER TABLE meta_rules ADD COLUMN source TEXT DEFAULT 'deterministic'"
+)
+
def ensure_table(db_path: str | Path) -> None:
"""Create the meta_rules table if it does not exist.
- Also migrates existing tables by adding the ``context_weights``
- column when it is missing (backward-compatible upgrade).
+ Also migrates existing tables by adding new columns when they are
+ missing (backward-compatible upgrade).
Args:
db_path: Path to the SQLite database file.
@@ -67,7 +75,13 @@ def ensure_table(db_path: str | Path) -> None:
try:
conn.execute(_CREATE_TABLE_SQL)
# Migrate: add columns if table existed before this version
- for stmt in (_ADD_CONTEXT_WEIGHTS_SQL, _ADD_APPLIES_WHEN_SQL, _ADD_NEVER_WHEN_SQL, _ADD_TRANSFER_SCOPE_SQL):
+ for stmt in (
+ _ADD_CONTEXT_WEIGHTS_SQL,
+ _ADD_APPLIES_WHEN_SQL,
+ _ADD_NEVER_WHEN_SQL,
+ _ADD_TRANSFER_SCOPE_SQL,
+ _ADD_SOURCE_SQL,
+ ):
with contextlib.suppress(sqlite3.OperationalError):
conn.execute(stmt)
conn.commit()
@@ -96,8 +110,8 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int:
(id, principle, source_categories, source_lesson_ids,
confidence, created_session, last_validated_session,
scope, examples, context_weights, applies_when, never_when,
- transfer_scope)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+ transfer_scope, source)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
meta.id,
meta.principle,
@@ -112,6 +126,7 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int:
json.dumps(meta.applies_when),
json.dumps(meta.never_when),
meta.transfer_scope.value,
+ meta.source,
),
)
conn.commit()
@@ -142,11 +157,18 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]:
if not cursor.fetchone():
return []
+ # Legacy brains may predate the `source` column. Probe the schema and
+ # synthesize `'deterministic'` in the SELECT when the column is absent
+ # so read-only callers (e.g. inject_brain_rules) don't have to trigger
+ # a write migration. Keeps the query shape stable for row unpacking.
+ existing_cols = {r[1] for r in conn.execute("PRAGMA table_info(meta_rules)")}
+ source_expr = "source" if "source" in existing_cols else "'deterministic' AS source"
+
rows = conn.execute(
- """SELECT id, principle, source_categories, source_lesson_ids,
+ f"""SELECT id, principle, source_categories, source_lesson_ids,
confidence, created_session, last_validated_session,
scope, examples, context_weights, applies_when, never_when,
- transfer_scope
+ transfer_scope, {source_expr}
FROM meta_rules
ORDER BY confidence DESC"""
).fetchall()
@@ -170,6 +192,7 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]:
applies_when=json.loads(row[10]) if row[10] else [],
never_when=json.loads(row[11]) if row[11] else [],
transfer_scope=_SCOPE_MAP.get(row[12], RuleTransferScope.PERSONAL) if row[12] else RuleTransferScope.PERSONAL,
+ source=row[13] or "deterministic",
))
return metas
finally:
diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py
index 9498d43d..c460a1fb 100644
--- a/src/gradata/hooks/inject_brain_rules.py
+++ b/src/gradata/hooks/inject_brain_rules.py
@@ -20,6 +20,17 @@
except ImportError:
parse_lessons = None
+try:
+ from gradata.enhancements.meta_rules import (
+ INJECTABLE_META_SOURCES,
+ format_meta_rules_for_prompt,
+ )
+ from gradata.enhancements.meta_rules_storage import load_meta_rules
+except ImportError:
+ format_meta_rules_for_prompt = None # type: ignore[assignment]
+ load_meta_rules = None # type: ignore[assignment]
+ INJECTABLE_META_SOURCES = frozenset() # type: ignore[assignment]
+
_log = logging.getLogger(__name__)
HOOK_META = {
@@ -30,6 +41,7 @@
MAX_RULES = 10
MIN_CONFIDENCE = 0.60
+MAX_META_RULES = 5 # meta-rules are high-level principles — separate cap from MAX_RULES
def _score(lesson) -> float:
@@ -129,8 +141,72 @@ def main(data: dict) -> dict | None:
for r in scored
]
- block = "\n" + "\n".join(lines) + "\n"
- return {"result": block}
+ rules_block = "\n" + "\n".join(lines) + "\n"
+
+ # Also inject tier-1 meta-rules (compound principles across 3+ lessons).
+ # Without this, meta-rules are created + stored but never reach the LLM.
+ # Quality gate: only inject metas whose principle text was LLM-synthesized
+ # or human-curated. Deterministic auto-generated principles (the OSS
+ # default) are excluded — the 2026-04-14 ablation (432 trials) showed they
+ # regress correctness on Sonnet (-1.1%), DeepSeek (-1.4%), and halve the
+ # qwen14b lift from +8.1% to +2.9%. Better to inject nothing than noise.
+ meta_block = ""
+ db_path = Path(brain_dir) / "system.db"
+ if load_meta_rules and format_meta_rules_for_prompt and db_path.is_file():
+ # Wrap the entire load -> filter -> format pipeline. A partially corrupt
+ # system.db can deserialize successfully (e.g. JSON `null` for
+ # source_lesson_ids) and then blow up later with TypeError inside the
+ # formatter. We must degrade to rules-only rather than aborting
+ # SessionStart.
+ try:
+ metas = load_meta_rules(db_path)
+ injectable = [
+ m for m in metas
+ if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES
+ ]
+ if injectable:
+ # Build a sanitized condition_context from the hook payload so
+ # applies_when / never_when are honored during SessionStart.
+ # We only forward small, string-shaped fields the rule engine
+ # uses for gating — no file contents, transcripts, or secrets.
+ condition_context = {
+ k: data[k]
+ for k in ("session_type", "task_type", "source", "cwd")
+ if isinstance(data.get(k), (str, int, float, bool))
+ }
+ if context and "context" not in condition_context:
+ condition_context["context"] = context
+
+ # Pass the full injectable set with `limit=MAX_META_RULES` so
+ # the cap is applied AFTER context-aware ranking inside the
+ # formatter. Pre-slicing by raw confidence would let a
+ # lower-confidence rule with a strong context weight get
+ # silently excluded.
+ formatted = format_meta_rules_for_prompt(
+ injectable,
+ context=context,
+ condition_context=condition_context,
+ limit=MAX_META_RULES,
+ )
+ if formatted:
+ meta_block = (
+ "\n\n"
+ + formatted
+ + "\n"
+ )
+ elif metas:
+ _log.debug(
+ "Skipped meta-rule injection: %d metas in DB, none with "
+ "injectable source (llm_synth or human_curated)",
+ len(metas),
+ )
+ except Exception as exc:
+ _log.debug(
+ "meta-rule pipeline failed (%s) — degrading to rules-only", exc,
+ )
+ meta_block = ""
+
+ return {"result": rules_block + meta_block}
if __name__ == "__main__":
diff --git a/tests/test_hooks_learning.py b/tests/test_hooks_learning.py
index 9ac31f85..a2b4fa63 100644
--- a/tests/test_hooks_learning.py
+++ b/tests/test_hooks_learning.py
@@ -73,6 +73,275 @@ def test_inject_rules_no_lessons_file(tmp_path):
assert result is None
+def test_inject_emits_meta_rules_block_for_llm_synth_source(tmp_path):
+ """Meta-rules with source='llm_synth' or 'human_curated' get injected."""
+ (tmp_path / "lessons.md").write_text(
+ "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n",
+ encoding="utf-8",
+ )
+
+ from gradata.enhancements.meta_rules import MetaRule
+ from gradata.enhancements.meta_rules_storage import save_meta_rules
+
+ db_path = tmp_path / "system.db"
+ meta = MetaRule(
+ id="m-1",
+ principle="Verify before acting — check existing state before creating new artifacts.",
+ source_categories=["PROCESS", "CODE"],
+ source_lesson_ids=["l-1", "l-2", "l-3"],
+ confidence=0.88,
+ created_session=1,
+ last_validated_session=1,
+ source="llm_synth",
+ )
+ save_meta_rules(db_path, [meta])
+
+ with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
+ result = inject_main({})
+ assert result is not None
+ text = result.get("result", "")
+ assert "" in text
+ assert "" in text
+ assert "Verify before acting" in text
+
+
+def test_inject_skips_meta_rules_with_deterministic_source(tmp_path):
+ """Meta-rules with source='deterministic' (the default for auto-generated
+ cluster output) are EXCLUDED from injection. Empirical: 2026-04-14 ablation
+ showed deterministic principles regress correctness."""
+ (tmp_path / "lessons.md").write_text(
+ "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n",
+ encoding="utf-8",
+ )
+
+ from gradata.enhancements.meta_rules import MetaRule
+ from gradata.enhancements.meta_rules_storage import save_meta_rules
+
+ db_path = tmp_path / "system.db"
+ # Default source is 'deterministic' — should NOT be injected
+ meta = MetaRule(
+ id="m-2",
+ principle="Code: Avoid: foo. Prefer: bar.", # the ablation-confirmed garbage shape
+ source_categories=["CODE"],
+ source_lesson_ids=["l-9"],
+ confidence=1.00, # confidence is high BUT source disqualifies it
+ created_session=1,
+ last_validated_session=1,
+ )
+ save_meta_rules(db_path, [meta])
+
+ with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
+ result = inject_main({})
+ assert result is not None
+ text = result.get("result", "")
+ assert "" in text
+ # Critical: the deterministic meta-rule must NOT appear in the prompt
+ assert "" not in text
+ assert "Avoid: foo" not in text
+
+
+def test_inject_caps_meta_rules_and_context_promotes_lower_confidence(tmp_path):
+ """Boundary test: with more than MAX_META_RULES injectable metas, the cap
+ must be applied AFTER context-aware ranking, so a lower-confidence rule
+ with a strong context weight can still make the cut.
+
+ Regression guard for the CR finding: pre-slicing by raw confidence would
+ silently exclude the context-promoted rule, giving the LLM the wrong
+ principles for the current task.
+ """
+ from gradata.enhancements.meta_rules import MetaRule
+ from gradata.enhancements.meta_rules_storage import save_meta_rules
+ from gradata.hooks.inject_brain_rules import MAX_META_RULES
+
+ (tmp_path / "lessons.md").write_text(
+ "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n",
+ encoding="utf-8",
+ )
+
+ db_path = tmp_path / "system.db"
+ # Seed MAX_META_RULES + 2 high-confidence metas that are *neutral* in the
+ # target context, plus one lower-confidence meta that should be *boosted*
+ # by context weight so it makes it into the top-N despite its lower base.
+ metas = []
+ for i in range(MAX_META_RULES + 2):
+ metas.append(
+ MetaRule(
+ id=f"m-hi-{i}",
+ principle=f"Neutral principle number {i} for baseline comparison.",
+ source_categories=["PROCESS"],
+ source_lesson_ids=[f"l-{i}a", f"l-{i}b", f"l-{i}c"],
+ confidence=0.95,
+ created_session=1,
+ last_validated_session=1,
+ source="llm_synth",
+ ),
+ )
+ # Lower base confidence (0.60) but a very strong context weight (3.0) in
+ # the "drafting" context — should beat the neutral 0.95-confidence metas
+ # after weighting (0.60 * 3.0 = 1.80 > 0.95 * 1.0 = 0.95).
+ promoted = MetaRule(
+ id="m-promoted",
+ principle="Promoted drafting principle — context weight lifts this in.",
+ source_categories=["TONE"],
+ source_lesson_ids=["l-p1", "l-p2", "l-p3"],
+ confidence=0.60,
+ created_session=1,
+ last_validated_session=1,
+ source="llm_synth",
+ context_weights={"drafting": 3.0, "default": 1.0},
+ )
+ metas.append(promoted)
+ save_meta_rules(db_path, metas)
+
+ # Run the hook with a context that promotes the low-confidence meta.
+ with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
+ result = inject_main({"session_type": "drafting"})
+
+ assert result is not None
+ text = result["result"]
+ assert "" in text
+
+ # Cap: only MAX_META_RULES meta-rule lines (numbered "1.", "2." ...) appear
+ # between the meta-rules tags.
+ meta_section = text.split("")[1].split("")[0]
+ numbered_lines = [
+ line for line in meta_section.splitlines()
+ if line.strip() and line.lstrip()[0].isdigit() and ". [META:" in line
+ ]
+ assert len(numbered_lines) == MAX_META_RULES, (
+ f"expected exactly {MAX_META_RULES} meta-rule lines, got {len(numbered_lines)}"
+ )
+
+ # Context-aware promotion: the lower-confidence but context-boosted rule
+ # must appear in the final output even though MAX_META_RULES other metas
+ # have higher raw confidence.
+ assert "Promoted drafting principle" in text, (
+ "context-weighted rule was excluded — cap is being applied before "
+ "context ranking (CR finding regression)"
+ )
+
+
+def test_inject_tolerates_missing_meta_rules_db(tmp_path):
+ """No system.db file → still returns the rules block, no meta-rules block,
+ and no exception."""
+ (tmp_path / "lessons.md").write_text(
+ "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n",
+ encoding="utf-8",
+ )
+ with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
+ result = inject_main({})
+ assert result is not None
+ text = result.get("result", "")
+ assert "" in text
+ assert "" not in text
+
+
+def test_corrupt_meta_rules_db_degrades_to_rules_only(tmp_path):
+ """Corrupt-but-readable system.db → still returns rules block, no
+ meta-rules block, and no exception."""
+ (tmp_path / "lessons.md").write_text(
+ "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n",
+ encoding="utf-8",
+ )
+ # Write a malformed-but-present system.db. The hook checks `is_file()`
+ # then calls load_meta_rules; the load (or any downstream filter/format)
+ # must not abort SessionStart. Garbage bytes guarantee deserialization
+ # failure in the storage layer.
+ (tmp_path / "system.db").write_bytes(b"this is not a valid sqlite or json payload")
+
+ with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
+ result = inject_main({})
+
+ assert result is not None
+ text = result.get("result", "")
+ assert "" in text
+ assert "" not in text
+
+
+def test_load_meta_rules_legacy_schema_without_source_column(tmp_path):
+ """Brains upgraded from an older schema may be missing the `source`
+ column. Read-only callers must still get rows back with source
+ synthesized as 'deterministic' rather than raising 'no such column'."""
+ import sqlite3
+
+ from gradata.enhancements.meta_rules_storage import load_meta_rules
+
+ db_path = tmp_path / "system.db"
+ conn = sqlite3.connect(str(db_path))
+ try:
+ # Legacy schema: every column EXCEPT `source`.
+ conn.execute(
+ """CREATE TABLE meta_rules (
+ id TEXT PRIMARY KEY,
+ principle TEXT,
+ source_categories TEXT,
+ source_lesson_ids TEXT,
+ confidence REAL,
+ created_session INTEGER,
+ last_validated_session INTEGER,
+ scope TEXT,
+ examples TEXT,
+ context_weights TEXT,
+ applies_when TEXT,
+ never_when TEXT,
+ transfer_scope TEXT
+ )"""
+ )
+ conn.execute(
+ """INSERT INTO meta_rules VALUES
+ ('m1', 'Always verify', '["PROCESS"]', '["l1"]', 0.85,
+ 1, 1, '{}', '[]', '{"default": 1.0}', '[]', '[]', 'personal')"""
+ )
+ conn.commit()
+ finally:
+ conn.close()
+
+ metas = load_meta_rules(db_path)
+ assert len(metas) == 1
+ assert metas[0].id == "m1"
+ assert metas[0].source == "deterministic"
+
+
+def test_inject_respects_applies_when_never_when(tmp_path):
+ """applies_when / never_when must gate meta-rule injection at
+ SessionStart. A rule with never_when=session_type=code must be
+ excluded when the hook payload says session_type=code."""
+ from gradata.enhancements.meta_rules import MetaRule
+ from gradata.enhancements.meta_rules_storage import save_meta_rules
+
+ (tmp_path / "lessons.md").write_text(
+ "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n",
+ encoding="utf-8",
+ )
+ db_path = tmp_path / "system.db"
+ gated = MetaRule(
+ id="m_gated",
+ principle="Never chat in code sessions",
+ source_categories=["PROCESS"],
+ source_lesson_ids=["l1", "l2", "l3"],
+ confidence=0.90,
+ created_session=1,
+ last_validated_session=1,
+ never_when=["session_type=code"],
+ source="llm_synth",
+ )
+ save_meta_rules(db_path, [gated])
+
+ with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
+ # Blocking payload — never_when matches, meta-rule must be skipped.
+ blocked = inject_main({"session_type": "code"})
+ # Permissive payload — never_when does not match, meta-rule injected.
+ allowed = inject_main({"session_type": "prose"})
+
+ assert blocked is not None
+ assert "" in blocked["result"]
+ assert "" not in blocked["result"]
+
+ assert allowed is not None
+ assert "" in allowed["result"]
+ assert "Never chat in code sessions" in allowed["result"]
+
+
def test_session_close_emits_event(tmp_path):
with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}):
with patch("gradata.hooks.session_close._emit_session_end") as mock_emit: