Gradata · Gradata · Apr 15, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/src/gradata/enhancements/meta_rules.py b/src/gradata/enhancements/meta_rules.py
@@ -36,7 +36,17 @@
 
 @dataclass
 class MetaRule:
-    """Emergent principle from 3+ related corrections."""
+    """Emergent principle from 3+ related corrections.
+
+    The ``source`` field tracks how the principle text was generated:
+      - ``"deterministic"`` (default): produced by token-frequency / cluster
+        heuristics. Empirically (2026-04-14 ablation) these regress
+        correctness when injected into prompts. Excluded from injection.
+      - ``"llm_synth"``: produced by cloud-side LLM synthesis from the
+        source rules. Eligible for injection.
+      - ``"human_curated"``: hand-written or human-edited principle. Always
+        eligible for injection.
+    """
 
     id: str
     principle: str
@@ -51,6 +61,13 @@ class MetaRule:
     applies_when: list[str] = field(default_factory=list)
     never_when: list[str] = field(default_factory=list)
     transfer_scope: RuleTransferScope = RuleTransferScope.PERSONAL
+    source: str = "deterministic"  # provenance of the principle text — see class docstring
+
+
+# Sources whose principle text is trusted enough to inject into LLM prompts.
+# Deterministic auto-generated principles regress correctness empirically
+# (2026-04-14 ablation, 432 trials, judged blind).
+INJECTABLE_META_SOURCES = frozenset({"llm_synth", "human_curated"})
 
 
 @dataclass
@@ -305,6 +322,7 @@ def format_meta_rules_for_prompt(
     context: str = "",
     condition_context: dict | None = None,
     scope_filter: RuleTransferScope | None = None,
+    limit: int | None = None,
 ) -> str:
     """Format meta-rules for injection into LLM context.
 
@@ -322,6 +340,10 @@ def format_meta_rules_for_prompt(
     When *scope_filter* is provided, only meta-rules with the matching
     ``transfer_scope`` are included.
 
+    When *limit* is provided, the cap is applied AFTER context-aware
+    ranking so a lower-confidence rule with a stronger context weight
+    can still be promoted into the final set.
+
     Args:
         metas: Meta-rules to format.
         context: Optional task-context label (e.g. ``"drafting"``,
@@ -332,6 +354,9 @@ def format_meta_rules_for_prompt(
             :func:`evaluate_conditions` are included.
         scope_filter: When set, only include meta-rules with this
             transfer scope.
+        limit: Optional maximum number of meta-rules to include.
+            Applied AFTER context-aware ranking so context weight can
+            influence which rules make the cut. ``None`` means no cap.
 
     Returns:
         Formatted string block, or ``""`` if *metas* is empty.
@@ -349,9 +374,15 @@ def format_meta_rules_for_prompt(
     if not metas:
         return ""
 
-    # Re-rank by context weight when a context is provided
+    # Re-rank by context weight when a context is provided. Pass `limit`
+    # through as `max_rules` so ranking + capping happens atomically;
+    # otherwise apply the cap after the fact (no ranking case).
     if context:
-        metas = rank_meta_rules_by_context(metas, context)
+        metas = rank_meta_rules_by_context(
+            metas, context, max_rules=limit if limit is not None else len(metas),
+        )
+    elif limit is not None:
+        metas = metas[:limit]
 
     lines = ["## Brain Meta-Rules (compound principles)"]
     for i, meta in enumerate(metas, start=1):

diff --git a/src/gradata/enhancements/meta_rules_storage.py b/src/gradata/enhancements/meta_rules_storage.py
@@ -53,12 +53,20 @@
     "ALTER TABLE meta_rules ADD COLUMN transfer_scope TEXT DEFAULT 'personal'"
 )
 
+# Provenance of the principle text (deterministic / llm_synth / human_curated).
+# Default is 'deterministic' for safety — pre-existing rows from before this
+# migration are auto-generated by the clusterer and should NOT be injected
+# (2026-04-14 ablation showed they regress correctness).
+_ADD_SOURCE_SQL = (
+    "ALTER TABLE meta_rules ADD COLUMN source TEXT DEFAULT 'deterministic'"
+)
+
 
 def ensure_table(db_path: str | Path) -> None:
     """Create the meta_rules table if it does not exist.
 
-    Also migrates existing tables by adding the ``context_weights``
-    column when it is missing (backward-compatible upgrade).
+    Also migrates existing tables by adding new columns when they are
+    missing (backward-compatible upgrade).
 
     Args:
         db_path: Path to the SQLite database file.
@@ -67,7 +75,13 @@ def ensure_table(db_path: str | Path) -> None:
     try:
         conn.execute(_CREATE_TABLE_SQL)
         # Migrate: add columns if table existed before this version
-        for stmt in (_ADD_CONTEXT_WEIGHTS_SQL, _ADD_APPLIES_WHEN_SQL, _ADD_NEVER_WHEN_SQL, _ADD_TRANSFER_SCOPE_SQL):
+        for stmt in (
+            _ADD_CONTEXT_WEIGHTS_SQL,
+            _ADD_APPLIES_WHEN_SQL,
+            _ADD_NEVER_WHEN_SQL,
+            _ADD_TRANSFER_SCOPE_SQL,
+            _ADD_SOURCE_SQL,
+        ):
             with contextlib.suppress(sqlite3.OperationalError):
                 conn.execute(stmt)
         conn.commit()
@@ -96,8 +110,8 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int:
                    (id, principle, source_categories, source_lesson_ids,
                     confidence, created_session, last_validated_session,
                     scope, examples, context_weights, applies_when, never_when,
-                    transfer_scope)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                    transfer_scope, source)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                 (
                     meta.id,
                     meta.principle,
@@ -112,6 +126,7 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int:
                     json.dumps(meta.applies_when),
                     json.dumps(meta.never_when),
                     meta.transfer_scope.value,
+                    meta.source,
                 ),
             )
         conn.commit()
@@ -142,11 +157,18 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]:
         if not cursor.fetchone():
             return []
 
+        # Legacy brains may predate the `source` column. Probe the schema and
+        # synthesize `'deterministic'` in the SELECT when the column is absent
+        # so read-only callers (e.g. inject_brain_rules) don't have to trigger
+        # a write migration. Keeps the query shape stable for row unpacking.
+        existing_cols = {r[1] for r in conn.execute("PRAGMA table_info(meta_rules)")}
+        source_expr = "source" if "source" in existing_cols else "'deterministic' AS source"
+
         rows = conn.execute(
-            """SELECT id, principle, source_categories, source_lesson_ids,
+            f"""SELECT id, principle, source_categories, source_lesson_ids,
                       confidence, created_session, last_validated_session,
                       scope, examples, context_weights, applies_when, never_when,
-                      transfer_scope
+                      transfer_scope, {source_expr}
                FROM meta_rules
                ORDER BY confidence DESC"""
         ).fetchall()
@@ -170,6 +192,7 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]:
                 applies_when=json.loads(row[10]) if row[10] else [],
                 never_when=json.loads(row[11]) if row[11] else [],
                 transfer_scope=_SCOPE_MAP.get(row[12], RuleTransferScope.PERSONAL) if row[12] else RuleTransferScope.PERSONAL,
+                source=row[13] or "deterministic",
             ))
         return metas
     finally:

diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py
@@ -20,6 +20,17 @@
 except ImportError:
     parse_lessons = None
 
+try:
+    from gradata.enhancements.meta_rules import (
+        INJECTABLE_META_SOURCES,
+        format_meta_rules_for_prompt,
+    )
+    from gradata.enhancements.meta_rules_storage import load_meta_rules
+except ImportError:
+    format_meta_rules_for_prompt = None  # type: ignore[assignment]
+    load_meta_rules = None  # type: ignore[assignment]
+    INJECTABLE_META_SOURCES = frozenset()  # type: ignore[assignment]
+
 _log = logging.getLogger(__name__)
 
 HOOK_META = {
@@ -30,6 +41,7 @@
 
 MAX_RULES = 10
 MIN_CONFIDENCE = 0.60
+MAX_META_RULES = 5  # meta-rules are high-level principles — separate cap from MAX_RULES
 
 
 def _score(lesson) -> float:
@@ -129,8 +141,72 @@ def main(data: dict) -> dict | None:
         for r in scored
     ]
 
-    block = "<brain-rules>\n" + "\n".join(lines) + "\n</brain-rules>"
-    return {"result": block}
+    rules_block = "<brain-rules>\n" + "\n".join(lines) + "\n</brain-rules>"
+
+    # Also inject tier-1 meta-rules (compound principles across 3+ lessons).
+    # Without this, meta-rules are created + stored but never reach the LLM.
+    # Quality gate: only inject metas whose principle text was LLM-synthesized
+    # or human-curated. Deterministic auto-generated principles (the OSS
+    # default) are excluded — the 2026-04-14 ablation (432 trials) showed they
+    # regress correctness on Sonnet (-1.1%), DeepSeek (-1.4%), and halve the
+    # qwen14b lift from +8.1% to +2.9%. Better to inject nothing than noise.
+    meta_block = ""
+    db_path = Path(brain_dir) / "system.db"
+    if load_meta_rules and format_meta_rules_for_prompt and db_path.is_file():
+        # Wrap the entire load -> filter -> format pipeline. A partially corrupt
+        # system.db can deserialize successfully (e.g. JSON `null` for
+        # source_lesson_ids) and then blow up later with TypeError inside the
+        # formatter. We must degrade to rules-only rather than aborting
+        # SessionStart.
+        try:
+            metas = load_meta_rules(db_path)
+            injectable = [
+                m for m in metas
+                if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES
+            ]
+            if injectable:
+                # Build a sanitized condition_context from the hook payload so
+                # applies_when / never_when are honored during SessionStart.
+                # We only forward small, string-shaped fields the rule engine
+                # uses for gating — no file contents, transcripts, or secrets.
+                condition_context = {
+                    k: data[k]
+                    for k in ("session_type", "task_type", "source", "cwd")
+                    if isinstance(data.get(k), (str, int, float, bool))
+                }
+                if context and "context" not in condition_context:
+                    condition_context["context"] = context
+
+                # Pass the full injectable set with `limit=MAX_META_RULES` so
+                # the cap is applied AFTER context-aware ranking inside the
+                # formatter. Pre-slicing by raw confidence would let a
+                # lower-confidence rule with a strong context weight get
+                # silently excluded.
+                formatted = format_meta_rules_for_prompt(
+                    injectable,
+                    context=context,
+                    condition_context=condition_context,
+                    limit=MAX_META_RULES,
+                )
+                if formatted:
+                    meta_block = (
+                        "\n<brain-meta-rules>\n"
+                        + formatted
+                        + "\n</brain-meta-rules>"
+                    )
+            elif metas:
+                _log.debug(
+                    "Skipped meta-rule injection: %d metas in DB, none with "
+                    "injectable source (llm_synth or human_curated)",
+                    len(metas),
+                )
+        except Exception as exc:
+            _log.debug(
+                "meta-rule pipeline failed (%s) — degrading to rules-only", exc,
+            )
+            meta_block = ""
+
+    return {"result": rules_block + meta_block}
 
 
 if __name__ == "__main__":