From c2fbdb888c7948747cbedd986c9c8d7be67247ca Mon Sep 17 00:00:00 2001 From: Oliver Le Date: Tue, 14 Apr 2026 02:37:42 -0700 Subject: [PATCH 1/5] feat(hooks): inject meta-rules into LLM context at session start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Structural gap: meta-rules (tier-1 compound principles from 3+ graduated rules) were being created, stored, and loaded by the SDK — but never injected into the LLM prompt. The LLM saw but never , which meant the entire compounding layer was dormant at the inference surface. Fix: inject_brain_rules.py now ALSO loads meta-rules from system.db via meta_rules_storage.load_meta_rules, caps them at MAX_META_RULES=5, context-ranks via format_meta_rules_for_prompt, and appends a block to the existing block. Defensive: import is try/except so sites without the meta_rules module don't break. DB load is try/except so a corrupt/missing system.db degrades to "rules only" rather than failing the session start hook. Tests: 2 new — meta-rules appear in injection block when DB has them, and injection tolerates missing system.db without raising. Co-Authored-By: Gradata --- src/gradata/hooks/inject_brain_rules.py | 33 +++++++++++++++- tests/test_hooks_learning.py | 50 +++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py index 9498d43d..42bf98b9 100644 --- a/src/gradata/hooks/inject_brain_rules.py +++ b/src/gradata/hooks/inject_brain_rules.py @@ -20,6 +20,13 @@ except ImportError: parse_lessons = None +try: + from gradata.enhancements.meta_rules import format_meta_rules_for_prompt + from gradata.enhancements.meta_rules_storage import load_meta_rules +except ImportError: + format_meta_rules_for_prompt = None # type: ignore[assignment] + load_meta_rules = None # type: ignore[assignment] + _log = logging.getLogger(__name__) HOOK_META = { @@ -30,6 +37,7 @@ MAX_RULES = 10 MIN_CONFIDENCE = 0.60 +MAX_META_RULES = 5 # meta-rules are high-level principles — separate cap from MAX_RULES def _score(lesson) -> float: @@ -129,8 +137,29 @@ def main(data: dict) -> dict | None: for r in scored ] - block = "\n" + "\n".join(lines) + "\n" - return {"result": block} + rules_block = "\n" + "\n".join(lines) + "\n" + + # Also inject tier-1 meta-rules (compound principles across 3+ lessons). + # Without this, meta-rules are created + stored but never reach the LLM — + # they only exist in the brain's memory, not in the prompt. Bounded by + # MAX_META_RULES and context-ranked inside format_meta_rules_for_prompt. + meta_block = "" + db_path = Path(brain_dir) / "system.db" + if load_meta_rules and format_meta_rules_for_prompt and db_path.is_file(): + try: + metas = load_meta_rules(db_path) + except Exception as exc: + _log.debug("meta-rule load failed (%s) — skipping injection", exc) + metas = [] + if metas: + metas_sorted = sorted( + metas, key=lambda m: getattr(m, "confidence", 0.0), reverse=True, + )[:MAX_META_RULES] + formatted = format_meta_rules_for_prompt(metas_sorted, context=context) + if formatted: + meta_block = "\n\n" + formatted + "\n" + + return {"result": rules_block + meta_block} if __name__ == "__main__": diff --git a/tests/test_hooks_learning.py b/tests/test_hooks_learning.py index 9ac31f85..89a8f936 100644 --- a/tests/test_hooks_learning.py +++ b/tests/test_hooks_learning.py @@ -73,6 +73,56 @@ def test_inject_rules_no_lessons_file(tmp_path): assert result is None +def test_inject_also_emits_meta_rules_block_when_db_has_them(tmp_path): + """When meta-rules exist in system.db, the injection block includes + alongside . Previously meta-rules were + created and stored but never reached the LLM.""" + # Seed lessons so the hook gets past the "no rules" guard + (tmp_path / "lessons.md").write_text( + "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", + encoding="utf-8", + ) + + # Seed a meta-rule into system.db + from gradata.enhancements.meta_rules import MetaRule + from gradata.enhancements.meta_rules_storage import save_meta_rules + + db_path = tmp_path / "system.db" + meta = MetaRule( + id="m-1", + principle="Verify before acting — check existing state before creating new artifacts.", + source_categories=["PROCESS", "CODE"], + source_lesson_ids=["l-1", "l-2", "l-3"], + confidence=0.88, + created_session=1, + last_validated_session=1, + ) + save_meta_rules(db_path, [meta]) + + with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): + result = inject_main({}) + assert result is not None + text = result.get("result", "") + assert "" in text + assert "" in text + assert "Verify before acting" in text + + +def test_inject_tolerates_missing_meta_rules_db(tmp_path): + """No system.db file → still returns the rules block, no meta-rules block, + and no exception.""" + (tmp_path / "lessons.md").write_text( + "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", + encoding="utf-8", + ) + with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): + result = inject_main({}) + assert result is not None + text = result.get("result", "") + assert "" in text + assert "" not in text + + def test_session_close_emits_event(tmp_path): with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): with patch("gradata.hooks.session_close._emit_session_end") as mock_emit: From 9dab67e9e284f22f3be017e97498bf0ead83114a Mon Sep 17 00:00:00 2001 From: Oliver Le Date: Tue, 14 Apr 2026 08:15:16 -0700 Subject: [PATCH 2/5] =?UTF-8?q?fix(meta=5Frules):=20source-aware=20injecti?= =?UTF-8?q?on=20filter=20=E2=80=94=20exclude=20deterministic=20principles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2026-04-14 ablation (432 trials, 3 LLMs, judged blind) showed deterministic auto-generated meta-rule principles regress correctness: Sonnet 4.6: -1.1% (full vs base) DeepSeek V3: -1.4% (full vs base) qwen2.5-coder:14b: drops correctness gain from +8.1% to +2.9% The OSS clusterer produces principles like "Code: Avoid: foo. Prefer: bar" or self-contradictory token frequencies. These are not safe to inject. Changes: - MetaRule gains a `source: str = "deterministic"` field. Default is the safe option — pre-existing rows are auto-generated and stay excluded. - INJECTABLE_META_SOURCES = {"llm_synth", "human_curated"}. Anything else is excluded from prompt injection. - meta_rules_storage: new ALTER TABLE migration adds the `source` column with default 'deterministic'. save/load read+write the field. - inject_brain_rules hook filters by source before formatting. If metas exist but none are injectable, logs a debug line and skips silently. - Tests: positive case uses source='llm_synth'; new negative case asserts high-confidence deterministic metas are NOT injected. When LLM synthesis ships (cloud-side discover_meta_rules), it must set source='llm_synth' to flow through. Hand-curated brains can use source='human_curated'. Co-Authored-By: Gradata --- src/gradata/enhancements/meta_rules.py | 19 +++++++- .../enhancements/meta_rules_storage.py | 28 +++++++++--- src/gradata/hooks/inject_brain_rules.py | 33 ++++++++++---- tests/test_hooks_learning.py | 44 ++++++++++++++++--- 4 files changed, 103 insertions(+), 21 deletions(-) diff --git a/src/gradata/enhancements/meta_rules.py b/src/gradata/enhancements/meta_rules.py index 2b2162f0..81c7f32f 100644 --- a/src/gradata/enhancements/meta_rules.py +++ b/src/gradata/enhancements/meta_rules.py @@ -36,7 +36,17 @@ @dataclass class MetaRule: - """Emergent principle from 3+ related corrections.""" + """Emergent principle from 3+ related corrections. + + The ``source`` field tracks how the principle text was generated: + - ``"deterministic"`` (default): produced by token-frequency / cluster + heuristics. Empirically (2026-04-14 ablation) these regress + correctness when injected into prompts. Excluded from injection. + - ``"llm_synth"``: produced by cloud-side LLM synthesis from the + source rules. Eligible for injection. + - ``"human_curated"``: hand-written or human-edited principle. Always + eligible for injection. + """ id: str principle: str @@ -51,6 +61,13 @@ class MetaRule: applies_when: list[str] = field(default_factory=list) never_when: list[str] = field(default_factory=list) transfer_scope: RuleTransferScope = RuleTransferScope.PERSONAL + source: str = "deterministic" # provenance of the principle text — see class docstring + + +# Sources whose principle text is trusted enough to inject into LLM prompts. +# Deterministic auto-generated principles regress correctness empirically +# (2026-04-14 ablation, 432 trials, judged blind). +INJECTABLE_META_SOURCES = frozenset({"llm_synth", "human_curated"}) @dataclass diff --git a/src/gradata/enhancements/meta_rules_storage.py b/src/gradata/enhancements/meta_rules_storage.py index 47b9e3bc..db09d61e 100644 --- a/src/gradata/enhancements/meta_rules_storage.py +++ b/src/gradata/enhancements/meta_rules_storage.py @@ -53,12 +53,20 @@ "ALTER TABLE meta_rules ADD COLUMN transfer_scope TEXT DEFAULT 'personal'" ) +# Provenance of the principle text (deterministic / llm_synth / human_curated). +# Default is 'deterministic' for safety — pre-existing rows from before this +# migration are auto-generated by the clusterer and should NOT be injected +# (2026-04-14 ablation showed they regress correctness). +_ADD_SOURCE_SQL = ( + "ALTER TABLE meta_rules ADD COLUMN source TEXT DEFAULT 'deterministic'" +) + def ensure_table(db_path: str | Path) -> None: """Create the meta_rules table if it does not exist. - Also migrates existing tables by adding the ``context_weights`` - column when it is missing (backward-compatible upgrade). + Also migrates existing tables by adding new columns when they are + missing (backward-compatible upgrade). Args: db_path: Path to the SQLite database file. @@ -67,7 +75,13 @@ def ensure_table(db_path: str | Path) -> None: try: conn.execute(_CREATE_TABLE_SQL) # Migrate: add columns if table existed before this version - for stmt in (_ADD_CONTEXT_WEIGHTS_SQL, _ADD_APPLIES_WHEN_SQL, _ADD_NEVER_WHEN_SQL, _ADD_TRANSFER_SCOPE_SQL): + for stmt in ( + _ADD_CONTEXT_WEIGHTS_SQL, + _ADD_APPLIES_WHEN_SQL, + _ADD_NEVER_WHEN_SQL, + _ADD_TRANSFER_SCOPE_SQL, + _ADD_SOURCE_SQL, + ): with contextlib.suppress(sqlite3.OperationalError): conn.execute(stmt) conn.commit() @@ -96,8 +110,8 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int: (id, principle, source_categories, source_lesson_ids, confidence, created_session, last_validated_session, scope, examples, context_weights, applies_when, never_when, - transfer_scope) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + transfer_scope, source) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", ( meta.id, meta.principle, @@ -112,6 +126,7 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int: json.dumps(meta.applies_when), json.dumps(meta.never_when), meta.transfer_scope.value, + meta.source, ), ) conn.commit() @@ -146,7 +161,7 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]: """SELECT id, principle, source_categories, source_lesson_ids, confidence, created_session, last_validated_session, scope, examples, context_weights, applies_when, never_when, - transfer_scope + transfer_scope, source FROM meta_rules ORDER BY confidence DESC""" ).fetchall() @@ -170,6 +185,7 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]: applies_when=json.loads(row[10]) if row[10] else [], never_when=json.loads(row[11]) if row[11] else [], transfer_scope=_SCOPE_MAP.get(row[12], RuleTransferScope.PERSONAL) if row[12] else RuleTransferScope.PERSONAL, + source=row[13] or "deterministic", )) return metas finally: diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py index 42bf98b9..f4555663 100644 --- a/src/gradata/hooks/inject_brain_rules.py +++ b/src/gradata/hooks/inject_brain_rules.py @@ -21,11 +21,15 @@ parse_lessons = None try: - from gradata.enhancements.meta_rules import format_meta_rules_for_prompt + from gradata.enhancements.meta_rules import ( + INJECTABLE_META_SOURCES, + format_meta_rules_for_prompt, + ) from gradata.enhancements.meta_rules_storage import load_meta_rules except ImportError: format_meta_rules_for_prompt = None # type: ignore[assignment] load_meta_rules = None # type: ignore[assignment] + INJECTABLE_META_SOURCES = frozenset() # type: ignore[assignment] _log = logging.getLogger(__name__) @@ -140,9 +144,12 @@ def main(data: dict) -> dict | None: rules_block = "\n" + "\n".join(lines) + "\n" # Also inject tier-1 meta-rules (compound principles across 3+ lessons). - # Without this, meta-rules are created + stored but never reach the LLM — - # they only exist in the brain's memory, not in the prompt. Bounded by - # MAX_META_RULES and context-ranked inside format_meta_rules_for_prompt. + # Without this, meta-rules are created + stored but never reach the LLM. + # Quality gate: only inject metas whose principle text was LLM-synthesized + # or human-curated. Deterministic auto-generated principles (the OSS + # default) are excluded — the 2026-04-14 ablation (432 trials) showed they + # regress correctness on Sonnet (-1.1%), DeepSeek (-1.4%), and halve the + # qwen14b lift from +8.1% to +2.9%. Better to inject nothing than noise. meta_block = "" db_path = Path(brain_dir) / "system.db" if load_meta_rules and format_meta_rules_for_prompt and db_path.is_file(): @@ -151,13 +158,23 @@ def main(data: dict) -> dict | None: except Exception as exc: _log.debug("meta-rule load failed (%s) — skipping injection", exc) metas = [] - if metas: - metas_sorted = sorted( - metas, key=lambda m: getattr(m, "confidence", 0.0), reverse=True, + injectable = [ + m for m in metas + if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES + ] + if injectable: + top_metas = sorted( + injectable, key=lambda m: getattr(m, "confidence", 0.0), reverse=True, )[:MAX_META_RULES] - formatted = format_meta_rules_for_prompt(metas_sorted, context=context) + formatted = format_meta_rules_for_prompt(top_metas, context=context) if formatted: meta_block = "\n\n" + formatted + "\n" + elif metas: + _log.debug( + "Skipped meta-rule injection: %d metas in DB, none with " + "injectable source (llm_synth or human_curated)", + len(metas), + ) return {"result": rules_block + meta_block} diff --git a/tests/test_hooks_learning.py b/tests/test_hooks_learning.py index 89a8f936..f4be4a47 100644 --- a/tests/test_hooks_learning.py +++ b/tests/test_hooks_learning.py @@ -73,17 +73,13 @@ def test_inject_rules_no_lessons_file(tmp_path): assert result is None -def test_inject_also_emits_meta_rules_block_when_db_has_them(tmp_path): - """When meta-rules exist in system.db, the injection block includes - alongside . Previously meta-rules were - created and stored but never reached the LLM.""" - # Seed lessons so the hook gets past the "no rules" guard +def test_inject_emits_meta_rules_block_for_llm_synth_source(tmp_path): + """Meta-rules with source='llm_synth' or 'human_curated' get injected.""" (tmp_path / "lessons.md").write_text( "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", encoding="utf-8", ) - # Seed a meta-rule into system.db from gradata.enhancements.meta_rules import MetaRule from gradata.enhancements.meta_rules_storage import save_meta_rules @@ -96,6 +92,7 @@ def test_inject_also_emits_meta_rules_block_when_db_has_them(tmp_path): confidence=0.88, created_session=1, last_validated_session=1, + source="llm_synth", ) save_meta_rules(db_path, [meta]) @@ -108,6 +105,41 @@ def test_inject_also_emits_meta_rules_block_when_db_has_them(tmp_path): assert "Verify before acting" in text +def test_inject_skips_meta_rules_with_deterministic_source(tmp_path): + """Meta-rules with source='deterministic' (the default for auto-generated + cluster output) are EXCLUDED from injection. Empirical: 2026-04-14 ablation + showed deterministic principles regress correctness.""" + (tmp_path / "lessons.md").write_text( + "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", + encoding="utf-8", + ) + + from gradata.enhancements.meta_rules import MetaRule + from gradata.enhancements.meta_rules_storage import save_meta_rules + + db_path = tmp_path / "system.db" + # Default source is 'deterministic' — should NOT be injected + meta = MetaRule( + id="m-2", + principle="Code: Avoid: foo. Prefer: bar.", # the ablation-confirmed garbage shape + source_categories=["CODE"], + source_lesson_ids=["l-9"], + confidence=1.00, # confidence is high BUT source disqualifies it + created_session=1, + last_validated_session=1, + ) + save_meta_rules(db_path, [meta]) + + with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): + result = inject_main({}) + assert result is not None + text = result.get("result", "") + assert "" in text + # Critical: the deterministic meta-rule must NOT appear in the prompt + assert "" not in text + assert "Avoid: foo" not in text + + def test_inject_tolerates_missing_meta_rules_db(tmp_path): """No system.db file → still returns the rules block, no meta-rules block, and no exception.""" From d1ebc5183d23a5a635e8a2907ed58b5f93c0b99b Mon Sep 17 00:00:00 2001 From: Oliver Le Date: Tue, 14 Apr 2026 10:19:02 -0700 Subject: [PATCH 3/5] fix(review): address CodeRabbit feedback on meta-rule injection --- src/gradata/enhancements/meta_rules.py | 18 +++++- src/gradata/hooks/inject_brain_rules.py | 11 ++-- tests/test_hooks_learning.py | 81 +++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/src/gradata/enhancements/meta_rules.py b/src/gradata/enhancements/meta_rules.py index 81c7f32f..03dd9723 100644 --- a/src/gradata/enhancements/meta_rules.py +++ b/src/gradata/enhancements/meta_rules.py @@ -322,6 +322,7 @@ def format_meta_rules_for_prompt( context: str = "", condition_context: dict | None = None, scope_filter: RuleTransferScope | None = None, + limit: int | None = None, ) -> str: """Format meta-rules for injection into LLM context. @@ -339,6 +340,10 @@ def format_meta_rules_for_prompt( When *scope_filter* is provided, only meta-rules with the matching ``transfer_scope`` are included. + When *limit* is provided, the cap is applied AFTER context-aware + ranking so a lower-confidence rule with a stronger context weight + can still be promoted into the final set. + Args: metas: Meta-rules to format. context: Optional task-context label (e.g. ``"drafting"``, @@ -349,6 +354,9 @@ def format_meta_rules_for_prompt( :func:`evaluate_conditions` are included. scope_filter: When set, only include meta-rules with this transfer scope. + limit: Optional maximum number of meta-rules to include. + Applied AFTER context-aware ranking so context weight can + influence which rules make the cut. ``None`` means no cap. Returns: Formatted string block, or ``""`` if *metas* is empty. @@ -366,9 +374,15 @@ def format_meta_rules_for_prompt( if not metas: return "" - # Re-rank by context weight when a context is provided + # Re-rank by context weight when a context is provided. Pass `limit` + # through as `max_rules` so ranking + capping happens atomically; + # otherwise apply the cap after the fact (no ranking case). if context: - metas = rank_meta_rules_by_context(metas, context) + metas = rank_meta_rules_by_context( + metas, context, max_rules=limit if limit is not None else len(metas), + ) + elif limit is not None: + metas = metas[:limit] lines = ["## Brain Meta-Rules (compound principles)"] for i, meta in enumerate(metas, start=1): diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py index f4555663..84424b6d 100644 --- a/src/gradata/hooks/inject_brain_rules.py +++ b/src/gradata/hooks/inject_brain_rules.py @@ -163,10 +163,13 @@ def main(data: dict) -> dict | None: if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES ] if injectable: - top_metas = sorted( - injectable, key=lambda m: getattr(m, "confidence", 0.0), reverse=True, - )[:MAX_META_RULES] - formatted = format_meta_rules_for_prompt(top_metas, context=context) + # Pass the full injectable set with `limit=MAX_META_RULES` so the + # cap is applied AFTER context-aware ranking inside the formatter. + # Pre-slicing by raw confidence would let a lower-confidence rule + # with a strong context weight get silently excluded. + formatted = format_meta_rules_for_prompt( + injectable, context=context, limit=MAX_META_RULES, + ) if formatted: meta_block = "\n\n" + formatted + "\n" elif metas: diff --git a/tests/test_hooks_learning.py b/tests/test_hooks_learning.py index f4be4a47..c428d63e 100644 --- a/tests/test_hooks_learning.py +++ b/tests/test_hooks_learning.py @@ -140,6 +140,87 @@ def test_inject_skips_meta_rules_with_deterministic_source(tmp_path): assert "Avoid: foo" not in text +def test_inject_caps_meta_rules_and_context_promotes_lower_confidence(tmp_path): + """Boundary test: with more than MAX_META_RULES injectable metas, the cap + must be applied AFTER context-aware ranking, so a lower-confidence rule + with a strong context weight can still make the cut. + + Regression guard for the CR finding: pre-slicing by raw confidence would + silently exclude the context-promoted rule, giving the LLM the wrong + principles for the current task. + """ + from gradata.enhancements.meta_rules import MetaRule + from gradata.enhancements.meta_rules_storage import save_meta_rules + from gradata.hooks.inject_brain_rules import MAX_META_RULES + + (tmp_path / "lessons.md").write_text( + "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", + encoding="utf-8", + ) + + db_path = tmp_path / "system.db" + # Seed MAX_META_RULES + 2 high-confidence metas that are *neutral* in the + # target context, plus one lower-confidence meta that should be *boosted* + # by context weight so it makes it into the top-N despite its lower base. + metas = [] + for i in range(MAX_META_RULES + 2): + metas.append( + MetaRule( + id=f"m-hi-{i}", + principle=f"Neutral principle number {i} for baseline comparison.", + source_categories=["PROCESS"], + source_lesson_ids=[f"l-{i}a", f"l-{i}b", f"l-{i}c"], + confidence=0.95, + created_session=1, + last_validated_session=1, + source="llm_synth", + ), + ) + # Lower base confidence (0.60) but a very strong context weight (3.0) in + # the "drafting" context — should beat the neutral 0.95-confidence metas + # after weighting (0.60 * 3.0 = 1.80 > 0.95 * 1.0 = 0.95). + promoted = MetaRule( + id="m-promoted", + principle="Promoted drafting principle — context weight lifts this in.", + source_categories=["TONE"], + source_lesson_ids=["l-p1", "l-p2", "l-p3"], + confidence=0.60, + created_session=1, + last_validated_session=1, + source="llm_synth", + context_weights={"drafting": 3.0, "default": 1.0}, + ) + metas.append(promoted) + save_meta_rules(db_path, metas) + + # Run the hook with a context that promotes the low-confidence meta. + with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): + result = inject_main({"session_type": "drafting"}) + + assert result is not None + text = result["result"] + assert "" in text + + # Cap: only MAX_META_RULES meta-rule lines (numbered "1.", "2." ...) appear + # between the meta-rules tags. + meta_section = text.split("")[1].split("")[0] + numbered_lines = [ + line for line in meta_section.splitlines() + if line.strip() and line.lstrip()[0].isdigit() and ". [META:" in line + ] + assert len(numbered_lines) == MAX_META_RULES, ( + f"expected exactly {MAX_META_RULES} meta-rule lines, got {len(numbered_lines)}" + ) + + # Context-aware promotion: the lower-confidence but context-boosted rule + # must appear in the final output even though MAX_META_RULES other metas + # have higher raw confidence. + assert "Promoted drafting principle" in text, ( + "context-weighted rule was excluded — cap is being applied before " + "context ranking (CR finding regression)" + ) + + def test_inject_tolerates_missing_meta_rules_db(tmp_path): """No system.db file → still returns the rules block, no meta-rules block, and no exception.""" From 58fa161c15b0c1e8187becd52195d0deae21de0e Mon Sep 17 00:00:00 2001 From: Oliver Le Date: Tue, 14 Apr 2026 19:09:53 -0700 Subject: [PATCH 4/5] fix(hooks): widen meta-rule fallback + add corrupt-DB regression test Round-2 CR: a partially corrupt system.db can deserialize successfully and then blow up inside format_meta_rules_for_prompt (e.g. JSON null in source_lesson_ids becomes None and len() raises). The previous try/except only guarded load_meta_rules, so failures in the filter/format pipeline still aborted SessionStart. - Wrap entire load -> filter -> format pipeline in one try/except so any downstream failure degrades to rules-only injection. - Preserve context-aware cap: full injectable set still passed to formatter with limit=MAX_META_RULES so context ranking happens before slicing. - Add test_corrupt_meta_rules_db_degrades_to_rules_only as sibling to the missing-DB case; writes garbage bytes to system.db and asserts the hook returns only without raising. Co-Authored-By: Gradata --- src/gradata/hooks/inject_brain_rules.py | 52 +++++++++++++++---------- tests/test_hooks_learning.py | 22 +++++++++++ 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py index 84424b6d..1581b905 100644 --- a/src/gradata/hooks/inject_brain_rules.py +++ b/src/gradata/hooks/inject_brain_rules.py @@ -153,31 +153,43 @@ def main(data: dict) -> dict | None: meta_block = "" db_path = Path(brain_dir) / "system.db" if load_meta_rules and format_meta_rules_for_prompt and db_path.is_file(): + # Wrap the entire load -> filter -> format pipeline. A partially corrupt + # system.db can deserialize successfully (e.g. JSON `null` for + # source_lesson_ids) and then blow up later with TypeError inside the + # formatter. We must degrade to rules-only rather than aborting + # SessionStart. try: metas = load_meta_rules(db_path) + injectable = [ + m for m in metas + if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES + ] + if injectable: + # Pass the full injectable set with `limit=MAX_META_RULES` so + # the cap is applied AFTER context-aware ranking inside the + # formatter. Pre-slicing by raw confidence would let a + # lower-confidence rule with a strong context weight get + # silently excluded. + formatted = format_meta_rules_for_prompt( + injectable, context=context, limit=MAX_META_RULES, + ) + if formatted: + meta_block = ( + "\n\n" + + formatted + + "\n" + ) + elif metas: + _log.debug( + "Skipped meta-rule injection: %d metas in DB, none with " + "injectable source (llm_synth or human_curated)", + len(metas), + ) except Exception as exc: - _log.debug("meta-rule load failed (%s) — skipping injection", exc) - metas = [] - injectable = [ - m for m in metas - if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES - ] - if injectable: - # Pass the full injectable set with `limit=MAX_META_RULES` so the - # cap is applied AFTER context-aware ranking inside the formatter. - # Pre-slicing by raw confidence would let a lower-confidence rule - # with a strong context weight get silently excluded. - formatted = format_meta_rules_for_prompt( - injectable, context=context, limit=MAX_META_RULES, - ) - if formatted: - meta_block = "\n\n" + formatted + "\n" - elif metas: _log.debug( - "Skipped meta-rule injection: %d metas in DB, none with " - "injectable source (llm_synth or human_curated)", - len(metas), + "meta-rule pipeline failed (%s) — degrading to rules-only", exc, ) + meta_block = "" return {"result": rules_block + meta_block} diff --git a/tests/test_hooks_learning.py b/tests/test_hooks_learning.py index c428d63e..11884303 100644 --- a/tests/test_hooks_learning.py +++ b/tests/test_hooks_learning.py @@ -236,6 +236,28 @@ def test_inject_tolerates_missing_meta_rules_db(tmp_path): assert "" not in text +def test_corrupt_meta_rules_db_degrades_to_rules_only(tmp_path): + """Corrupt-but-readable system.db → still returns rules block, no + meta-rules block, and no exception.""" + (tmp_path / "lessons.md").write_text( + "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", + encoding="utf-8", + ) + # Write a malformed-but-present system.db. The hook checks `is_file()` + # then calls load_meta_rules; the load (or any downstream filter/format) + # must not abort SessionStart. Garbage bytes guarantee deserialization + # failure in the storage layer. + (tmp_path / "system.db").write_bytes(b"this is not a valid sqlite or json payload") + + with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): + result = inject_main({}) + + assert result is not None + text = result.get("result", "") + assert "" in text + assert "" not in text + + def test_session_close_emits_event(tmp_path): with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): with patch("gradata.hooks.session_close._emit_session_end") as mock_emit: From 28c1336b3ef413f1493f3a99c95c7a651532acd5 Mon Sep 17 00:00:00 2001 From: Oliver Le Date: Tue, 14 Apr 2026 22:00:33 -0700 Subject: [PATCH 5/5] =?UTF-8?q?fix(meta=5Frules):=20round-3=20CR=20?= =?UTF-8?q?=E2=80=94=20legacy=20schema=20+=20condition=5Fcontext=20gating?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit round-3 feedback: 1. load_meta_rules() probes pragma for `source` column and synthesizes 'deterministic' when absent, so brains upgraded from a pre-source schema no longer raise `no such column: source` on read-only paths (inject_brain_rules never calls ensure_table()). 2. inject_brain_rules now forwards a sanitized hook payload as `condition_context`, so applies_when / never_when are honored during SessionStart instead of being silently bypassed. Only string/int/ float/bool fields from session_type/task_type/source/cwd are forwarded — no transcripts, file contents, or secrets. Regression tests: - test_load_meta_rules_legacy_schema_without_source_column: creates a legacy meta_rules table, confirms load returns rows with source='deterministic' instead of crashing. - test_inject_respects_applies_when_never_when: an llm_synth rule with never_when=session_type=code is excluded on code sessions and injected on prose sessions. All 2077 tests pass, 23 skipped. --- .../enhancements/meta_rules_storage.py | 11 ++- src/gradata/hooks/inject_brain_rules.py | 17 +++- tests/test_hooks_learning.py | 84 +++++++++++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/gradata/enhancements/meta_rules_storage.py b/src/gradata/enhancements/meta_rules_storage.py index db09d61e..f299f0a1 100644 --- a/src/gradata/enhancements/meta_rules_storage.py +++ b/src/gradata/enhancements/meta_rules_storage.py @@ -157,11 +157,18 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]: if not cursor.fetchone(): return [] + # Legacy brains may predate the `source` column. Probe the schema and + # synthesize `'deterministic'` in the SELECT when the column is absent + # so read-only callers (e.g. inject_brain_rules) don't have to trigger + # a write migration. Keeps the query shape stable for row unpacking. + existing_cols = {r[1] for r in conn.execute("PRAGMA table_info(meta_rules)")} + source_expr = "source" if "source" in existing_cols else "'deterministic' AS source" + rows = conn.execute( - """SELECT id, principle, source_categories, source_lesson_ids, + f"""SELECT id, principle, source_categories, source_lesson_ids, confidence, created_session, last_validated_session, scope, examples, context_weights, applies_when, never_when, - transfer_scope, source + transfer_scope, {source_expr} FROM meta_rules ORDER BY confidence DESC""" ).fetchall() diff --git a/src/gradata/hooks/inject_brain_rules.py b/src/gradata/hooks/inject_brain_rules.py index 1581b905..c460a1fb 100644 --- a/src/gradata/hooks/inject_brain_rules.py +++ b/src/gradata/hooks/inject_brain_rules.py @@ -165,13 +165,28 @@ def main(data: dict) -> dict | None: if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES ] if injectable: + # Build a sanitized condition_context from the hook payload so + # applies_when / never_when are honored during SessionStart. + # We only forward small, string-shaped fields the rule engine + # uses for gating — no file contents, transcripts, or secrets. + condition_context = { + k: data[k] + for k in ("session_type", "task_type", "source", "cwd") + if isinstance(data.get(k), (str, int, float, bool)) + } + if context and "context" not in condition_context: + condition_context["context"] = context + # Pass the full injectable set with `limit=MAX_META_RULES` so # the cap is applied AFTER context-aware ranking inside the # formatter. Pre-slicing by raw confidence would let a # lower-confidence rule with a strong context weight get # silently excluded. formatted = format_meta_rules_for_prompt( - injectable, context=context, limit=MAX_META_RULES, + injectable, + context=context, + condition_context=condition_context, + limit=MAX_META_RULES, ) if formatted: meta_block = ( diff --git a/tests/test_hooks_learning.py b/tests/test_hooks_learning.py index 11884303..a2b4fa63 100644 --- a/tests/test_hooks_learning.py +++ b/tests/test_hooks_learning.py @@ -258,6 +258,90 @@ def test_corrupt_meta_rules_db_degrades_to_rules_only(tmp_path): assert "" not in text +def test_load_meta_rules_legacy_schema_without_source_column(tmp_path): + """Brains upgraded from an older schema may be missing the `source` + column. Read-only callers must still get rows back with source + synthesized as 'deterministic' rather than raising 'no such column'.""" + import sqlite3 + + from gradata.enhancements.meta_rules_storage import load_meta_rules + + db_path = tmp_path / "system.db" + conn = sqlite3.connect(str(db_path)) + try: + # Legacy schema: every column EXCEPT `source`. + conn.execute( + """CREATE TABLE meta_rules ( + id TEXT PRIMARY KEY, + principle TEXT, + source_categories TEXT, + source_lesson_ids TEXT, + confidence REAL, + created_session INTEGER, + last_validated_session INTEGER, + scope TEXT, + examples TEXT, + context_weights TEXT, + applies_when TEXT, + never_when TEXT, + transfer_scope TEXT + )""" + ) + conn.execute( + """INSERT INTO meta_rules VALUES + ('m1', 'Always verify', '["PROCESS"]', '["l1"]', 0.85, + 1, 1, '{}', '[]', '{"default": 1.0}', '[]', '[]', 'personal')""" + ) + conn.commit() + finally: + conn.close() + + metas = load_meta_rules(db_path) + assert len(metas) == 1 + assert metas[0].id == "m1" + assert metas[0].source == "deterministic" + + +def test_inject_respects_applies_when_never_when(tmp_path): + """applies_when / never_when must gate meta-rule injection at + SessionStart. A rule with never_when=session_type=code must be + excluded when the hook payload says session_type=code.""" + from gradata.enhancements.meta_rules import MetaRule + from gradata.enhancements.meta_rules_storage import save_meta_rules + + (tmp_path / "lessons.md").write_text( + "[2026-04-01] [RULE:0.92] PROCESS: Always plan before implementing\n", + encoding="utf-8", + ) + db_path = tmp_path / "system.db" + gated = MetaRule( + id="m_gated", + principle="Never chat in code sessions", + source_categories=["PROCESS"], + source_lesson_ids=["l1", "l2", "l3"], + confidence=0.90, + created_session=1, + last_validated_session=1, + never_when=["session_type=code"], + source="llm_synth", + ) + save_meta_rules(db_path, [gated]) + + with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): + # Blocking payload — never_when matches, meta-rule must be skipped. + blocked = inject_main({"session_type": "code"}) + # Permissive payload — never_when does not match, meta-rule injected. + allowed = inject_main({"session_type": "prose"}) + + assert blocked is not None + assert "" in blocked["result"] + assert "" not in blocked["result"] + + assert allowed is not None + assert "" in allowed["result"] + assert "Never chat in code sessions" in allowed["result"] + + def test_session_close_emits_event(tmp_path): with patch.dict(os.environ, {"GRADATA_BRAIN_DIR": str(tmp_path)}): with patch("gradata.hooks.session_close._emit_session_end") as mock_emit: