Skip to content
37 changes: 34 additions & 3 deletions src/gradata/enhancements/meta_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@

@dataclass
class MetaRule:
"""Emergent principle from 3+ related corrections."""
"""Emergent principle from 3+ related corrections.

The ``source`` field tracks how the principle text was generated:
- ``"deterministic"`` (default): produced by token-frequency / cluster
heuristics. Empirically (2026-04-14 ablation) these regress
correctness when injected into prompts. Excluded from injection.
- ``"llm_synth"``: produced by cloud-side LLM synthesis from the
source rules. Eligible for injection.
- ``"human_curated"``: hand-written or human-edited principle. Always
eligible for injection.
"""

id: str
principle: str
Expand All @@ -51,6 +61,13 @@ class MetaRule:
applies_when: list[str] = field(default_factory=list)
never_when: list[str] = field(default_factory=list)
transfer_scope: RuleTransferScope = RuleTransferScope.PERSONAL
source: str = "deterministic" # provenance of the principle text — see class docstring


# Sources whose principle text is trusted enough to inject into LLM prompts.
# Deterministic auto-generated principles regress correctness empirically
# (2026-04-14 ablation, 432 trials, judged blind).
INJECTABLE_META_SOURCES = frozenset({"llm_synth", "human_curated"})


@dataclass
Expand Down Expand Up @@ -305,6 +322,7 @@ def format_meta_rules_for_prompt(
context: str = "",
condition_context: dict | None = None,
scope_filter: RuleTransferScope | None = None,
limit: int | None = None,
) -> str:
"""Format meta-rules for injection into LLM context.

Expand All @@ -322,6 +340,10 @@ def format_meta_rules_for_prompt(
When *scope_filter* is provided, only meta-rules with the matching
``transfer_scope`` are included.

When *limit* is provided, the cap is applied AFTER context-aware
ranking so a lower-confidence rule with a stronger context weight
can still be promoted into the final set.

Args:
metas: Meta-rules to format.
context: Optional task-context label (e.g. ``"drafting"``,
Expand All @@ -332,6 +354,9 @@ def format_meta_rules_for_prompt(
:func:`evaluate_conditions` are included.
scope_filter: When set, only include meta-rules with this
transfer scope.
limit: Optional maximum number of meta-rules to include.
Applied AFTER context-aware ranking so context weight can
influence which rules make the cut. ``None`` means no cap.

Returns:
Formatted string block, or ``""`` if *metas* is empty.
Expand All @@ -349,9 +374,15 @@ def format_meta_rules_for_prompt(
if not metas:
return ""

# Re-rank by context weight when a context is provided
# Re-rank by context weight when a context is provided. Pass `limit`
# through as `max_rules` so ranking + capping happens atomically;
# otherwise apply the cap after the fact (no ranking case).
if context:
metas = rank_meta_rules_by_context(metas, context)
metas = rank_meta_rules_by_context(
metas, context, max_rules=limit if limit is not None else len(metas),
)
elif limit is not None:
metas = metas[:limit]

lines = ["## Brain Meta-Rules (compound principles)"]
for i, meta in enumerate(metas, start=1):
Expand Down
37 changes: 30 additions & 7 deletions src/gradata/enhancements/meta_rules_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,20 @@
"ALTER TABLE meta_rules ADD COLUMN transfer_scope TEXT DEFAULT 'personal'"
)

# Provenance of the principle text (deterministic / llm_synth / human_curated).
# Default is 'deterministic' for safety — pre-existing rows from before this
# migration are auto-generated by the clusterer and should NOT be injected
# (2026-04-14 ablation showed they regress correctness).
_ADD_SOURCE_SQL = (
"ALTER TABLE meta_rules ADD COLUMN source TEXT DEFAULT 'deterministic'"
)


def ensure_table(db_path: str | Path) -> None:
"""Create the meta_rules table if it does not exist.

Also migrates existing tables by adding the ``context_weights``
column when it is missing (backward-compatible upgrade).
Also migrates existing tables by adding new columns when they are
missing (backward-compatible upgrade).

Args:
db_path: Path to the SQLite database file.
Expand All @@ -67,7 +75,13 @@ def ensure_table(db_path: str | Path) -> None:
try:
conn.execute(_CREATE_TABLE_SQL)
# Migrate: add columns if table existed before this version
for stmt in (_ADD_CONTEXT_WEIGHTS_SQL, _ADD_APPLIES_WHEN_SQL, _ADD_NEVER_WHEN_SQL, _ADD_TRANSFER_SCOPE_SQL):
for stmt in (
_ADD_CONTEXT_WEIGHTS_SQL,
_ADD_APPLIES_WHEN_SQL,
_ADD_NEVER_WHEN_SQL,
_ADD_TRANSFER_SCOPE_SQL,
_ADD_SOURCE_SQL,
):
with contextlib.suppress(sqlite3.OperationalError):
conn.execute(stmt)
conn.commit()
Expand Down Expand Up @@ -96,8 +110,8 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int:
(id, principle, source_categories, source_lesson_ids,
confidence, created_session, last_validated_session,
scope, examples, context_weights, applies_when, never_when,
transfer_scope)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
transfer_scope, source)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
meta.id,
meta.principle,
Expand All @@ -112,6 +126,7 @@ def save_meta_rules(db_path: str | Path, metas: list[MetaRule]) -> int:
json.dumps(meta.applies_when),
json.dumps(meta.never_when),
meta.transfer_scope.value,
meta.source,
),
)
conn.commit()
Expand Down Expand Up @@ -142,11 +157,18 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]:
if not cursor.fetchone():
return []

# Legacy brains may predate the `source` column. Probe the schema and
# synthesize `'deterministic'` in the SELECT when the column is absent
# so read-only callers (e.g. inject_brain_rules) don't have to trigger
# a write migration. Keeps the query shape stable for row unpacking.
existing_cols = {r[1] for r in conn.execute("PRAGMA table_info(meta_rules)")}
source_expr = "source" if "source" in existing_cols else "'deterministic' AS source"

rows = conn.execute(
"""SELECT id, principle, source_categories, source_lesson_ids,
f"""SELECT id, principle, source_categories, source_lesson_ids,
confidence, created_session, last_validated_session,
scope, examples, context_weights, applies_when, never_when,
transfer_scope
transfer_scope, {source_expr}
FROM meta_rules
ORDER BY confidence DESC"""
).fetchall()
Expand All @@ -170,6 +192,7 @@ def load_meta_rules(db_path: str | Path) -> list[MetaRule]:
applies_when=json.loads(row[10]) if row[10] else [],
never_when=json.loads(row[11]) if row[11] else [],
transfer_scope=_SCOPE_MAP.get(row[12], RuleTransferScope.PERSONAL) if row[12] else RuleTransferScope.PERSONAL,
source=row[13] or "deterministic",
))
return metas
finally:
Expand Down
80 changes: 78 additions & 2 deletions src/gradata/hooks/inject_brain_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@
except ImportError:
parse_lessons = None

try:
from gradata.enhancements.meta_rules import (
INJECTABLE_META_SOURCES,
format_meta_rules_for_prompt,
)
from gradata.enhancements.meta_rules_storage import load_meta_rules
except ImportError:
format_meta_rules_for_prompt = None # type: ignore[assignment]
load_meta_rules = None # type: ignore[assignment]
INJECTABLE_META_SOURCES = frozenset() # type: ignore[assignment]

_log = logging.getLogger(__name__)

HOOK_META = {
Expand All @@ -30,6 +41,7 @@

MAX_RULES = 10
MIN_CONFIDENCE = 0.60
MAX_META_RULES = 5 # meta-rules are high-level principles — separate cap from MAX_RULES


def _score(lesson) -> float:
Expand Down Expand Up @@ -129,8 +141,72 @@ def main(data: dict) -> dict | None:
for r in scored
]

block = "<brain-rules>\n" + "\n".join(lines) + "\n</brain-rules>"
return {"result": block}
rules_block = "<brain-rules>\n" + "\n".join(lines) + "\n</brain-rules>"

# Also inject tier-1 meta-rules (compound principles across 3+ lessons).
# Without this, meta-rules are created + stored but never reach the LLM.
# Quality gate: only inject metas whose principle text was LLM-synthesized
# or human-curated. Deterministic auto-generated principles (the OSS
# default) are excluded — the 2026-04-14 ablation (432 trials) showed they
# regress correctness on Sonnet (-1.1%), DeepSeek (-1.4%), and halve the
# qwen14b lift from +8.1% to +2.9%. Better to inject nothing than noise.
meta_block = ""
db_path = Path(brain_dir) / "system.db"
if load_meta_rules and format_meta_rules_for_prompt and db_path.is_file():
# Wrap the entire load -> filter -> format pipeline. A partially corrupt
# system.db can deserialize successfully (e.g. JSON `null` for
# source_lesson_ids) and then blow up later with TypeError inside the
# formatter. We must degrade to rules-only rather than aborting
# SessionStart.
try:
metas = load_meta_rules(db_path)
injectable = [
m for m in metas
if getattr(m, "source", "deterministic") in INJECTABLE_META_SOURCES
]
if injectable:
# Build a sanitized condition_context from the hook payload so
# applies_when / never_when are honored during SessionStart.
# We only forward small, string-shaped fields the rule engine
# uses for gating — no file contents, transcripts, or secrets.
condition_context = {
k: data[k]
for k in ("session_type", "task_type", "source", "cwd")
if isinstance(data.get(k), (str, int, float, bool))
}
if context and "context" not in condition_context:
condition_context["context"] = context

# Pass the full injectable set with `limit=MAX_META_RULES` so
# the cap is applied AFTER context-aware ranking inside the
# formatter. Pre-slicing by raw confidence would let a
# lower-confidence rule with a strong context weight get
# silently excluded.
formatted = format_meta_rules_for_prompt(
injectable,
context=context,
condition_context=condition_context,
limit=MAX_META_RULES,
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if formatted:
meta_block = (
"\n<brain-meta-rules>\n"
+ formatted
+ "\n</brain-meta-rules>"
)
elif metas:
_log.debug(
"Skipped meta-rule injection: %d metas in DB, none with "
"injectable source (llm_synth or human_curated)",
len(metas),
)
except Exception as exc:
_log.debug(
"meta-rule pipeline failed (%s) — degrading to rules-only", exc,
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
meta_block = ""

return {"result": rules_block + meta_block}


if __name__ == "__main__":
Expand Down
Loading
Loading