Gradata · Gradata · Apr 15, 2026 · Apr 15, 2026
diff --git a/src/gradata/_core.py b/src/gradata/_core.py
@@ -683,6 +683,18 @@ def _lesson_key(lesson):
                         "confidence": lesson.confidence, "fire_count": lesson.fire_count})
                 except Exception as e:
                     _log.debug("Graduation emit failed: %s", e)
+                # Canary enrollment: every new RULE enters canary state so
+                # check_canary_health (next session) can regression-gate it.
+                # Best-effort — never breaks graduation if the canary table
+                # / DB path is unavailable.
+                if new_state == "RULE":
+                    try:
+                        from gradata.enhancements.rule_canary import promote_to_canary
+                        promote_to_canary(
+                            lesson.category, brain.session, db_path=brain.db_path,
+                        )
+                    except Exception as e:
+                        _log.debug("promote_to_canary failed: %s", e)
                 # User-facing graduation notification
                 try:
                     brain.bus.emit("lesson.graduated", {
@@ -837,6 +849,57 @@ def _lesson_key(lesson):
             "graduated_rules": graduated_rules,
             "meta_rules_discovered": meta_rules_discovered}
 
+        # Canary health sweep: for every RULE-tier lesson previously enrolled
+        # in canary, check if corrections landed in its category since it
+        # graduated. Healthy canaries promote to ACTIVE; unhealthy ones roll
+        # back to INSTINCT-range confidence. Best-effort; never fails the
+        # session close. See enhancements/rule_canary.py.
+        try:
+            from gradata.enhancements.rule_canary import (
+                CANARY_SESSIONS,
+                check_canary_health,
+                promote_to_active,
+                rollback_rule,
+            )
+
+            rule_lessons = [l for l in all_lessons if l.state.value == "RULE"]
+            seen_categories: set[str] = set()
+            for l in rule_lessons:
+                if l.category in seen_categories:
+                    continue
+                seen_categories.add(l.category)
+                try:
+                    health = check_canary_health(
+                        l.category, current_session, db_path=brain.db_path,
+                    )
+                except Exception as e:
+                    _log.debug("check_canary_health(%s) failed: %s", l.category, e)
+                    continue
+
+                rec = health.get("recommendation")
+                if rec == "PROMOTE":
+                    try:
+                        promote_to_active(l.category, db_path=brain.db_path)
+                    except Exception as e:
+                        _log.debug("promote_to_active(%s) failed: %s", l.category, e)
+                elif rec == "ROLLBACK":
+                    try:
+                        rollback_rule(
+                            l.category,
+                            reason=(
+                                f"canary_unhealthy: {health.get('corrections_caused', 0)} "
+                                f"correction(s) in {health.get('sessions_active', 0)}/"
+                                f"{CANARY_SESSIONS} canary sessions"
+                            ),
+                            db_path=brain.db_path,
+                        )
+                    except Exception as e:
+                        _log.debug("rollback_rule(%s) failed: %s", l.category, e)
+        except ImportError:
+            pass  # rule_canary optional; skip silently
+        except Exception as e:
+            _log.debug("Canary sweep failed: %s", e)
+
         # Session boundary marker for dashboard queries
         try:
             brain.emit("SESSION_END", "brain.end_session", {

diff --git a/src/gradata/brain.py b/src/gradata/brain.py
@@ -876,13 +876,43 @@ def apply_brain_rules(
 
         lessons = parse_lessons(lessons_path.read_text(encoding="utf-8"))
 
-        # Try tree-based retrieval first (falls back to flat if no paths)
+        # Try tree-based retrieval first (falls back to flat if no paths).
+        # Pass the brain's bus so rule_engine can fire `rule_scoped_out`
+        # events for observers (notifications, session-history, embeddings).
+        _bus = getattr(self, "bus", None)
         try:
             from gradata.rules.rule_engine import apply_rules_with_tree
 
-            applied = apply_rules_with_tree(lessons, scope, max_rules=max_rules)
+            applied = apply_rules_with_tree(
+                lessons, scope, max_rules=max_rules, event_bus=_bus,
+            )
         except (ImportError, Exception):
-            applied = apply_rules(lessons, scope, max_rules=max_rules)
+            applied = apply_rules(lessons, scope, max_rules=max_rules, bus=_bus)
+
+        # Emit `rules.injected` so downstream effectiveness tracking
+        # (SessionHistory.compute_effectiveness) sees what entered this
+        # session's prompts. Fire-and-forget — never fails apply_brain_rules.
+        if _bus is not None and applied:
+            try:
+                _bus.emit("rules.injected", {
+                    "rules": [
+                        {
+                            "id": a.rule_id,
+                            "category": a.lesson.category,
+                            "confidence": a.lesson.confidence,
+                            "state": a.lesson.state.value,
+                        }
+                        for a in applied
+                    ],
+                    "scope": {
+                        "task_type": scope.task_type,
+                        "domain": scope.domain,
+                        "audience": scope.audience,
+                    },
+                    "task": task,
+                })
+            except Exception as e:
+                logger.debug("rules.injected emit failed: %s", e)
 
         result = format_rules_for_prompt(applied)
         self._rule_cache.put(cache_key, result)

diff --git a/src/gradata/enhancements/self_improvement.py b/src/gradata/enhancements/self_improvement.py
@@ -970,6 +970,41 @@ def update_confidence(
 # ---------------------------------------------------------------------------
 
 
+def _passes_beta_lb_gate(lesson: Lesson) -> bool:
+    """Beta lower-bound gate on PATTERN -> RULE promotion.
+
+    Opt-in via env var ``GRADATA_BETA_LB_GATE`` (default off). When enabled,
+    requires the 5th-percentile lower bound of Beta(α, β) to meet the
+    configured threshold (``GRADATA_BETA_LB_THRESHOLD``, default 0.70) AND
+    at least ``GRADATA_BETA_LB_MIN_FIRES`` observations (default 5).
+
+    Rationale: the v4 ablation min2022 random-label control showed that
+    ~15–20% of current RULE-tier graduations are calibrated by format,
+    not content. The Beta posterior captures uncertainty the mean
+    (lesson.confidence) discards. Feature-flagged so production
+    calibration is unchanged until this is measured in-band.
+    """
+    import os
+
+    if os.environ.get("GRADATA_BETA_LB_GATE", "").lower() not in ("1", "true", "yes", "on"):
+        return True  # gate disabled — defer to existing conf + fire_count checks
+
+    try:
+        threshold = float(os.environ.get("GRADATA_BETA_LB_THRESHOLD", "0.70"))
+        min_fires = int(os.environ.get("GRADATA_BETA_LB_MIN_FIRES", "5"))
+    except ValueError:
+        threshold, min_fires = 0.70, 5
+
+    if lesson.fire_count < min_fires:
+        return False
+
+    alpha = getattr(lesson, "alpha", 1.0)
+    beta_param = getattr(lesson, "beta_param", 1.0)
+    from gradata.rules.rule_engine import _beta_ppf_05
+
+    return _beta_ppf_05(alpha, beta_param) >= threshold
+
+
 def graduate(
     lessons: list[Lesson],
     *,
@@ -1107,6 +1142,7 @@ def graduate(
             and lesson.state == LessonState.PATTERN
             and lesson.confidence >= eff_rule_threshold
             and lesson.fire_count >= MIN_APPLICATIONS_FOR_RULE
+            and _passes_beta_lb_gate(lesson)
         ):
             blocked = False
 

diff --git a/src/gradata/rules/rule_engine.py b/src/gradata/rules/rule_engine.py
@@ -502,14 +502,25 @@ def filter_by_scope(
 
 
 def _beta_ppf_05(alpha: float, beta_param: float) -> float:
-    """Approximate 5th percentile of Beta(alpha, beta) distribution.
+    """5th percentile of Beta(alpha, beta) distribution.
 
-    Uses normal approximation. For tiny samples, returns conservative estimate.
+    Uses scipy.stats.beta.ppf when available (exact). Falls back to the
+    normal approximation otherwise. The normal approx is biased for
+    small samples (α+β < 10), precisely the regime ~40% of PATTERN-tier
+    rules sit in — prefer scipy when present.
     """
-    import math
-
     if alpha <= 0 or beta_param <= 0:
         return 0.0
+
+    try:
+        from scipy.stats import beta as _scipy_beta
+
+        return max(0.0, min(1.0, float(_scipy_beta.ppf(0.05, alpha, beta_param))))
+    except ImportError:
+        pass
+
+    import math
+
     total = alpha + beta_param
     mean = alpha / total
     if total <= 2:

diff --git a/tests/test_beta_scoring.py b/tests/test_beta_scoring.py
@@ -10,8 +10,11 @@
 
 
 def test_beta_reliability_high_success():
+    # Beta(20, 2) exact 5th percentile ≈ 0.793. The previous assertion
+    # of > 0.8 measured the bias of the normal approximation, not the
+    # statistic itself. Scipy-backed PPF closes that bias.
     score = beta_domain_reliability(fires=20, misfires=1)
-    assert score > 0.8
+    assert score > 0.75
 
 
 def test_beta_reliability_uncertain_with_few_observations():