diff --git a/Gradata/src/gradata/enhancements/rule_pipeline.py b/Gradata/src/gradata/enhancements/rule_pipeline.py index b7fd04fd..29cf3979 100644 --- a/Gradata/src/gradata/enhancements/rule_pipeline.py +++ b/Gradata/src/gradata/enhancements/rule_pipeline.py @@ -335,7 +335,7 @@ def run_rule_pipeline( # Rank rules using retrieval fusion if available try: - from gradata.enhancements.retrieval_fusion import ( # type: ignore[import] + from gradata.enhancements.scoring.retrieval_fusion import ( # type: ignore[import] ScoredRule, apply_correction_boost, reciprocal_rank_fusion, diff --git a/Gradata/src/gradata/enhancements/retrieval_fusion.py b/Gradata/src/gradata/enhancements/scoring/retrieval_fusion.py similarity index 100% rename from Gradata/src/gradata/enhancements/retrieval_fusion.py rename to Gradata/src/gradata/enhancements/scoring/retrieval_fusion.py diff --git a/Gradata/src/gradata/enhancements/self_improvement/_graduation.py b/Gradata/src/gradata/enhancements/self_improvement/_graduation.py index 680a12f5..5bf8a0f1 100644 --- a/Gradata/src/gradata/enhancements/self_improvement/_graduation.py +++ b/Gradata/src/gradata/enhancements/self_improvement/_graduation.py @@ -107,7 +107,12 @@ def _read_beta_lb_config() -> tuple[bool, float, int]: import math import os - enabled = os.environ.get("GRADATA_BETA_LB_GATE", "").lower() in ("1", "true", "yes", "on") + enabled = os.environ.get("GRADATA_BETA_LB_GATE", "1").lower() not in ( + "0", + "false", + "no", + "off", + ) try: threshold = float(os.environ.get("GRADATA_BETA_LB_THRESHOLD", "0.85")) if not math.isfinite(threshold): @@ -128,7 +133,7 @@ def _passes_beta_lb_gate( ) -> bool: """Beta lower-bound gate on PATTERN -> RULE promotion. - Opt-in via env var ``GRADATA_BETA_LB_GATE`` (default off). When enabled, + Enabled by default; set ``GRADATA_BETA_LB_GATE=0`` to disable. When enabled, requires the 5th-percentile lower bound of Beta(α, β) to meet the configured threshold (``GRADATA_BETA_LB_THRESHOLD``, default 0.85) AND at least ``GRADATA_BETA_LB_MIN_FIRES`` observations (default 5). diff --git a/Gradata/src/gradata/middleware/_core.py b/Gradata/src/gradata/middleware/_core.py index 584784e8..3f38e3af 100644 --- a/Gradata/src/gradata/middleware/_core.py +++ b/Gradata/src/gradata/middleware/_core.py @@ -16,6 +16,7 @@ from gradata._env import env_str from gradata.enhancements.rule_to_hook import DeterminismCheck, classify_rule +from gradata.security.score_obfuscation import obfuscate_instruction if TYPE_CHECKING: # pragma: no cover from gradata._types import Lesson @@ -276,7 +277,10 @@ def build_brain_rules_block(source: RuleSource) -> str: selected = source.select() if not selected: return "" - lines = [f"[{l.state}:{l.confidence:.2f}] {l.category}: {l.description}" for l in selected] + lines = [ + obfuscate_instruction(f"[{l.state}:{l.confidence:.2f}] {l.category}: {l.description}") + for l in selected + ] return "\n" + "\n".join(lines) + "\n" diff --git a/Gradata/tests/test_initial_confidence_invariant.py b/Gradata/tests/test_initial_confidence_invariant.py new file mode 100644 index 00000000..3ea5bbd9 --- /dev/null +++ b/Gradata/tests/test_initial_confidence_invariant.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from gradata._types import Lesson, LessonState +from gradata.enhancements.self_improvement import graduate +from gradata.enhancements.self_improvement._confidence import ( + INITIAL_CONFIDENCE, + MIN_APPLICATIONS_FOR_PATTERN, + PATTERN_THRESHOLD, +) + + +def _lesson(confidence: float, fire_count: int) -> Lesson: + return Lesson( + date="2026-05-02", + state=LessonState.INSTINCT, + confidence=confidence, + category="PROCESS", + description="Follow the existing process", + fire_count=fire_count, + ) + + +def test_fresh_lesson_starts_as_instinct() -> None: + lesson = _lesson(INITIAL_CONFIDENCE, 0) + + assert lesson.state is LessonState.INSTINCT + + +def test_pattern_threshold_tie_does_not_promote() -> None: + lesson = _lesson(PATTERN_THRESHOLD, MIN_APPLICATIONS_FOR_PATTERN) + + active, graduated = graduate([lesson]) + + assert lesson.state is LessonState.INSTINCT + assert active == [lesson] + assert graduated == [] + + +def test_above_pattern_threshold_with_enough_fires_promotes() -> None: + lesson = _lesson(PATTERN_THRESHOLD + 0.01, MIN_APPLICATIONS_FOR_PATTERN) + + graduate([lesson]) + + assert lesson.state is LessonState.PATTERN diff --git a/Gradata/tests/test_middleware_core.py b/Gradata/tests/test_middleware_core.py index a1339110..d3de676b 100644 --- a/Gradata/tests/test_middleware_core.py +++ b/Gradata/tests/test_middleware_core.py @@ -43,7 +43,8 @@ def test_build_brain_rules_block_wraps_in_xml(): block = build_brain_rules_block(src) assert block.startswith("") assert block.endswith("") - assert "[RULE:0.95]" in block + assert "[RULE]" in block + assert "[RULE:" not in block assert "TONE" in block @@ -55,7 +56,7 @@ def test_build_brain_rules_block_respects_max_rules(): ] src = RuleSource(lessons=lessons, max_rules=5) block = build_brain_rules_block(src) - assert block.count("[RULE:") == 5 + assert block.count("[RULE]") == 5 def test_check_output_finds_em_dash_violation(): diff --git a/Gradata/tests/test_retrieval_fusion.py b/Gradata/tests/test_retrieval_fusion.py index 5f442e8b..f25aae41 100644 --- a/Gradata/tests/test_retrieval_fusion.py +++ b/Gradata/tests/test_retrieval_fusion.py @@ -3,14 +3,13 @@ import pytest -from gradata.enhancements.retrieval_fusion import ( +from gradata.enhancements.scoring.retrieval_fusion import ( MergedRule, ScoredRule, apply_correction_boost, reciprocal_rank_fusion, ) - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/Gradata/tests/test_rule_graduated_events.py b/Gradata/tests/test_rule_graduated_events.py index 496c3a20..d82b35f7 100644 --- a/Gradata/tests/test_rule_graduated_events.py +++ b/Gradata/tests/test_rule_graduated_events.py @@ -59,7 +59,8 @@ def test_instinct_to_pattern_emits_rule_graduated(tmp_path): assert data["fire_count"] == 5 -def test_pattern_to_rule_emits_rule_graduated(tmp_path): +def test_pattern_to_rule_emits_rule_graduated(tmp_path, monkeypatch): + monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0") brain = init_brain(tmp_path) lesson = Lesson( date="2026-04-21", diff --git a/Gradata/tests/test_rule_pipeline.py b/Gradata/tests/test_rule_pipeline.py index 53d77b93..7ef77f12 100644 --- a/Gradata/tests/test_rule_pipeline.py +++ b/Gradata/tests/test_rule_pipeline.py @@ -168,7 +168,7 @@ def test_pipeline_does_not_graduate_instinct_below_threshold(tmp_path: Path) -> assert result.graduated == [] -def test_pipeline_graduates_pattern_to_rule(tmp_path: Path) -> None: +def test_pipeline_graduates_pattern_to_rule(tmp_path: Path, monkeypatch) -> None: """PATTERN lesson at 0.90 confidence with >= 5 fires graduates to RULE. C2 fix: MIN_APPLICATIONS_FOR_RULE was accidentally lowered to 3 in @@ -177,6 +177,7 @@ def test_pipeline_graduates_pattern_to_rule(tmp_path: Path) -> None: which only passed because of the bug. Updated to fire_count=5 which is the correct threshold. """ + monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0") lesson = _make_lesson( state=LessonState.PATTERN, confidence=0.90, @@ -242,7 +243,7 @@ def test_pipeline_handles_missing_retrieval_fusion_module(tmp_path: Path) -> Non _write_lessons(lessons_path, [lesson]) db_path = tmp_path / "system.db" - with patch.dict(sys.modules, {"gradata.enhancements.retrieval_fusion": None}): + with patch.dict(sys.modules, {"gradata.enhancements.scoring.retrieval_fusion": None}): result = run_rule_pipeline(lessons_path, db_path, current_session=5) assert not any("retrieval_fusion" in e for e in result.errors) diff --git a/Gradata/tests/test_rule_to_hook.py b/Gradata/tests/test_rule_to_hook.py index 756705b1..430c8796 100644 --- a/Gradata/tests/test_rule_to_hook.py +++ b/Gradata/tests/test_rule_to_hook.py @@ -315,6 +315,7 @@ def test_graduate_promotes_and_installs_hook_for_em_dash(self, tmp_path, monkeyp from datetime import UTC as _UTC from datetime import datetime as _dt + monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0") monkeypatch.setenv("GRADATA_HOOK_ROOT", str(tmp_path)) from gradata import _paths as _p from gradata._types import Lesson, LessonState diff --git a/Gradata/tests/test_rule_to_hook_promotion.py b/Gradata/tests/test_rule_to_hook_promotion.py index d0cee0ed..28b3ee87 100644 --- a/Gradata/tests/test_rule_to_hook_promotion.py +++ b/Gradata/tests/test_rule_to_hook_promotion.py @@ -264,6 +264,7 @@ def test_graduation_auto_promotes_deterministic_rule(tmp_path, monkeypatch): from gradata.enhancements.self_improvement import graduate hook_dir = tmp_path / "pre-tool" / "generated" + monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0") monkeypatch.setenv("GRADATA_HOOK_ROOT", str(hook_dir)) # Satisfy the council empirical gate: fire_count>=10 and >=3 distinct diff --git a/Gradata/tests/test_safety_assertion.py b/Gradata/tests/test_safety_assertion.py index 01ec373e..e195c13a 100644 --- a/Gradata/tests/test_safety_assertion.py +++ b/Gradata/tests/test_safety_assertion.py @@ -80,8 +80,9 @@ def test_no_rule_without_5_fires(self) -> None: f"Promoted to {lesson.state} with only {lesson.fire_count} fires" ) - def test_rule_with_5_fires(self) -> None: + def test_rule_with_5_fires(self, monkeypatch) -> None: """PATTERN -> RULE succeeds with 5+ fires and sufficient confidence.""" + monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0") lesson = _make_lesson( state=LessonState.PATTERN, confidence=RULE_THRESHOLD + 0.01, diff --git a/Gradata/tests/test_score_obfuscation_gate.py b/Gradata/tests/test_score_obfuscation_gate.py new file mode 100644 index 00000000..d326136f --- /dev/null +++ b/Gradata/tests/test_score_obfuscation_gate.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import re + +from gradata import Brain +from gradata.middleware._core import RuleSource, build_brain_rules_block + +_RAW_CONFIDENCE_FLOAT = re.compile(r"(? None: + leaks = _RAW_CONFIDENCE_FLOAT.findall(prompt) + assert not leaks, f"raw confidence float leaked into prompt-bound text: {prompt}" + + +def test_apply_brain_rules_prompt_does_not_leak_raw_confidence(tmp_path) -> None: + brain = Brain.init( + tmp_path / "brain", + name="ObfuscationGate", + domain="Testing", + embedding="local", + interactive=False, + ) + result = brain.add_rule( + "Prefer concrete dates over relative dates", + "PROCESS", + state="RULE", + confidence=0.95, + ) + assert result["added"] is True + + prompt = brain.apply_brain_rules("write a status update", max_rules=5) + + assert "" in prompt + _assert_no_raw_confidence_float(prompt) + + +def test_middleware_brain_rules_block_does_not_leak_raw_confidence() -> None: + source = RuleSource( + lessons=[ + { + "state": "RULE", + "confidence": 0.95, + "category": "PROCESS", + "description": "Prefer concrete dates over relative dates", + }, + { + "state": "PATTERN", + "confidence": 0.72, + "category": "STYLE", + "description": "Keep summaries short", + }, + ] + ) + + prompt = build_brain_rules_block(source) + + assert "" in prompt + _assert_no_raw_confidence_float(prompt) diff --git a/Gradata/tests/test_wiring_compound.py b/Gradata/tests/test_wiring_compound.py index f35175d2..dc531d0e 100644 --- a/Gradata/tests/test_wiring_compound.py +++ b/Gradata/tests/test_wiring_compound.py @@ -76,11 +76,11 @@ def test_monotone_in_alpha(self): class TestBetaLBGate: - def test_gate_disabled_by_default_allows_promotion(self, monkeypatch): + def test_gate_can_be_disabled_to_allow_promotion(self, monkeypatch): from gradata._types import Lesson, LessonState from gradata.enhancements.self_improvement import _passes_beta_lb_gate - monkeypatch.delenv("GRADATA_BETA_LB_GATE", raising=False) + monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0") lesson = Lesson( date="2026-04-15", category="test", description="test rule", state=LessonState.PATTERN, confidence=0.95, fire_count=5,