Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gradata/src/gradata/enhancements/rule_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def run_rule_pipeline(

# Rank rules using retrieval fusion if available
try:
from gradata.enhancements.retrieval_fusion import ( # type: ignore[import]
from gradata.enhancements.scoring.retrieval_fusion import ( # type: ignore[import]
ScoredRule,
apply_correction_boost,
reciprocal_rank_fusion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,12 @@ def _read_beta_lb_config() -> tuple[bool, float, int]:
import math
import os

enabled = os.environ.get("GRADATA_BETA_LB_GATE", "").lower() in ("1", "true", "yes", "on")
enabled = os.environ.get("GRADATA_BETA_LB_GATE", "1").lower() not in (
"0",
"false",
"no",
"off",
)
Comment on lines +110 to +115
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Default-on Beta-LB now hard-blocks lessons that lack posterior state.

Enabling the gate by default turns any Lesson without persisted alpha / beta_param into an automatic PATTERN→RULE deny, because _passes_beta_lb_gate() falls back to Beta(1,1), whose 5th-percentile lower bound is far below the default 0.85. That means legacy or handcrafted PATTERN lessons will stop graduating entirely unless the env override is set, which is a much stronger behavior change than “tighter calibration.”

Possible compatibility guard
 def _passes_beta_lb_gate(
     lesson: Lesson,
     config: tuple[bool, float, int] | None = None,
 ) -> bool:
@@
-    alpha = getattr(lesson, "alpha", 1.0)
-    beta_param = getattr(lesson, "beta_param", 1.0)
+    alpha = getattr(lesson, "alpha", None)
+    beta_param = getattr(lesson, "beta_param", None)
+    if alpha is None or beta_param is None:
+        return True  # keep legacy promotion behavior until posterior state is backfilled
+
     from gradata.rules.rule_engine import _beta_ppf_05
 
     return _beta_ppf_05(alpha, beta_param) >= threshold

Also applies to: 151-155

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@Gradata/src/gradata/enhancements/self_improvement/_graduation.py` around
lines 110 - 115, The default-on gate (GRADATA_BETA_LB_GATE currently defaulting
to "1") causes lessons missing persisted alpha/beta_param to be treated as
Beta(1,1) in _passes_beta_lb_gate(), which denies legacy PATTERN lessons; revert
the compatibility break by making the gate default off (change the default of
GRADATA_BETA_LB_GATE to "0"/false) or alter _passes_beta_lb_gate() to treat a
Lesson lacking alpha or beta_param as passing (i.e., skip the Beta(1,1) fallback
and allow graduation) — update code references to GRADATA_BETA_LB_GATE,
_passes_beta_lb_gate(), Lesson.alpha, and Lesson.beta_param accordingly.

try:
threshold = float(os.environ.get("GRADATA_BETA_LB_THRESHOLD", "0.85"))
if not math.isfinite(threshold):
Expand All @@ -128,7 +133,7 @@ def _passes_beta_lb_gate(
) -> bool:
"""Beta lower-bound gate on PATTERN -> RULE promotion.

Opt-in via env var ``GRADATA_BETA_LB_GATE`` (default off). When enabled,
Enabled by default; set ``GRADATA_BETA_LB_GATE=0`` to disable. When enabled,
requires the 5th-percentile lower bound of Beta(α, β) to meet the
configured threshold (``GRADATA_BETA_LB_THRESHOLD``, default 0.85) AND
at least ``GRADATA_BETA_LB_MIN_FIRES`` observations (default 5).
Expand Down
6 changes: 5 additions & 1 deletion Gradata/src/gradata/middleware/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from gradata._env import env_str
from gradata.enhancements.rule_to_hook import DeterminismCheck, classify_rule
from gradata.security.score_obfuscation import obfuscate_instruction

if TYPE_CHECKING: # pragma: no cover
from gradata._types import Lesson
Expand Down Expand Up @@ -276,7 +277,10 @@ def build_brain_rules_block(source: RuleSource) -> str:
selected = source.select()
if not selected:
return ""
lines = [f"[{l.state}:{l.confidence:.2f}] {l.category}: {l.description}" for l in selected]
lines = [
obfuscate_instruction(f"[{l.state}:{l.confidence:.2f}] {l.category}: {l.description}")
for l in selected
]
return "<brain-rules>\n" + "\n".join(lines) + "\n</brain-rules>"


Expand Down
44 changes: 44 additions & 0 deletions Gradata/tests/test_initial_confidence_invariant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

from gradata._types import Lesson, LessonState
from gradata.enhancements.self_improvement import graduate
from gradata.enhancements.self_improvement._confidence import (
INITIAL_CONFIDENCE,
MIN_APPLICATIONS_FOR_PATTERN,
PATTERN_THRESHOLD,
)


def _lesson(confidence: float, fire_count: int) -> Lesson:
return Lesson(
date="2026-05-02",
state=LessonState.INSTINCT,
confidence=confidence,
category="PROCESS",
description="Follow the existing process",
fire_count=fire_count,
)


def test_fresh_lesson_starts_as_instinct() -> None:
lesson = _lesson(INITIAL_CONFIDENCE, 0)

assert lesson.state is LessonState.INSTINCT


def test_pattern_threshold_tie_does_not_promote() -> None:
lesson = _lesson(PATTERN_THRESHOLD, MIN_APPLICATIONS_FOR_PATTERN)

active, graduated = graduate([lesson])

assert lesson.state is LessonState.INSTINCT
assert active == [lesson]
assert graduated == []


def test_above_pattern_threshold_with_enough_fires_promotes() -> None:
lesson = _lesson(PATTERN_THRESHOLD + 0.01, MIN_APPLICATIONS_FOR_PATTERN)

graduate([lesson])

assert lesson.state is LessonState.PATTERN
5 changes: 3 additions & 2 deletions Gradata/tests/test_middleware_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def test_build_brain_rules_block_wraps_in_xml():
block = build_brain_rules_block(src)
assert block.startswith("<brain-rules>")
assert block.endswith("</brain-rules>")
assert "[RULE:0.95]" in block
assert "[RULE]" in block
assert "[RULE:" not in block
assert "TONE" in block


Expand All @@ -55,7 +56,7 @@ def test_build_brain_rules_block_respects_max_rules():
]
src = RuleSource(lessons=lessons, max_rules=5)
block = build_brain_rules_block(src)
assert block.count("[RULE:") == 5
assert block.count("[RULE]") == 5


def test_check_output_finds_em_dash_violation():
Expand Down
3 changes: 1 addition & 2 deletions Gradata/tests/test_retrieval_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@

import pytest

from gradata.enhancements.retrieval_fusion import (
from gradata.enhancements.scoring.retrieval_fusion import (
MergedRule,
ScoredRule,
apply_correction_boost,
reciprocal_rank_fusion,
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion Gradata/tests/test_rule_graduated_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def test_instinct_to_pattern_emits_rule_graduated(tmp_path):
assert data["fire_count"] == 5


def test_pattern_to_rule_emits_rule_graduated(tmp_path):
def test_pattern_to_rule_emits_rule_graduated(tmp_path, monkeypatch):
monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0")
brain = init_brain(tmp_path)
lesson = Lesson(
date="2026-04-21",
Expand Down
5 changes: 3 additions & 2 deletions Gradata/tests/test_rule_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_pipeline_does_not_graduate_instinct_below_threshold(tmp_path: Path) ->
assert result.graduated == []


def test_pipeline_graduates_pattern_to_rule(tmp_path: Path) -> None:
def test_pipeline_graduates_pattern_to_rule(tmp_path: Path, monkeypatch) -> None:
"""PATTERN lesson at 0.90 confidence with >= 5 fires graduates to RULE.

C2 fix: MIN_APPLICATIONS_FOR_RULE was accidentally lowered to 3 in
Expand All @@ -177,6 +177,7 @@ def test_pipeline_graduates_pattern_to_rule(tmp_path: Path) -> None:
which only passed because of the bug. Updated to fire_count=5 which is
the correct threshold.
"""
monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0")
lesson = _make_lesson(
state=LessonState.PATTERN,
confidence=0.90,
Expand Down Expand Up @@ -242,7 +243,7 @@ def test_pipeline_handles_missing_retrieval_fusion_module(tmp_path: Path) -> Non
_write_lessons(lessons_path, [lesson])
db_path = tmp_path / "system.db"

with patch.dict(sys.modules, {"gradata.enhancements.retrieval_fusion": None}):
with patch.dict(sys.modules, {"gradata.enhancements.scoring.retrieval_fusion": None}):
result = run_rule_pipeline(lessons_path, db_path, current_session=5)

assert not any("retrieval_fusion" in e for e in result.errors)
Expand Down
1 change: 1 addition & 0 deletions Gradata/tests/test_rule_to_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def test_graduate_promotes_and_installs_hook_for_em_dash(self, tmp_path, monkeyp
from datetime import UTC as _UTC
from datetime import datetime as _dt

monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0")
monkeypatch.setenv("GRADATA_HOOK_ROOT", str(tmp_path))
from gradata import _paths as _p
from gradata._types import Lesson, LessonState
Expand Down
1 change: 1 addition & 0 deletions Gradata/tests/test_rule_to_hook_promotion.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def test_graduation_auto_promotes_deterministic_rule(tmp_path, monkeypatch):
from gradata.enhancements.self_improvement import graduate

hook_dir = tmp_path / "pre-tool" / "generated"
monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0")
monkeypatch.setenv("GRADATA_HOOK_ROOT", str(hook_dir))

# Satisfy the council empirical gate: fire_count>=10 and >=3 distinct
Expand Down
3 changes: 2 additions & 1 deletion Gradata/tests/test_safety_assertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ def test_no_rule_without_5_fires(self) -> None:
f"Promoted to {lesson.state} with only {lesson.fire_count} fires"
)

def test_rule_with_5_fires(self) -> None:
def test_rule_with_5_fires(self, monkeypatch) -> None:
"""PATTERN -> RULE succeeds with 5+ fires and sufficient confidence."""
monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0")
lesson = _make_lesson(
state=LessonState.PATTERN,
confidence=RULE_THRESHOLD + 0.01,
Expand Down
59 changes: 59 additions & 0 deletions Gradata/tests/test_score_obfuscation_gate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

import re

from gradata import Brain
from gradata.middleware._core import RuleSource, build_brain_rules_block

_RAW_CONFIDENCE_FLOAT = re.compile(r"(?<![\w.])(?:0(?:\.\d+)?|1(?:\.0+)?)(?![\w.])")


def _assert_no_raw_confidence_float(prompt: str) -> None:
leaks = _RAW_CONFIDENCE_FLOAT.findall(prompt)
assert not leaks, f"raw confidence float leaked into prompt-bound text: {prompt}"
Comment on lines +8 to +13
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail
python - <<'PY'
import re

current = re.compile(r"(?<![\w.])(?:0(?:\.\d+)?|1(?:\.0+)?)(?![\w.])")
proposed = re.compile(r"(?<![\w.])(?:0\.\d+|1\.0+)(?![\w.])")

samples = [
    "<brain-rules>\n[RULE] Use 1 sentence.\n</brain-rules>",
    "<brain-rules>\n[RULE] Keep 0 padding.\n</brain-rules>",
    "<brain-rules>\n[RULE:0.95] Prefer concrete dates.\n</brain-rules>",
    "<brain-rules>\n[RULE:1.00] Prefer concrete dates.\n</brain-rules>",
]

for s in samples:
    print("TEXT:", s.replace("\n", "\\n"))
    print(" current:", current.findall(s))
    print("proposed:", proposed.findall(s))
    print("-" * 60)
PY

Repository: Gradata/gradata

Length of output: 701


Regex matches plain integers and will cause false positives.

The optional decimal portion in the current pattern ((?:\.\d+)? and (?:\.0+)?) makes it match standalone 0 and 1. This means valid text like "use 1 sentence" or "keep 0 padding" will incorrectly trigger the gate.

Proposed fix
-_RAW_CONFIDENCE_FLOAT = re.compile(r"(?<![\w.])(?:0(?:\.\d+)?|1(?:\.0+)?)(?![\w.])")
+_RAW_CONFIDENCE_FLOAT = re.compile(r"(?<![\w.])(?:0\.\d+|1\.0+)(?![\w.])")

The proposed regex requires an explicit decimal point and digits, matching only actual floats like 0.95 and 1.00 while ignoring plain integers.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@Gradata/tests/test_score_obfuscation_gate.py` around lines 8 - 13, The regex
_RAW_CONFIDENCE_FLOAT currently also matches integers because the decimal
portion is optional; update the pattern used by the _RAW_CONFIDENCE_FLOAT
constant so it requires an explicit decimal point and digits (i.e., only match
floats like 0.95 or 1.00), then keep _assert_no_raw_confidence_float unchanged
so it will only detect actual float leaks and not plain integers like "1" or
"0".



def test_apply_brain_rules_prompt_does_not_leak_raw_confidence(tmp_path) -> None:
brain = Brain.init(
tmp_path / "brain",
name="ObfuscationGate",
domain="Testing",
embedding="local",
interactive=False,
)
Comment on lines +16 to +23
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Looking for test conftest and isolation hooks..."
fd -i 'conftest.py' tests

echo
echo "Searching for BRAIN_DIR and _paths cache refresh logic in test infra..."
rg -n -C3 'BRAIN_DIR|_paths|Brain\.init|reload|refresh' tests

echo
echo "Reference call site in this test file:"
rg -n -C2 'test_apply_brain_rules_prompt_does_not_leak_raw_confidence|Brain\.init\(' Gradata/tests/test_score_obfuscation_gate.py

Repository: Gradata/gradata

Length of output: 219


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Find all conftest.py files in the repository
echo "=== Locating conftest.py files ==="
git ls-files | grep -i conftest.py

echo
echo "=== Checking Gradata/tests directory structure ==="
git ls-files | grep "^Gradata/tests/" | head -20

echo
echo "=== Looking for the target test file ==="
git ls-files | grep "test_score_obfuscation_gate.py"

Repository: Gradata/gradata

Length of output: 953


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Reading conftest.py for test isolation setup ==="
cat -n Gradata/tests/conftest.py

echo
echo "=== Checking for test_score_obfuscation_gate.py ==="
git ls-files | grep "test_score_obfuscation_gate.py"

echo
echo "=== Reading test_score_obfuscation_gate.py around lines 16-23 ==="
head -30 Gradata/tests/test_score_obfuscation_gate.py

Repository: Gradata/gradata

Length of output: 9867


Use the fresh_brain fixture or init_brain() from conftest.py instead of calling Brain.init() directly.

This test calls Brain.init() directly without setting BRAIN_DIR or refreshing the _paths.py module cache. The conftest.py provides the init_brain() helper (and fresh_brain fixture) specifically for this purpose. Either replace the direct Brain.init() call with brain = init_brain(tmp_path, name="ObfuscationGate", domain="Testing") or use the fresh_brain fixture and customize it as needed. See conftest.py lines 26-92.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@Gradata/tests/test_score_obfuscation_gate.py` around lines 16 - 23, Replace
the direct Brain.init(...) invocation in
test_apply_brain_rules_prompt_does_not_leak_raw_confidence with the test helpers
from conftest: either call brain = init_brain(tmp_path, name="ObfuscationGate",
domain="Testing", embedding="local", interactive=False) or switch the test to
use the fresh_brain fixture and adjust its parameters; this ensures BRAIN_DIR
and the _paths cache are handled the same way as other tests instead of calling
Brain.init directly.

result = brain.add_rule(
"Prefer concrete dates over relative dates",
"PROCESS",
state="RULE",
confidence=0.95,
)
assert result["added"] is True

prompt = brain.apply_brain_rules("write a status update", max_rules=5)

assert "<brain-rules>" in prompt
_assert_no_raw_confidence_float(prompt)


def test_middleware_brain_rules_block_does_not_leak_raw_confidence() -> None:
source = RuleSource(
lessons=[
{
"state": "RULE",
"confidence": 0.95,
"category": "PROCESS",
"description": "Prefer concrete dates over relative dates",
},
{
"state": "PATTERN",
"confidence": 0.72,
"category": "STYLE",
"description": "Keep summaries short",
},
]
)

prompt = build_brain_rules_block(source)

assert "<brain-rules>" in prompt
_assert_no_raw_confidence_float(prompt)
4 changes: 2 additions & 2 deletions Gradata/tests/test_wiring_compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ def test_monotone_in_alpha(self):


class TestBetaLBGate:
def test_gate_disabled_by_default_allows_promotion(self, monkeypatch):
def test_gate_can_be_disabled_to_allow_promotion(self, monkeypatch):
from gradata._types import Lesson, LessonState
from gradata.enhancements.self_improvement import _passes_beta_lb_gate

monkeypatch.delenv("GRADATA_BETA_LB_GATE", raising=False)
monkeypatch.setenv("GRADATA_BETA_LB_GATE", "0")
lesson = Lesson(
date="2026-04-15", category="test", description="test rule",
state=LessonState.PATTERN, confidence=0.95, fire_count=5,
Expand Down
Loading