Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/gradata/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,10 @@ def brain_correct(
from gradata.enhancements.causal_chains import CausalChain, CausalRelation
from gradata.enhancements.meta_rules import _lesson_id
if not hasattr(brain, "_causal_chain"):
brain._causal_chain = CausalChain()
brain._causal_chain = CausalChain() # type: ignore[attr-defined]
correction_id = str(event.get("id", ""))
rule_id = _lesson_id(best_match)
brain._causal_chain.add_link(
brain._causal_chain.add_link( # type: ignore[attr-defined]
correction_id, rule_id,
CausalRelation.REINFORCEMENT,
strength=min(1.0, best_match.confidence),
Expand Down Expand Up @@ -446,10 +446,10 @@ def brain_correct(
from gradata.enhancements.causal_chains import CausalChain, CausalRelation
from gradata.enhancements.meta_rules import _lesson_id
if not hasattr(brain, "_causal_chain"):
brain._causal_chain = CausalChain()
brain._causal_chain = CausalChain() # type: ignore[attr-defined]
correction_id = str(event.get("id", ""))
rule_id = _lesson_id(new_lesson)
brain._causal_chain.add_link(
brain._causal_chain.add_link( # type: ignore[attr-defined]
correction_id, rule_id,
CausalRelation.CORRECTION_TO_RULE,
strength=1.0,
Expand Down
119 changes: 110 additions & 9 deletions src/gradata/enhancements/meta_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,17 +586,111 @@ def parse_lessons_from_markdown(text: str) -> list[Lesson]:
# ---------------------------------------------------------------------------


_GEMMA_DEFAULT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai"
_GEMMA_DEFAULT_MODEL = "gemma-3-27b-it"


def _resolve_llm_credentials() -> tuple[str, str, str]:
"""Resolve LLM credentials from environment. Returns (key, base, model).

Delegates to the same env vars used by ``llm_synthesizer``.
Resolution order:
1. ``GRADATA_LLM_KEY`` + ``GRADATA_LLM_BASE`` — explicit override.
2. ``GRADATA_GEMMA_API_KEY`` — Google AI Studio OpenAI-compat endpoint.
"""
import os

key = os.environ.get("GRADATA_LLM_KEY", "")
base = os.environ.get("GRADATA_LLM_BASE", "")
model = os.environ.get("GRADATA_LLM_MODEL", "gpt-4o-mini")
return key, base, model
if key and base:
return key, base, model

gemma_key = os.environ.get("GRADATA_GEMMA_API_KEY", "")
if gemma_key:
return (
gemma_key,
os.environ.get("GRADATA_GEMMA_BASE", _GEMMA_DEFAULT_BASE),
os.environ.get("GRADATA_GEMMA_MODEL", _GEMMA_DEFAULT_MODEL),
)

return "", "", model
Comment on lines +605 to +616
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Edge case: partial OpenAI-compat credentials fall through silently.

If GRADATA_LLM_KEY is set but GRADATA_LLM_BASE is empty, the function silently falls through to the Gemma check. This could surprise users who set only the key and expect an error. Consider logging a debug message when one is set without the other.

💡 Proposed logging for partial credentials
     key = os.environ.get("GRADATA_LLM_KEY", "")
     base = os.environ.get("GRADATA_LLM_BASE", "")
     model = os.environ.get("GRADATA_LLM_MODEL", "gpt-4o-mini")
     if key and base:
         return key, base, model
+    if key and not base:
+        _log.debug("GRADATA_LLM_KEY set but GRADATA_LLM_BASE missing; checking Gemma fallback")
+    elif base and not key:
+        _log.debug("GRADATA_LLM_BASE set but GRADATA_LLM_KEY missing; checking Gemma fallback")

     gemma_key = os.environ.get("GRADATA_GEMMA_API_KEY", "")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/meta_rules.py` around lines 605 - 616, The code
silently skips partial OpenAI-compatible credentials (when one of
GRADATA_LLM_KEY or GRADATA_LLM_BASE is set but the other is empty), so update
the function that evaluates key, base, model (variables key, base, model) to
detect this partial-credential case and emit a debug (or warning) log indicating
which env var is missing before falling through to the Gemma check;
specifically, after reading key and base from env (GRADATA_LLM_KEY,
GRADATA_LLM_BASE) and before the existing `if key and base: return ...` block
add a branch that logs a clear message referencing the missing piece (e.g.,
"GRADATA_LLM_KEY set but GRADATA_LLM_BASE missing") so users aren’t silently
misrouted to Gemma, then continue with the existing fallthrough behavior.



def _build_principle_prompt(rules: list[Lesson], category: str) -> str:
bullets = "\n".join(f"- {r.description}" for r in rules[:10] if r.description)
return (
f'Given these {min(len(rules), 10)} user corrections related to "{category}":\n'
f"{bullets}\n\n"
"Write ONE actionable behavioral principle (1-2 sentences) that captures the pattern.\n"
'Format: "When [context], [do X] instead of [Y]."\n'
"Do not list individual words. Focus on the behavioral change.\n"
"Return ONLY the principle, no preamble."
)


def _call_gemma_native(prompt: str, creds: str, model: str, timeout: float = 15.0) -> str | None:
"""Call Google's native Gemma API (the OpenAI-compat endpoint rejects AQ. keys)."""
import urllib.error
import urllib.request

url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
payload = json.dumps({
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {"maxOutputTokens": 200, "temperature": 0.3},
}).encode()
headers = {"Content-Type": "application/json", "x-goog-api-key": creds}
try:
req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
with urllib.request.urlopen(req, timeout=timeout) as resp:
body = json.loads(resp.read().decode())
text = body["candidates"][0]["content"]["parts"][0]["text"].strip()
if 15 <= len(text) <= 500:
return text
return None
except (urllib.error.URLError, urllib.error.HTTPError, OSError, KeyError,
json.JSONDecodeError, IndexError) as exc:
_log.debug("Gemma native call failed: %s", exc)
return None


def _try_llm_principle(rules: list[Lesson], category: str) -> str | None:
"""Best-effort LLM synthesis of ONE behavioral principle for a rule group.

Returns the principle string or None (no credentials, empty input, or
any LLM error). Never raises -- synthesis must degrade to deterministic.

Provider resolution:
1. ``GRADATA_LLM_KEY`` + ``GRADATA_LLM_BASE`` -- OpenAI-compat endpoint.
2. ``GRADATA_GEMMA_API_KEY`` -- Google's native Gemma API.
"""
import os

if not rules:
return None

k = os.environ.get("GRADATA_LLM_KEY", "")
b = os.environ.get("GRADATA_LLM_BASE", "")
if k and b:
try:
from gradata.enhancements.llm_synthesizer import synthesise_principle_llm

return synthesise_principle_llm(
lessons=rules,
theme=category,
api_key=k,
api_base=b,
model=os.environ.get("GRADATA_LLM_MODEL", "gpt-4o-mini"),
)
except Exception as exc: # noqa: BLE001 -- degrade to deterministic
_log.debug("OpenAI-compat synthesis failed for %s: %s", category, exc)
return None
Comment on lines +684 to +686
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix pipeline failure: remove unused noqa directive.

The ruff RUF100 error indicates BLE001 is not enabled, making the noqa comment unnecessary.

🔧 Proposed fix
-        except Exception as exc:  # noqa: BLE001 -- degrade to deterministic
+        except Exception as exc:  # degrade to deterministic
🧰 Tools
🪛 GitHub Actions: SDK CI

[error] 684-684: ruff RUF100: Unused noqa directive (non-enabled: BLE001). help: Remove unused noqa directive

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/meta_rules.py` around lines 684 - 686, Remove the
unnecessary "noqa: BLE001" directive on the except clause: locate the try/except
block that currently reads "except Exception as exc:  # noqa: BLE001 -- degrade
to deterministic" (the block that logs "_log.debug('OpenAI-compat synthesis
failed for %s: %s', category, exc)" and returns None) and delete the " # noqa:
BLE001 -- degrade to deterministic" suffix so the line becomes simply "except
Exception as exc:". Ensure no other changes to the logging or return behavior.


g = os.environ.get("GRADATA_GEMMA_API_KEY", "")
if g:
model = os.environ.get("GRADATA_GEMMA_MODEL", _GEMMA_DEFAULT_MODEL)
return _call_gemma_native(_build_principle_prompt(rules, category), g, model)
Comment on lines +671 to +691
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Consider extracting provider detection to reduce credential-resolution duplication.

_try_llm_principle re-reads the same environment variables that _resolve_llm_credentials handles. While the branching logic differs (provider selection vs. credential retrieval), extracting a shared helper or returning a provider indicator from _resolve_llm_credentials could improve maintainability.

🧰 Tools
🪛 GitHub Actions: SDK CI

[error] 684-684: ruff RUF100: Unused noqa directive (non-enabled: BLE001). help: Remove unused noqa directive

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/meta_rules.py` around lines 671 - 691, The code in
_try_llm_principle duplicates environment checks
(_GRADATA_LLM_KEY/_GRADATA_LLM_BASE and _GRADATA_GEMMA_API_KEY) already handled
by _resolve_llm_credentials; extract provider-detection into a shared helper
(e.g., _detect_llm_provider) or change _resolve_llm_credentials to return a
provider tag plus credential bundle, then update _try_llm_principle to call that
helper and branch on the provider value (OpenAI-compatible vs Gemma) and only
use corresponding creds to call synthesise_principle_llm or _call_gemma_native,
removing the duplicate os.environ.get checks and centralizing credential
resolution logic.


return None


def _call_llm_for_synthesis(
Expand Down Expand Up @@ -924,13 +1018,20 @@ def synthesize_meta_rules_agentic(

avg_conf = round(sum(r.confidence for r in rules) / len(rules), 4)
categories = sorted(set(r.category for r in rules))

# Build principle from rule descriptions (deterministic for OSS)
# Cloud can override with LLM synthesis via source="llm_synth"
descriptions = [r.description for r in rules]
principle = f"Across {len(rules)} corrections in {category}: " + "; ".join(descriptions[:5])
if len(descriptions) > 5:
principle += f" (and {len(descriptions) - 5} more)"

# Prefer LLM-synthesized behavioral principle when credentials available.
# Empirically (2026-04-14 ablation) deterministic principles regress
# correctness; LLM principles are injectable, deterministic are not.
llm_principle = _try_llm_principle(rules, category)
if llm_principle:
principle = llm_principle
source = "llm_synth"
else:
principle = f"Across {len(rules)} corrections in {category}: " + "; ".join(descriptions[:5])
if len(descriptions) > 5:
principle += f" (and {len(descriptions) - 5} more)"
source = "deterministic"

meta = MetaRule(
id=mid,
Expand All @@ -940,7 +1041,7 @@ def synthesize_meta_rules_agentic(
confidence=avg_conf,
created_session=current_session,
last_validated_session=current_session,
source="deterministic",
source=source,
)

new_metas.append(meta)
Expand Down
91 changes: 87 additions & 4 deletions src/gradata/enhancements/rule_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from dataclasses import dataclass, field
from pathlib import Path

from gradata._types import LessonState
from gradata._types import Lesson, LessonState

_log = logging.getLogger(__name__)

Expand All @@ -33,12 +33,80 @@ class PipelineResult:
skills_generated: list[str] = field(default_factory=list)
skills_updated: int = 0
self_observation_candidates: int = 0
patterns_lifted: int = 0


def _normalize_pattern_description(text: str) -> str:
"""Strip noise prefixes so dedup across pipeline runs catches duplicates."""
text = text.strip()
for prefix in ("User corrected: ", "[AUTO] "):
if text.startswith(prefix):
text = text[len(prefix):]
return text


def _patterns_to_graduated_lessons(
db_path: Path,
current_session: int,
min_sessions: int = 2,
min_score: float = 3.0,
) -> list[Lesson]:
"""Lift graduated correction_patterns into synthetic RULE-state lessons.

Before this wiring the 437-row correction_patterns table was orphaned --
query_graduation_candidates had no production caller, so meta-rule
synthesis never saw the real user corrections. This bridges the gap:
clusters that already hit (sessions >= min_sessions, weight >= min_score)
are lifted directly to RULE state for synthesis.
"""
try:
from gradata.enhancements.meta_rules_storage import ( # type: ignore[import]
query_graduation_candidates,
)
except ImportError:
return []
if not db_path.is_file():
return []

try:
candidates = query_graduation_candidates(
db_path, min_sessions=min_sessions, min_score=min_score,
)
Comment on lines +48 to +74
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Pass BrainContext through this new DB read path.

_patterns_to_graduated_lessons() adds another direct database entry point by calling query_graduation_candidates() from a raw Path. Please thread BrainContext through here instead of bypassing the SDK DB boundary.

As per coding guidelines, src/gradata/**/*.py requires "all functions accepting BrainContext where DB access occurs".

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/rule_pipeline.py` around lines 48 - 74, Modify
_patterns_to_graduated_lessons to accept a BrainContext parameter and use it for
DB access instead of taking/using a raw Path; specifically change the signature
to include ctx: BrainContext (or replace db_path with ctx), derive the Path from
ctx (or pass ctx directly) and call query_graduation_candidates through the
SDK/ctx (i.e., pass ctx or ctx.db rather than db_path) so the DB read goes
through the BrainContext boundary; update any imports/typing (BrainContext) and
adjust callers to pass the context.

except Exception as exc:
_log.debug("_patterns_to_graduated_lessons: query failed: %s", exc)
return []

lessons: list[Lesson] = []
seen: set[tuple[str, str]] = set()
for row in candidates:
raw = row.get("representative_text") or ""
# Drop evaluator-generated noise -- not real user corrections
if raw.startswith("[AUTO]"):
continue
desc = _normalize_pattern_description(raw)
Comment on lines +82 to +86
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Trim before filtering [AUTO] rows.

raw.startswith("[AUTO]") runs before whitespace is stripped, so values like " [AUTO] ..." will currently be lifted into synthesis as real lessons.

Suggested fix
-        raw = row.get("representative_text") or ""
+        raw = str(row.get("representative_text") or "").strip()
         # Drop evaluator-generated noise -- not real user corrections
         if raw.startswith("[AUTO]"):
             continue
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/rule_pipeline.py` around lines 82 - 86, The code
currently checks raw.startswith("[AUTO]") before trimming, so strings with
leading whitespace (e.g., "  [AUTO]...") bypass the filter; update the logic in
the rule pipeline so you strip/trim the raw string first (use the existing
variable raw) and then perform the startswith("[AUTO]") check, and then pass the
trimmed value into _normalize_pattern_description instead of the untrimmed one
to ensure evaluator-generated noise is properly dropped.

if not desc:
continue
category = (row.get("category") or "GENERAL").upper()
dedup_key = (category, desc)
if dedup_key in seen:
continue
seen.add(dedup_key)
first_seen = str(row.get("first_seen") or "")[:10] or "2026-01-01"
lessons.append(Lesson(
date=first_seen,
state=LessonState.RULE,
confidence=0.92,
Comment on lines +94 to +98
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Avoid hardcoded fallback lesson date.

Using "2026-01-01" as fallback can write misleading metadata if first_seen is missing/malformed.

Safer fallback
-        first_seen = str(row.get("first_seen") or "")[:10] or "2026-01-01"
+        from datetime import date as _date
+        first_seen = str(row.get("first_seen") or "")[:10] or _date.today().isoformat()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
first_seen = str(row.get("first_seen") or "")[:10] or "2026-01-01"
lessons.append(Lesson(
date=first_seen,
state=LessonState.RULE,
confidence=0.92,
from datetime import date as _date
first_seen = str(row.get("first_seen") or "")[:10] or _date.today().isoformat()
lessons.append(Lesson(
date=first_seen,
state=LessonState.RULE,
confidence=0.92,
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/rule_pipeline.py` around lines 95 - 99, The code is
using a hardcoded fallback "2026-01-01" for first_seen which can produce
misleading metadata; update the logic around the first_seen variable and the
Lesson(...) creation to validate/parse row.get("first_seen") (e.g., try parsing
ISO date or check regex), and if invalid or missing either set date to a safe
dynamic fallback like datetime.date.today().isoformat() or omit adding the
Lesson entirely; change the assignment that builds first_seen and the
lessons.append(...) call (the Lesson(...) instantiation and LessonState.RULE
usage) to use the validated/parsing result instead of the hardcoded
"2026-01-01".

category=category,
description=desc,
fire_count=int(row.get("distinct_sessions") or 2),
))
return lessons


def _generate_skill_file(
lesson: "object",
output_dir: "Path",
) -> "Path | None":
lesson: Lesson,
output_dir: Path,
) -> Path | None:
"""Generate a SKILL.md file from a graduated rule.

Only generates for rules meeting quality gate:
Expand Down Expand Up @@ -318,6 +386,21 @@ def run_rule_pipeline(
except (ImportError, Exception) as exc:
result.errors.append(f"Phase 0: self-observation: {exc}")

# ── Phase 1.6: Lift graduated correction_patterns into all_lessons ───────
# Bridges the orphaned correction_patterns table (437 user corrections)
# into synthesis. Without this, RULE-state lessons come only from
# lessons.md which can be empty on fresh brains.
try:
pattern_lessons = _patterns_to_graduated_lessons(db_path, current_session)
if pattern_lessons:
existing_keys = {(l.category, l.description) for l in all_lessons}
for pl in pattern_lessons:
if (pl.category, pl.description) not in existing_keys:
all_lessons.append(pl)
result.patterns_lifted += 1
Comment on lines +396 to +400
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Update dedup set after append in Phase 1.6.

existing_keys is initialized once and never updated, so in-loop dedup only reflects preexisting lessons.

Dedup fix
             for pl in pattern_lessons:
                 if (pl.category, pl.description) not in existing_keys:
                     all_lessons.append(pl)
+                    existing_keys.add((pl.category, pl.description))
                     result.patterns_lifted += 1
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
existing_keys = {(l.category, l.description) for l in all_lessons}
for pl in pattern_lessons:
if (pl.category, pl.description) not in existing_keys:
all_lessons.append(pl)
result.patterns_lifted += 1
existing_keys = {(l.category, l.description) for l in all_lessons}
for pl in pattern_lessons:
if (pl.category, pl.description) not in existing_keys:
all_lessons.append(pl)
existing_keys.add((pl.category, pl.description))
result.patterns_lifted += 1
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/gradata/enhancements/rule_pipeline.py` around lines 397 - 401,
existing_keys is built once before the loop and never updated, so duplicates
within pattern_lessons can be appended multiple times; update the dedup set
inside the loop after appending each new lesson by adding the tuple
(pl.category, pl.description) to existing_keys so the check (pl.category,
pl.description) not in existing_keys correctly prevents duplicates; modify the
loop that iterates over pattern_lessons (which currently appends to all_lessons
and increments result.patterns_lifted) to also update existing_keys after a
successful append.

except Exception as exc:
result.errors.append(f"Phase 1.6: pattern lift: {exc}")

# ── Phase 2: Atomic writes ────────────────────────────────────────────────
# Graduate rules, update confidence, create meta-rules.
for lesson in all_lessons:
Expand Down
47 changes: 47 additions & 0 deletions tests/test_agentic_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,3 +313,50 @@ def test_non_rule_state_lessons_excluded():
assert len(result_with_noise) == 1
assert len(result_clean) == 1
assert set(result_with_noise[0].source_lesson_ids) == set(result_clean[0].source_lesson_ids)


# ---------------------------------------------------------------------------
# LLM principle wiring
# ---------------------------------------------------------------------------


def test_llm_principle_used_when_synthesizer_returns_string(monkeypatch):
"""When LLM synthesis succeeds, principle uses LLM text and source=llm_synth."""
import gradata.enhancements.meta_rules as mr

monkeypatch.setattr(
mr, "_try_llm_principle",
lambda rules, category: "When drafting, lead with the benefit, not the feature."
)
lessons = _make_rule_group("DRAFTING", n=3, confidence=0.92)
result = synthesize_meta_rules_agentic(lessons=lessons)

assert len(result) == 1
assert result[0].source == "llm_synth"
assert result[0].principle == "When drafting, lead with the benefit, not the feature."


def test_llm_principle_falls_back_to_deterministic_on_none(monkeypatch):
"""When LLM returns None (no creds or failure), deterministic path runs."""
import gradata.enhancements.meta_rules as mr

monkeypatch.setattr(mr, "_try_llm_principle", lambda rules, category: None)
lessons = _make_rule_group("DRAFTING", n=3, confidence=0.92)
result = synthesize_meta_rules_agentic(lessons=lessons)

assert len(result) == 1
assert result[0].source == "deterministic"
assert "Across 3 corrections in DRAFTING" in result[0].principle


def test_try_llm_principle_returns_none_without_creds(monkeypatch):
"""_try_llm_principle degrades silently when no credentials configured."""
import gradata.enhancements.meta_rules as mr

monkeypatch.delenv("GRADATA_LLM_KEY", raising=False)
monkeypatch.delenv("GRADATA_LLM_BASE", raising=False)
monkeypatch.delenv("GRADATA_GEMMA_API_KEY", raising=False)
monkeypatch.delenv("GRADATA_GEMMA_BASE", raising=False)

rules = _make_rule_group("DRAFTING", n=3)
assert mr._try_llm_principle(rules, "DRAFTING") is None
Comment on lines +352 to +362
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

LGTM, but consider adding a precedence test.

This test correctly validates silent degradation when no credentials are configured. However, there's no test coverage for the precedence behavior when both GRADATA_LLM_KEY/GRADATA_LLM_BASE and GRADATA_GEMMA_API_KEY are set. A test confirming that OpenAI-compat credentials take precedence would prevent future regressions in the resolution order.

Would you like me to draft a test case for credential precedence verification?

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/test_agentic_synthesis.py` around lines 352 - 362, Add a new test that
verifies credential precedence when both OpenAI-compatible and Gemma credentials
are present: use monkeypatch.setenv to set GRADATA_LLM_KEY and GRADATA_LLM_BASE
as well as GRADATA_GEMMA_API_KEY and GRADATA_GEMMA_BASE, call
mr._try_llm_principle(rules, "DRAFTING") (same rules from _make_rule_group) and
assert the returned principle matches the result when only GRADATA_LLM_* is set
(i.e., precedence is OpenAI-compatible); then clear GRADATA_LLM_* and assert the
result changes to the Gemma-backed principle to prove precedence behavior.

Loading
Loading