-
Notifications
You must be signed in to change notification settings - Fork 0
feat: wire LLM meta-rule synthesis (Gemma native) #97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
00b85b1
296cbed
9604d26
762682d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -586,17 +586,111 @@ def parse_lessons_from_markdown(text: str) -> list[Lesson]: | |
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| _GEMMA_DEFAULT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai" | ||
| _GEMMA_DEFAULT_MODEL = "gemma-3-27b-it" | ||
|
|
||
|
|
||
| def _resolve_llm_credentials() -> tuple[str, str, str]: | ||
| """Resolve LLM credentials from environment. Returns (key, base, model). | ||
|
|
||
| Delegates to the same env vars used by ``llm_synthesizer``. | ||
| Resolution order: | ||
| 1. ``GRADATA_LLM_KEY`` + ``GRADATA_LLM_BASE`` — explicit override. | ||
| 2. ``GRADATA_GEMMA_API_KEY`` — Google AI Studio OpenAI-compat endpoint. | ||
| """ | ||
| import os | ||
|
|
||
| key = os.environ.get("GRADATA_LLM_KEY", "") | ||
| base = os.environ.get("GRADATA_LLM_BASE", "") | ||
| model = os.environ.get("GRADATA_LLM_MODEL", "gpt-4o-mini") | ||
| return key, base, model | ||
| if key and base: | ||
| return key, base, model | ||
|
|
||
| gemma_key = os.environ.get("GRADATA_GEMMA_API_KEY", "") | ||
| if gemma_key: | ||
| return ( | ||
| gemma_key, | ||
| os.environ.get("GRADATA_GEMMA_BASE", _GEMMA_DEFAULT_BASE), | ||
| os.environ.get("GRADATA_GEMMA_MODEL", _GEMMA_DEFAULT_MODEL), | ||
| ) | ||
|
|
||
| return "", "", model | ||
|
|
||
|
|
||
| def _build_principle_prompt(rules: list[Lesson], category: str) -> str: | ||
| bullets = "\n".join(f"- {r.description}" for r in rules[:10] if r.description) | ||
| return ( | ||
| f'Given these {min(len(rules), 10)} user corrections related to "{category}":\n' | ||
| f"{bullets}\n\n" | ||
| "Write ONE actionable behavioral principle (1-2 sentences) that captures the pattern.\n" | ||
| 'Format: "When [context], [do X] instead of [Y]."\n' | ||
| "Do not list individual words. Focus on the behavioral change.\n" | ||
| "Return ONLY the principle, no preamble." | ||
| ) | ||
|
|
||
|
|
||
| def _call_gemma_native(prompt: str, creds: str, model: str, timeout: float = 15.0) -> str | None: | ||
| """Call Google's native Gemma API (the OpenAI-compat endpoint rejects AQ. keys).""" | ||
| import urllib.error | ||
| import urllib.request | ||
|
|
||
| url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" | ||
| payload = json.dumps({ | ||
| "contents": [{"parts": [{"text": prompt}]}], | ||
| "generationConfig": {"maxOutputTokens": 200, "temperature": 0.3}, | ||
| }).encode() | ||
| headers = {"Content-Type": "application/json", "x-goog-api-key": creds} | ||
| try: | ||
| req = urllib.request.Request(url, data=payload, headers=headers, method="POST") | ||
| with urllib.request.urlopen(req, timeout=timeout) as resp: | ||
| body = json.loads(resp.read().decode()) | ||
| text = body["candidates"][0]["content"]["parts"][0]["text"].strip() | ||
| if 15 <= len(text) <= 500: | ||
| return text | ||
| return None | ||
| except (urllib.error.URLError, urllib.error.HTTPError, OSError, KeyError, | ||
| json.JSONDecodeError, IndexError) as exc: | ||
| _log.debug("Gemma native call failed: %s", exc) | ||
| return None | ||
|
|
||
|
|
||
| def _try_llm_principle(rules: list[Lesson], category: str) -> str | None: | ||
| """Best-effort LLM synthesis of ONE behavioral principle for a rule group. | ||
|
|
||
| Returns the principle string or None (no credentials, empty input, or | ||
| any LLM error). Never raises -- synthesis must degrade to deterministic. | ||
|
|
||
| Provider resolution: | ||
| 1. ``GRADATA_LLM_KEY`` + ``GRADATA_LLM_BASE`` -- OpenAI-compat endpoint. | ||
| 2. ``GRADATA_GEMMA_API_KEY`` -- Google's native Gemma API. | ||
| """ | ||
| import os | ||
|
|
||
| if not rules: | ||
| return None | ||
|
|
||
| k = os.environ.get("GRADATA_LLM_KEY", "") | ||
| b = os.environ.get("GRADATA_LLM_BASE", "") | ||
| if k and b: | ||
| try: | ||
| from gradata.enhancements.llm_synthesizer import synthesise_principle_llm | ||
|
|
||
| return synthesise_principle_llm( | ||
| lessons=rules, | ||
| theme=category, | ||
| api_key=k, | ||
| api_base=b, | ||
| model=os.environ.get("GRADATA_LLM_MODEL", "gpt-4o-mini"), | ||
| ) | ||
| except Exception as exc: # noqa: BLE001 -- degrade to deterministic | ||
| _log.debug("OpenAI-compat synthesis failed for %s: %s", category, exc) | ||
| return None | ||
|
Comment on lines
+684
to
+686
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix pipeline failure: remove unused The ruff RUF100 error indicates 🔧 Proposed fix- except Exception as exc: # noqa: BLE001 -- degrade to deterministic
+ except Exception as exc: # degrade to deterministic🧰 Tools🪛 GitHub Actions: SDK CI[error] 684-684: ruff RUF100: Unused 🤖 Prompt for AI Agents |
||
|
|
||
| g = os.environ.get("GRADATA_GEMMA_API_KEY", "") | ||
| if g: | ||
| model = os.environ.get("GRADATA_GEMMA_MODEL", _GEMMA_DEFAULT_MODEL) | ||
| return _call_gemma_native(_build_principle_prompt(rules, category), g, model) | ||
|
Comment on lines
+671
to
+691
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Consider extracting provider detection to reduce credential-resolution duplication.
🧰 Tools🪛 GitHub Actions: SDK CI[error] 684-684: ruff RUF100: Unused 🤖 Prompt for AI Agents |
||
|
|
||
| return None | ||
|
|
||
|
|
||
| def _call_llm_for_synthesis( | ||
|
|
@@ -924,13 +1018,20 @@ def synthesize_meta_rules_agentic( | |
|
|
||
| avg_conf = round(sum(r.confidence for r in rules) / len(rules), 4) | ||
| categories = sorted(set(r.category for r in rules)) | ||
|
|
||
| # Build principle from rule descriptions (deterministic for OSS) | ||
| # Cloud can override with LLM synthesis via source="llm_synth" | ||
| descriptions = [r.description for r in rules] | ||
| principle = f"Across {len(rules)} corrections in {category}: " + "; ".join(descriptions[:5]) | ||
| if len(descriptions) > 5: | ||
| principle += f" (and {len(descriptions) - 5} more)" | ||
|
|
||
| # Prefer LLM-synthesized behavioral principle when credentials available. | ||
| # Empirically (2026-04-14 ablation) deterministic principles regress | ||
| # correctness; LLM principles are injectable, deterministic are not. | ||
| llm_principle = _try_llm_principle(rules, category) | ||
| if llm_principle: | ||
| principle = llm_principle | ||
| source = "llm_synth" | ||
| else: | ||
| principle = f"Across {len(rules)} corrections in {category}: " + "; ".join(descriptions[:5]) | ||
| if len(descriptions) > 5: | ||
| principle += f" (and {len(descriptions) - 5} more)" | ||
| source = "deterministic" | ||
|
|
||
| meta = MetaRule( | ||
| id=mid, | ||
|
|
@@ -940,7 +1041,7 @@ def synthesize_meta_rules_agentic( | |
| confidence=avg_conf, | ||
| created_session=current_session, | ||
| last_validated_session=current_session, | ||
| source="deterministic", | ||
| source=source, | ||
| ) | ||
|
|
||
| new_metas.append(meta) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -14,7 +14,7 @@ | |||||||||||||||||||||||
| from dataclasses import dataclass, field | ||||||||||||||||||||||||
| from pathlib import Path | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| from gradata._types import LessonState | ||||||||||||||||||||||||
| from gradata._types import Lesson, LessonState | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| _log = logging.getLogger(__name__) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
@@ -33,12 +33,80 @@ class PipelineResult: | |||||||||||||||||||||||
| skills_generated: list[str] = field(default_factory=list) | ||||||||||||||||||||||||
| skills_updated: int = 0 | ||||||||||||||||||||||||
| self_observation_candidates: int = 0 | ||||||||||||||||||||||||
| patterns_lifted: int = 0 | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def _normalize_pattern_description(text: str) -> str: | ||||||||||||||||||||||||
| """Strip noise prefixes so dedup across pipeline runs catches duplicates.""" | ||||||||||||||||||||||||
| text = text.strip() | ||||||||||||||||||||||||
| for prefix in ("User corrected: ", "[AUTO] "): | ||||||||||||||||||||||||
| if text.startswith(prefix): | ||||||||||||||||||||||||
| text = text[len(prefix):] | ||||||||||||||||||||||||
| return text | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def _patterns_to_graduated_lessons( | ||||||||||||||||||||||||
| db_path: Path, | ||||||||||||||||||||||||
| current_session: int, | ||||||||||||||||||||||||
| min_sessions: int = 2, | ||||||||||||||||||||||||
| min_score: float = 3.0, | ||||||||||||||||||||||||
| ) -> list[Lesson]: | ||||||||||||||||||||||||
| """Lift graduated correction_patterns into synthetic RULE-state lessons. | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| Before this wiring the 437-row correction_patterns table was orphaned -- | ||||||||||||||||||||||||
| query_graduation_candidates had no production caller, so meta-rule | ||||||||||||||||||||||||
| synthesis never saw the real user corrections. This bridges the gap: | ||||||||||||||||||||||||
| clusters that already hit (sessions >= min_sessions, weight >= min_score) | ||||||||||||||||||||||||
| are lifted directly to RULE state for synthesis. | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| from gradata.enhancements.meta_rules_storage import ( # type: ignore[import] | ||||||||||||||||||||||||
| query_graduation_candidates, | ||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
| except ImportError: | ||||||||||||||||||||||||
| return [] | ||||||||||||||||||||||||
| if not db_path.is_file(): | ||||||||||||||||||||||||
| return [] | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| candidates = query_graduation_candidates( | ||||||||||||||||||||||||
| db_path, min_sessions=min_sessions, min_score=min_score, | ||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
|
Comment on lines
+48
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pass
As per coding guidelines, 🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| except Exception as exc: | ||||||||||||||||||||||||
| _log.debug("_patterns_to_graduated_lessons: query failed: %s", exc) | ||||||||||||||||||||||||
| return [] | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| lessons: list[Lesson] = [] | ||||||||||||||||||||||||
| seen: set[tuple[str, str]] = set() | ||||||||||||||||||||||||
| for row in candidates: | ||||||||||||||||||||||||
| raw = row.get("representative_text") or "" | ||||||||||||||||||||||||
| # Drop evaluator-generated noise -- not real user corrections | ||||||||||||||||||||||||
| if raw.startswith("[AUTO]"): | ||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||
| desc = _normalize_pattern_description(raw) | ||||||||||||||||||||||||
|
Comment on lines
+82
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trim before filtering
Suggested fix- raw = row.get("representative_text") or ""
+ raw = str(row.get("representative_text") or "").strip()
# Drop evaluator-generated noise -- not real user corrections
if raw.startswith("[AUTO]"):
continue🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| if not desc: | ||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||
| category = (row.get("category") or "GENERAL").upper() | ||||||||||||||||||||||||
| dedup_key = (category, desc) | ||||||||||||||||||||||||
| if dedup_key in seen: | ||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||
| seen.add(dedup_key) | ||||||||||||||||||||||||
| first_seen = str(row.get("first_seen") or "")[:10] or "2026-01-01" | ||||||||||||||||||||||||
| lessons.append(Lesson( | ||||||||||||||||||||||||
| date=first_seen, | ||||||||||||||||||||||||
| state=LessonState.RULE, | ||||||||||||||||||||||||
| confidence=0.92, | ||||||||||||||||||||||||
|
Comment on lines
+94
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid hardcoded fallback lesson date. Using Safer fallback- first_seen = str(row.get("first_seen") or "")[:10] or "2026-01-01"
+ from datetime import date as _date
+ first_seen = str(row.get("first_seen") or "")[:10] or _date.today().isoformat()📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| category=category, | ||||||||||||||||||||||||
| description=desc, | ||||||||||||||||||||||||
| fire_count=int(row.get("distinct_sessions") or 2), | ||||||||||||||||||||||||
| )) | ||||||||||||||||||||||||
| return lessons | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def _generate_skill_file( | ||||||||||||||||||||||||
| lesson: "object", | ||||||||||||||||||||||||
| output_dir: "Path", | ||||||||||||||||||||||||
| ) -> "Path | None": | ||||||||||||||||||||||||
| lesson: Lesson, | ||||||||||||||||||||||||
| output_dir: Path, | ||||||||||||||||||||||||
| ) -> Path | None: | ||||||||||||||||||||||||
| """Generate a SKILL.md file from a graduated rule. | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| Only generates for rules meeting quality gate: | ||||||||||||||||||||||||
|
|
@@ -318,6 +386,21 @@ def run_rule_pipeline( | |||||||||||||||||||||||
| except (ImportError, Exception) as exc: | ||||||||||||||||||||||||
| result.errors.append(f"Phase 0: self-observation: {exc}") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # ── Phase 1.6: Lift graduated correction_patterns into all_lessons ─────── | ||||||||||||||||||||||||
| # Bridges the orphaned correction_patterns table (437 user corrections) | ||||||||||||||||||||||||
| # into synthesis. Without this, RULE-state lessons come only from | ||||||||||||||||||||||||
| # lessons.md which can be empty on fresh brains. | ||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| pattern_lessons = _patterns_to_graduated_lessons(db_path, current_session) | ||||||||||||||||||||||||
| if pattern_lessons: | ||||||||||||||||||||||||
| existing_keys = {(l.category, l.description) for l in all_lessons} | ||||||||||||||||||||||||
| for pl in pattern_lessons: | ||||||||||||||||||||||||
| if (pl.category, pl.description) not in existing_keys: | ||||||||||||||||||||||||
| all_lessons.append(pl) | ||||||||||||||||||||||||
| result.patterns_lifted += 1 | ||||||||||||||||||||||||
|
Comment on lines
+396
to
+400
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update dedup set after append in Phase 1.6.
Dedup fix for pl in pattern_lessons:
if (pl.category, pl.description) not in existing_keys:
all_lessons.append(pl)
+ existing_keys.add((pl.category, pl.description))
result.patterns_lifted += 1📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| except Exception as exc: | ||||||||||||||||||||||||
| result.errors.append(f"Phase 1.6: pattern lift: {exc}") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # ── Phase 2: Atomic writes ──────────────────────────────────────────────── | ||||||||||||||||||||||||
| # Graduate rules, update confidence, create meta-rules. | ||||||||||||||||||||||||
| for lesson in all_lessons: | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -313,3 +313,50 @@ def test_non_rule_state_lessons_excluded(): | |
| assert len(result_with_noise) == 1 | ||
| assert len(result_clean) == 1 | ||
| assert set(result_with_noise[0].source_lesson_ids) == set(result_clean[0].source_lesson_ids) | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # LLM principle wiring | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| def test_llm_principle_used_when_synthesizer_returns_string(monkeypatch): | ||
| """When LLM synthesis succeeds, principle uses LLM text and source=llm_synth.""" | ||
| import gradata.enhancements.meta_rules as mr | ||
|
|
||
| monkeypatch.setattr( | ||
| mr, "_try_llm_principle", | ||
| lambda rules, category: "When drafting, lead with the benefit, not the feature." | ||
| ) | ||
| lessons = _make_rule_group("DRAFTING", n=3, confidence=0.92) | ||
| result = synthesize_meta_rules_agentic(lessons=lessons) | ||
|
|
||
| assert len(result) == 1 | ||
| assert result[0].source == "llm_synth" | ||
| assert result[0].principle == "When drafting, lead with the benefit, not the feature." | ||
|
|
||
|
|
||
| def test_llm_principle_falls_back_to_deterministic_on_none(monkeypatch): | ||
| """When LLM returns None (no creds or failure), deterministic path runs.""" | ||
| import gradata.enhancements.meta_rules as mr | ||
|
|
||
| monkeypatch.setattr(mr, "_try_llm_principle", lambda rules, category: None) | ||
| lessons = _make_rule_group("DRAFTING", n=3, confidence=0.92) | ||
| result = synthesize_meta_rules_agentic(lessons=lessons) | ||
|
|
||
| assert len(result) == 1 | ||
| assert result[0].source == "deterministic" | ||
| assert "Across 3 corrections in DRAFTING" in result[0].principle | ||
|
|
||
|
|
||
| def test_try_llm_principle_returns_none_without_creds(monkeypatch): | ||
| """_try_llm_principle degrades silently when no credentials configured.""" | ||
| import gradata.enhancements.meta_rules as mr | ||
|
|
||
| monkeypatch.delenv("GRADATA_LLM_KEY", raising=False) | ||
| monkeypatch.delenv("GRADATA_LLM_BASE", raising=False) | ||
| monkeypatch.delenv("GRADATA_GEMMA_API_KEY", raising=False) | ||
| monkeypatch.delenv("GRADATA_GEMMA_BASE", raising=False) | ||
|
|
||
| rules = _make_rule_group("DRAFTING", n=3) | ||
| assert mr._try_llm_principle(rules, "DRAFTING") is None | ||
|
Comment on lines
+352
to
+362
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial LGTM, but consider adding a precedence test. This test correctly validates silent degradation when no credentials are configured. However, there's no test coverage for the precedence behavior when both Would you like me to draft a test case for credential precedence verification? 🤖 Prompt for AI Agents |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial
Edge case: partial OpenAI-compat credentials fall through silently.
If
GRADATA_LLM_KEYis set butGRADATA_LLM_BASEis empty, the function silently falls through to the Gemma check. This could surprise users who set only the key and expect an error. Consider logging a debug message when one is set without the other.💡 Proposed logging for partial credentials
key = os.environ.get("GRADATA_LLM_KEY", "") base = os.environ.get("GRADATA_LLM_BASE", "") model = os.environ.get("GRADATA_LLM_MODEL", "gpt-4o-mini") if key and base: return key, base, model + if key and not base: + _log.debug("GRADATA_LLM_KEY set but GRADATA_LLM_BASE missing; checking Gemma fallback") + elif base and not key: + _log.debug("GRADATA_LLM_BASE set but GRADATA_LLM_KEY missing; checking Gemma fallback") gemma_key = os.environ.get("GRADATA_GEMMA_API_KEY", "")🤖 Prompt for AI Agents