diff --git a/.claude/hooks/statusline/sprites-statusline.js b/.claude/hooks/statusline/sprites-statusline.js
new file mode 100644
index 00000000..cf71c43d
--- /dev/null
+++ b/.claude/hooks/statusline/sprites-statusline.js
@@ -0,0 +1,304 @@
+#!/usr/bin/env node
+// Gradata Statusline v9 — Zero subprocess spawns
+// Line 1: Identity + context window + time
+// Line 2: Jobs | Overdue | Deals | Reply rate | Learning | Saved
+//
+// v9: Replaced all Python, curl, and git CLI spawns with native Node.
+// Uses better-sqlite3 for DB, node:https for Pipedrive, .git/ reads for timestamps.
+
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+
+const stdinTimeout = setTimeout(() => process.exit(0), 3000);
+let input = '';
+process.stdin.setEncoding('utf8');
+process.stdin.on('data', chunk => input += chunk);
+process.stdin.on('end', () => {
+ clearTimeout(stdinTimeout);
+ try {
+ const data = JSON.parse(input);
+ const model = (data.model && data.model.display_name) || data.model || 'Claude';
+ const cfg = require('../config.js');
+ const dir = (data.workspace && data.workspace.current_dir) || data.cwd || cfg.WORKING_DIR;
+ const session = data.session_id || '';
+ const remaining = data.context_window ? data.context_window.remaining_percentage : (data.remaining_context_percentage || null);
+
+ // Native modules — zero spawns
+ const nativeDb = require('../native-db.js');
+ const nativeGit = require('../native-git.js');
+ const nativeHttp = require('../native-http.js');
+
+ const c = {
+ reset: '\x1b[0m', dim: '\x1b[2m', bold: '\x1b[1m',
+ green: '\x1b[32m', yellow: '\x1b[33m', orange: '\x1b[38;5;208m',
+ red: '\x1b[31m', cyan: '\x1b[36m', white: '\x1b[37m',
+ };
+
+ // ── Context Window ──────────────────────────────────────────────
+ let ctxDisplay = '';
+ let usedPct = 0;
+ if (remaining != null) {
+ const BUFFER = 16.5;
+ const usableRemaining = Math.max(0, ((remaining - BUFFER) / (100 - BUFFER)) * 100);
+ usedPct = Math.max(0, Math.min(100, Math.round(100 - usableRemaining)));
+
+ let burnInfo = '';
+ if (session) {
+ try {
+ const historyPath = path.join(os.tmpdir(), `claude-ctx-hist-${session}.json`);
+ let history = [];
+ if (fs.existsSync(historyPath)) {
+ try { history = JSON.parse(fs.readFileSync(historyPath, 'utf8')); } catch { history = []; }
+ }
+ const now = Math.floor(Date.now() / 1000);
+ if (history.length === 0 || history[history.length - 1].used_pct !== usedPct) {
+ history.push({ used_pct: usedPct, timestamp: now });
+ fs.writeFileSync(historyPath, JSON.stringify(history));
+ }
+ if (history.length >= 3) {
+ const recent = history.slice(-6);
+ const avgBurn = (recent[recent.length - 1].used_pct - recent[0].used_pct) / (recent.length - 1);
+ if (avgBurn > 0) {
+ const msgsLeft = Math.round((100 - usedPct) / avgBurn);
+ burnInfo = ` ~${msgsLeft} left`;
+ }
+ }
+ const bridgePath = path.join(os.tmpdir(), `claude-ctx-${session}.json`);
+ fs.writeFileSync(bridgePath, JSON.stringify({
+ session_id: session, remaining_percentage: remaining,
+ used_pct: usedPct, timestamp: now
+ }));
+ } catch {}
+ }
+
+ let color = c.green;
+ if (usedPct >= 80) color = c.red;
+ else if (usedPct >= 65) color = c.orange;
+ else if (usedPct >= 50) color = c.yellow;
+
+ let bracket = 'FRESH', bracketColor = c.green;
+ if (usedPct >= 80) { bracket = 'CRITICAL'; bracketColor = c.red; }
+ else if (usedPct >= 65) { bracket = 'DEPLETED'; bracketColor = c.orange; }
+ else if (usedPct >= 35) { bracket = 'MODERATE'; bracketColor = c.yellow; }
+
+ ctxDisplay = `${color}ctx: ${usedPct}%${burnInfo}${c.reset} ${bracketColor}${c.bold}${bracket}${c.reset}`;
+ }
+
+ // ── Session Number (Anthropic session logs) ─────────────────────
+ // Count .jsonl files across all ~/.claude/projects/ dirs — each file
+ // is one real Claude Code session, regardless of project or worktree.
+ let currentSession = 0;
+ try {
+ const projectsDir = path.join(os.homedir(), '.claude', 'projects');
+ if (fs.existsSync(projectsDir)) {
+ let count = 0;
+ for (const entry of fs.readdirSync(projectsDir)) {
+ const entryPath = path.join(projectsDir, entry);
+ try {
+ if (fs.statSync(entryPath).isDirectory()) {
+ for (const f of fs.readdirSync(entryPath)) {
+ if (f.endsWith('.jsonl')) count++;
+ }
+ }
+ } catch {}
+ }
+ currentSession = count;
+ }
+ } catch {}
+
+ // ── Time ────────────────────────────────────────────────────────
+ const timeStr = new Date().toLocaleTimeString('en-US', { hour: 'numeric', minute: '2-digit', hour12: true });
+
+ // ══════════════════════════════════════════════════════════════════
+ // LINE 1: Identity + Context
+ // ══════════════════════════════════════════════════════════════════
+ const line1 = [
+ `${c.bold}${c.cyan}Gradata${c.reset}`,
+ currentSession > 0 ? `${c.bold}${c.white}S${currentSession}${c.reset}` : '',
+ `${c.dim}${model}${c.reset}`,
+ ctxDisplay,
+ `${c.dim}${timeStr}${c.reset}`,
+ ].filter(Boolean);
+
+ // ══════════════════════════════════════════════════════════════════
+ // LINE 2: The 6 things that matter (all zero-spawn)
+ // ══════════════════════════════════════════════════════════════════
+
+ // 1. JOBS QUEUED — native SQLite via better-sqlite3 (was: Python spawn)
+ let jobsDisplay = '';
+ try {
+ const jd = nativeDb.getJobQueue(cfg.SYSTEM_DB);
+ if (jd.pending > 0) {
+ let age = '';
+ if (jd.oldest) {
+ const diffMs = Date.now() - new Date(jd.oldest).getTime();
+ const hrs = Math.floor(diffMs / 3600000);
+ age = hrs < 24 ? `${hrs}h` : `${Math.floor(hrs / 24)}d`;
+ }
+ jobsDisplay = `${c.yellow}${c.bold}${jd.pending} jobs${age ? ` (${age})` : ''}${c.reset}`;
+ }
+ } catch {}
+
+ // 2. OVERDUE DEALS — file-based cache + async refresh (was: curl spawn)
+ const PIPEDRIVE_CACHE = path.join(os.tmpdir(), 'gradata-pipedrive-cache.json');
+ const CACHE_TTL_MS = 5 * 60 * 1000;
+ const MORNING_BRIEF = path.join(cfg.BRAIN_DIR, 'morning-brief.md');
+ const BRIEF_MAX_AGE_MS = 24 * 60 * 60 * 1000;
+
+ let overdueCount = 0, activeDealsCount = 0, pipelineVal = '--';
+ let overdueSource = '';
+ let _startupBriefCache = ''; // cached for reuse in reply-rate section
+
+ try {
+ const startupBriefPath = path.join(dir, 'domain', 'pipeline', 'startup-brief.md');
+ const briefSources = [MORNING_BRIEF, startupBriefPath];
+ for (const bp of briefSources) {
+ if (overdueSource) break;
+ if (!fs.existsSync(bp)) continue;
+ const briefAge = Date.now() - fs.statSync(bp).mtimeMs;
+ if (briefAge > BRIEF_MAX_AGE_MS) continue;
+ const briefText = fs.readFileSync(bp, 'utf8');
+ if (bp === startupBriefPath) _startupBriefCache = briefText;
+ const qcMatch = briefText.match(/Overdue Deals \((\d+) need action/);
+ if (qcMatch) { overdueCount = parseInt(qcMatch[1]); overdueSource = 'qc'; break; }
+ const sbMatch = briefText.match(/(\d+) truly overdue/);
+ if (sbMatch) { overdueCount = parseInt(sbMatch[1]); overdueSource = 'sb'; break; }
+ }
+ } catch {}
+
+ // Pipedrive: read processed cache, trigger async refresh if stale (zero spawns)
+ let pdData = null;
+ try {
+ if (fs.existsSync(PIPEDRIVE_CACHE)) {
+ const cached = JSON.parse(fs.readFileSync(PIPEDRIVE_CACHE, 'utf8'));
+ if (Date.now() - cached.ts < CACHE_TTL_MS) {
+ pdData = cached;
+ } else {
+ pdData = cached; // use stale data this render
+ nativeHttp.refreshPipedriveDeals(
+ process.env.PIPEDRIVE_TOKEN,
+ PIPEDRIVE_CACHE + '.raw',
+ 5000
+ );
+ }
+ }
+ } catch {}
+
+ // Process raw Pipedrive response if available (from previous async fetch)
+ if (!pdData) {
+ try {
+ const rawPath = PIPEDRIVE_CACHE + '.raw';
+ if (fs.existsSync(rawPath)) {
+ const raw = JSON.parse(fs.readFileSync(rawPath, 'utf8'));
+ const deals = raw.data || [];
+ const now = new Date(); now.setHours(0,0,0,0);
+ let overdue = 0, activeCount = 0, totalValue = 0;
+ const OLIVER_LABEL = '45';
+ for (const d of deals) {
+ const labels = String(d.label || '').split(',').map(s => s.trim());
+ if (!labels.includes(OLIVER_LABEL)) continue;
+ totalValue += (d.value || 0);
+ activeCount++;
+ const nextAct = d.next_activity_date;
+ if (nextAct) {
+ const actDate = new Date(nextAct); actDate.setHours(0,0,0,0);
+ if (actDate < now) overdue++;
+ }
+ }
+ pdData = { overdue, activeCount, totalValue, ts: Date.now() };
+ fs.writeFileSync(PIPEDRIVE_CACHE, JSON.stringify(pdData));
+ try { fs.unlinkSync(rawPath); } catch {}
+ }
+ } catch {}
+ }
+
+ if (pdData) {
+ if (!overdueSource) overdueCount = pdData.overdue || 0;
+ activeDealsCount = pdData.activeCount || 0;
+ const pv = pdData.totalValue || 0;
+ pipelineVal = pv >= 1000 ? '$' + (pv / 1000).toFixed(1) + 'K' : pv > 0 ? '$' + pv.toFixed(0) : '--';
+ }
+
+ let overdueDisplay = '';
+ if (overdueCount > 0) {
+ overdueDisplay = `${c.red}${c.bold}${overdueCount} overdue${c.reset}`;
+ }
+
+ // 3. DEALS + PIPELINE VALUE
+ const dealsDisplay = activeDealsCount > 0
+ ? `${c.cyan}${activeDealsCount} deals ${pipelineVal}${c.reset}`
+ : '';
+
+ // 4. REPLY RATE — reuse cached startup-brief from overdue section (one read, not two)
+ let replyRate = '', replyRateNum = 0;
+ if (!_startupBriefCache) {
+ // Wasn't read in overdue loop (e.g. morning-brief matched first) — read now
+ try {
+ const sbp = path.join(dir, 'domain', 'pipeline', 'startup-brief.md');
+ if (fs.existsSync(sbp)) _startupBriefCache = fs.readFileSync(sbp, 'utf8');
+ } catch {}
+ }
+ if (_startupBriefCache) {
+ const m = _startupBriefCache.match(/Oliver.s Instantly reply rate:\*{0,2}\s*([\d.]+)%/);
+ if (m) { replyRateNum = parseFloat(m[1]); }
+ }
+ if (replyRateNum === 0) {
+ try {
+ const bd = nativeDb.getBrainScores(cfg.SYSTEM_DB);
+ if ((bd.reply_rate || 0) > 0) replyRateNum = bd.reply_rate;
+ else if ((bd.reply_rate_cum || 0) > 0) replyRateNum = bd.reply_rate_cum;
+ } catch {}
+ }
+ if (replyRateNum > 0) {
+ const rColor = replyRateNum >= 2 ? c.green : replyRateNum >= 1 ? c.yellow : c.dim;
+ replyRate = `${rColor}${replyRateNum.toFixed(1)}% reply${c.reset}`;
+ }
+
+ // 5. LEARNING — parse lessons.md directly in Node (was: Python spawn)
+ let learningDisplay = '';
+ try {
+ if (fs.existsSync(cfg.LESSONS_FILE)) {
+ const text = fs.readFileSync(cfg.LESSONS_FILE, 'utf8');
+ const lines = text.split('\n');
+ let rules = 0, learning = 0;
+ for (const line of lines) {
+ const match = line.match(/^\[[\d-]+\]\s+\[(RULE|PATTERN|INSTINCT):([\d.]+)\]/);
+ if (match) {
+ if (match[1] === 'RULE') rules++;
+ else learning++;
+ }
+ }
+ const rColor = rules > 0 ? c.green : c.dim;
+ const lColor = learning > 0 ? c.yellow : c.dim;
+ learningDisplay = `${rColor}${rules} rules${c.reset} ${lColor}${learning} learning${c.reset}`;
+ }
+ } catch {}
+
+ // 6. BRAIN SAVE AGE — read .git/ directly (was: git CLI spawn)
+ let savedDisplay = '';
+ const brainTs = nativeGit.lastCommitTime(cfg.BRAIN_DIR);
+ if (brainTs) {
+ const diffMins = Math.floor((Date.now() - brainTs * 1000) / 60000);
+ let age, ageColor;
+ if (diffMins < 60) { age = `${diffMins}m`; ageColor = c.green; }
+ else if (diffMins < 1440) { age = `${Math.floor(diffMins / 60)}h`; ageColor = diffMins > 360 ? c.orange : c.green; }
+ else { age = `${Math.floor(diffMins / 1440)}d`; ageColor = c.red; }
+ savedDisplay = `${ageColor}saved ${age}${c.reset}`;
+ }
+
+ // ── BUILD LINE 2 ────────────────────────────────────────────────
+ const line2 = [
+ jobsDisplay, overdueDisplay, dealsDisplay, replyRate, learningDisplay, savedDisplay
+ ].filter(Boolean);
+
+ if (line2.length === 0) line2.push(`${c.green}All clear${c.reset}`);
+
+ // ── OUTPUT ───────────────────────────────────────────────────────
+ const sep = ` ${c.dim}\u2502${c.reset} `;
+ process.stdout.write(line1.join(sep) + '\n' + line2.join(sep));
+
+ } catch (e) {
+ process.stdout.write('\x1b[36mGradata\x1b[0m');
+ }
+});
diff --git a/.gitignore b/.gitignore
index c36b721b..34191568 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,19 @@ node_modules/
.claude-flow/logs/
graphify-out/
**/graphify-out/
+.graphify_*.json
+.graphify_*.txt
+.graphify_python
+.graphify_split.py
+.graphify_ast_run.py
+.graphify_cache_check.py
+
+# Gradata runtime logs
+Gradata/run.log
+**/run.log
+
+# Personal tool/settings archives (Claude Code config backups)
+/.archive/
# Virtual environments
.venv/
@@ -135,6 +148,7 @@ Gradata/docs/STRESS_TEST_PROTOCOL.md
Gradata/docs/GRADATA-LAUNCH-STRATEGY.md
Gradata/docs/GTM-Execution-Plan.md
Gradata/docs/gradata-marketing-strategy.md
+Gradata/docs/pre-launch-plan.md
Gradata/docs/gradata-comparison-table.md
Gradata/docs/ablation-experiment-s93.md
Gradata/docs/ARCHITECTURE.md
@@ -185,6 +199,9 @@ Leads/
# (subagents running Bash with > redirects land these in cwd)
# ----------------------------------------------------------------------------
$null
+0
+`
+BrainDetail
GateResult
None
Path
diff --git a/Gradata/.archive/dashboard_streamlit_deprecated_2026-04-23.py b/Gradata/.archive/dashboard_streamlit_deprecated_2026-04-23.py
new file mode 100644
index 00000000..6e0bce75
--- /dev/null
+++ b/Gradata/.archive/dashboard_streamlit_deprecated_2026-04-23.py
@@ -0,0 +1,981 @@
+"""
+Gradata Dashboard — Your AI's fitness tracker.
+===============================================
+Run: streamlit run C:/Users/olive/SpritesWork/brain/scripts/dashboard.py
+"""
+
+import json
+import re
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+
+import pandas as pd
+import plotly.graph_objects as go
+import streamlit as st
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+BRAIN_DIR = Path("C:/Users/olive/SpritesWork/brain")
+DB_PATH = BRAIN_DIR / "system.db"
+EVENTS_PATH = BRAIN_DIR / "events.jsonl"
+LESSONS_PATH = BRAIN_DIR / "lessons.md"
+PROSPECTS_DIR = BRAIN_DIR / "prospects"
+BRIEF_PATH = BRAIN_DIR / "morning-brief.md"
+TASKS_DIR = Path("C:/Users/olive/.claude/scheduled-tasks")
+
+st.set_page_config(page_title="Gradata", layout="wide", page_icon=":brain:")
+
+# Custom CSS for cleaner look
+st.markdown(
+ """
+
+""",
+ unsafe_allow_html=True,
+)
+
+
+# ---------------------------------------------------------------------------
+# Data Layer (same as before, hidden complexity)
+# ---------------------------------------------------------------------------
+@st.cache_resource
+def get_db():
+ conn = sqlite3.connect(str(DB_PATH), check_same_thread=False)
+ conn.row_factory = sqlite3.Row
+ conn.execute("PRAGMA journal_mode=WAL")
+ conn.execute("PRAGMA busy_timeout=5000")
+ return conn
+
+
+def q(sql, params=None):
+ try:
+ rows = get_db().execute(sql, params or ()).fetchall()
+ return [dict(r) for r in rows]
+ except sqlite3.OperationalError:
+ return []
+
+
+def qdf(sql, params=None):
+ rows = q(sql, params)
+ return pd.DataFrame(rows) if rows else pd.DataFrame()
+
+
+# --- Path B bridge: derive dashboard metrics live from events.jsonl -----------
+# `session_metrics` and `correction_severity` tables haven't been written since
+# 2026-03-30 (writer dropped out of the hook chain). Rather than restore a
+# fragile CLI-script writer, derive the same columns live from the authoritative
+# event log. Kept in dashboard.py (not imported from the SDK) because the SDK
+# import chain has an ~11s cold-start penalty that would make every render slow.
+# Tracked in WRITERS.md; replace with ProjectionRegistry (Path C) when Hermes
+# observers need persisted historicals.
+@st.cache_data(ttl=60)
+def _derive_session_metrics_from_events() -> pd.DataFrame:
+ if not EVENTS_PATH.exists():
+ return pd.DataFrame()
+ corrections: dict[int, int] = {}
+ outputs: dict[int, int] = {}
+ unedited: dict[int, int] = {}
+ session_date: dict[int, str] = {}
+ with EVENTS_PATH.open(encoding="utf-8") as f:
+ for line in f:
+ try:
+ d = json.loads(line)
+ except json.JSONDecodeError:
+ continue
+ sess = d.get("session")
+ if sess is None:
+ continue
+ t = d.get("type")
+ ts = d.get("ts", "")
+ if isinstance(ts, str) and ts:
+ session_date[sess] = ts[:10]
+ if t == "CORRECTION":
+ corrections[sess] = corrections.get(sess, 0) + 1
+ elif t == "OUTPUT":
+ outputs[sess] = outputs.get(sess, 0) + 1
+ if not (d.get("data") or {}).get("edited_by_oliver"):
+ unedited[sess] = unedited.get(sess, 0) + 1
+ rows = []
+ for sess in sorted(set(corrections) | set(outputs)):
+ outs = outputs.get(sess, 0)
+ corr = corrections.get(sess, 0)
+ une = unedited.get(sess, 0)
+ rows.append(
+ {
+ "session": sess,
+ "date": session_date.get(sess, ""),
+ "corrections": corr,
+ "outputs_produced": outs,
+ "outputs_unedited": une,
+ # Density requires outputs to be meaningful; leave NaN otherwise.
+ "correction_density": (min(corr / outs, 1.0) if outs else None),
+ "first_draft_acceptance": ((une / outs) if outs else None),
+ }
+ )
+ return pd.DataFrame(rows)
+
+
+@st.cache_data(ttl=60)
+def _derive_severity_from_events() -> pd.DataFrame:
+ if not EVENTS_PATH.exists():
+ return pd.DataFrame()
+ counts: dict[tuple[int, str], int] = {}
+ with EVENTS_PATH.open(encoding="utf-8") as f:
+ for line in f:
+ try:
+ d = json.loads(line)
+ except json.JSONDecodeError:
+ continue
+ if d.get("type") != "CORRECTION":
+ continue
+ sess = d.get("session")
+ sev = (d.get("data") or {}).get("severity")
+ if sess is None or not sev:
+ continue
+ counts[(sess, str(sev).lower())] = counts.get((sess, str(sev).lower()), 0) + 1
+ rows = [{"session": s, "severity_label": sev, "cnt": c} for (s, sev), c in counts.items()]
+ return pd.DataFrame(rows).sort_values("session") if rows else pd.DataFrame()
+
+
+def parse_lessons():
+ if not LESSONS_PATH.exists():
+ return []
+ text = LESSONS_PATH.read_text(encoding="utf-8")
+ lessons = []
+ lines = text.strip().split("\n")
+ i = 0
+ while i < len(lines):
+ line = lines[i].strip()
+ m = re.match(r"\[(\d{4}-\d{2}-\d{2})\]\s+\[(\w+):([\d.]+)\]\s+(\w+):\s*(.*)", line)
+ if m:
+ lesson = {
+ "date": m.group(1),
+ "state": m.group(2),
+ "confidence": float(m.group(3)),
+ "category": m.group(4),
+ "description": m.group(5)[:80],
+ }
+ if i + 1 < len(lines):
+ fc = re.search(r"Fire count:\s*(\d+)", lines[i + 1])
+ lesson["fire_count"] = int(fc.group(1)) if fc else 0
+ else:
+ lesson["fire_count"] = 0
+ lessons.append(lesson)
+ i += 1
+ return lessons
+
+
+def brief_age_hours():
+ if not BRIEF_PATH.exists():
+ return 9999
+ return (
+ datetime.now() - datetime.fromtimestamp(BRIEF_PATH.stat().st_mtime)
+ ).total_seconds() / 3600
+
+
+def status_dot(good, warn_threshold=None, value=None):
+ """Returns a colored circle based on status."""
+ if isinstance(good, bool):
+ return "🟢" if good else "🔴"
+ if value is not None and warn_threshold is not None:
+ if value <= warn_threshold * 0.5:
+ return "🟢"
+ elif value <= warn_threshold:
+ return "🟡"
+ return "🔴"
+ return "⚪"
+
+
+# ---------------------------------------------------------------------------
+# Sidebar
+# ---------------------------------------------------------------------------
+page = st.sidebar.radio(
+ "",
+ [
+ "Today",
+ "Is My AI Learning?",
+ "My Deals",
+ "Under the Hood",
+ ],
+)
+st.sidebar.markdown("---")
+if st.sidebar.button("Refresh"):
+ st.cache_resource.clear()
+ st.rerun()
+st.sidebar.caption("Gradata v0.1 — Your AI's fitness tracker")
+
+
+# ===================================================================
+# PAGE 1: TODAY — "What do I need to know right now?"
+# ===================================================================
+if page == "Today":
+ st.title("Good morning, Oliver.")
+
+ # --- Overall Health Score ---
+ lessons = parse_lessons()
+ rules = len([l for l in lessons if l["state"] == "RULE"])
+ patterns = len([l for l in lessons if l["state"] == "PATTERN"])
+ instincts = len([l for l in lessons if l["state"] == "INSTINCT"])
+ total_lessons = len(lessons)
+
+ brief_hours = brief_age_hours()
+ dm = q(
+ "SELECT pipeline_value, deals_total, instantly_reply_rate, instantly_sent FROM daily_metrics ORDER BY date DESC LIMIT 1"
+ )
+ latest = dm[0] if dm else {}
+
+ budgets = q("SELECT api_name, daily_limit, used_today FROM credit_budgets")
+ total_credits_used = sum(b["used_today"] for b in budgets)
+
+ # --- Quick Status Row ---
+ st.markdown("### How's everything looking?")
+ s1, s2, s3, s4 = st.columns(4)
+
+ with s1:
+ dot = status_dot(True, value=brief_hours, warn_threshold=24)
+ if brief_hours < 12:
+ st.markdown(f"#### {dot} Morning Brief")
+ st.caption("Up to date")
+ elif brief_hours < 48:
+ st.markdown(f"#### {dot} Morning Brief")
+ st.caption(f"Updated {brief_hours:.0f}h ago — getting stale")
+ else:
+ st.markdown(f"#### {dot} Morning Brief")
+ st.caption(f"**{brief_hours / 24:.0f} days old** — not running")
+
+ with s2:
+ graduated = rules + patterns
+ dot = "🟢" if graduated >= 3 else ("🟡" if graduated >= 1 else "🔴")
+ st.markdown(f"#### {dot} AI Learning")
+ if graduated == 0:
+ st.caption(f"{total_lessons} lessons, **none graduated yet**")
+ else:
+ st.caption(f"{rules} rules, {patterns} patterns, {instincts} building")
+
+ with s3:
+ pipe_val = latest.get("pipeline_value", 0)
+ deals = latest.get("deals_total", 0)
+ st.markdown(f"#### {'🟢' if pipe_val > 5000 else '🟡'} Pipeline")
+ st.caption(f"${pipe_val:,.0f} across {deals} deals")
+
+ with s4:
+ st.markdown(f"#### {'🟢' if total_credits_used < 50 else '🟡'} API Credits")
+ st.caption(f"{total_credits_used} used today")
+
+ st.markdown("---")
+
+ # --- What Should You Do Right Now? ---
+ st.markdown("### What needs your attention")
+
+ actions = []
+
+ # Stale deals
+ stale_deals = q(
+ "SELECT company, prospect_name, days_in_stage, stage FROM deals WHERE days_in_stage > 14"
+ )
+ for d in stale_deals:
+ actions.append(
+ (
+ "🔴",
+ f"**{d['company']}** — stuck in '{d['stage']}' for {d['days_in_stage']} days. Follow up or close it.",
+ )
+ )
+
+ # Brief not running
+ if brief_hours > 48:
+ actions.append(
+ (
+ "🔴",
+ f"Your morning brief hasn't updated in **{brief_hours / 24:.0f} days**. The scheduling system may be broken.",
+ )
+ )
+
+ # No graduated lessons
+ if rules + patterns == 0:
+ actions.append(
+ (
+ "🟡",
+ f"Your AI has {total_lessons} lessons but **none have graduated**. The learning pipeline needs attention.",
+ )
+ )
+
+ # Hot credit APIs
+ for b in budgets:
+ if b["used_today"] > b["daily_limit"] * 0.8:
+ actions.append(
+ (
+ "🟡",
+ f"**{b['api_name']}** credits nearly exhausted ({b['used_today']}/{b['daily_limit']}).",
+ )
+ )
+
+ # Deals needing attention (low health)
+ sick_deals = q(
+ "SELECT company, health_score, stage FROM deals WHERE health_score < 40 AND health_score > 0"
+ )
+ for d in sick_deals:
+ actions.append(
+ (
+ "🟡",
+ f"**{d['company']}** health score is {d['health_score']:.0f}/100 — needs a touch.",
+ )
+ )
+
+ if actions:
+ for dot, text in actions:
+ st.markdown(f"{dot} {text}")
+ else:
+ st.success("Everything looks good. No urgent actions needed.")
+
+ st.markdown("---")
+
+ # --- Your Agents ---
+ st.markdown("### Your agents")
+ if TASKS_DIR.exists():
+ task_dirs = sorted([d for d in TASKS_DIR.iterdir() if d.is_dir()])
+ if task_dirs:
+ for td in task_dirs:
+ skill_file = td / "SKILL.md"
+ if not skill_file.exists():
+ continue
+ header = skill_file.read_text(encoding="utf-8")[:500]
+
+ # Parse YAML frontmatter
+ name = td.name
+ desc_match = re.search(r"description:\s*(.+)", header)
+ desc = desc_match.group(1).strip() if desc_match else ""
+
+ # Determine status from description
+ is_disabled = any(
+ tag in desc.upper() for tag in ["DISABLED", "DUPLICATE", "ARCHIVED"]
+ )
+ is_active = not is_disabled and desc != ""
+
+ if is_disabled:
+ icon = "⏸️"
+ status = "Disabled"
+ else:
+ icon = "🟢"
+ status = "Ready"
+
+ with st.container(border=True):
+ ac1, ac2 = st.columns([3, 1])
+ ac1.markdown(f"{icon} **{name}**")
+ short_desc = (
+ desc[:80]
+ .replace("[DUPLICATE", "")
+ .replace("[ARCHIVED]", "")
+ .replace("[DISABLED]", "")
+ .strip(" —-")
+ )
+ ac1.caption(short_desc if short_desc else "No description")
+ ac2.markdown(f"**{status}**")
+ else:
+ st.caption("No scheduled agents found.")
+ else:
+ st.info("No scheduled tasks directory found. Agents will appear here once configured.")
+
+ st.markdown("---")
+
+ # --- Your Pipeline at a Glance ---
+ st.markdown("### Pipeline snapshot")
+ deals_df = qdf(
+ "SELECT company, prospect_name, stage, value, health_score, days_in_stage FROM deals ORDER BY value DESC"
+ )
+ if not deals_df.empty:
+
+ def row_style(row):
+ if row.get("days_in_stage", 0) > 14:
+ return ["background-color: #ef444422"] * len(row)
+ if row.get("health_score", 100) < 40:
+ return ["background-color: #f59e0b22"] * len(row)
+ return [""] * len(row)
+
+ st.dataframe(
+ deals_df,
+ use_container_width=True,
+ hide_index=True,
+ column_config={
+ "company": "Company",
+ "prospect_name": "Contact",
+ "stage": "Stage",
+ "value": st.column_config.NumberColumn("Deal Value", format="$%.0f"),
+ "health_score": st.column_config.ProgressColumn(
+ "Health", min_value=0, max_value=100
+ ),
+ "days_in_stage": "Days in Stage",
+ },
+ )
+
+ # --- Outreach Quick Stats ---
+ st.markdown("### Outreach")
+ if latest.get("instantly_sent"):
+ oc1, oc2, oc3 = st.columns(3)
+ oc1.metric("Emails Sent (All Time)", f"{latest['instantly_sent']:,}")
+ oc2.metric("Reply Rate", f"{latest.get('instantly_reply_rate', 0):.0%}")
+ replies = latest.get("replies_count") or q(
+ "SELECT replies_count FROM daily_metrics ORDER BY date DESC LIMIT 1"
+ )
+ oc3.metric(
+ "Total Replies",
+ latest.get("replies_count", "N/A")
+ if isinstance(latest.get("replies_count"), int)
+ else "N/A",
+ )
+ else:
+ st.caption("No outreach data yet.")
+
+
+# ===================================================================
+# PAGE 2: IS MY AI LEARNING? — The whole point of Gradata
+# ===================================================================
+elif page == "Is My AI Learning?":
+ st.title("Is your AI actually learning?")
+
+ # --- Live status block (Path B bridge) -----------------------------------
+ # The projection tables (session_metrics, correction_severity) haven't been
+ # written since 2026-03-30. Panels now derive from events.jsonl live.
+ _sm_live = _derive_session_metrics_from_events()
+ _current_session = int(_sm_live["session"].max()) if not _sm_live.empty else 0
+ _today_iso = datetime.now().strftime("%Y-%m-%d")
+ _corrections_today = 0
+ if not _sm_live.empty and "date" in _sm_live.columns:
+ _today_row = _sm_live[_sm_live["date"] == _today_iso]
+ if not _today_row.empty:
+ _corrections_today = int(_today_row["corrections"].sum())
+ _transitions_df = qdf(
+ "SELECT old_state, new_state, category, ROUND(confidence, 2) as confidence, session "
+ "FROM lesson_transitions ORDER BY session DESC"
+ )
+ _promotions = 0
+ _demotions = 0
+ if not _transitions_df.empty:
+ _promotions = int((_transitions_df["new_state"].isin(["PATTERN", "RULE"])).sum())
+ _demotions = int(
+ (_transitions_df["new_state"].isin(["INSTINCT", "UNTESTABLE", "KILLED"])).sum()
+ )
+ _heal_str = (
+ f"Self-healing active — {_promotions} promotions, {_demotions} demotions"
+ if not _transitions_df.empty
+ else "Self-healing: no state changes yet"
+ )
+ st.info(
+ f"**Live as of session {_current_session}, {_today_iso}** · "
+ f"Learning pipeline: ALIVE ({_corrections_today} corrections today) · "
+ f"{_heal_str}"
+ )
+
+ lessons = parse_lessons()
+ rules = [l for l in lessons if l["state"] == "RULE"]
+ patterns = [l for l in lessons if l["state"] == "PATTERN"]
+ instincts = [l for l in lessons if l["state"] == "INSTINCT"]
+ untestable = [l for l in lessons if l["state"] == "UNTESTABLE"]
+
+ # --- The Big Answer ---
+ graduated = len(rules) + len(patterns)
+ if graduated == 0:
+ st.error(
+ "**Not yet.** You have lessons, but none have graduated. Your AI knows things but can't prove they work."
+ )
+ st.markdown("""
+ **What this means:** Every time you correct your AI, it creates a "lesson." But lessons need to prove
+ they actually help before they become permanent rules. Right now, none have proven themselves.
+
+ **What to do:** Keep correcting outputs. The system needs to see the same lesson fire multiple times
+ across different sessions before it trusts the pattern.
+ """)
+ elif graduated < 5:
+ st.warning(
+ f"**Getting there.** {graduated} lessons have graduated. Your AI is starting to learn your preferences."
+ )
+ else:
+ st.success(f"**Yes.** {graduated} lessons graduated. Your AI is adapting to how you work.")
+
+ st.markdown("---")
+
+ # --- The Journey: How lessons become rules ---
+ st.markdown("### How your AI learns")
+ st.markdown("""
+ Every correction you make creates a lesson. Lessons go through stages before your AI trusts them:
+
+ **You correct something** → Lesson created (Untestable)
+ → AI tries using it (Instinct) → It works multiple times (Pattern) → **Permanent rule**
+ """)
+
+ j1, j2, j3, j4 = st.columns(4)
+ j1.metric(
+ "Waiting to be tested",
+ len(untestable),
+ help="Lessons your AI learned but hasn't had a chance to apply yet",
+ )
+ j2.metric(
+ "Being tested",
+ len(instincts),
+ help="Your AI is actively trying these. If they work, they'll promote.",
+ )
+ j3.metric(
+ "Proven patterns",
+ len(patterns),
+ help="These worked multiple times. Almost permanent.",
+ )
+ j4.metric(
+ "Permanent rules",
+ len(rules),
+ help="Your AI won't forget these. They're part of its DNA now.",
+ )
+
+ # Visual funnel
+ fig = go.Figure(
+ go.Funnel(
+ y=["Waiting to test", "Being tested", "Proven", "Permanent"],
+ x=[len(untestable), len(instincts), len(patterns), len(rules)],
+ marker=dict(color=["#FF6B6B", "#FFD93D", "#6BCB77", "#4D96FF"]),
+ textinfo="value",
+ )
+ )
+ fig.update_layout(height=220, margin=dict(l=10, r=10, t=10, b=10), showlegend=False)
+ st.plotly_chart(fig, use_container_width=True)
+
+ st.markdown("---")
+
+ # --- Are corrections going down? ---
+ st.markdown("### Are you correcting less over time?")
+ st.caption("If this line goes down, your AI is getting better. If flat, something's stuck.")
+
+ # Derive live from events.jsonl — session_metrics table is frozen at
+ # session 76 (2026-03-30). See WRITERS.md for provenance.
+ _cs_src = _derive_session_metrics_from_events()
+ cs_df = (
+ _cs_src[_cs_src["correction_density"].notna()][
+ ["session", "date", "correction_density", "first_draft_acceptance"]
+ ].sort_values("session")
+ if not _cs_src.empty
+ else _cs_src
+ )
+ if not cs_df.empty:
+ st.caption(f"Last updated: live from events.jsonl · session {int(cs_df['session'].max())}")
+ fig = go.Figure()
+ fig.add_trace(
+ go.Scatter(
+ x=cs_df["session"],
+ y=cs_df["correction_density"],
+ mode="lines+markers",
+ name="How much you changed",
+ line=dict(color="#FF6B6B", width=2),
+ fill="tozeroy",
+ fillcolor="rgba(255,107,107,0.1)",
+ )
+ )
+ fig.add_trace(
+ go.Scatter(
+ x=cs_df["session"],
+ y=cs_df["first_draft_acceptance"],
+ mode="lines+markers",
+ name="Used as-is rate",
+ line=dict(color="#4ECDC4", width=2),
+ )
+ )
+ fig.update_layout(
+ height=300,
+ margin=dict(l=20, r=20, t=10, b=20),
+ yaxis_title="Rate",
+ xaxis_title="Session #",
+ legend=dict(orientation="h", yanchor="bottom", y=1.02),
+ )
+ st.plotly_chart(fig, use_container_width=True)
+ else:
+ st.info("No correction data yet. Start a few sessions and correct some outputs.")
+
+ st.markdown("---")
+
+ # --- What has your AI learned? ---
+ st.markdown("### What your AI knows")
+
+ # Show rules first (most important)
+ if rules:
+ st.markdown("**Permanent rules** (your AI follows these every time):")
+ for l in rules:
+ st.markdown(f"- {l['description']}")
+
+ if patterns:
+ st.markdown("**Proven patterns** (working well, almost permanent):")
+ for l in patterns:
+ st.markdown(f"- {l['description']}")
+
+ if instincts:
+ with st.expander(f"Being tested ({len(instincts)} lessons)"):
+ for l in instincts:
+ conf_pct = l["confidence"] * 100
+ st.markdown(f"- {l['description']} — *{conf_pct:.0f}% confident*")
+
+ if untestable:
+ with st.expander(f"Waiting to be tested ({len(untestable)} lessons)"):
+ for l in untestable[:15]:
+ st.markdown(f"- {l['description']}")
+ if len(untestable) > 15:
+ st.caption(f"...and {len(untestable) - 15} more")
+
+ st.markdown("---")
+
+ # --- Meta-rules ---
+ st.markdown("### Meta-rules")
+ st.caption(
+ "High-level principles synthesized automatically from clusters of related graduated rules."
+ )
+ meta_rows = q(
+ "SELECT principle, confidence, source_categories, applies_when, never_when "
+ "FROM meta_rules ORDER BY confidence DESC"
+ )
+ if not meta_rows:
+ st.info(
+ "No meta-rules yet. They emerge when 3+ related rules graduate and cluster "
+ "semantically. Meta-rule discovery requires Gradata Cloud."
+ )
+ else:
+ for mr in meta_rows:
+ with st.expander(f"{mr['principle'][:80]} · {mr['confidence']:.0%} confidence"):
+ cols = st.columns(2)
+ try:
+ applies = json.loads(mr["applies_when"] or "[]")
+ except (json.JSONDecodeError, TypeError):
+ applies = []
+ try:
+ never = json.loads(mr["never_when"] or "[]")
+ except (json.JSONDecodeError, TypeError):
+ never = []
+ try:
+ cats = json.loads(mr["source_categories"] or "[]")
+ except (json.JSONDecodeError, TypeError):
+ cats = []
+ cols[0].write("**Applies when:**")
+ cols[0].write("\n".join(f"- {a}" for a in applies) if applies else "—")
+ cols[1].write("**Never when:**")
+ cols[1].write("\n".join(f"- {n}" for n in never) if never else "—")
+ if cats:
+ st.caption(f"Source categories: {', '.join(cats)}")
+
+ st.markdown("---")
+
+ # --- Correction severity trend ---
+ st.markdown("### Are corrections getting lighter?")
+ st.caption(
+ "If the bars shift from red (major rewrites) to green (small tweaks), your AI is improving on the hard stuff."
+ )
+
+ # Derive live from events.jsonl — correction_severity table also frozen.
+ corr_df = _derive_severity_from_events()
+ if corr_df.empty:
+ st.warning(
+ "Severity breakdown is paused — no CORRECTION events carry a "
+ "`severity` field. Restoring via pipeline-revamp."
+ )
+ if not corr_df.empty:
+ st.caption(
+ f"Last updated: live from events.jsonl · session {int(corr_df['session'].max())}"
+ )
+ color_map = {
+ "trivial": "#6BCB77",
+ "minor": "#FFD93D",
+ "moderate": "#FF9F43",
+ "major": "#FF6B6B",
+ "rewrite": "#EE5A24",
+ }
+ fig = go.Figure()
+ for severity in ["rewrite", "major", "moderate", "minor", "trivial"]:
+ sev_data = corr_df[corr_df["severity_label"] == severity]
+ if not sev_data.empty:
+ fig.add_trace(
+ go.Bar(
+ x=sev_data["session"],
+ y=sev_data["cnt"],
+ name=severity.title(),
+ marker_color=color_map.get(severity, "#888"),
+ )
+ )
+ fig.update_layout(
+ barmode="stack",
+ height=300,
+ margin=dict(l=20, r=20, t=10, b=20),
+ xaxis_title="Session #",
+ yaxis_title="Corrections",
+ )
+ st.plotly_chart(fig, use_container_width=True)
+
+ st.markdown("---")
+
+ # --- State changes (self-healing) ---
+ st.markdown("### State changes")
+ st.caption(
+ "Every time a lesson is promoted or demoted automatically, it appears here. "
+ "Promotions build lasting knowledge; demotions prune what isn't working."
+ )
+ if _transitions_df.empty:
+ st.info("No state changes recorded yet.")
+ else:
+ t1, t2 = st.columns(2)
+ t1.metric(
+ "Promotions",
+ _promotions,
+ help="Lessons that moved up (INSTINCT→PATTERN or PATTERN→RULE)",
+ )
+ t2.metric("Demotions / Kills", _demotions, help="Lessons pruned or reset")
+ with st.expander(f"Recent state changes ({min(len(_transitions_df), 50)} shown)"):
+ st.dataframe(
+ _transitions_df.head(50),
+ use_container_width=True,
+ hide_index=True,
+ column_config={
+ "old_state": "From",
+ "new_state": "To",
+ "category": "Category",
+ "confidence": "Confidence",
+ "session": "Session",
+ },
+ )
+
+
+# ===================================================================
+# PAGE 3: MY DEALS — Pipeline without the jargon
+# ===================================================================
+elif page == "My Deals":
+ st.title("Your deals")
+
+ deals = q("SELECT * FROM deals ORDER BY value DESC")
+ if not deals:
+ st.info("No deals tracked yet.")
+ else:
+ # Summary
+ total_val = sum(d.get("value", 0) or 0 for d in deals)
+ active = [d for d in deals if d.get("stage") not in ("closed-won", "closed-lost")]
+ stale = [d for d in active if (d.get("days_in_stage") or 0) > 14]
+
+ m1, m2, m3 = st.columns(3)
+ m1.metric("Total Pipeline", f"${total_val:,.0f}")
+ m2.metric("Active Deals", len(active))
+ m3.metric(
+ "Need Attention",
+ len(stale),
+ delta=f"{len(stale)} stale" if stale else "All good",
+ delta_color="inverse",
+ )
+
+ st.markdown("---")
+
+ # Each deal as a card
+ for d in deals:
+ health = d.get("health_score", 0) or 0
+ days = d.get("days_in_stage", 0) or 0
+ value = d.get("value", 0) or 0
+ stage = d.get("stage", "unknown")
+
+ # Status determination
+ if stage in ("closed-won",):
+ icon = "🏆"
+ elif stage in ("closed-lost",):
+ icon = "❌"
+ elif days > 14:
+ icon = "🔴"
+ elif health < 40:
+ icon = "🟡"
+ else:
+ icon = "🟢"
+
+ with st.container(border=True):
+ dc1, dc2, dc3, dc4 = st.columns([3, 1.5, 1.5, 2])
+
+ dc1.markdown(f"### {icon} {d.get('company', '?')}")
+ dc1.caption(f"{d.get('prospect_name', '?')} — {stage.replace('-', ' ').title()}")
+
+ dc2.metric("Value", f"${value:,.0f}")
+ dc3.metric("Health", f"{health:.0f}/100")
+
+ # What to do
+ if days > 14:
+ dc4.warning(f"Stuck {days} days. Follow up or kill it.")
+ elif health < 40:
+ dc4.warning("Low health. Needs attention.")
+ elif stage == "demo-done":
+ dc4.info("Send proposal or follow up.")
+ elif stage == "proposal-made":
+ dc4.info("Check if they've reviewed it.")
+ else:
+ dc4.success("On track.")
+
+ st.markdown("---")
+
+ # Pipeline over time
+ st.markdown("### Pipeline trend")
+ pipe_df = qdf("SELECT date, pipeline_value FROM daily_metrics ORDER BY date")
+ if not pipe_df.empty:
+ fig = go.Figure()
+ fig.add_trace(
+ go.Scatter(
+ x=pipe_df["date"],
+ y=pipe_df["pipeline_value"],
+ mode="lines+markers",
+ fill="tozeroy",
+ line=dict(color="#4ECDC4", width=2),
+ fillcolor="rgba(78,205,196,0.1)",
+ )
+ )
+ fig.update_layout(
+ height=250,
+ margin=dict(l=20, r=20, t=10, b=20),
+ yaxis_title="Pipeline Value ($)",
+ )
+ st.plotly_chart(fig, use_container_width=True)
+
+ # Outreach
+ st.markdown("### Outreach performance")
+ dm = q(
+ "SELECT instantly_sent, instantly_reply_rate, replies_count FROM daily_metrics ORDER BY date DESC LIMIT 1"
+ )
+ if dm and dm[0].get("instantly_sent"):
+ d = dm[0]
+ oc1, oc2, oc3 = st.columns(3)
+ oc1.metric("Emails Sent", f"{d['instantly_sent']:,}")
+ rr = d.get("instantly_reply_rate", 0)
+ oc2.metric(
+ "Reply Rate",
+ f"{rr:.0%}",
+ delta="Good" if rr > 0.3 else "Low",
+ delta_color="normal" if rr > 0.3 else "inverse",
+ )
+ oc3.metric("Replies", d.get("replies_count", 0))
+
+
+# ===================================================================
+# PAGE 4: UNDER THE HOOD — For when you want the details
+# ===================================================================
+elif page == "Under the Hood":
+ st.title("Under the hood")
+ st.caption("The technical details. You don't need this daily, but it's here when you want it.")
+
+ # --- API Credit Usage ---
+ st.markdown("### API credit usage")
+ budgets = q("SELECT * FROM credit_budgets ORDER BY api_name")
+ if budgets:
+ for b in budgets:
+ used = b["used_today"]
+ limit = b["daily_limit"]
+ pct = (used / limit * 100) if limit > 0 else 0
+ col1, col2 = st.columns([1, 3])
+ col1.markdown(f"**{b['api_name'].title()}**")
+ col2.progress(min(pct / 100, 1.0), text=f"{used}/{limit} today ({pct:.0f}%)")
+
+ st.markdown("---")
+
+ # --- Session History ---
+ st.markdown("### Recent sessions")
+ # session_metrics is frozen at s76 (2026-03-30). Use the live bridge instead.
+ _sm_hood = _derive_session_metrics_from_events()
+ if not _sm_hood.empty:
+ sess_df = (
+ _sm_hood[
+ ["session", "date", "corrections", "correction_density", "first_draft_acceptance"]
+ ]
+ .rename(
+ columns={
+ "session": "Session",
+ "date": "Date",
+ "corrections": "Corrections",
+ "correction_density": "Correction Density",
+ "first_draft_acceptance": "First Draft Acceptance",
+ }
+ )
+ .sort_values("Session", ascending=False)
+ .head(20)
+ )
+ st.caption(f"Live from events.jsonl · {len(_sm_hood)} total sessions")
+ st.dataframe(sess_df, use_container_width=True, hide_index=True)
+ else:
+ sess_df = qdf(
+ "SELECT session as 'Session', date as 'Date', session_type as 'Type', "
+ "corrections as 'Corrections', gate_pass_rate as 'Quality Score' "
+ "FROM session_metrics ORDER BY session DESC LIMIT 20"
+ )
+ if not sess_df.empty:
+ st.dataframe(sess_df, use_container_width=True, hide_index=True)
+
+ st.markdown("---")
+
+ # --- Event Activity ---
+ st.markdown("### What's been happening")
+ evt_df = qdf("SELECT type, COUNT(*) as count FROM events GROUP BY type ORDER BY count DESC")
+ if not evt_df.empty:
+ fig = go.Figure(
+ go.Bar(
+ x=evt_df["count"],
+ y=evt_df["type"],
+ orientation="h",
+ marker_color="#4ECDC4",
+ )
+ )
+ fig.update_layout(
+ height=max(300, len(evt_df) * 30),
+ margin=dict(l=10, r=10, t=10, b=10),
+ yaxis=dict(autorange="reversed"),
+ )
+ st.plotly_chart(fig, use_container_width=True)
+
+ st.markdown("---")
+
+ # --- Recent Event Feed ---
+ st.markdown("### Live event feed")
+ if EVENTS_PATH.exists():
+ with open(EVENTS_PATH, encoding="utf-8") as f:
+ lines = f.readlines()
+ recent = []
+ for line in lines[-30:]:
+ try:
+ e = json.loads(line)
+ recent.append(
+ {
+ "When": e.get("ts", "")[:19],
+ "What": e.get("type", ""),
+ "From": e.get("source", ""),
+ }
+ )
+ except json.JSONDecodeError:
+ pass
+ if recent:
+ st.dataframe(
+ pd.DataFrame(reversed(recent)),
+ use_container_width=True,
+ hide_index=True,
+ )
+
+ st.markdown("---")
+
+ # --- Database Size ---
+ st.markdown("### Brain storage")
+ tables = q("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
+ if tables:
+ total_rows = 0
+ stats = []
+ for t in tables:
+ name = t["name"]
+ if name.startswith("brain_fts") or name == "sqlite_sequence":
+ continue
+ cnt = q(f"SELECT COUNT(*) as c FROM [{name}]")
+ rows = cnt[0]["c"] if cnt else 0
+ total_rows += rows
+ stats.append({"Table": name, "Records": rows})
+ # Latest session number from events.jsonl (authoritative — DB events table is sparse)
+ _sm_for_count = _derive_session_metrics_from_events()
+ _latest_sess = int(_sm_for_count["session"].max()) if not _sm_for_count.empty else 0
+ sc1, sc2 = st.columns(2)
+ sc1.metric("Total Records", f"{total_rows:,}")
+ sc2.metric("Session #", f"{_latest_sess:,}", help="Latest session number from events.jsonl")
+ with st.expander("Table breakdown"):
+ st.dataframe(pd.DataFrame(stats), use_container_width=True, hide_index=True)
diff --git a/Gradata/CHANGELOG.md b/Gradata/CHANGELOG.md
index 168c141d..607c0938 100644
--- a/Gradata/CHANGELOG.md
+++ b/Gradata/CHANGELOG.md
@@ -1,5 +1,83 @@
# Changelog
+## [Unreleased] — post-0.6.0 (2026-04-23 → 2026-04-24)
+
+33 commits ahead of public `origin/main`. Not yet pushed.
+
+### Added
+
+- **Cloud sync + Supabase schema hardening.** Dual-write path now pushes local
+ brain events to Supabase with a transform layer (`_cloud_sync.py`) that maps
+ SQLite rows to cloud schema + scrubs JSONB payloads. Watermark-based
+ incremental sync via `sync_state` table (migration 003).
+- **Local SQLite migrations 002 + 003.** `002_event_id_device_id_content_hash`
+ adds sync-stable identifiers; `003_add_sync_state` creates the watermark table.
+ Both idempotent — `CREATE TABLE IF NOT EXISTS` + `add_column_if_missing` + `has_applied()` gating.
+- **Supabase migrations 014/015/016 applied to prod.** UNIQUE constraints on
+ `corrections(brain_id, session, description)` and `events(brain_id, type, created_at)`,
+ plus `brains.last_used_at` column. Reference SQL tracked under
+ `Gradata/migrations/supabase/`; README documents application state + governance.
+- **Canonical graduation + persistent brain_prompt + two-provider synth**
+ (`f91d5557`). `` now regenerates on every graduation-triggering
+ session close, model-agnostic.
+- **Context-window watchdog hook** (`ctx_watchdog`, commit `56bac80c`): auto-handoff
+ when Claude Code context hits threshold. Reduces forced /clear losses.
+- **Auto-compact handoff pipeline** (`485cd7b4`): two-phase /clear injection so
+ session state survives compaction.
+- **Code-review-graph activation enforced** before any Glob/Grep call
+ (`fd956ec4`) — pushes agents to use the structural graph instead of brute-force
+ file search.
+- **Cloud-health probes in `gradata doctor`** (`d5425337`): reachability + auth
+ token validation + data sanity.
+- **`lesson_applications` audit loop** (`d668bab7`): closes the
+ compound-quality feedback cycle.
+- **Implicit feedback: text-speak detection** (`5a6da455`, `1a497e85`):
+ catches corrections phrased as "r/u/dont/cant".
+
+### Changed
+
+- **Statusline session count** sourced from Anthropic JSONL (`18166663`,
+ `74af66e6`, `a405447d`) — replaces stale `loop-state.md` counter (367 → 659).
+- **Meta-rules `llm_synth` runs locally**, not cloud-side (`0b797b73`).
+ Removes cloud-dependence for a core graduation primitive.
+- **Streamlit dashboard deprecated** (`3ed9438c`). `gradata.ai` web dashboard
+ now covers all panels (`/rules`, `/corrections`, `/self-healing`,
+ `/observability`). Legacy CLI archived to
+ `Gradata/.archive/dashboard_streamlit_deprecated_2026-04-23.py`.
+- **`implicit_feedback` hook emit-only contract** (`aace2410`): main() returns
+ None uniformly; signals emit via `IMPLICIT_FEEDBACK` event instead of as
+ UserPromptSubmit injection to reduce prompt noise.
+
+### Fixed
+
+- **Bare `except: pass` blocks in core SDK** now log at DEBUG (`812eda9c`).
+ Removes silent-failure class from Layer 0.
+- **MISFIRE_PENALTY sign in `agent_graduation`** (`03ddb6f9`): penalties
+ were being applied as bonuses.
+- **Session-start hook**: correct lessons path + brain_prompt load +
+ tighten stale-notes detection (`c2cc47b6`).
+- **Cluster injection line count** now scopes to `` block only,
+ not full prompt (`118122a2`).
+- **Public docs truth-pass** on cloud-vs-SDK boundary (`978e4c7f`): removed
+ stale cloud-graduation claims from Pro tier marketing (`2c65bf2a`).
+
+### Tests
+
+- **3932 pass, 3 skip** (up from 2598 in v0.6.0). No xfail remaining.
+- Meta-rules cloud-gated tests unskipped (`509bf927`).
+- `pipeline_e2e`: removed "not yet implemented" skips, bumped fixtures (`2a781645`).
+- `test_implicit_feedback`: coverage for text-speak + multi-signal inputs.
+
+### Security / Governance
+
+- **Supabase migrations now idempotent.** 014/015 wrapped in `DO $$` blocks
+ that check `pg_constraint` for existing UNIQUE on same columns before
+ adding. Prod state: both tables have pre-existing `_key` variants (from
+ inline `UNIQUE()` in original CREATE TABLE) + the new `_unique` variants —
+ redundant but harmless. Documented in `migrations/supabase/README.md`.
+- `.gitignore` hardened against bash-redirect artifacts (`0`, `BrainDetail`),
+ graphify cache files (`.graphify_*`), and run.log spray.
+
## [0.6.0] - 2026-04-15 — "We opened up"
**Strategic pivot:** the moat is not the algorithm code, it's the hosted service.
diff --git a/Gradata/docs/LEGACY_CLEANUP.md b/Gradata/docs/LEGACY_CLEANUP.md
new file mode 100644
index 00000000..7d53a12f
--- /dev/null
+++ b/Gradata/docs/LEGACY_CLEANUP.md
@@ -0,0 +1,54 @@
+# Legacy Cloud-Gate Cleanup Tracker
+
+As of 2026-04-20, Gradata is fully local-first. Cloud-gate stubs and
+"cloud-only" fallbacks are legacy concepts that should be removed.
+
+## Principle
+
+- Every feature must run locally with no external service.
+- `gradata_cloud_backup/` is a private backup, not a gate.
+- LLM-assisted synthesis uses the user's own provider (Anthropic SDK key or
+ Claude Code Max OAuth via `claude -p`). Never a Gradata-hosted endpoint.
+- Tests and fixtures should exercise the local implementation directly.
+
+## Known legacy items to retire
+
+### 1. Deprecated adapter shims (scheduled v0.8.0)
+- `src/gradata/integrations/anthropic_adapter.py` → `middleware.wrap_anthropic`
+- `src/gradata/integrations/langchain_adapter.py` → `middleware.LangChainCallback`
+- `src/gradata/integrations/crewai_adapter.py` → `middleware.CrewAIGuard`
+Warnings are in place; remove the modules and their tests at v0.8.0.
+
+### 2. `_cloud_sync.py` terminology
+File posts to an optional external dashboard — fine to keep, but the
+module docstring should make clear it is optional telemetry, not a
+mandatory cloud dependency. Callers already tolerate absence.
+
+### 3. Docstring drift in `meta_rules.py`
+Module header still says "require Gradata Cloud" and "no-ops in the
+open-source build". That is no longer true as of the local-first port —
+rewrite the header to describe the local clustering algorithm.
+
+### 4. Test-level cloud gating
+Former `@_requires_cloud` / `skipif` markers were deleted in this cycle.
+If any new test reintroduces a cloud gate, delete the gate instead — the
+feature should either be local-first or not ship.
+
+### 5. `api_key` kwarg on `merge_into_meta`
+The old `merge_into_meta(..., api_key=...)` path routed into
+`synthesise_principle_llm` directly. Current architecture drives LLM
+distillation from `rule_synthesizer` at session close instead. The kwarg
+is still accepted via `**kwargs` for forward compatibility but performs
+no work — remove after one release.
+
+### 6. Doc sweep
+`docs/cloud/` should be audited for pages that imply cloud is required.
+Rewrite as "optional managed hosting" or delete.
+
+## How to retire an item
+
+1. Grep for the symbol / doc string.
+2. Delete the code path and any tests that exercise it.
+3. Update the module docstring.
+4. Bump the deprecation note in `CHANGELOG`.
+5. Run the full suite.
diff --git a/Gradata/docs/architecture/cloud-monolith-v2.md b/Gradata/docs/architecture/cloud-monolith-v2.md
index b19206fc..5d277ed6 100644
--- a/Gradata/docs/architecture/cloud-monolith-v2.md
+++ b/Gradata/docs/architecture/cloud-monolith-v2.md
@@ -5,8 +5,11 @@ Redis (cache), Kafka (queue), Elasticsearch (search), and Pinecone
(vectors) for gradata-cloud workloads — no new vendors.
Design goal: one Postgres instance, RLS-isolated per tenant, carrying
-every cloud-side workload the SDK needs. Local SQLite stays the source
-of truth for writes; cloud is the pushable reflection + shared surface.
+the cloud-side visualization and sharing workloads. Local SQLite stays
+the source of truth and runs graduation, synthesis, and rule-to-hook
+promotion locally. Cloud is a downstream reflection — it mirrors events
+and rules for dashboards, team sharing, and managed backups, but does
+not gate or re-run the learning loop.
## What v2 adds
diff --git a/Gradata/docs/architecture/multi-tenant-future-proofing.md b/Gradata/docs/architecture/multi-tenant-future-proofing.md
index 405f2f2a..480b1e32 100644
--- a/Gradata/docs/architecture/multi-tenant-future-proofing.md
+++ b/Gradata/docs/architecture/multi-tenant-future-proofing.md
@@ -13,13 +13,13 @@
- Embeddings stored as BLOB (`brain_embeddings`); FTS5 via `brain_fts`.
- `events.scope` column exists (default 'local') — partial seed for tenant scoping, not used.
- `sync_state` table exists per source but not cloud-bound.
-- Proprietary scoring/graduation code in `gradata_cloud_backup/`.
+- Proprietary dashboard / team-sharing code in `gradata_cloud_backup/`. Graduation runs locally in the OSS SDK.
- Open SDK is Apache-2.0 — cannot require cloud to run.
## Architectural Decisions (Lock In Now)
### 1. Local-first stays the source of truth
-SDK writes to local SQLite + jsonl. Cloud is a **sync target + shared meta-rule source + proprietary scoring service**. Do NOT migrate SDK storage to Postgres. Reasons: privacy, offline, open source, speed.
+SDK writes to local SQLite + jsonl and runs the full learning loop (graduation, synthesis, rule-to-hook promotion) locally. Cloud is a **sync target + dashboard + future team + future shared-corpus surface** — not a gate on the local loop. Do NOT migrate SDK storage to Postgres. Reasons: privacy, offline, open source, speed.
### 2. Supabase is the cloud target
Postgres + Auth + RLS + pgvector + Realtime in one project. Free tier covers pre-revenue. Alternative (Neon + Clerk + own RLS) costs weeks you don't have.
@@ -36,9 +36,9 @@ Add `visibility TEXT` to `meta_rules`, `rules` (if separate table emerges):
- `global` — Gradata-curated, pushed to all tenants (e.g., quality_gates, truth_protocol)
### 5. Proprietary boundary
-- **Open SDK** writes raw events, computes local diffs, injects rules.
-- **Cloud (proprietary)** owns: graduation scoring, cross-tenant meta-rule mining, profiling, billing, licensing.
-- Clean interface: SDK posts events → Cloud returns scored rules. Stateless call.
+- **Open SDK** writes raw events, computes local diffs, injects rules, graduates lessons, and synthesizes meta-rules locally (BYO API key or Claude Code Max OAuth).
+- **Cloud (proprietary)** owns: dashboard/visualization, cross-tenant meta-rule corpus (opt-in donation), team sharing, billing, licensing.
+- Clean interface: SDK pushes events + graduated rules to cloud. Cloud reflects them back through UI. Cloud never re-runs graduation.
### 6. Schema versioning
Add `schema_version INT` to event envelope + a `migrations` table. Forward-only migrations. SDK refuses to run against incompatible brain.
@@ -116,9 +116,9 @@ Files to create:
### Phase 3 — Verification (half day)
10. Spin up a **test tenant** (not Oliver, not user #2). Run full flow:
- - Onboard → writes local brain → syncs to cloud → pulls global rules → corrects a draft → rule graduates → syncs back
+ - Onboard → writes local brain → corrects a draft → rule graduates **locally** → syncs reflection up to cloud → dashboard renders.
- Verify RLS: test tenant cannot see Oliver's events (SQL probe)
- - Ablation: disable cloud sync → SDK still works fully offline
+ - Ablation: disable cloud sync → SDK still works fully offline, including graduation + synthesis.
### Phase 4 — Explicitly deferred
diff --git a/Gradata/docs/cloud/dashboard.md b/Gradata/docs/cloud/dashboard.md
index 6e01f94e..6c7935ad 100644
--- a/Gradata/docs/cloud/dashboard.md
+++ b/Gradata/docs/cloud/dashboard.md
@@ -1,6 +1,6 @@
# Dashboard
-The Gradata Cloud dashboard is a Next.js app at [app.gradata.ai](https://app.gradata.ai). It wraps the same data the local `brain.manifest.json` exposes, plus Cloud-only views for meta-rule synthesis, team management, and the operator console.
+The Gradata Cloud dashboard is a Next.js app at [app.gradata.ai](https://app.gradata.ai). It visualizes the same data the local `brain.manifest.json` exposes, plus Cloud-only views for team management and the operator console. Meta-rule synthesis runs locally in the SDK — the dashboard renders the results, it does not re-run them.
diff --git a/Gradata/docs/cloud/overview.md b/Gradata/docs/cloud/overview.md
index 941c9ec4..864cfde8 100644
--- a/Gradata/docs/cloud/overview.md
+++ b/Gradata/docs/cloud/overview.md
@@ -1,6 +1,6 @@
# Gradata Cloud
-Gradata Cloud is the hosted dashboard and back-end that complements the open-source SDK. The SDK keeps running locally; Cloud adds synchronization, cross-device continuity, team sharing, meta-rule synthesis, and an operator view for engineering teams.
+Gradata Cloud is the hosted dashboard that complements the open-source SDK. **The SDK is functionally complete on its own** — graduation, meta-rule synthesis, rule-to-hook promotion, and every piece of the learning loop run locally. Cloud adds visualization, cross-device continuity, team sharing, and managed backups on top of that local loop.
## What's in the SDK vs the Cloud
@@ -14,15 +14,14 @@ Gradata Cloud is the hosted dashboard and back-end that complements the open-sou
| Search (FTS5 + optional embeddings) | Yes | Yes |
| Cross-platform export (`.cursorrules`, `BRAIN-RULES.md`, ...) | Yes | Yes |
| Meta-rule **clustering** | Yes | Yes |
-| Meta-rule **synthesis** (LLM-generated principles) | Placeholder | Yes |
+| Meta-rule **synthesis** (local LLM via your own key or Claude Code Max OAuth) | Yes | Yes |
| Dashboard with charts | No | Yes |
| Cross-device sync of a brain | No | Yes |
| Team brains (shared rules, per-member overrides) | No | Yes |
| Operator view (customer KPIs, alerts) | No | Yes |
-| Cloud-side rule evaluation and A/B harness | No | Yes |
| Managed backups | No | Yes |
-The SDK is Apache-2.0 and will stay permissively open. Cloud is a hosted SaaS tier with team features, corpus aggregation, and brain marketplace on top.
+The SDK is Apache-2.0 and will stay permissively open. Cloud is a hosted SaaS tier that **visualizes** the local learning loop — it does not gate, override, or re-run it. Team features and brain marketplace build on top later.
## When to self-host vs use Cloud
@@ -34,10 +33,10 @@ The SDK is Apache-2.0 and will stay permissively open. Cloud is a hosted SaaS ti
**Use Cloud if:**
-- Get meta-rule synthesis out of the box (no LLM wiring on your side).
+- You want a dashboard to watch your brain mature (graduations, correction-rate decay, compound-quality score).
- Teams can maintain shared, version-controlled brains across multiple operators.
-- Includes dashboard, alerts, and billing.
- Managed backups and cross-device sync handled for you.
+- Operator / alerting view for engineering leads.
## Architecture
@@ -48,14 +47,13 @@ flowchart LR
end
subgraph Cloud["Gradata Cloud"]
C[Sync API] --> D[Postgres + pgvector]
- D --> E[Meta-rule synthesis]
D --> F[Dashboard]
D --> G[Operator view]
end
- A <-->|optional
outbound only| C
+ A -->|optional
outbound only| C
```
-The SDK talks to Cloud only when you opt in with an API key. Sync is outbound: your local brain is the source of truth, Cloud holds a mirror plus derived metrics.
+The SDK talks to Cloud only when you opt in with an API key. Sync is strictly outbound and read-only from Cloud's perspective: your local brain is the source of truth, Cloud holds a mirror plus derived metrics. Cloud never mutates your local state or re-runs graduation.
## Getting an API key
diff --git a/Gradata/docs/concepts/meta-rules.md b/Gradata/docs/concepts/meta-rules.md
index cf8bcff1..56d54c4e 100644
--- a/Gradata/docs/concepts/meta-rules.md
+++ b/Gradata/docs/concepts/meta-rules.md
@@ -44,10 +44,10 @@ Clustering uses a combination of:
Minimum group size is controlled by `min_group_size=3` in `discover_meta_rules()`.
-!!! info "Cloud vs open source"
- In the open-source SDK, meta-rule **clustering** runs locally but the **principle synthesis** step requires [Gradata Cloud](../cloud/overview.md). Without cloud, `discover_meta_rules()` returns an empty list and `merge_into_meta()` produces a placeholder meta-rule with correct IDs and confidence but `principle = "(requires Gradata Cloud)"`.
+!!! info "Local by default"
+ Meta-rule clustering **and** principle synthesis both run locally. Synthesis uses whichever LLM path you've configured: your own Anthropic API key (set `ANTHROPIC_API_KEY`) or the Claude Code Max OAuth path via `claude -p`. Cloud is not required for any of it — the full `[rule, rule, rule] → "Verify before acting"` pipeline runs in the OSS SDK.
- The math, the events, and the storage are all open. Only the LLM-driven synthesis that turns `[rule, rule, rule] → "Verify before acting"` is cloud-gated.
+ Cloud becomes relevant when you want a hosted dashboard, cross-device sync, team brains, or (future) opt-in corpus donation. It does not re-synthesize or override what graduated locally.
## Confidence
diff --git a/Gradata/docs/specs/cloud-sync-and-pricing.md b/Gradata/docs/specs/cloud-sync-and-pricing.md
new file mode 100644
index 00000000..5205c701
--- /dev/null
+++ b/Gradata/docs/specs/cloud-sync-and-pricing.md
@@ -0,0 +1,325 @@
+# Gradata Cloud — Sync Architecture + Pricing Tiers
+
+**Status:** DRAFT v1 (2026-04-21)
+**Scope:** Full sync protocol + paid-tier feature matrix + pricing structure for Free / Personal / Teams / Enterprise.
+**Supersedes:** Current `cloud/sync.py` telemetry-only payload (11-scalar MetricsWindow).
+
+---
+
+## 1. Guiding principles
+
+1. **Local-first, always.** The SDK works forever without cloud. Every "premium" feature remains free when self-hosted.
+2. **Cloud is read-only semantically.** Cloud never authors rules, never mutates brain state, never decides scope. It stores what the device sends and returns what the device asks for.
+3. **Events are the source of truth.** `events.jsonl` is append-only, monotonically timestamped, device-scoped. `system.db` is a materialized view. Syncing events gives multi-device for free with no merge conflicts.
+4. **Privacy gradient.** Raw correction text stays on-device unless user explicitly opts in (corpus contribution). Telemetry-only is the default.
+5. **API keys, not CLI login flows.** User generates keys in the dashboard, pastes once. No OAuth device-code dance.
+
+---
+
+## 2. Identity & authentication
+
+### 2.1 API key model
+
+- User signs up on dashboard → authenticates via email magic-link or Google OAuth (browser-only).
+- User clicks **Settings → API Keys → Generate new key**.
+- Dashboard displays key ONCE: `gk_live_<32-char-base62>`.
+- User copies key and pastes into one of:
+ - `~/.gradata/api-key` (plain file, mode 0600)
+ - `GRADATA_API_KEY` env var
+ - `cloud-config.json` per-brain (existing path)
+- No terminal login. No device code. No redirect flow. One copy/paste, permanent.
+
+### 2.2 Key scoping
+
+Each key is scoped at creation time:
+| Scope | Can read | Can write | Notes |
+|---|---|---|---|
+| `brain:sync` | own events | own events | default for device sync |
+| `brain:read` | own events | — | dashboard read-only, e.g. mobile viewer |
+| `team:admin` | team events | team events + ACLs | Teams tier only |
+| `marketplace:publish` | — | publish brain snapshot | future marketplace |
+
+### 2.3 Device identity
+
+- First sync on a new device generates `device_id` (UUIDv7, 128-bit).
+- `device_id` stored in `cloud-config.json` per brain, per machine.
+- Cloud tracks `(user_id, brain_id, device_id)` — enables "sign out of this device" and per-device audit.
+
+### 2.4 Revocation
+
+- Dashboard → API Keys → Revoke → key invalidated within 60s.
+- Devices receive 401 on next sync → local `events.jsonl` continues to work; cloud features disabled gracefully.
+
+---
+
+## 3. Sync protocol
+
+### 3.1 Data model
+
+**Event** (wire format, append-only on both sides):
+```json
+{
+ "event_id": "evt_01H7P4...",
+ "brain_id": "brn_01G3...",
+ "device_id": "dev_018E...",
+ "event_ts": "2026-04-21T13:09:19.123Z",
+ "kind": "correction | rule_graduated | session_start | ...",
+ "payload": { /* kind-specific */ },
+ "schema_ver": 1,
+ "content_hash": "sha256:ab12..."
+}
+```
+
+**Constraints:**
+- `event_id` is client-generated ULID → sortable, dedupable.
+- `content_hash` is sha256 of payload → enables idempotent retries.
+- `device_id` scopes authorship — no two devices write events with the same device_id.
+- `event_ts` is logical monotonic per-device (HLC-style); global ordering uses `(event_ts, device_id, event_id)`.
+
+### 3.2 Endpoints
+
+```
+POST /api/v1/events/push # upload N events, chunked
+GET /api/v1/events/pull # since watermark, exclude own device_id
+POST /api/v1/events/backfill/init # start chunked backfill session
+POST /api/v1/events/backfill/chunk
+POST /api/v1/events/backfill/finalize
+GET /api/v1/brain/snapshot # materialized system.db view (Personal+)
+GET /api/v1/brain/materialize # server-side re-materialize trigger
+```
+
+All require `Authorization: Bearer gk_live_`. All enforce HTTPS via existing `require_https`.
+
+### 3.3 Push (incremental)
+
+```
+POST /api/v1/events/push
+Body: { "events": [ event, ... ], "device_id": "dev_...", "brain_id": "brn_..." }
+Returns: { "accepted": 42, "deduped": 3, "rejected": [] }
+```
+
+Client behavior:
+- Triggered on Stop hook or every 5min when events accumulated.
+- Pushes since `last_push_event_id` watermark.
+- Chunks of 500 events max per request.
+- On 429: exponential backoff; on 5xx: retry with same `event_id`s (server dedups by `event_id`).
+
+### 3.4 Pull (incremental)
+
+```
+GET /api/v1/events/pull?since=&exclude_device=&limit=500
+Returns: { "events": [ ... ], "has_more": true, "next_cursor": "..." }
+```
+
+Client behavior:
+- Pulls events authored by OTHER devices of the same brain.
+- Appends to local `events.jsonl`.
+- Triggers local re-materialization (`brain/materialize.py`).
+
+### 3.5 Backfill (first device, first sync)
+
+New account, existing local brain with thousands of events:
+
+1. `POST /backfill/init` → returns `backfill_session_id`.
+2. Client reads `events.jsonl`, chunks into 10k events / batch.
+3. `POST /backfill/chunk` × N with `session_id`.
+4. `POST /backfill/finalize` → server materializes snapshot.
+5. Dashboard becomes available.
+
+Backfill runs in background thread, shows progress in CLI (`gradata cloud status`) and dashboard.
+
+### 3.6 Second-device onboarding
+
+1. User pastes API key on device B → `device_id` generated.
+2. `GET /events/pull?since=0&exclude_device=dev_B` → streams entire event log.
+3. Local materializer rebuilds `events.jsonl` + `system.db`.
+4. Device B starts its own append stream. Stable forever.
+
+### 3.7 Conflict resolution
+
+There are no conflicts.
+
+- Each event has one author (`device_id`).
+- Events are append-only → no updates, no deletes (tombstones if needed for deletion are themselves events).
+- Global order = lex-sort on `(event_ts, device_id, event_id)` → deterministic across devices.
+- Materialization is a pure function of event log → same events → same `system.db`.
+
+### 3.8 Rule graduation across devices
+
+- Graduation is a LOCAL decision on the device where the threshold crossed.
+- The graduation *event* gets pushed.
+- Other devices pull the graduation event; their materializer sees `rule_graduated` and updates local `system.db` accordingly.
+- No device "re-decides" — first-to-graduate wins (monotonic, deterministic).
+
+---
+
+## 4. Feature matrix (locked to pricing tier)
+
+| Feature | Free | Personal | Teams | Enterprise |
+|---|---|---|---|---|
+| SDK (all features, local) | ✓ | ✓ | ✓ | ✓ |
+| Cloud account | ✓ | ✓ | ✓ | ✓ |
+| Dashboard: basic metrics | ✓ | ✓ | ✓ | ✓ |
+| Dashboard: full visualization | — | ✓ | ✓ | ✓ |
+| Multi-device sync | 1 device | unlimited | unlimited | unlimited |
+| Historical retention (cloud) | 7 days | 90 days | 2 years | unlimited |
+| Local DB pruning helper | — | ✓ | ✓ | ✓ |
+| Backup / point-in-time restore | — | 7 days | 30 days | 90 days |
+| Team / shared brain | — | — | up to 10 seats | unlimited |
+| Multi-agent permissions (RBAC) | — | — | ✓ | ✓ + SSO |
+| Audit logs | — | — | 90 days | unlimited + export |
+| Cross-brain rule discovery | — | — | ✓ | ✓ |
+| Marketplace: install brains | view | install | install | install |
+| Marketplace: publish brains | — | ✓ | ✓ | ✓ |
+| Self-host cloud backend | — | — | — | ✓ |
+| SLA | — | — | 99.5% | 99.9% + custom |
+| Support | community | email | priority email | dedicated + Slack |
+
+**Indicative pricing** (subject to market validation — treat as placeholder until 10 paid users):
+- **Free:** $0
+- **Personal:** $12/mo or $108/yr (one user, sync, dashboard, 90d history)
+- **Teams:** $20/seat/mo, min 3 seats (shared brains, RBAC, audit, 2yr)
+- **Enterprise:** custom (self-host option, SSO, unlimited retention, SLA)
+
+---
+
+## 5. Tier-by-tier detail
+
+### 5.1 Free
+
+**What they get:**
+- Unlimited local SDK (every feature — quality gates, truth protocol, rule graduation, meta-rules, Thompson sampling, all of it).
+- Cloud account + dashboard showing **basic metrics only**:
+ - Session count, rule count, rewrite rate trend (single chart, last 7 days).
+ - No correction content, no rule details, no graduation timeline.
+- 1 device syncing → on 2nd device prompt shows upgrade to Personal.
+- 7 days of cloud event retention (older events pruned from cloud; local keeps everything).
+- Community Discord support.
+
+**Why give this away:** adoption funnel. They're using the full SDK — any friction they hit leaves them wanting sync, history, or team features. Conversion engine, not value dilution.
+
+### 5.2 Personal ($12/mo)
+
+**Adds over Free:**
+- **Full dashboard:** every rule, every graduation event, confidence trajectories, correction heatmaps, meta-rule derivations.
+- **Unlimited devices** (laptop + desktop + cloud agents + mobile viewer all sync to same brain).
+- **90-day cloud retention** — older events pruned but snapshot preserved.
+- **Local DB pruning helper:** CLI command `gradata brain prune --older-than 30d --keep-in-cloud` (your local `system.db` stays small; cloud keeps the tail).
+- **7-day backup / point-in-time restore** — "restore my brain to yesterday 3pm."
+- **Publish to marketplace** (when marketplace ships).
+
+**Who buys:** solo developers / indie agent builders who run Claude Code on multiple machines and want their brain to follow them.
+
+### 5.3 Teams ($20/seat/mo, min 3 seats)
+
+**Adds over Personal:**
+- **Shared brains.** A team brain is a separate `brain_id` that multiple users+devices can read/write. Example: a 5-person SDR team shares an "outbound" brain; every correction anyone makes graduates for everyone.
+- **Multi-agent permissions (RBAC):**
+ - Owner: full control, billing.
+ - Admin: manage seats, ACLs, brain settings.
+ - Writer: append events (agents + humans both).
+ - Reader: dashboard + rule install only.
+ - Agent: restricted write — can graduate rules, cannot delete or modify ACLs.
+- **Audit logs** (90 days): every event authored, every rule graduated, every ACL change, every API key use. Queryable by user / device / timeframe.
+- **2-year cloud retention.**
+- **30-day backup** with rollback.
+- **Cross-brain rule discovery:** "users in teams similar to yours graduated this rule; install delta." Driven by anonymized meta-rule clustering server-side.
+- **Priority email support.**
+
+**Who buys:** agencies, SDR teams, support teams running multi-agent workflows where the brain is shared IP.
+
+### 5.4 Enterprise (custom)
+
+**Adds over Teams:**
+- **Self-host option.** Same protocol, run the cloud backend on your own Postgres + object store. API keys issued by your instance. Zero data leaves your VPC.
+- **SSO** (SAML, OIDC) + directory sync (SCIM).
+- **Unlimited retention + unlimited audit log** (exportable to customer's SIEM).
+- **Dedicated subdomain** (`brain.customer.com`).
+- **90-day backup + custom RPO/RTO.**
+- **Custom SLA (99.9%+).**
+- **Dedicated support channel** (Slack Connect or equiv).
+- **Custom contract, DPA, security review.**
+
+**Who buys:** regulated industries (finance, healthcare, defense) where data sovereignty is non-negotiable. Also the natural home for large enterprise agent-team deployments (50+ seats).
+
+---
+
+## 6. Feature deep-dives
+
+### 6.1 Multi-device sync
+
+Architecture covered in §3. Key UX:
+- **Install & paste key:** `gradata cloud enable --key gk_live_...` writes to config.
+- **Status:** `gradata cloud status` shows last push, last pull, backfill state, device_id.
+- **Pause:** `gradata cloud pause` flips `sync_enabled=false` — local continues, cloud idles.
+- **Disconnect device:** `gradata cloud disconnect` clears device_id, revokes this device's key scope server-side.
+
+### 6.2 Team / shared brain state
+
+- **Team brain** = separate `brain_id` in cloud, owned by a team not a user.
+- **Join flow:** admin invites email → user accepts in dashboard → receives a team-scoped API key → local SDK switches `brain_id` to team.
+- **Per-user agents:** my Claude on my laptop writes events with `(team_brain, my_device)`. Your Claude on your laptop writes with `(team_brain, your_device)`. Graduations are team-wide.
+- **Permissions enforced server-side.** Writer key on a read-only user's machine = 403 on push.
+- **Conflict-free** — same event model, just team-scoped.
+
+### 6.3 Audit logs
+
+- Every API request logged: `(user_id, api_key_id, device_id, endpoint, status, bytes, ts)`.
+- Every event push logged with `content_hash`.
+- Every ACL change emits an `acl_changed` event in the brain itself — visible in dashboard timeline and queryable.
+- Teams: 90d retention, dashboard view. Enterprise: unlimited + export API.
+
+### 6.4 Historical retention
+
+- **Local:** user controls. Default keeps everything. `gradata brain prune --older-than 30d` deletes from local `events.jsonl` + rebuilds `system.db`.
+- **Cloud:** tier-gated. Server runs nightly pruner on events older than tier's retention. Rule graduations and meta-rules NEVER pruned (they're derived state, summaries of pruned events).
+- **Read-through:** dashboard for an event older than cloud retention shows "archived" with the graduation it contributed to (so you never lose the *outcome*, only the raw event).
+
+### 6.5 Cross-brain rule discovery
+
+The real moat. Mechanism:
+- Server-side clustering over anonymized rule metadata (rule text, graduation context, trigger scope — NO correction content unless `contribute_corpus=true`).
+- For a team brain, nightly job computes: "Similar brains (vector distance on rule corpus) graduated these rules you don't have."
+- Dashboard surfaces top-10 with confidence deltas: "This rule saved similar teams ~12 corrections each. Install?"
+- One-click install → fetches rule snapshot → applies locally at INSTINCT confidence → user's own graduation pipeline decides if it survives.
+
+**This is what makes the paid tier not-a-Grafana-clone.** You can't Streamlit your way to this — it requires a population.
+
+### 6.6 Backup / DR
+
+- **Snapshot cadence:** hourly for Personal+, every 15min for Teams+, continuous WAL for Enterprise.
+- **Point-in-time restore:** dashboard → "Restore brain to 2026-04-20 14:30" → server rebuilds snapshot at that ts → user confirms → pushed to devices on next sync as a special `restore` event (devices re-materialize to that point, then resume).
+- **Export:** any tier can export raw events as JSONL. Owning your data is non-negotiable.
+
+---
+
+## 7. What ships in v1 (Week 1-2)
+
+Scope for first cut to get the flywheel spinning:
+
+1. **Dashboard-issued API keys** (replace current `GRADATA_API_TOKEN` env with `gk_live_*` keys in Settings UI).
+2. **Event model migration:** extend `cloud/sync.py` to push `Event` objects in addition to `TelemetryPayload`. Maintain backward compat for 2 releases.
+3. **`/events/push` + `/events/pull` endpoints** on `api.gradata.ai`.
+4. **Backfill flow** (chunked init/chunk/finalize).
+5. **Server-side materializer** (port `brain/materialize.py` to a job worker).
+6. **Dashboard: basic metrics (Free) + full dashboard (Personal).**
+7. **Stripe integration** for Personal tier only. Teams/Enterprise contact-sales link.
+8. **Device management UI** (list devices, revoke).
+
+Teams / RBAC / cross-brain / audit can follow in v2 (week 3-6). Marketplace + self-host are v3+.
+
+---
+
+## 8. Open questions
+
+1. **Event encryption at rest.** Do we E2E encrypt events with a user-held key? Protects against cloud compromise but complicates server-side materialization and cross-brain discovery. Likely: opt-in E2E for Enterprise only.
+2. **Mobile viewer.** Does Personal tier include a read-only iOS/Android app, or is that a separate line item?
+3. **Marketplace pricing split.** If Alex Hormozi uploads a brain, what's the rev share? 70/30? 80/20? Subscription vs one-time?
+4. **Free tier abuse.** What stops a single user from spinning 100 brains on Free tier? Likely: 1 brain per free account, upgrade for more.
+5. **Rate limits per tier.** Needs pricing math after load-testing.
+
+---
+
+## 9. Why this shape (one-paragraph summary)
+
+The SDK is already a CRDT disguised as a learning pipeline. Events are append-only, monotonically timestamped, and device-authored — exactly what multi-device sync needs with zero additional machinery. The cloud becomes a *durable event log* plus a *materializer* plus a *coordinator* (for teams and cross-brain comparison). Pricing is tiered not by gating "premium SDK features" — those stay free forever — but by gating the things that genuinely *require* a cloud: multi-device, multi-user, retention, comparison. This makes Free protective (it's how Anthropic-native-memory users discover you when they hit the sovereignty wall), Personal obvious ($12 to have my brain on every device), Teams real (shared cognition is a Netflix-scale problem for 5-person teams), and Enterprise boring (compliance + self-host for the regulated).
diff --git a/Gradata/hooks/hooks.json b/Gradata/hooks/hooks.json
index 036666fe..816c33cc 100644
--- a/Gradata/hooks/hooks.json
+++ b/Gradata/hooks/hooks.json
@@ -49,12 +49,22 @@
],
"Stop": [
{
- "description": "Gradata: emit SESSION_END + run graduation sweep",
+ "description": "Gradata: context-window watchdog — write handoff at threshold",
+ "hooks": [
+ {
+ "type": "command",
+ "command": "python -m gradata.hooks.ctx_watchdog",
+ "timeout": 10000
+ }
+ ]
+ },
+ {
+ "description": "Gradata: gated graduation sweep (concurrency-locked, SDK-only synth, throttled)",
"hooks": [
{
"type": "command",
"command": "python -m gradata.hooks.session_close",
- "timeout": 15000
+ "timeout": 90000
}
]
}
diff --git a/Gradata/migrations/supabase/014_corrections_unique.sql b/Gradata/migrations/supabase/014_corrections_unique.sql
new file mode 100644
index 00000000..dcef3fbd
--- /dev/null
+++ b/Gradata/migrations/supabase/014_corrections_unique.sql
@@ -0,0 +1,41 @@
+-- Migration 014: Deduplicate corrections + add UNIQUE constraint
+-- id is UUID, so we use ctid (not MIN(id)) to pick one row per duplicate group.
+-- Applied to prod 2026-04-24 via Management API (0 duplicates found).
+--
+-- Idempotency: guards against pre-existing UNIQUE constraints on the same
+-- columns (prod already had `corrections_brain_session_desc_key` from the
+-- initial table's inline UNIQUE(...) clause — this migration is a no-op there,
+-- kept for fresh-DB parity).
+--
+-- Run in Supabase SQL editor or via Management API.
+
+BEGIN;
+
+DELETE FROM corrections a
+USING corrections b
+WHERE a.brain_id = b.brain_id
+ AND a.session = b.session
+ AND a.description = b.description
+ AND a.ctid > b.ctid;
+
+DO $$
+BEGIN
+ IF NOT EXISTS (
+ SELECT 1
+ FROM pg_constraint c
+ JOIN pg_class t ON t.oid = c.conrelid
+ WHERE t.relname = 'corrections'
+ AND c.contype = 'u'
+ AND c.conkey @> ARRAY[
+ (SELECT attnum FROM pg_attribute WHERE attrelid = t.oid AND attname = 'brain_id'),
+ (SELECT attnum FROM pg_attribute WHERE attrelid = t.oid AND attname = 'session'),
+ (SELECT attnum FROM pg_attribute WHERE attrelid = t.oid AND attname = 'description')
+ ]::smallint[]
+ ) THEN
+ ALTER TABLE corrections
+ ADD CONSTRAINT corrections_brain_session_description_unique
+ UNIQUE (brain_id, session, description);
+ END IF;
+END $$;
+
+COMMIT;
diff --git a/Gradata/migrations/supabase/015_events_unique.sql b/Gradata/migrations/supabase/015_events_unique.sql
new file mode 100644
index 00000000..f05368ec
--- /dev/null
+++ b/Gradata/migrations/supabase/015_events_unique.sql
@@ -0,0 +1,41 @@
+-- Migration 015: Deduplicate events + add UNIQUE constraint
+-- id is UUID, so we use ctid (not MIN(id)) to pick one row per duplicate group.
+-- Applied to prod 2026-04-24 via Management API (0 duplicates found).
+--
+-- Idempotency: guards against pre-existing UNIQUE constraints on the same
+-- columns (prod already had `events_brain_type_created_at_key` from the
+-- initial table's inline UNIQUE(...) clause — this migration is a no-op there,
+-- kept for fresh-DB parity).
+--
+-- Run in Supabase SQL editor or via Management API.
+
+BEGIN;
+
+DELETE FROM events a
+USING events b
+WHERE a.brain_id = b.brain_id
+ AND a.type = b.type
+ AND a.created_at = b.created_at
+ AND a.ctid > b.ctid;
+
+DO $$
+BEGIN
+ IF NOT EXISTS (
+ SELECT 1
+ FROM pg_constraint c
+ JOIN pg_class t ON t.oid = c.conrelid
+ WHERE t.relname = 'events'
+ AND c.contype = 'u'
+ AND c.conkey @> ARRAY[
+ (SELECT attnum FROM pg_attribute WHERE attrelid = t.oid AND attname = 'brain_id'),
+ (SELECT attnum FROM pg_attribute WHERE attrelid = t.oid AND attname = 'type'),
+ (SELECT attnum FROM pg_attribute WHERE attrelid = t.oid AND attname = 'created_at')
+ ]::smallint[]
+ ) THEN
+ ALTER TABLE events
+ ADD CONSTRAINT events_brain_type_created_at_unique
+ UNIQUE (brain_id, type, created_at);
+ END IF;
+END $$;
+
+COMMIT;
diff --git a/Gradata/migrations/supabase/016_brains_last_used_at.sql b/Gradata/migrations/supabase/016_brains_last_used_at.sql
new file mode 100644
index 00000000..8b1c358e
--- /dev/null
+++ b/Gradata/migrations/supabase/016_brains_last_used_at.sql
@@ -0,0 +1,7 @@
+-- Migration 016: Add last_used_at column to brains table
+-- Used by auth.py verify_api_key to stamp the column on each key use;
+-- returned by the brains list endpoint.
+-- Run in Supabase SQL editor.
+
+ALTER TABLE brains
+ ADD COLUMN IF NOT EXISTS last_used_at TIMESTAMPTZ;
diff --git a/Gradata/migrations/supabase/README.md b/Gradata/migrations/supabase/README.md
new file mode 100644
index 00000000..7d8123c5
--- /dev/null
+++ b/Gradata/migrations/supabase/README.md
@@ -0,0 +1,47 @@
+# Supabase schema migrations
+
+Raw SQL migrations for the proprietary cloud Postgres (project `miqwilxheuxwafvmoajs`).
+Separate from `src/gradata/_migrations/` which owns the **local SQLite** schema.
+
+## Apply
+
+Via Supabase Management API (token in `.env` as `SUPABASE_ACCESS_TOKEN`):
+
+```bash
+curl -sS -X POST "https://api.supabase.com/v1/projects/miqwilxheuxwafvmoajs/database/query" \
+ -H "Authorization: Bearer $SUPABASE_ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -H "User-Agent: curl/8.0.1" \
+ -d "$(jq -Rs '{query: .}' < migrations/supabase/014_corrections_unique.sql)"
+```
+
+The `User-Agent: curl/8.0.1` header is required to bypass Cloudflare WAF rule 1010.
+
+Or paste into the Supabase SQL editor.
+
+## Applied to prod
+
+| File | Applied | Notes |
+|------|---------|-------|
+| 014_corrections_unique.sql | 2026-04-24 | 0 duplicates found. Prod already had `corrections_brain_session_desc_key` (inline UNIQUE from CREATE TABLE) — migration added a redundant `_unique` constraint on same columns. Both enforce; harmless. Migration now guards with a `pg_constraint` lookup so re-runs are no-ops. |
+| 015_events_unique.sql | 2026-04-24 | Same pattern: prod already had `events_brain_type_created_at_key`. Migration guards with `pg_constraint` lookup. |
+| 016_brains_last_used_at.sql| 2026-04-24 | Column already existed; idempotent `IF NOT EXISTS` |
+
+### Prod constraint state (verified 2026-04-24)
+
+```
+corrections_brain_session_desc_key UNIQUE (brain_id, session, description) -- pre-existing
+corrections_brain_session_description_unique UNIQUE (brain_id, session, description) -- from 014
+events_brain_type_created_at_key UNIQUE (brain_id, type, created_at) -- pre-existing
+events_brain_type_created_at_unique UNIQUE (brain_id, type, created_at) -- from 015
+```
+
+Redundant pairs are functionally harmless (same column set, same enforcement). Dropping the `_key` variants is a future cleanup migration — not urgent.
+
+## Convention
+
+- Numbered in application order, zero-padded (`NNN_description.sql`).
+- Wrap DDL + DML in `BEGIN; ... COMMIT;` so a failure rolls back.
+- Deduplication on UUID-keyed tables must use `ctid`, not `MIN(id)` — Postgres has no `min(uuid)` aggregate.
+- Use `IF NOT EXISTS` / `IF EXISTS` so re-runs are no-ops.
+- Header comment: what it does, when applied, anything non-obvious.
diff --git a/Gradata/skills/core/session-start/SKILL.md b/Gradata/skills/core/session-start/SKILL.md
new file mode 100644
index 00000000..20d1363a
--- /dev/null
+++ b/Gradata/skills/core/session-start/SKILL.md
@@ -0,0 +1,52 @@
+---
+name: session-start
+description: Run at every session start. Loads minimal context, surfaces what matters. Hooks handle data sync silently.
+---
+
+# Session Startup
+
+Hooks already ran: health check, API sync (Pipedrive/Gmail/Calendar/Instantly/Fireflies), follow-up tracker, memory bridge, agent job queue. Don't re-query what hooks already pulled.
+
+## Step 1: Check Continuation
+
+Read `C:/Users/olive/SpritesWork/brain/continuation.md`. If exists, follow its Resume Point, then archive: `python C:/Users/olive/SpritesWork/brain/scripts/continuation.py archive`. If missing, continue.
+
+## Step 2: Load Context (parallel batch)
+
+Fire all at once — no dependencies:
+1. Read `domain/pipeline/startup-brief.md` (pipeline snapshot, handoff section) *(verify path — may be stale)*
+2. Read `C:/Users/olive/SpritesWork/brain/lessons.md` (scan for mistakes to avoid)
+3. Check Google Calendar today + 30 days (demos, calls, meetings)
+4. Read `C:/Users/olive/SpritesWork/brain/loop-state.md` (session number, open items) *(auto-regenerated by session_close hook — always fresh)*
+5. Read `C:/Users/olive/SpritesWork/brain/brain_prompt.md` (soul.md VOICE mandatories + graduated RULE-level lessons)
+
+## Step 3: Surface Alerts
+
+Only if relevant:
+- Stale files (loop-state, startup-brief >7 days old)
+- Agent job queue directives from hook output (ACTION REQUIRED items)
+- Overdue deals (from morning-brief.md if fresh <4hrs, else skip)
+
+## Step 4: Output (3 lines max)
+
+```
+[check] S[N] loaded | [today's calendar or "clear"]
+[tasks] Top 2-3 from loop-state open items
+[alert] Only if something is broken/overdue — otherwise omit
+```
+
+Then respond to Oliver's message. Don't dump walls of text.
+
+## On-Demand Loading (during session, not at startup)
+
+Load these ONLY when the task requires them:
+- **CARL rules**: `.carl/global`, `domain/carl/global`, plus task-specific domains
+- **Email writing**: `domain/templates/templates.txt`, `domain/carl/prospect-email`
+- **Demo prep**: `domain/playbooks/sales-methodology.txt`, `domain/carl/demo-prep`
+- **Prospecting**: `domain/playbooks/prospecting-instructions.txt`, then free scripts before Apollo
+- **Product knowledge**: `domain/sprites_context.md`
+- **Prospect history**: `C:/Users/olive/SpritesWork/brain/prospects/`
+- **Design/visual**: ui-ux-pro-max plugin auto-activates
+- **Skills**: route through `brain/scripts/orchestrate.py` for sales tasks
+
+Don't preload skills or CARL domains. Load when Oliver's message makes the intent clear.
diff --git a/Gradata/src/gradata/__init__.py b/Gradata/src/gradata/__init__.py
index 4d0cb014..55558352 100644
--- a/Gradata/src/gradata/__init__.py
+++ b/Gradata/src/gradata/__init__.py
@@ -21,6 +21,7 @@
try:
from importlib.metadata import PackageNotFoundError as _PkgNotFound
from importlib.metadata import version as _pkg_version
+
try:
__version__ = _pkg_version("gradata")
except _PkgNotFound:
@@ -146,6 +147,7 @@ def __getattr__(name: str):
if name in _PATTERN_IMPORTS:
import importlib
import warnings
+
module_path, attr = _PATTERN_IMPORTS[name]
warnings.warn(
f"Importing {name} from 'gradata' is deprecated. "
diff --git a/Gradata/src/gradata/_brain_manifest.py b/Gradata/src/gradata/_brain_manifest.py
index 3ce2e4c4..cec373e5 100644
--- a/Gradata/src/gradata/_brain_manifest.py
+++ b/Gradata/src/gradata/_brain_manifest.py
@@ -20,12 +20,15 @@
"""
import json
+import logging
from datetime import UTC, datetime
from typing import TYPE_CHECKING
import gradata._paths as _p
from gradata._db import get_connection
+_log = logging.getLogger(__name__)
+
# Re-export helpers so existing imports from _brain_manifest still work
from gradata._manifest_helpers import (
_count_events,
@@ -61,14 +64,17 @@ def generate_manifest(*, domain: str = "General", ctx: "BrainContext | None" = N
try:
db = ctx.db_path if ctx else _p.DB_PATH
conn = get_connection(db)
- db_max = conn.execute(
- "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
- ).fetchone()[0] or 0
+ db_max = (
+ conn.execute(
+ "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
+ ).fetchone()[0]
+ or 0
+ )
conn.close()
if db_max > version_info["sessions_trained"]:
version_info["sessions_trained"] = db_max
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("Session count DB cross-check failed (non-fatal): %s", e)
quality = _quality_metrics(ctx=ctx)
memory = _memory_composition(ctx=ctx)
@@ -110,10 +116,22 @@ def generate_manifest(*, domain: str = "General", ctx: "BrainContext | None" = N
},
},
"bootstrap": [
- {"step": "set_env_vars", "desc": "Set BRAIN_DIR, WORKING_DIR, DOMAIN_DIR", "required": True},
+ {
+ "step": "set_env_vars",
+ "desc": "Set BRAIN_DIR, WORKING_DIR, DOMAIN_DIR",
+ "required": True,
+ },
{"step": "init_db", "command": "python start.py init", "required": True},
- {"step": "embed_brain", "command": "python embed.py --full", "required": rag.get("active", False)},
- {"step": "rebuild_fts", "command": "python -c \"from query import fts_rebuild; fts_rebuild()\"", "required": True},
+ {
+ "step": "embed_brain",
+ "command": "python embed.py --full",
+ "required": rag.get("active", False),
+ },
+ {
+ "step": "rebuild_fts",
+ "command": 'python -c "from query import fts_rebuild; fts_rebuild()"',
+ "required": True,
+ },
{"step": "validate", "command": "python config_validator.py", "required": False},
],
"compatibility": {
diff --git a/Gradata/src/gradata/_cloud_sync.py b/Gradata/src/gradata/_cloud_sync.py
index 1090211b..a0bd9b07 100644
--- a/Gradata/src/gradata/_cloud_sync.py
+++ b/Gradata/src/gradata/_cloud_sync.py
@@ -22,6 +22,7 @@
- Deletes (cloud rows never get removed by this path).
- Bulk batching beyond one table per HTTP call.
"""
+
from __future__ import annotations
import json
@@ -30,6 +31,7 @@
import sqlite3
import urllib.error
import urllib.request
+import uuid
from datetime import UTC, datetime
from pathlib import Path
from typing import Any, Final
@@ -41,6 +43,19 @@
ENV_ENABLED: Final[str] = "GRADATA_CLOUD_SYNC"
ENV_URL: Final[str] = "GRADATA_CLOUD_URL"
ENV_KEY: Final[str] = "GRADATA_CLOUD_KEY"
+# Aliases — accept the Supabase-native env var names too, so a single .env
+# works for both the cloud backend service and the SDK push path.
+ENV_URL_ALIAS: Final[str] = "GRADATA_SUPABASE_URL"
+ENV_KEY_ALIAS: Final[str] = "GRADATA_SUPABASE_SERVICE_KEY"
+
+
+def _env_url() -> str:
+ return os.environ.get(ENV_URL) or os.environ.get(ENV_URL_ALIAS) or ""
+
+
+def _env_key() -> str:
+ return os.environ.get(ENV_KEY) or os.environ.get(ENV_KEY_ALIAS) or ""
+
# Tables pushed to the cloud. Order matters only for foreign keys; we keep
# the parent tables first so Supabase FK constraints pass on first try.
@@ -53,12 +68,169 @@
"rule_provenance",
)
+# Local SQLite table -> cloud Supabase table when names differ.
+_TABLE_REMAP: Final[dict[str, str]] = {
+ "correction_patterns": "corrections",
+}
+
+# Deterministic UUID namespace — stable across re-runs so upserts work.
+_UUID_NS: Final[uuid.UUID] = uuid.UUID("b8a1c9e2-9f5d-4c9b-8a1e-7f3b2d1a0e4c")
+
+
+def _row_uuid(tenant_id: str, table: str, local_key: Any) -> str:
+ """Return a deterministic UUID for (tenant, table, local_key)."""
+ return str(uuid.uuid5(_UUID_NS, f"{tenant_id}:{table}:{local_key}"))
+
+
+def _maybe_json(value: Any, default: Any = None) -> Any:
+ """Parse a text-encoded JSON column, tolerating nulls + bad data."""
+ if value is None or value == "":
+ return default
+ if not isinstance(value, str):
+ return value
+ try:
+ return json.loads(value)
+ except (ValueError, TypeError):
+ return default
+
+
+def _scrub(value: Any) -> Any:
+ """Recursively clean strings for Postgres JSONB.
+
+ Strips NUL bytes (\\u0000 not allowed) and unpaired UTF-16 surrogates
+ (\\ud800-\\udfff) that encode-survive in Python but poison JSONB.
+ """
+ if isinstance(value, str):
+ cleaned = value.replace("\x00", "") if "\x00" in value else value
+ # Round-trip through UTF-8 with surrogate replacement to drop lone halves.
+ try:
+ cleaned.encode("utf-8")
+ except UnicodeEncodeError:
+ cleaned = cleaned.encode("utf-8", "replace").decode("utf-8")
+ return cleaned
+ if isinstance(value, dict):
+ return {k: _scrub(v) for k, v in value.items()}
+ if isinstance(value, list):
+ return [_scrub(v) for v in value]
+ return value
+
+
+def _transform_row(table: str, row: dict[str, Any], tenant_id: str) -> dict[str, Any]:
+ """Map a local SQLite row to the cloud Supabase row shape.
+
+ The cloud schema is narrower: `brain_id` not `tenant_id`, `data` JSONB for
+ extras, UUIDs for ids. We pick the known cloud columns explicitly and
+ pack everything else into `data` so new SDK columns surface without a
+ schema migration.
+ """
+ if table == "events":
+ parsed = _maybe_json(row.get("data_json"), default={"_raw": row.get("data_json")})
+ data_blob: dict[str, Any] = parsed if isinstance(parsed, dict) else {"_value": parsed}
+ # Cloud JSONB rejects control chars / non-JSON-serializable values.
+ # Fallback: stringify via repr if round-trip fails.
+ try:
+ json.dumps(data_blob, ensure_ascii=False)
+ except (TypeError, ValueError):
+ data_blob = {"_repr": repr(data_blob)}
+ tags = _maybe_json(row.get("tags_json"), default=[])
+ if not isinstance(tags, list):
+ tags = []
+ # Cloud `events.session` is INTEGER; local has heterogeneous data
+ # (floats like 4.5, UUIDs). Coerce or drop into data.session_raw.
+ session_raw = row.get("session")
+ session_int: int | None
+ try:
+ session_int = int(session_raw) if session_raw is not None else None
+ except (ValueError, TypeError):
+ session_int = None
+ if "session_raw" not in data_blob:
+ data_blob["session_raw"] = session_raw
+ return {
+ "id": _row_uuid(tenant_id, table, row.get("id")),
+ "brain_id": tenant_id,
+ "type": row.get("type"),
+ "source": row.get("source"),
+ "session": session_int,
+ "data": data_blob,
+ "tags": tags,
+ "created_at": row.get("ts"),
+ }
+
+ if table == "meta_rules":
+ extras = {
+ k: v
+ for k, v in row.items()
+ if k not in ("id", "tenant_id", "principle", "scope", "confidence")
+ }
+ raw_lesson_ids = _maybe_json(row.get("source_lesson_ids"), default=[])
+ if raw_lesson_ids:
+ extras["source_lesson_ids_raw"] = raw_lesson_ids
+ visibility = row.get("visibility") or "private"
+ if visibility not in ("private", "shared", "global"):
+ visibility = "private"
+ principle = row.get("principle") or ""
+ title = (principle[:80] + "...") if len(principle) > 83 else (principle or "meta-rule")
+ return {
+ "id": _row_uuid(tenant_id, table, row.get("id")),
+ "brain_id": tenant_id,
+ "title": title,
+ "principle": principle,
+ "description": principle,
+ "scope": row.get("scope"),
+ "visibility": visibility,
+ "confidence": row.get("confidence"),
+ "data": extras,
+ }
+
+ if table == "correction_patterns":
+ extras = {
+ k: v
+ for k, v in row.items()
+ if k
+ not in (
+ "tenant_id",
+ "session_id",
+ "category",
+ "severity",
+ "representative_text",
+ "created_at",
+ )
+ }
+ raw_severity = row.get("severity")
+ severity = (
+ raw_severity
+ if raw_severity in ("trivial", "minor", "moderate", "major", "rewrite")
+ else "minor"
+ )
+ if severity != raw_severity:
+ extras["severity_raw"] = raw_severity
+ return {
+ "id": _row_uuid(tenant_id, table, row.get("pattern_hash")),
+ "brain_id": tenant_id,
+ "session": row.get("session_id"),
+ "category": row.get("category"),
+ "severity": severity,
+ "description": row.get("representative_text"),
+ "data": extras,
+ "created_at": row.get("created_at"),
+ }
+
+ out: dict[str, Any] = {"brain_id": tenant_id}
+ for k, v in row.items():
+ if k in ("tenant_id",):
+ continue
+ if k == "id" and isinstance(v, int):
+ out["id"] = _row_uuid(tenant_id, table, v)
+ continue
+ out[k] = v
+ return out
+
def enabled() -> bool:
"""True when the env flag is set AND both URL/key are present."""
if os.environ.get(ENV_ENABLED, "").strip() not in ("1", "true", "yes"):
return False
- return bool(os.environ.get(ENV_URL) and os.environ.get(ENV_KEY))
+ return bool(_env_url() and _env_key())
def _iso_now() -> str:
@@ -129,13 +301,79 @@ def _rows_since(
return [dict(zip(cols, row, strict=False)) for row in cur.fetchall()]
-def _post(table: str, rows: list[dict[str, Any]]) -> int:
- """POST rows to Supabase PostgREST. Returns count accepted."""
+_POST_BATCH_SIZE: Final[int] = 500
+
+# Filename in brain_dir for last cloud push error. Surfaced by `gradata doctor`
+# so constraint-violation silent-retry loops become visible without
+# grepping logs.
+_PUSH_ERROR_FILENAME: Final[str] = "cloud_push_error.json"
+
+
+def _scrub_error_body(body: str) -> str:
+ """Reduce a PostgREST error body to the subset safe to persist.
+
+ Postgres ``23505`` errors echo the conflicting row's column values in the
+ ``details``/``hint`` fields (e.g. ``"Key (id)=(uuid-value) already
+ exists"``). ``cloud_push_error.json`` is read and printed by ``gradata
+ doctor``, so leaking those values would surface tenant data on unrelated
+ screens. Keeps ``code`` and the first 120 chars of ``message`` (enough for
+ the constraint name); drops everything else.
+ """
+ try:
+ parsed = json.loads(body)
+ except (json.JSONDecodeError, ValueError):
+ return f""
+ if not isinstance(parsed, dict):
+ return f""
+ safe: dict[str, Any] = {}
+ for k in ("code", "message"):
+ v = parsed.get(k)
+ if isinstance(v, str):
+ safe[k] = v[:120]
+ return json.dumps(safe)
+
+
+def _post(table: str, rows: list[dict[str, Any]]) -> tuple[int, dict | None]:
+ """POST rows to Supabase PostgREST.
+
+ Returns ``(accepted, error)``. ``error`` is ``None`` on success or
+ ``{"code": int, "message": str, "constraint_violation": bool}`` on
+ failure. Constraint violations (HTTP 409 / Postgres 23505) log at ERROR
+ so the doctor + log aggregators catch silent retry loops.
+
+ Applies ``_TABLE_REMAP`` so local table names that differ from the cloud
+ (e.g. ``correction_patterns`` -> ``corrections``) route correctly. Batches
+ large pushes because PostgREST rejects oversize bodies with opaque
+ "Empty or invalid json" errors.
+ """
if not rows:
- return 0
- url = f"{os.environ[ENV_URL].rstrip('/')}/rest/v1/{table}"
- key = os.environ[ENV_KEY]
- body = json.dumps(rows).encode("utf-8")
+ return 0, None
+ # Dedupe within the batch so ON CONFLICT DO UPDATE doesn't hit the same
+ # row twice in a single statement (Postgres rejects that).
+ seen: set[Any] = set()
+ deduped: list[dict[str, Any]] = []
+ for r in rows:
+ key = r.get("id")
+ if key is not None:
+ if key in seen:
+ continue
+ seen.add(key)
+ deduped.append(r)
+ rows = deduped
+ if len(rows) > _POST_BATCH_SIZE:
+ total = 0
+ first_error: dict | None = None
+ for i in range(0, len(rows), _POST_BATCH_SIZE):
+ count, err = _post(table, rows[i : i + _POST_BATCH_SIZE])
+ total += count
+ if err is not None and first_error is None:
+ first_error = err
+ return total, first_error
+ cloud_table = _TABLE_REMAP.get(table, table)
+ url = f"{_env_url().rstrip('/')}/rest/v1/{cloud_table}"
+ key = _env_key()
+ # Final scrub catches NUL / lone surrogates anywhere in the payload.
+ body = json.dumps(_scrub(rows)).encode("utf-8")
req = urllib.request.Request(
url,
data=body,
@@ -152,15 +390,79 @@ def _post(table: str, rows: list[dict[str, Any]]) -> int:
# URL is sourced from GRADATA_CLOUD_URL env; operator-controlled.
with urllib.request.urlopen(req, timeout=30) as resp:
if 200 <= resp.status < 300:
- return len(rows)
+ return len(rows), None
_log.warning("cloud_sync: %s returned HTTP %s", table, resp.status)
- return 0
+ return 0, {
+ "code": resp.status,
+ "message": f"HTTP {resp.status}",
+ "constraint_violation": False,
+ }
except urllib.error.HTTPError as e:
- _log.warning("cloud_sync: %s HTTP %s: %s", table, e.code, e.read()[:200])
- return 0
+ body_snippet = e.read()[:500].decode("utf-8", errors="replace")
+ is_constraint = e.code == 409 or "23505" in body_snippet
+ scrubbed = _scrub_error_body(body_snippet)
+ if is_constraint:
+ _log.error(
+ "cloud_sync: %s constraint violation (HTTP %s): %s",
+ table,
+ e.code,
+ scrubbed,
+ )
+ else:
+ _log.warning(
+ "cloud_sync: %s HTTP %s: %s",
+ table,
+ e.code,
+ scrubbed,
+ )
+ return 0, {
+ "code": e.code,
+ "message": scrubbed,
+ "constraint_violation": is_constraint,
+ }
except urllib.error.URLError as e:
_log.warning("cloud_sync: %s network error: %s", table, e)
- return 0
+ return 0, {
+ "code": 0,
+ "message": f"network error: {e}",
+ "constraint_violation": False,
+ }
+
+
+def _record_push_error(brain_dir: Path, error: dict) -> None:
+ """Persist last cloud-push error so `gradata doctor` surfaces it.
+
+ Atomic: writes to a tmp sibling then ``os.replace`` — on any platform
+ concurrent readers never observe a truncated-then-rewritten file. This
+ matters because daemon + MCP server can both call ``push()`` while a user
+ runs ``gradata doctor``; a partial read would mask the violation as
+ ``error file unreadable`` instead of surfacing the constraint.
+
+ Best-effort: swallows OSError (read-only FS, permissions) because cloud
+ push must not take down the caller on disk hiccups.
+ """
+ target = brain_dir / _PUSH_ERROR_FILENAME
+ tmp = target.with_suffix(target.suffix + ".tmp")
+ try:
+ payload = {**error, "recorded_at": _iso_now()}
+ tmp.write_text(json.dumps(payload, indent=2), encoding="utf-8")
+ os.replace(tmp, target)
+ except OSError as exc:
+ _log.debug("cloud_sync: could not record push error: %s", exc)
+ try:
+ if tmp.exists():
+ tmp.unlink()
+ except OSError:
+ pass
+
+
+def _clear_push_error(brain_dir: Path) -> None:
+ try:
+ p = brain_dir / _PUSH_ERROR_FILENAME
+ if p.exists():
+ p.unlink()
+ except OSError as exc:
+ _log.debug("cloud_sync: could not clear push error: %s", exc)
def _resolve_db(brain_dir: str | Path) -> Path | None:
@@ -204,16 +506,31 @@ def push(brain_dir: str | Path) -> dict[str, int]:
pushed: dict[str, int] = {}
all_ok = True
started = _iso_now()
+ last_error: dict | None = None
for table in PUSH_TABLES:
rows = _rows_since(conn, table, tenant_id, since)
if not rows:
continue
- accepted = _post(table, rows)
+ transformed = []
+ for r in rows:
+ try:
+ transformed.append(_transform_row(table, r, tenant_id))
+ except Exception as exc:
+ _log.warning("cloud_sync: skipping malformed row in %s: %s", table, exc)
+ all_ok = False
+ if not transformed:
+ continue
+ accepted, error = _post(table, transformed)
pushed[table] = accepted
- if accepted != len(rows):
+ if error is not None and last_error is None:
+ last_error = {**error, "table": table}
+ if accepted != len(transformed):
all_ok = False
if pushed and all_ok:
_mark_push(conn, tenant_id, started)
+ _clear_push_error(brain)
+ elif last_error is not None:
+ _record_push_error(brain, last_error)
return pushed
finally:
conn.close()
diff --git a/Gradata/src/gradata/_config.py b/Gradata/src/gradata/_config.py
index 592702c3..28dbfc73 100644
--- a/Gradata/src/gradata/_config.py
+++ b/Gradata/src/gradata/_config.py
@@ -6,6 +6,7 @@
are defaults that can be overridden by brain/taxonomy.json. See reload_config()
and the _tag_taxonomy.py reload mechanism.
"""
+
from __future__ import annotations
import json
@@ -179,7 +180,13 @@ def reload_config(brain_dir: str | Path | None = None) -> None:
# Always preserve the "default" fallback
new_weights = data["memory_type_weights"]
if "default" not in new_weights:
- new_weights["default"] = MEMORY_TYPE_WEIGHTS.get("default", {
- "episodic": 1.0, "semantic": 1.0, "procedural": 1.0, "strategic": 1.0,
- })
+ new_weights["default"] = MEMORY_TYPE_WEIGHTS.get(
+ "default",
+ {
+ "episodic": 1.0,
+ "semantic": 1.0,
+ "procedural": 1.0,
+ "strategic": 1.0,
+ },
+ )
MEMORY_TYPE_WEIGHTS.update(new_weights)
diff --git a/Gradata/src/gradata/_config_paths.py b/Gradata/src/gradata/_config_paths.py
index 57efb91d..4d1e8d6f 100644
--- a/Gradata/src/gradata/_config_paths.py
+++ b/Gradata/src/gradata/_config_paths.py
@@ -15,6 +15,7 @@
paths from ``Path.home()`` directly. That keeps future work (XDG compliance,
Windows %APPDATA%, sandboxed test overrides) in one place.
"""
+
from __future__ import annotations
import os
diff --git a/Gradata/src/gradata/_context_compile.py b/Gradata/src/gradata/_context_compile.py
index 3dde9bf6..d1bcc7fe 100644
--- a/Gradata/src/gradata/_context_compile.py
+++ b/Gradata/src/gradata/_context_compile.py
@@ -5,11 +5,14 @@
returns formatted context injection.
"""
+import logging
import re
from typing import TYPE_CHECKING
import gradata._paths as _p
+_log = logging.getLogger(__name__)
+
if TYPE_CHECKING:
from gradata._paths import BrainContext
@@ -88,8 +91,8 @@ def compile_context(
txt = r.get("text", "")[:100]
lines.append(f"- [{src}] {txt}")
return "\n".join(lines)
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("Fallback keyword search failed (non-fatal): %s", e)
return ""
try:
diff --git a/Gradata/src/gradata/_context_packet.py b/Gradata/src/gradata/_context_packet.py
index 9d3a2e9a..821300b7 100644
--- a/Gradata/src/gradata/_context_packet.py
+++ b/Gradata/src/gradata/_context_packet.py
@@ -6,6 +6,7 @@
import contextlib
import json
+import logging
import sqlite3
from datetime import date, datetime
from pathlib import Path
@@ -13,6 +14,8 @@
import gradata._paths as _p
+_log = logging.getLogger(__name__)
+
if TYPE_CHECKING:
from gradata._paths import BrainContext
@@ -92,8 +95,8 @@ def _load_user_scope(ctx: "BrainContext | None" = None) -> dict:
}
for e in corrections
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("user_scope: corrections query failed (non-fatal): %s", e)
patterns_file = ctx.patterns_file if ctx else _p.PATTERNS_FILE
if patterns_file.exists():
result["frameworks"] = _safe_read_lines(patterns_file, 15)
@@ -126,8 +129,8 @@ def _load_prospect_context(prospect_name: str, ctx: "BrainContext | None" = None
{"source": r.get("source", ""), "text": r.get("text", "")[:120]}
for r in fts_results[:2]
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("prospect FTS search failed (non-fatal): %s", e)
try:
from gradata._fact_extractor import query_facts
@@ -137,8 +140,8 @@ def _load_prospect_context(prospect_name: str, ctx: "BrainContext | None" = None
{"type": f["fact_type"], "value": f["fact_value"], "confidence": f["confidence"]}
for f in facts[:5]
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("prospect fact query failed (non-fatal): %s", e)
try:
all_events = _events_query(limit=50)
prospect_lower = prospect_name.lower()
@@ -156,8 +159,8 @@ def _load_prospect_context(prospect_name: str, ctx: "BrainContext | None" = None
if len(interactions) >= 2:
break
result["recent_interactions"] = interactions
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("prospect interaction query failed (non-fatal): %s", e)
return result
@@ -173,8 +176,8 @@ def _load_drafting_context(ctx: "BrainContext | None" = None) -> dict:
if "[PROVEN]" in line or "[EMERGING]" in line
]
result["patterns"] = "\n".join(relevant[:10])
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("drafting patterns read failed (non-fatal): %s", e)
domain_dir = ctx.domain_dir if ctx else _p.DOMAIN_DIR
soul_path = domain_dir / "soul.md"
result["voice_guidelines"] = _safe_read_lines(soul_path, 20)
@@ -189,8 +192,8 @@ def _load_debug_context(topic: str, ctx: "BrainContext | None" = None) -> dict:
{"source": r.get("source", ""), "text": r.get("text", "")[:150]}
for r in fts_results[:2]
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("debug FTS search failed (non-fatal): %s", e)
try:
failures = _events_query(event_type="TOOL_FAILURE", limit=3)
result["recent_failures"] = [
@@ -200,8 +203,8 @@ def _load_debug_context(topic: str, ctx: "BrainContext | None" = None) -> dict:
}
for e in failures
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("debug failures query failed (non-fatal): %s", e)
try:
corrections = _events_query(event_type="CORRECTION", limit=10)
topic_lower = topic.lower()
@@ -218,8 +221,8 @@ def _load_debug_context(topic: str, ctx: "BrainContext | None" = None) -> dict:
if len(related) >= 3:
break
result["corrections"] = related
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("debug corrections query failed (non-fatal): %s", e)
return result
@@ -233,8 +236,8 @@ def _load_audit_context(session: int, ctx: "BrainContext | None" = None) -> dict
if row:
result["metrics"] = dict(row)
conn.close()
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("audit metrics query failed (non-fatal): %s", e)
try:
outputs = _events_query(event_type="OUTPUT", session=session, limit=20)
result["outputs"] = [
@@ -247,8 +250,8 @@ def _load_audit_context(session: int, ctx: "BrainContext | None" = None) -> dict
}
for e in outputs
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("audit outputs query failed (non-fatal): %s", e)
try:
gates = _events_query(event_type="GATE_RESULT", session=session, limit=20)
result["gates"] = [
@@ -258,8 +261,8 @@ def _load_audit_context(session: int, ctx: "BrainContext | None" = None) -> dict
}
for e in gates
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("audit gates query failed (non-fatal): %s", e)
with contextlib.suppress(Exception):
result["correction_rate"] = _correction_rate(last_n_sessions=5)
return result
@@ -276,8 +279,8 @@ def _load_wrapup_context(session: int, ctx: "BrainContext | None" = None) -> dic
}
for e in events
]
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("wrapup events query failed (non-fatal): %s", e)
try:
today_str = date.today().isoformat()
prospects_dir = ctx.prospects_dir if ctx else _p.PROSPECTS_DIR
@@ -291,8 +294,8 @@ def _load_wrapup_context(session: int, ctx: "BrainContext | None" = None) -> dic
result["modified_prospects"].append(f.stem)
except Exception:
continue
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("wrapup prospects scan failed (non-fatal): %s", e)
loop_state = ctx.loop_state if ctx else _p.LOOP_STATE
if loop_state.exists():
result["current_loop_state"] = _safe_read(loop_state, limit_chars=500)
diff --git a/Gradata/src/gradata/_core.py b/Gradata/src/gradata/_core.py
index 3dd08ac6..4cfead85 100644
--- a/Gradata/src/gradata/_core.py
+++ b/Gradata/src/gradata/_core.py
@@ -31,17 +31,25 @@
# Map evaluator dimension names to correction categories
_DIMENSION_CATEGORY_MAP = {
- "task_alignment": "ACCURACY", "completeness": "STRUCTURE",
- "accuracy": "ACCURACY", "clarity": "DRAFTING", "conciseness": "DRAFTING",
- "tone": "TONE", "formatting": "FORMAT", "security": "SECURITY",
+ "task_alignment": "ACCURACY",
+ "completeness": "STRUCTURE",
+ "accuracy": "ACCURACY",
+ "clarity": "DRAFTING",
+ "conciseness": "DRAFTING",
+ "tone": "TONE",
+ "formatting": "FORMAT",
+ "security": "SECURITY",
}
def _filter_lessons_by_state(lessons, min_state: str = "PATTERN"):
"""Filter lessons by minimum state rank."""
min_rank = _STATE_RANK.get(min_state.upper(), 1)
- return [lesson for lesson in lessons
- if _STATE_RANK.get(lesson.state.value, -1) >= min_rank and lesson.confidence > 0.0]
+ return [
+ lesson
+ for lesson in lessons
+ if _STATE_RANK.get(lesson.state.value, -1) >= min_rank and lesson.confidence > 0.0
+ ]
# ── correct() ──────────────────────────────────────────────────────────
@@ -72,19 +80,27 @@ def _attribute_domain_fires(
rule.domain_scores[domain]["misfires"] += 1
# Record conflict in rule graph
- if hasattr(brain, '_rule_graph') and brain._rule_graph:
+ if hasattr(brain, "_rule_graph") and brain._rule_graph:
rule_id = f"{rule.category}:{hash(rule.description) % 10000:04d}"
correction_id = f"{correction_category}:{hash(correction_desc) % 10000:04d}"
brain._rule_graph.add_conflict(rule_id, correction_id)
def brain_correct(
- brain: Brain, draft: str, final: str, *,
- category: str | None = None, context: dict | None = None,
- session: int | None = None, agent_type: str | None = None,
- approval_required: bool = False, dry_run: bool = False,
- min_severity: str = "as-is", scope: str | None = None,
- applies_to: str | None = None, auto_heal: bool = False,
+ brain: Brain,
+ draft: str,
+ final: str,
+ *,
+ category: str | None = None,
+ context: dict | None = None,
+ session: int | None = None,
+ agent_type: str | None = None,
+ approval_required: bool = False,
+ dry_run: bool = False,
+ min_severity: str = "as-is",
+ scope: str | None = None,
+ applies_to: str | None = None,
+ auto_heal: bool = False,
) -> dict:
"""Record a correction: user edited draft into final version."""
# Input validation
@@ -96,7 +112,9 @@ def brain_correct(
raise ValueError("draft and final are identical — no correction detected.")
max_input = 100_000
if len(draft) + len(final) > max_input:
- raise ValueError(f"Combined input length ({len(draft) + len(final)}) exceeds limit ({max_input}).")
+ raise ValueError(
+ f"Combined input length ({len(draft) + len(final)}) exceeds limit ({max_input})."
+ )
if session is not None and (not isinstance(session, int) or session < 1):
raise ValueError(f"session must be a positive integer, got {session!r}")
@@ -105,7 +123,9 @@ def brain_correct(
if scope is not None:
scope = str(scope).strip().lower() or None
if scope is not None and scope not in _valid_scopes:
- raise ValueError(f"Unsupported correction scope: {scope!r}. Must be one of {_valid_scopes}")
+ raise ValueError(
+ f"Unsupported correction scope: {scope!r}. Must be one of {_valid_scopes}"
+ )
# Normalize free-form scope binding (sim21). Any truthy string is accepted;
# empty strings collapse to None so callers can pass through user input
@@ -131,19 +151,30 @@ def brain_correct(
from gradata.enhancements.diff_engine import compute_diff
from gradata.enhancements.edit_classifier import classify_edits, summarize_edits
except ImportError:
- data = {"draft_text": draft[:2000], "final_text": final[:2000],
- "edit_distance": 0.0, "severity": "unknown", "outcome": "unknown",
- "major_edit": False, "category": category or "UNKNOWN",
- "summary": "", "classifications": []}
- result = brain.emit("CORRECTION", "brain.correct", data,
- [f"category:{category or 'UNKNOWN'}"], session)
- brain.bus.emit("correction.created", {
- "lesson": {},
+ data = {
+ "draft_text": draft[:2000],
+ "final_text": final[:2000],
+ "edit_distance": 0.0,
"severity": "unknown",
- "category": category or "GENERAL",
- "diff": "",
- "source": "human",
- })
+ "outcome": "unknown",
+ "major_edit": False,
+ "category": category or "UNKNOWN",
+ "summary": "",
+ "classifications": [],
+ }
+ result = brain.emit(
+ "CORRECTION", "brain.correct", data, [f"category:{category or 'UNKNOWN'}"], session
+ )
+ brain.bus.emit(
+ "correction.created",
+ {
+ "lesson": {},
+ "severity": "unknown",
+ "category": category or "GENERAL",
+ "diff": "",
+ "source": "human",
+ },
+ )
return result
from gradata._scope import build_scope
@@ -158,6 +189,7 @@ def brain_correct(
# PII redaction — runs AFTER extraction on full text, BEFORE storage
try:
from gradata.safety import redact_pii_with_report
+
draft_redacted, _ = redact_pii_with_report(draft)
final_redacted, _ = redact_pii_with_report(final)
except ImportError:
@@ -167,6 +199,7 @@ def brain_correct(
scope_data = {}
if scope_obj:
from gradata._scope import scope_to_dict
+
scope_data = scope_to_dict(scope_obj)
# Tag correction scope (default: domain)
@@ -182,6 +215,7 @@ def brain_correct(
# cannot graduate to a RULE without an explicit promote action.
try:
from gradata.security.correction_hash import build_provenance
+
_prov_meta = build_provenance(draft, final, context)
except Exception as _prov_err: # pragma: no cover - defensive
_log.debug("Provenance hash computation failed: %s", _prov_err)
@@ -202,6 +236,7 @@ def brain_correct(
adversarial_hits: list[str] = []
try:
from gradata.security.adversarial_blocklist import scan_correction
+
adversarial_hits = scan_correction(draft, final)
except Exception as _adv_err: # pragma: no cover - defensive
_log.debug("Adversarial-phrase scan failed: %s", _adv_err)
@@ -219,19 +254,28 @@ def brain_correct(
structured_correction = None
try:
from gradata.correction_detector import extract_structured_correction
+
structured_correction = extract_structured_correction(
- draft_redacted, final_redacted, context=str(context or ""),
+ draft_redacted,
+ final_redacted,
+ context=str(context or ""),
)
except (ImportError, Exception) as _sc_err:
_log.debug("Structured correction extraction skipped: %s", _sc_err)
data = {
- "draft_text": draft_redacted[:2000], "final_text": final_redacted[:2000],
- "edit_distance": diff.edit_distance, "severity": diff.severity,
- "outcome": diff.severity, "major_edit": diff.severity in ("major", "discarded"),
- "category": category or "UNKNOWN", "summary": summary,
- "classifications": [{"category": c.category, "severity": c.severity,
- "description": c.description} for c in classifications],
+ "draft_text": draft_redacted[:2000],
+ "final_text": final_redacted[:2000],
+ "edit_distance": diff.edit_distance,
+ "severity": diff.severity,
+ "outcome": diff.severity,
+ "major_edit": diff.severity in ("major", "discarded"),
+ "category": category or "UNKNOWN",
+ "summary": summary,
+ "classifications": [
+ {"category": c.category, "severity": c.severity, "description": c.description}
+ for c in classifications
+ ],
"lines_added": diff.summary_stats.get("lines_added", 0),
"lines_removed": diff.summary_stats.get("lines_removed", 0),
"correction_scope": correction_scope,
@@ -273,6 +317,7 @@ def brain_correct(
# Auto-extract patterns
try:
from gradata.enhancements.pattern_extractor import extract_patterns
+
patterns = extract_patterns(classifications, scope=scope_obj)
if patterns:
event["patterns_extracted"] = len(patterns)
@@ -283,10 +328,14 @@ def brain_correct(
# session window so repeat corrections don't inflate fire_count/confidence.
# See gradata/enhancements/dedup.py for MERGE-vs-DROP policy notes.
from gradata.enhancements.dedup import annotate_event_with_dedup
+
is_observation_dup = annotate_event_with_dedup(
- event, brain.db_path,
- draft=draft_redacted, final=final_redacted,
- category=category, session=session,
+ event,
+ brain.db_path,
+ draft=draft_redacted,
+ final=final_redacted,
+ category=category,
+ session=session,
)
# Close the loop: correction → lesson
@@ -302,7 +351,9 @@ def brain_correct(
update_confidence,
)
- if not is_observation_dup and _SEV_RANK.get(diff.severity, 0) >= _SEV_RANK.get(min_severity, 0):
+ if not is_observation_dup and _SEV_RANK.get(diff.severity, 0) >= _SEV_RANK.get(
+ min_severity, 0
+ ):
lessons_path = brain._find_lessons_path(create=True)
if lessons_path:
existing_text = ""
@@ -312,8 +363,10 @@ def brain_correct(
cat = (category or "UNKNOWN").upper()
if classifications:
- primary = next((c for c in classifications if c.category.upper() == cat),
- classifications[0])
+ primary = next(
+ (c for c in classifications if c.category.upper() == cat),
+ classifications[0],
+ )
# Check convergence gate — skip extraction if category is settled
convergence_data = brain._get_convergence()
cat_convergence = convergence_data.get("by_category", {}).get(cat, {})
@@ -331,8 +384,12 @@ def brain_correct(
from gradata.enhancements.behavioral_extractor import (
extract_instruction,
)
+
behavioral_desc = extract_instruction(
- draft, final, primary, category=cat,
+ draft,
+ final,
+ primary,
+ category=cat,
)
if not behavioral_desc:
# Fallback to keyword templates
@@ -340,12 +397,15 @@ def brain_correct(
extract_behavioral_instruction,
)
from gradata.enhancements.instruction_cache import InstructionCache
+
if not isinstance(brain._instruction_cache, InstructionCache):
brain._instruction_cache = InstructionCache(
lessons_path.parent / "instruction_cache.json"
)
behavioral_desc = extract_behavioral_instruction(
- diff, primary, cache=brain._instruction_cache, # type: ignore[arg-type]
+ diff,
+ primary,
+ cache=brain._instruction_cache, # type: ignore[arg-type]
)
desc = behavioral_desc or primary.description
except Exception as e:
@@ -368,11 +428,16 @@ def brain_correct(
best_match = existing_l
from gradata._config import get_similarity_threshold
+
sim_threshold = get_similarity_threshold(cat)
if best_match and best_sim >= sim_threshold:
if dry_run:
event["dry_run"] = True
- event["would_reinforce"] = {"category": cat, "description": best_match.description[:200], "similarity": round(best_sim, 3)}
+ event["would_reinforce"] = {
+ "category": cat,
+ "description": best_match.description[:200],
+ "similarity": round(best_sim, 3),
+ }
return event
best_match.fire_count += 1
if len(desc) > len(best_match.description):
@@ -383,24 +448,33 @@ def brain_correct(
event["lesson_reinforced"] = True
event["lesson_category"] = cat
try:
- brain.emit("LESSON_CHANGE", "brain.correct", {
- "action": "reinforced", "lesson_category": cat,
- "lesson_description": best_match.description[:200],
- "fire_count": best_match.fire_count,
- "source_correction_id": event.get("id"),
- }, [f"category:{cat}", "provenance"], session)
+ brain.emit(
+ "LESSON_CHANGE",
+ "brain.correct",
+ {
+ "action": "reinforced",
+ "lesson_category": cat,
+ "lesson_description": best_match.description[:200],
+ "fire_count": best_match.fire_count,
+ "source_correction_id": event.get("id"),
+ },
+ [f"category:{cat}", "provenance"],
+ session,
+ )
except Exception as e:
_log.debug("Provenance emit failed: %s", e)
# Causal chain: correction reinforces existing rule
try:
from gradata.enhancements.causal_chains import CausalChain, CausalRelation
from gradata.enhancements.meta_rules import _lesson_id
+
if not hasattr(brain, "_causal_chain"):
brain._causal_chain = CausalChain() # type: ignore[attr-defined]
correction_id = str(event.get("id", ""))
rule_id = _lesson_id(best_match)
brain._causal_chain.add_link( # type: ignore[attr-defined]
- correction_id, rule_id,
+ correction_id,
+ rule_id,
CausalRelation.REINFORCEMENT,
strength=min(1.0, best_match.confidence),
session=session or 0,
@@ -409,13 +483,16 @@ def brain_correct(
pass
else:
import json as _json
+
lesson_scope = ""
if agent_type or context:
scope_ctx = dict(context or {})
if agent_type:
scope_ctx["agent_type"] = agent_type
scope_obj = build_scope(scope_ctx)
- scope_dict = {k: v for k, v in scope_obj.__dict__.items() if v and v != "normal"}
+ scope_dict = {
+ k: v for k, v in scope_obj.__dict__.items() if v and v != "normal"
+ }
else:
scope_dict = {}
# Always tag correction_scope on new lessons
@@ -427,18 +504,27 @@ def brain_correct(
init_conf = 0.0 if approval_required else INITIAL_CONFIDENCE
correction_id = str(event.get("id", "")) if event.get("id") else ""
new_lesson = Lesson(
- date=_date.today().isoformat(), state=LessonState.INSTINCT,
- confidence=init_conf, category=cat, description=desc,
- scope_json=lesson_scope, agent_type=agent_type or "",
+ date=_date.today().isoformat(),
+ state=LessonState.INSTINCT,
+ confidence=init_conf,
+ category=cat,
+ description=desc,
+ scope_json=lesson_scope,
+ agent_type=agent_type or "",
correction_event_ids=[correction_id] if correction_id else [],
- pending_approval=approval_required)
+ pending_approval=approval_required,
+ )
if dry_run:
event["dry_run"] = True
event["proposed_lesson"] = {
- "category": cat, "description": desc,
- "state": LessonState.INSTINCT.value, "confidence": init_conf,
- "scope": lesson_scope or None, "approval_required": approval_required}
+ "category": cat,
+ "description": desc,
+ "state": LessonState.INSTINCT.value,
+ "confidence": init_conf,
+ "scope": lesson_scope or None,
+ "approval_required": approval_required,
+ }
return event
existing_lessons.append(new_lesson)
@@ -447,12 +533,14 @@ def brain_correct(
try:
from gradata.enhancements.causal_chains import CausalChain, CausalRelation
from gradata.enhancements.meta_rules import _lesson_id
+
if not hasattr(brain, "_causal_chain"):
brain._causal_chain = CausalChain() # type: ignore[attr-defined]
correction_id = str(event.get("id", ""))
rule_id = _lesson_id(new_lesson)
brain._causal_chain.add_link( # type: ignore[attr-defined]
- correction_id, rule_id,
+ correction_id,
+ rule_id,
CausalRelation.CORRECTION_TO_RULE,
strength=1.0,
session=session or 0,
@@ -467,39 +555,64 @@ def brain_correct(
from gradata._db import get_connection
from gradata._tenant import tenant_for as _tenant_for
+
_tid = _tenant_for(brain.dir)
with get_connection(brain.db_path) as conn:
with _ctx_mod.suppress(_sqlite3_mod.OperationalError):
- conn.execute("ALTER TABLE pending_approvals ADD COLUMN tenant_id TEXT")
+ conn.execute(
+ "ALTER TABLE pending_approvals ADD COLUMN tenant_id TEXT"
+ )
conn.execute(
"INSERT INTO pending_approvals "
"(lesson_category, lesson_description, draft_text, final_text, "
"severity, correction_event_id, agent_type, created_at, tenant_id) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
- (cat, desc[:500], draft_redacted[:2000], final_redacted[:2000],
- diff.severity, correction_id, agent_type or "",
- _date.today().isoformat(), _tid))
+ (
+ cat,
+ desc[:500],
+ draft_redacted[:2000],
+ final_redacted[:2000],
+ diff.severity,
+ correction_id,
+ agent_type or "",
+ _date.today().isoformat(),
+ _tid,
+ ),
+ )
except Exception as e:
_log.debug("pending_approvals insert failed: %s", e)
_log.info("New lesson: [INSTINCT:%.2f] %s", init_conf, cat)
try:
- brain.emit("LESSON_CHANGE", "brain.correct", {
- "action": "created", "lesson_category": cat,
- "lesson_description": desc[:200],
- "initial_confidence": INITIAL_CONFIDENCE,
- "source_correction_id": event.get("id"),
- }, [f"category:{cat}", "provenance"], session)
+ brain.emit(
+ "LESSON_CHANGE",
+ "brain.correct",
+ {
+ "action": "created",
+ "lesson_category": cat,
+ "lesson_description": desc[:200],
+ "initial_confidence": INITIAL_CONFIDENCE,
+ "source_correction_id": event.get("id"),
+ },
+ [f"category:{cat}", "provenance"],
+ session,
+ )
except Exception as e:
_log.debug("Provenance emit failed: %s", e)
# Update confidence
- correction_data = [{"category": cat, "severity_label": diff.severity, "description": desc}]
+ correction_data = [
+ {"category": cat, "severity_label": diff.severity, "description": desc}
+ ]
severity_data = {cat: diff.severity}
existing_lessons = update_confidence(
- existing_lessons, correction_data, severity_data=severity_data,
- salt=getattr(brain, "_brain_salt", ""))
+ existing_lessons,
+ correction_data,
+ severity_data=severity_data,
+ salt=getattr(brain, "_brain_salt", ""),
+ )
from gradata._db import write_lessons_safe
+
write_lessons_safe(lessons_path, format_lessons(existing_lessons))
if "lessons_created" not in event:
event["lessons_updated"] = True
@@ -530,7 +643,8 @@ def brain_correct(
failure["correction_event_id"] = event.get("id")
failure["correction_severity"] = diff.severity
brain.emit(
- "RULE_FAILURE", "brain.correct:self_healing",
+ "RULE_FAILURE",
+ "brain.correct:self_healing",
failure,
[f"category:{failure['failed_rule_category']}", "self_healing"],
session,
@@ -568,9 +682,11 @@ def brain_correct(
"revert_command", f"gradata rule revert {_rid}"
)
_log.warning(
- "auto-healed R-%s: confidence %s -> %s, "
- "revert with `%s`",
- _rid, _old, _new, _revert,
+ "auto-healed R-%s: confidence %s -> %s, revert with `%s`",
+ _rid,
+ _old,
+ _new,
+ _revert,
)
except Exception: # pragma: no cover — defensive
pass
@@ -580,7 +696,7 @@ def brain_correct(
_log.debug("Self-healing detection failed: %s", e)
# Persist rule graph
- if hasattr(brain, '_rule_graph') and brain._rule_graph:
+ if hasattr(brain, "_rule_graph") and brain._rule_graph:
with contextlib.suppress(Exception):
brain._rule_graph.save()
@@ -589,9 +705,14 @@ def brain_correct(
from datetime import date as _fts_date
from gradata._query import fts_index
- fts_index(source="corrections", file_type="correction",
- text=f"{category or 'UNKNOWN'}: {summary or diff.severity} - {final_redacted[:500]}",
- embed_date=_fts_date.today().isoformat(), ctx=brain.ctx)
+
+ fts_index(
+ source="corrections",
+ file_type="correction",
+ text=f"{category or 'UNKNOWN'}: {summary or diff.severity} - {final_redacted[:500]}",
+ embed_date=_fts_date.today().isoformat(),
+ ctx=brain.ctx,
+ )
except Exception as e:
_log.debug("FTS index failed: %s", e)
@@ -602,9 +723,14 @@ def brain_correct(
if brain._learning_pipeline:
try:
pipeline_result = brain._learning_pipeline.process_correction(
- draft=draft, final=final, severity=diff.severity,
- category=category or "UNKNOWN", session_id=str(session or ""),
- task_type=task_type, occurrence_count=1)
+ draft=draft,
+ final=final,
+ severity=diff.severity,
+ category=category or "UNKNOWN",
+ session_id=str(session or ""),
+ task_type=task_type,
+ occurrence_count=1,
+ )
event["pipeline"] = {
"stages_completed": pipeline_result.stages_completed,
"is_high_value": pipeline_result.is_high_value,
@@ -613,7 +739,8 @@ def brain_correct(
"cluster_id": pipeline_result.cluster_id,
"context_bracket": pipeline_result.context_bracket,
"memory_type": pipeline_result.memory_type,
- "processing_time_ms": pipeline_result.processing_time_ms}
+ "processing_time_ms": pipeline_result.processing_time_ms,
+ }
except Exception as e:
_log.warning("Learning pipeline failed: %s", e)
@@ -621,17 +748,21 @@ def brain_correct(
if agent_type:
try:
from gradata.enhancements.pattern_integration import feed_q_router
+
feed_q_router(brain, diff.severity, agent_type=agent_type, task_type=task_type)
except Exception as e:
_log.debug("Q-router feed failed: %s", e)
- brain.bus.emit("correction.created", {
- "lesson": event.get("lesson", {}),
- "severity": event.get("data", {}).get("severity", "unknown"),
- "category": category or "GENERAL",
- "diff": str(event.get("diff", "")),
- "source": "human",
- })
+ brain.bus.emit(
+ "correction.created",
+ {
+ "lesson": event.get("lesson", {}),
+ "severity": event.get("data", {}).get("severity", "unknown"),
+ "category": category or "GENERAL",
+ "diff": str(event.get("diff", "")),
+ "source": "human",
+ },
+ )
# Correction provenance — HMAC-signed proof of who corrected what
try:
@@ -639,6 +770,7 @@ def brain_correct(
import json
from gradata.security.correction_provenance import create_provenance_record
+
correction_hash = _hashlib.sha256(
json.dumps([draft, final], separators=(",", ":")).encode()
).hexdigest()
@@ -648,7 +780,8 @@ def brain_correct(
_log.warning("brain._brain_salt is empty; skipping provenance HMAC")
raise ValueError("empty salt")
provenance = create_provenance_record(
- user_id=user_id, correction_hash=correction_hash,
+ user_id=user_id,
+ correction_hash=correction_hash,
session=session or 0,
salt=_prov_salt,
)
@@ -665,17 +798,24 @@ def brain_correct(
def _graduation_message(old_state: str, lesson: Lesson) -> str:
"""Generate a user-facing graduation notification message."""
if lesson.state.value == "PATTERN":
- return (f"You've corrected this {lesson.fire_count} times — "
- f"Gradata learned it: \"{lesson.description[:80]}\"")
+ return (
+ f"You've corrected this {lesson.fire_count} times — "
+ f'Gradata learned it: "{lesson.description[:80]}"'
+ )
elif lesson.state.value == "RULE":
- return (f"Graduated to RULE: \"{lesson.description[:80]}\" — "
- f"this correction is now permanent ({lesson.confidence:.0%} confidence)")
+ return (
+ f'Graduated to RULE: "{lesson.description[:80]}" — '
+ f"this correction is now permanent ({lesson.confidence:.0%} confidence)"
+ )
return f"Lesson updated: {lesson.description[:80]}"
def brain_end_session(
- brain: Brain, *, session_corrections: list[dict] | None = None,
- session_type: str = "full", machine_mode: bool | None = None,
+ brain: Brain,
+ *,
+ session_corrections: list[dict] | None = None,
+ session_type: str = "full",
+ machine_mode: bool | None = None,
skip_meta_rules: bool = False,
) -> dict:
"""Run full graduation sweep at end of session."""
@@ -699,17 +839,22 @@ def brain_end_session(
# when two lessons share the same first 40 chars of description.
def _lesson_key(lesson):
return f"{lesson.category}:{lesson.description[:60]}"
+
before_states = {_lesson_key(lesson): lesson.state.value for lesson in lessons}
lessons = update_confidence(
- lessons, session_corrections or [],
- session_type=session_type, machine_mode=machine_mode,
- salt=getattr(brain, "_brain_salt", ""))
+ lessons,
+ session_corrections or [],
+ session_type=session_type,
+ machine_mode=machine_mode,
+ salt=getattr(brain, "_brain_salt", ""),
+ )
# Auto-detect machine mode: human sessions rarely exceed 30 corrections.
# Previous threshold of 10 misclassified productive human sessions.
- is_machine = machine_mode if machine_mode is not None else (
- len(session_corrections or []) > 30)
+ is_machine = (
+ machine_mode if machine_mode is not None else (len(session_corrections or []) > 30)
+ )
_salt = getattr(brain, "_brain_salt", "")
active, graduated = graduate(lessons, machine_mode=is_machine, salt=_salt, brain=brain)
@@ -731,10 +876,18 @@ def _lesson_key(lesson):
for lesson, old_state, new_state in transitions:
if new_state in ("PATTERN", "RULE"):
try:
- brain.emit("GRADUATION", "end_session", {
- "lesson": lesson.description[:100], "category": lesson.category,
- "from_state": old_state, "to_state": new_state,
- "confidence": lesson.confidence, "fire_count": lesson.fire_count})
+ brain.emit(
+ "GRADUATION",
+ "end_session",
+ {
+ "lesson": lesson.description[:100],
+ "category": lesson.category,
+ "from_state": old_state,
+ "to_state": new_state,
+ "confidence": lesson.confidence,
+ "fire_count": lesson.fire_count,
+ },
+ )
except Exception as e:
_log.debug("Graduation emit failed: %s", e)
# Canary enrollment: every new RULE enters canary state so
@@ -744,22 +897,28 @@ def _lesson_key(lesson):
if new_state == "RULE":
try:
from gradata.enhancements.rule_canary import promote_to_canary
+
promote_to_canary(
- lesson.category, brain.session, db_path=brain.db_path,
+ lesson.category,
+ brain.session,
+ db_path=brain.db_path,
)
except Exception as e:
_log.debug("promote_to_canary failed: %s", e)
# User-facing graduation notification
try:
- brain.bus.emit("lesson.graduated", {
- "category": lesson.category,
- "description": lesson.description[:100],
- "old_state": old_state,
- "new_state": new_state,
- "fire_count": lesson.fire_count,
- "confidence": lesson.confidence,
- "message": _graduation_message(old_state, lesson),
- })
+ brain.bus.emit(
+ "lesson.graduated",
+ {
+ "category": lesson.category,
+ "description": lesson.description[:100],
+ "old_state": old_state,
+ "new_state": new_state,
+ "fire_count": lesson.fire_count,
+ "confidence": lesson.confidence,
+ "message": _graduation_message(old_state, lesson),
+ },
+ )
except Exception as e:
_log.debug("lesson.graduated emit failed: %s", e)
@@ -772,6 +931,7 @@ def _lesson_key(lesson):
from gradata._db import get_connection
from gradata._tenant import tenant_for as _tenant_for
+
now = datetime.now(UTC).isoformat()
_tid = _tenant_for(brain.dir)
with get_connection(brain.db_path) as conn:
@@ -782,8 +942,18 @@ def _lesson_key(lesson):
"INSERT INTO lesson_transitions "
"(lesson_desc, category, old_state, new_state, confidence, "
"fire_count, session, transitioned_at, tenant_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
- (lesson.description[:100], lesson.category, old_state, new_state,
- lesson.confidence, lesson.fire_count, None, now, _tid))
+ (
+ lesson.description[:100],
+ lesson.category,
+ old_state,
+ new_state,
+ lesson.confidence,
+ lesson.fire_count,
+ None,
+ now,
+ _tid,
+ ),
+ )
except Exception as e:
_log.debug("Lineage logging failed: %s", e)
@@ -793,6 +963,7 @@ def _lesson_key(lesson):
from gradata.audit import write_provenance
from gradata.inspection import _make_rule_id
+
now_prov = datetime.now(UTC).isoformat()
for lesson, _old_state, new_state in transitions:
if new_state in ("PATTERN", "RULE"):
@@ -822,20 +993,33 @@ def _lesson_key(lesson):
all_lessons = active + graduated
from gradata._db import write_lessons_safe
+
if all_lessons: # guard against wiping lessons file when all lessons are killed
write_lessons_safe(lessons_path, format_lessons(all_lessons))
# Archive graduated RULE lessons
- new_rules = [l for l in graduated if l.state.value == "RULE"
- and before_states.get(_lesson_key(l)) != "RULE"]
+ new_rules = [
+ l
+ for l in graduated
+ if l.state.value == "RULE" and before_states.get(_lesson_key(l)) != "RULE"
+ ]
archive_path = lessons_path.parent / "lessons-archive.md"
if new_rules and archive_path.parent.is_dir():
from datetime import date
- archive_text = archive_path.read_text(encoding="utf-8") if archive_path.exists() else "# Lessons Archive"
- archive_lines = [archive_text.rstrip(), f"\n## Graduated {date.today().isoformat()} (auto)"]
+
+ archive_text = (
+ archive_path.read_text(encoding="utf-8")
+ if archive_path.exists()
+ else "# Lessons Archive"
+ )
+ archive_lines = [
+ archive_text.rstrip(),
+ f"\n## Graduated {date.today().isoformat()} (auto)",
+ ]
for r in new_rules:
archive_lines.append(
- f"[{r.date}] {r.category}: {r.description} → Auto-graduated (confidence {r.confidence:.2f})")
+ f"[{r.date}] {r.category}: {r.description} → Auto-graduated (confidence {r.confidence:.2f})"
+ )
archive_path.write_text("\n".join(archive_lines) + "\n", encoding="utf-8")
# Detect session number early so meta-rules and events use the real value
@@ -847,15 +1031,20 @@ def _lesson_key(lesson):
try:
from gradata.enhancements.meta_rules import refresh_meta_rules
from gradata.enhancements.meta_rules_storage import load_meta_rules, save_meta_rules
+
existing_metas = load_meta_rules(brain.db_path)
- llm_key = getattr(brain, '_llm_key', None)
+ llm_key = getattr(brain, "_llm_key", None)
new_metas = refresh_meta_rules(
- all_lessons, existing_metas, session_corrections or [],
+ all_lessons,
+ existing_metas,
+ session_corrections or [],
current_session=current_session,
- **({'api_key': llm_key} if llm_key else {}))
+ **({"api_key": llm_key} if llm_key else {}),
+ )
if new_metas:
if any(l.parent_meta_rule_id for l in all_lessons):
from gradata.enhancements.self_improvement import propagate_confidence
+
propagate_confidence(all_lessons, new_metas)
# Re-write lessons to persist propagated confidence
if all_lessons:
@@ -867,18 +1056,25 @@ def _lesson_key(lesson):
existing_ids = {m.id for m in existing_metas}
meta_rules_discovered = sum(1 for m in new_metas if m.id not in existing_ids)
if meta_rules_discovered > 0:
- _log.info("Meta-rules: %d new (%d total)", meta_rules_discovered, len(new_metas))
+ _log.info(
+ "Meta-rules: %d new (%d total)", meta_rules_discovered, len(new_metas)
+ )
for meta in new_metas:
if meta.id not in existing_ids:
try:
- brain.bus.emit("meta_rule.created", {
- "id": meta.id,
- "principle": meta.principle,
- "description": meta.principle,
- "source_categories": getattr(meta, "source_categories", []),
- "confidence": getattr(meta, "confidence", 0.0),
- "session": current_session,
- })
+ brain.bus.emit(
+ "meta_rule.created",
+ {
+ "id": meta.id,
+ "principle": meta.principle,
+ "description": meta.principle,
+ "source_categories": getattr(
+ meta, "source_categories", []
+ ),
+ "confidence": getattr(meta, "confidence", 0.0),
+ "session": current_session,
+ },
+ )
except Exception as e:
_log.debug("Meta-rule event emit failed: %s", e)
except ImportError as e:
@@ -888,26 +1084,33 @@ def _lesson_key(lesson):
# Build graduated_rules detail list from transitions
from gradata.inspection import _make_rule_id
+
graduated_rules = []
for l, old_s, new_s in transitions:
if new_s in ("PATTERN", "RULE"):
- graduated_rules.append({
- "rule_id": _make_rule_id(l),
- "category": l.category,
- "description": l.description[:100],
- "old_state": old_s,
- "new_state": new_s,
- "confidence": l.confidence,
- })
+ graduated_rules.append(
+ {
+ "rule_id": _make_rule_id(l),
+ "category": l.category,
+ "description": l.description[:100],
+ "old_state": old_s,
+ "new_state": new_s,
+ "confidence": l.confidence,
+ }
+ )
result = {
"session": current_session,
- "total_lessons": len(all_lessons), "active": len(active),
- "graduated": len(graduated), "promotions": promotions,
- "demotions": demotions, "kills": kills,
+ "total_lessons": len(all_lessons),
+ "active": len(active),
+ "graduated": len(graduated),
+ "promotions": promotions,
+ "demotions": demotions,
+ "kills": kills,
"new_rules": [l.description[:60] for l in new_rules] if new_rules else [],
"graduated_rules": graduated_rules,
- "meta_rules_discovered": meta_rules_discovered}
+ "meta_rules_discovered": meta_rules_discovered,
+ }
# Canary health sweep: for every RULE-tier lesson previously enrolled
# in canary, check if corrections landed in its category since it
@@ -930,7 +1133,9 @@ def _lesson_key(lesson):
seen_categories.add(l.category)
try:
health = check_canary_health(
- l.category, current_session, db_path=brain.db_path,
+ l.category,
+ current_session,
+ db_path=brain.db_path,
)
except Exception as e:
_log.debug("check_canary_health(%s) failed: %s", l.category, e)
@@ -962,28 +1167,44 @@ def _lesson_key(lesson):
# Session boundary marker for dashboard queries
try:
- brain.emit("SESSION_END", "brain.end_session", {
- "session": current_session,
- "total_lessons": len(all_lessons),
- "promotions": promotions, "demotions": demotions,
- "graduated_rules": len(new_rules),
- }, session=current_session)
+ brain.emit(
+ "SESSION_END",
+ "brain.end_session",
+ {
+ "session": current_session,
+ "total_lessons": len(all_lessons),
+ "promotions": promotions,
+ "demotions": demotions,
+ "graduated_rules": len(new_rules),
+ },
+ session=current_session,
+ )
except Exception as e:
_log.warning("SESSION_END emit failed: %s", e)
if promotions or demotions or kills:
- _log.info("Graduation sweep: %d promotions, %d demotions, %d kills",
- promotions, demotions, kills)
- brain.bus.emit("session.ended", {
- "session_number": brain.session,
- "stats": result,
- })
+ _log.info(
+ "Graduation sweep: %d promotions, %d demotions, %d kills",
+ promotions,
+ demotions,
+ kills,
+ )
+ brain.bus.emit(
+ "session.ended",
+ {
+ "session_number": brain.session,
+ "stats": result,
+ },
+ )
# Cloud sync: upload session telemetry if user has run `gradata login`.
# NEVER blocks the learning loop — all failures are silently logged.
_cloud_sync_session(
- brain, current_session, all_lessons,
- session_corrections or [], result,
+ brain,
+ current_session,
+ all_lessons,
+ session_corrections or [],
+ result,
)
return result
@@ -1040,26 +1261,20 @@ def _cloud_sync_session(
from gradata.cloud.sync import CloudConfig, TelemetryPayload
# Derive brain_id: use config value, or hash the brain directory path
- b_id = brain_id_from_config or hashlib.sha256(
- str(brain.dir).encode()
- ).hexdigest()[:16]
+ b_id = brain_id_from_config or hashlib.sha256(str(brain.dir).encode()).hexdigest()[:16]
# Compute metrics from session corrections
n_corrections = len(session_corrections)
rewrite_count = sum(
- 1 for c in session_corrections
- if c.get("severity") == "rewrite"
- or c.get("edit_distance", 0) > 0.8
+ 1
+ for c in session_corrections
+ if c.get("severity") == "rewrite" or c.get("edit_distance", 0) > 0.8
)
edit_distances = [
- float(c.get("edit_distance", 0))
- for c in session_corrections
- if "edit_distance" in c
+ float(c.get("edit_distance", 0)) for c in session_corrections if "edit_distance" in c
]
rewrite_rate = rewrite_count / n_corrections if n_corrections else 0.0
- edit_distance_avg = (
- sum(edit_distances) / len(edit_distances) if edit_distances else 0.0
- )
+ edit_distance_avg = sum(edit_distances) / len(edit_distances) if edit_distances else 0.0
# Correction density: corrections per output (approximate from session)
correction_density = 0.0
@@ -1076,31 +1291,19 @@ def _cloud_sync_session(
try:
from gradata.enhancements.metrics import compute_blandness
- finals = [
- c.get("final", "") for c in session_corrections if c.get("final")
- ]
+ finals = [c.get("final", "") for c in session_corrections if c.get("final")]
if finals:
blandness_score = compute_blandness(finals)
except Exception:
pass
# Rule stats from lessons
- rules_active = sum(
- 1 for l in all_lessons if l.state.value in ("INSTINCT", "PATTERN")
- )
- rules_graduated = sum(
- 1 for l in all_lessons if l.state.value == "RULE"
- )
+ rules_active = sum(1 for l in all_lessons if l.state.value in ("INSTINCT", "PATTERN"))
+ rules_graduated = sum(1 for l in all_lessons if l.state.value == "RULE")
total_fires = sum(getattr(l, "fire_count", 0) for l in all_lessons)
total_misfires = sum(getattr(l, "misfire_count", 0) for l in all_lessons)
- rule_success_rate = (
- (total_fires - total_misfires) / total_fires
- if total_fires > 0
- else 0.0
- )
- rule_misfire_rate = (
- total_misfires / total_fires if total_fires > 0 else 0.0
- )
+ rule_success_rate = (total_fires - total_misfires) / total_fires if total_fires > 0 else 0.0
+ rule_misfire_rate = total_misfires / total_fires if total_fires > 0 else 0.0
payload = TelemetryPayload(
brain_id=b_id,
@@ -1154,11 +1357,86 @@ def _cloud_sync_session(
else:
_log.debug(
"Cloud sync_mode=%s — skipping event/correction sync for session %d",
- sync_mode, session,
+ sync_mode,
+ session,
)
except Exception as e:
- _log.debug("Cloud sync failed (non-fatal): %s", e)
+ _log.warning("Cloud sync failed (non-fatal): %s", e, exc_info=True)
+
+
+def cloud_sync_tick(brain_dir: str | Path, session_number: int) -> None:
+ """Hook-safe cloud sync that doesn't require an instantiated Brain.
+
+ Reads lessons from lessons.md and session corrections from system.db,
+ then runs the same telemetry path as ``brain_end_session()``.
+
+ Called by the Stop hook so cloud sync actually fires from Claude Code
+ sessions — Claude Code never calls ``brain.end_session()`` directly.
+ Never raises.
+ """
+ try:
+ import json as _json
+ import sqlite3
+ from pathlib import Path as _Path
+
+ bd = _Path(brain_dir)
+ if not bd.is_dir():
+ return
+
+ all_lessons: list[Lesson] = []
+ lessons_path = bd / "lessons.md"
+ if lessons_path.is_file():
+ try:
+ from gradata.enhancements.self_improvement._confidence import (
+ parse_lessons,
+ )
+
+ all_lessons = parse_lessons(lessons_path.read_text(encoding="utf-8"))
+ except Exception as e:
+ _log.debug("cloud_sync_tick: parse_lessons failed: %s", e)
+
+ session_corrections: list[dict] = []
+ db_path = bd / "system.db"
+ if db_path.is_file() and session_number:
+ try:
+ with sqlite3.connect(db_path) as conn:
+ rows = conn.execute(
+ "SELECT data_json FROM events WHERE type = 'CORRECTION' AND session = ?",
+ (session_number,),
+ ).fetchall()
+ for (raw,) in rows:
+ try:
+ parsed = _json.loads(raw) if isinstance(raw, str) else raw
+ if isinstance(parsed, dict):
+ session_corrections.append(parsed)
+ except (TypeError, _json.JSONDecodeError):
+ continue
+ except sqlite3.Error as e:
+ _log.debug("cloud_sync_tick: db read failed: %s", e)
+
+ # _cloud_sync_session only reads `.dir` and `.db_path` from brain —
+ # a minimal stub lets us reuse the full telemetry/event path without
+ # paying the cost of a fresh Brain() with migrations + FTS init.
+ # `db_path` may not exist for a fresh brain that has only lessons.md;
+ # downstream `compute_metrics` already tolerates that with a None-path
+ # short-circuit, so we pass it through unchanged rather than guarding
+ # here. Sync still completes and `last_sync_at` still updates.
+ class _BrainStub:
+ def __init__(self, d: _Path, db: _Path) -> None:
+ self.dir = d
+ self.db_path = db
+
+ stub = _BrainStub(bd, db_path)
+ _cloud_sync_session(
+ stub, # type: ignore[arg-type]
+ session_number,
+ all_lessons,
+ session_corrections,
+ {},
+ )
+ except Exception as e:
+ _log.warning("cloud_sync_tick failed: %s", e, exc_info=True)
def _parse_toml_cloud(config_path: Path) -> dict:
@@ -1185,9 +1463,15 @@ def _parse_toml_cloud(config_path: Path) -> dict:
# ── auto_evolve() ──────────────────────────────────────────────────────
+
def brain_auto_evolve(
- brain: Brain, output: str, *, task: str = "", agent_type: str = "",
- evaluator: Callable | None = None, dimensions: list | None = None,
+ brain: Brain,
+ output: str,
+ *,
+ task: str = "",
+ agent_type: str = "",
+ evaluator: Callable | None = None,
+ dimensions: list | None = None,
threshold: float = 7.0,
) -> dict:
"""Evaluate output and auto-generate corrections for failed dimensions."""
@@ -1204,27 +1488,50 @@ def brain_auto_evolve(
cat = _DIMENSION_CATEGORY_MAP.get(dim_name.lower(), "PROCESS")
correction_desc = f"[AUTO] {dim_name} scored {score:.1f}/{threshold:.1f}: {feedback}"
try:
- brain.correct(draft=output[:2000], final=correction_desc[:2000],
- category=cat, agent_type=agent_type or "auto-evolve",
- context={"task": task, "auto_evolve": True})
- corrections.append({"dimension": dim_name, "score": score,
- "category": cat, "feedback": feedback[:200]})
+ brain.correct(
+ draft=output[:2000],
+ final=correction_desc[:2000],
+ category=cat,
+ agent_type=agent_type or "auto-evolve",
+ context={"task": task, "auto_evolve": True},
+ )
+ corrections.append(
+ {
+ "dimension": dim_name,
+ "score": score,
+ "category": cat,
+ "feedback": feedback[:200],
+ }
+ )
except Exception as e:
_log.warning("Auto-evolve correction failed for %s: %s", dim_name, e)
if corrections:
- _log.info("auto_evolve: %d corrections from %d dimensions (agent=%s)",
- len(corrections), len(dims), agent_type or "auto")
+ _log.info(
+ "auto_evolve: %d corrections from %d dimensions (agent=%s)",
+ len(corrections),
+ len(dims),
+ agent_type or "auto",
+ )
- return {"scores": result.scores, "average": result.average, "verdict": result.verdict,
- "corrections_generated": len(corrections), "corrections": corrections,
- "threshold": threshold}
+ return {
+ "scores": result.scores,
+ "average": result.average,
+ "verdict": result.verdict,
+ "corrections_generated": len(corrections),
+ "corrections": corrections,
+ "threshold": threshold,
+ }
# ── detect_implicit_feedback() ─────────────────────────────────────────
+
def brain_detect_implicit_feedback(
- brain: Brain, user_message: str, *, session: int | None = None,
+ brain: Brain,
+ user_message: str,
+ *,
+ session: int | None = None,
) -> dict:
"""Detect implicit behavioral feedback in user prompts."""
signals = []
@@ -1243,37 +1550,75 @@ def _phrase_match(phrase: str) -> bool:
end = idx + len(phrase)
return not (end < len(text) and text[end].isalpha())
- for marker in ["are you sure", "that's wrong", "that's not right", "not accurate",
- "no, not that", "no don't", "stop doing", "why did you", "why didn't you"]:
+ for marker in [
+ "are you sure",
+ "that's wrong",
+ "that's not right",
+ "not accurate",
+ "no, not that",
+ "no don't",
+ "stop doing",
+ "why did you",
+ "why didn't you",
+ ]:
if _phrase_match(marker):
signals.append({"type": "pushback", "marker": marker})
- for marker in ["make sure", "don't forget", "remember to", "you should always",
- "i already told", "i just said", "as i mentioned", "like i said"]:
+ for marker in [
+ "make sure",
+ "don't forget",
+ "remember to",
+ "you should always",
+ "i already told",
+ "i just said",
+ "as i mentioned",
+ "like i said",
+ ]:
if _phrase_match(marker):
signals.append({"type": "reminder", "marker": marker})
- for marker in ["what about", "you forgot", "you missed", "you skipped",
- "you ignored", "you dropped", "did you check", "did you verify"]:
+ for marker in [
+ "what about",
+ "you forgot",
+ "you missed",
+ "you skipped",
+ "you ignored",
+ "you dropped",
+ "did you check",
+ "did you verify",
+ ]:
if _phrase_match(marker):
signals.append({"type": "gap", "marker": marker})
- for marker in ["are we sure", "is that right", "is that correct",
- "won't that", "won't people", "i feel like"]:
+ for marker in [
+ "are we sure",
+ "is that right",
+ "is that correct",
+ "won't that",
+ "won't people",
+ "i feel like",
+ ]:
if _phrase_match(marker):
signals.append({"type": "challenge", "marker": marker})
has_feedback = len(signals) > 0
event = None
if has_feedback:
- event = brain.emit("IMPLICIT_FEEDBACK", "brain.detect_implicit_feedback",
- {"signals": [s["type"] for s in signals],
- "markers": [s["marker"] for s in signals],
- "snippet": user_message[:200]},
- tags=[f"signal:{s['type']}" for s in signals], session=session)
+ event = brain.emit(
+ "IMPLICIT_FEEDBACK",
+ "brain.detect_implicit_feedback",
+ {
+ "signals": [s["type"] for s in signals],
+ "markers": [s["marker"] for s in signals],
+ "snippet": user_message[:200],
+ },
+ tags=[f"signal:{s['type']}" for s in signals],
+ session=session,
+ )
return {"signals": signals, "has_feedback": has_feedback, "event": event}
# ── Export helpers ─────────────────────────────────────────────────────
+
def brain_export_rules(brain: Brain, *, min_state: str = "PATTERN", skill_name: str = "") -> str:
"""Export graduated brain rules as OpenSpace-compatible SKILL.md."""
try:
@@ -1294,6 +1639,7 @@ def brain_export_rules(brain: Brain, *, min_state: str = "PATTERN", skill_name:
domain = "general"
if brain.manifest_path.is_file():
import json
+
try:
manifest = json.loads(brain.manifest_path.read_text(encoding="utf-8"))
domain = manifest.get("metadata", {}).get("domain", "general")
@@ -1312,7 +1658,8 @@ def brain_export_rules(brain: Brain, *, min_state: str = "PATTERN", skill_name:
categories_str = ", ".join(sorted(by_category.keys())).lower()
lines = [
- "---", f"name: {skill_name}",
+ "---",
+ f"name: {skill_name}",
f"description: Behavioral rules for {domain} tasks covering {categories_str}. "
f"Graduated from {len(qualified)} corrections via Gradata.",
"license: Apache-2.0",
@@ -1321,15 +1668,24 @@ def brain_export_rules(brain: Brain, *, min_state: str = "PATTERN", skill_name:
" author: gradata",
' version: "1.0"',
f" domain: {domain}",
- f" rules-count: \"{len(qualified)}\"",
- "---", "", f"# {skill_name.replace('-', ' ').title()}", "",
- "## Purpose", "",
+ f' rules-count: "{len(qualified)}"',
+ "---",
+ "",
+ f"# {skill_name.replace('-', ' ').title()}",
+ "",
+ "## Purpose",
+ "",
f"Behavioral rules adapted from human corrections in the {domain} domain.",
- "Apply these rules to avoid repeating past mistakes.", "",
- "## When to Apply", "",
+ "Apply these rules to avoid repeating past mistakes.",
+ "",
+ "## When to Apply",
+ "",
f"- Any {domain} task involving: {categories_str}",
- f"- {len(qualified)} rules across {len(by_category)} categories", "",
- "## Rules", ""]
+ f"- {len(qualified)} rules across {len(by_category)} categories",
+ "",
+ "## Rules",
+ "",
+ ]
for cat, cat_lessons in sorted(by_category.items()):
lines.append(f"### {cat}")
@@ -1348,10 +1704,18 @@ def brain_export_rules(brain: Brain, *, min_state: str = "PATTERN", skill_name:
lines.append(f"{i}. {l.category}: {l.description}")
lines.append("")
- lines.extend(["## Provenance", "",
- "- Source: Gradata correction-based procedural memory",
- f"- Domain: {domain}", f"- Rules exported: {len(qualified)}",
- f"- Categories: {len(by_category)}", f"- Min graduation tier: {min_state}", ""])
+ lines.extend(
+ [
+ "## Provenance",
+ "",
+ "- Source: Gradata correction-based procedural memory",
+ f"- Domain: {domain}",
+ f"- Rules exported: {len(qualified)}",
+ f"- Categories: {len(by_category)}",
+ f"- Min graduation tier: {min_state}",
+ "",
+ ]
+ )
return "\n".join(lines)
@@ -1367,13 +1731,22 @@ def brain_export_rules_json(brain: Brain, *, min_state: str = "PATTERN") -> list
lessons = parse_lessons(lessons_path.read_text(encoding="utf-8"))
qualified = _filter_lessons_by_state(lessons, min_state)
qualified.sort(key=lambda l: (l.category, l.description))
- return [{"category": l.category, "description": l.description,
- "state": l.state.value, "confidence": round(l.confidence, 2),
- "fire_count": l.fire_count, "date": l.date} for l in qualified]
+ return [
+ {
+ "category": l.category,
+ "description": l.description,
+ "state": l.state.value,
+ "confidence": round(l.confidence, 2),
+ "fire_count": l.fire_count,
+ "date": l.date,
+ }
+ for l in qualified
+ ]
-def brain_export_skill(brain: Brain, *, output_dir: str | None = None,
- min_state: str = "PATTERN", skill_name: str = "") -> Path:
+def brain_export_skill(
+ brain: Brain, *, output_dir: str | None = None, min_state: str = "PATTERN", skill_name: str = ""
+) -> Path:
"""Export graduated rules as a full skill directory."""
import hashlib
import json
@@ -1399,9 +1772,13 @@ def brain_export_skill(brain: Brain, *, output_dir: str | None = None,
skill_id = f"{skill_name}__imp_{brain_hash}"
(skill_dir / ".skill_id").write_text(skill_id, encoding="utf-8")
- provenance = {"source": "gradata", "skill_id": skill_id,
- "brain_name": brain.dir.name, "exported_at": datetime.now(UTC).isoformat(),
- "min_state": min_state}
+ provenance = {
+ "source": "gradata",
+ "skill_id": skill_id,
+ "brain_name": brain.dir.name,
+ "exported_at": datetime.now(UTC).isoformat(),
+ "min_state": min_state,
+ }
if brain.manifest_path.is_file():
try:
manifest = json.loads(brain.manifest_path.read_text(encoding="utf-8"))
@@ -1413,8 +1790,9 @@ def brain_export_skill(brain: Brain, *, output_dir: str | None = None,
return skill_dir
-def brain_export_skills(brain: Brain, *, output_dir: str | None = None,
- min_state: str = "PATTERN") -> list[str]:
+def brain_export_skills(
+ brain: Brain, *, output_dir: str | None = None, min_state: str = "PATTERN"
+) -> list[str]:
"""Export graduated rules as per-category SKILL.md files."""
from collections import defaultdict
from pathlib import Path
@@ -1431,6 +1809,7 @@ def brain_export_skills(brain: Brain, *, output_dir: str | None = None,
try:
if hasattr(brain, "manifest_path") and brain.manifest_path.is_file():
import json
+
manifest = json.loads(brain.manifest_path.read_text(encoding="utf-8"))
domain = manifest.get("metadata", {}).get("domain", "general").lower()
except Exception:
@@ -1442,11 +1821,18 @@ def brain_export_skills(brain: Brain, *, output_dir: str | None = None,
slug = re.sub(r"[^\w\-]", "_", cat.lower())
skill_dir = base / f"gradata-{slug}"
skill_dir.mkdir(parents=True, exist_ok=True)
- lines = ["---", f'name: "gradata-{domain}-{slug}"',
- f'description: "Behavioral rules for {cat} from {len(cat_rules)} corrections"',
- f"tags: [{domain}, {slug}, gradata]", "source: gradata",
- "compatible_with: [hermes, mindstudio, openspace]",
- "---", "", f"# {cat} Rules ({domain.title()})", ""]
+ lines = [
+ "---",
+ f'name: "gradata-{domain}-{slug}"',
+ f'description: "Behavioral rules for {cat} from {len(cat_rules)} corrections"',
+ f"tags: [{domain}, {slug}, gradata]",
+ "source: gradata",
+ "compatible_with: [hermes, mindstudio, openspace]",
+ "---",
+ "",
+ f"# {cat} Rules ({domain.title()})",
+ "",
+ ]
for i, rule in enumerate(cat_rules, 1):
lines.append(f"{i}. [{rule['state']}:{rule['confidence']:.2f}] {rule['description']}")
lines.append("")
@@ -1458,6 +1844,7 @@ def brain_export_skills(brain: Brain, *, output_dir: str | None = None,
# ── convergence() ─────────────────────────────────────────────────────
+
def _mann_kendall(data: list[int] | list[float]) -> tuple[str, float]:
"""Mann-Kendall trend test — delegates to _stats.trend_analysis().
@@ -1467,6 +1854,7 @@ def _mann_kendall(data: list[int] | list[float]) -> tuple[str, float]:
return "no_trend", 1.0
from gradata._stats import trend_analysis
+
slope, p_value = trend_analysis([float(x) for x in data])
trend = ("decreasing" if slope < 0 else "increasing") if p_value < 0.05 else "no_trend"
@@ -1489,13 +1877,22 @@ def brain_convergence(brain: Brain) -> dict:
total_corrections: int
total_sessions: int
"""
- empty = {"sessions": [], "corrections_per_session": [], "trend": "insufficient_data",
- "p_value": 1.0, "changepoints": [], "by_category": {},
- "total_corrections": 0, "total_sessions": 0,
- "edit_distance_per_session": [], "edit_distance_trend": "insufficient_data"}
+ empty = {
+ "sessions": [],
+ "corrections_per_session": [],
+ "trend": "insufficient_data",
+ "p_value": 1.0,
+ "changepoints": [],
+ "by_category": {},
+ "total_corrections": 0,
+ "total_sessions": 0,
+ "edit_distance_per_session": [],
+ "edit_distance_trend": "insufficient_data",
+ }
try:
from gradata._db import get_connection
+
with get_connection(brain.db_path) as conn:
rows = conn.execute(
"SELECT session, COUNT(*) as cnt FROM events "
@@ -1560,7 +1957,11 @@ def brain_convergence(brain: Brain) -> dict:
cat_trend = "diverging"
elif len(cat_counts) >= 3:
cat_avg = sum(cat_counts) / len(cat_counts)
- cat_cv = (sum((x - cat_avg) ** 2 for x in cat_counts) / len(cat_counts)) ** 0.5 / cat_avg if cat_avg > 0 else 0
+ cat_cv = (
+ (sum((x - cat_avg) ** 2 for x in cat_counts) / len(cat_counts)) ** 0.5 / cat_avg
+ if cat_avg > 0
+ else 0
+ )
cat_trend = "converged" if cat_cv < 0.5 else "no_signal"
else:
cat_trend = "insufficient_data"
@@ -1574,12 +1975,16 @@ def brain_convergence(brain: Brain) -> dict:
ed_counts = [r[1] for r in ed_rows] if ed_rows else []
if len(ed_counts) >= 3:
ed_mk_trend, _ed_p = _mann_kendall(ed_counts)
- ed_trend = "improving" if ed_mk_trend == "decreasing" else (
- "worsening" if ed_mk_trend == "increasing" else "stable")
+ ed_trend = (
+ "improving"
+ if ed_mk_trend == "decreasing"
+ else ("worsening" if ed_mk_trend == "increasing" else "stable")
+ )
else:
ed_trend = "insufficient_data"
from gradata._stats import cusum_changepoints
+
raw_changepoints = cusum_changepoints(counts)
changepoint_sessions = [sessions[i] for i in raw_changepoints if i < len(sessions)]
@@ -1663,8 +2068,11 @@ def brain_prove(brain: Brain) -> dict:
if lessons_path and lessons_path.is_file():
from gradata._types import LessonState
from gradata.enhancements.self_improvement import parse_lessons
+
lessons = parse_lessons(lessons_path.read_text(encoding="utf-8"))
- rule_count = sum(1 for l in lessons if l.state in (LessonState.PATTERN, LessonState.RULE))
+ rule_count = sum(
+ 1 for l in lessons if l.state in (LessonState.PATTERN, LessonState.RULE)
+ )
except Exception:
pass
@@ -1748,21 +2156,24 @@ def brain_share(brain: Brain) -> dict:
rules: list[dict] = []
if lessons_path and lessons_path.is_file():
from gradata.enhancements.self_improvement import parse_lessons
+
all_lessons = parse_lessons(lessons_path.read_text(encoding="utf-8"))
for lesson in all_lessons:
if lesson.state in (LessonState.PATTERN, LessonState.RULE):
- rules.append({
- "category": lesson.category,
- "description": lesson.description,
- "confidence": lesson.confidence,
- "state": lesson.state.value,
- "fire_count": lesson.fire_count,
- "correction_type": (
- lesson.correction_type.value
- if hasattr(lesson.correction_type, "value")
- else str(lesson.correction_type)
- ),
- })
+ rules.append(
+ {
+ "category": lesson.category,
+ "description": lesson.description,
+ "confidence": lesson.confidence,
+ "state": lesson.state.value,
+ "fire_count": lesson.fire_count,
+ "correction_type": (
+ lesson.correction_type.value
+ if hasattr(lesson.correction_type, "value")
+ else str(lesson.correction_type)
+ ),
+ }
+ )
proof: dict = {}
with contextlib.suppress(Exception):
@@ -1849,7 +2260,5 @@ def brain_absorb(brain: Brain, package: dict) -> dict:
"absorbed": absorbed,
"skipped": skipped,
"source": package.get("brain_id", "unknown"),
- "total_rules_in_package": package.get(
- "rule_count", len(package.get("rules", []))
- ),
+ "total_rules_in_package": package.get("rule_count", len(package.get("rules", []))),
}
diff --git a/Gradata/src/gradata/_data_flow_audit.py b/Gradata/src/gradata/_data_flow_audit.py
index 13c40de1..87de1328 100644
--- a/Gradata/src/gradata/_data_flow_audit.py
+++ b/Gradata/src/gradata/_data_flow_audit.py
@@ -20,10 +20,21 @@ def _check(name: str, passed: bool, detail: str = ""):
def check_event_pipes(ctx: "BrainContext | None" = None):
known_types = [
- "CORRECTION", "GATE_RESULT", "GATE_OVERRIDE", "OUTPUT",
- "AUDIT_SCORE", "LESSON_CHANGE", "CALIBRATION", "HEALTH_CHECK",
- "COST_EVENT", "TOOL_FAILURE", "HALLUCINATION", "STALE_DATA",
- "VERIFICATION", "STEP_COMPLETE", "DEFER",
+ "CORRECTION",
+ "GATE_RESULT",
+ "GATE_OVERRIDE",
+ "OUTPUT",
+ "AUDIT_SCORE",
+ "LESSON_CHANGE",
+ "CALIBRATION",
+ "HEALTH_CHECK",
+ "COST_EVENT",
+ "TOOL_FAILURE",
+ "HALLUCINATION",
+ "STALE_DATA",
+ "VERIFICATION",
+ "STEP_COMPLETE",
+ "DEFER",
]
try:
db = ctx.db_path if ctx else _p.DB_PATH
@@ -34,8 +45,11 @@ def check_event_pipes(ctx: "BrainContext | None" = None):
except Exception:
emitted_types = set()
for t in known_types:
- _check(f"event_pipe:{t}", t in emitted_types,
- "has emissions" if t in emitted_types else "no emissions found")
+ _check(
+ f"event_pipe:{t}",
+ t in emitted_types,
+ "has emissions" if t in emitted_types else "no emissions found",
+ )
def check_index_completeness(ctx: BrainContext | None = None):
@@ -57,7 +71,9 @@ def check_index_completeness(ctx: BrainContext | None = None):
brain_files.add(rel)
missing = brain_files - indexed_files
if missing:
- _check("index:completeness", False, f"{len(missing)} files not indexed: {list(missing)[:5]}")
+ _check(
+ "index:completeness", False, f"{len(missing)} files not indexed: {list(missing)[:5]}"
+ )
else:
_check("index:completeness", True, f"{len(brain_files)} files all indexed")
@@ -71,8 +87,10 @@ def check_facts_freshness(ctx: "BrainContext | None" = None):
try:
db = ctx.db_path if ctx else _p.DB_PATH
conn = sqlite3.connect(str(db))
- tables = [r[0] for r in conn.execute(
- "SELECT name FROM sqlite_master WHERE type='table'").fetchall()]
+ tables = [
+ r[0]
+ for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
+ ]
if "facts" not in tables:
_check("facts:table_exists", False, "facts table missing")
conn.close()
@@ -99,6 +117,7 @@ def check_facts_freshness(ctx: "BrainContext | None" = None):
def check_embeddings(ctx: BrainContext | None = None):
"""Check SQLite brain_embeddings table for indexed chunks."""
import sqlite3
+
db = ctx.db_path if ctx else _p.DB_PATH
try:
conn = sqlite3.connect(str(db))
@@ -114,8 +133,10 @@ def check_fts5(ctx: BrainContext | None = None):
db = ctx.db_path if ctx else _p.DB_PATH
try:
conn = sqlite3.connect(str(db))
- tables = [r[0] for r in conn.execute(
- "SELECT name FROM sqlite_master WHERE type='table'").fetchall()]
+ tables = [
+ r[0]
+ for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
+ ]
if "brain_fts" not in tables:
_check("fts5:table", False, "brain_fts virtual table missing")
conn.close()
@@ -156,5 +177,10 @@ def run_audit(ctx: "BrainContext | None" = None) -> dict:
passed = sum(1 for c in CHECKS if c["passed"])
total = len(CHECKS)
score = round(passed / total * 100, 1) if total > 0 else 0
- return {"timestamp": datetime.now().isoformat(), "passed": passed, "total": total,
- "score": score, "checks": CHECKS}
+ return {
+ "timestamp": datetime.now().isoformat(),
+ "passed": passed,
+ "total": total,
+ "score": score,
+ "checks": CHECKS,
+ }
diff --git a/Gradata/src/gradata/_db.py b/Gradata/src/gradata/_db.py
index 993363d1..952431f9 100644
--- a/Gradata/src/gradata/_db.py
+++ b/Gradata/src/gradata/_db.py
@@ -58,6 +58,7 @@ def ensure_table(conn: sqlite3.Connection, create_sql: str) -> None:
# File Locking — concurrency protection for lessons.md
# ---------------------------------------------------------------------------
+
@contextmanager
def lessons_lock(lessons_path: str | Path, timeout: float = 10.0):
"""Context manager for exclusive file lock on lessons.md.
@@ -87,6 +88,7 @@ def lessons_lock(lessons_path: str | Path, timeout: float = 10.0):
# Platform-specific locking
if os.name == "nt":
import msvcrt
+
while True:
try:
msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
@@ -99,6 +101,7 @@ def lessons_lock(lessons_path: str | Path, timeout: float = 10.0):
time.sleep(0.1)
else:
import fcntl
+
while True:
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
@@ -118,12 +121,14 @@ def lessons_lock(lessons_path: str | Path, timeout: float = 10.0):
if os.name == "nt":
try:
import msvcrt
+
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
except OSError:
pass
else:
try:
import fcntl
+
fcntl.flock(fd, fcntl.LOCK_UN)
except OSError:
pass
@@ -205,8 +210,14 @@ def check_budget(conn: sqlite3.Connection, api_name: str, count: int = 1) -> dic
).fetchone()
if row is None:
- return {"allowed": True, "remaining": 999, "daily_limit": 999, "used_today": 0,
- "api_name": api_name, "error": "unknown API — no budget configured"}
+ return {
+ "allowed": True,
+ "remaining": 999,
+ "daily_limit": 999,
+ "used_today": 0,
+ "api_name": api_name,
+ "error": "unknown API — no budget configured",
+ }
limit, used, last_reset = row[0], row[1], row[2]
@@ -242,6 +253,7 @@ def spend_budget(conn: sqlite3.Connection, api_name: str, count: int = 1) -> dic
return result
from datetime import date
+
today = date.today().isoformat()
conn.execute(
@@ -258,6 +270,7 @@ def spend_budget(conn: sqlite3.Connection, api_name: str, count: int = 1) -> dic
def budget_summary(conn: sqlite3.Connection) -> list[dict]:
"""Return all budget rows for morning brief reporting."""
from datetime import date
+
today = date.today().isoformat()
# Reset stale rows first
diff --git a/Gradata/src/gradata/_doctor.py b/Gradata/src/gradata/_doctor.py
index 0f68a509..cd1a2958 100644
--- a/Gradata/src/gradata/_doctor.py
+++ b/Gradata/src/gradata/_doctor.py
@@ -8,16 +8,25 @@
# Or via CLI:
gradata doctor
+ gradata doctor --cloud # cloud-only checks
+ gradata doctor --no-cloud # skip cloud probes (offline)
"""
+
from __future__ import annotations
+import contextlib
import json
import os
import shutil
+import socket
import sqlite3
import sys
+import urllib.error
+import urllib.request
from pathlib import Path
+_CLOUD_PROBE_TIMEOUT = 5.0 # seconds — keep doctor fast even when offline
+
def _check_python_version():
"""Check Python >= 3.11."""
@@ -36,7 +45,7 @@ def _check_vector_store():
"""Report vector store status. FTS5 is primary search, sqlite-vec planned."""
return {
"name": "vector_store",
- "status": "fts5",
+ "status": "ok",
"detail": "FTS5 is the primary search engine. sqlite-vec planned for vector similarity.",
}
@@ -45,6 +54,7 @@ def _check_sentence_transformers():
"""Check if sentence-transformers is importable."""
try:
import sentence_transformers
+
version = getattr(sentence_transformers, "__version__", "unknown")
return {"name": "sentence_transformers", "status": "ok", "detail": version}
except ImportError:
@@ -102,6 +112,7 @@ def _resolve_brain_path():
return Path(brain_dir)
try:
from gradata._paths import DB_PATH, resolve_brain_dir
+
# If DB_PATH points to a real system.db, use its parent
if DB_PATH.exists():
return DB_PATH.parent
@@ -124,7 +135,11 @@ def _check_system_db(brain_path):
return _skip("system_db")
db = brain_path / "system.db"
if not db.exists():
- return {"name": "system_db", "status": "skip", "detail": "system.db not found (brain may not be initialized)"}
+ return {
+ "name": "system_db",
+ "status": "skip",
+ "detail": "system.db not found (brain may not be initialized)",
+ }
try:
conn = sqlite3.connect(str(db))
conn.execute("SELECT 1")
@@ -141,7 +156,11 @@ def _check_events_jsonl(brain_path):
return _skip("events_jsonl")
ej = brain_path / "events.jsonl"
if not ej.exists():
- return {"name": "events_jsonl", "status": "skip", "detail": "events.jsonl not found (brain may not be initialized)"}
+ return {
+ "name": "events_jsonl",
+ "status": "skip",
+ "detail": "events.jsonl not found (brain may not be initialized)",
+ }
try:
size_kb = round(ej.stat().st_size / 1024, 1)
return {"name": "events_jsonl", "status": "ok", "detail": f"exists, {size_kb} KB"}
@@ -155,7 +174,11 @@ def _check_manifest(brain_path):
return _skip("brain_manifest")
mf = brain_path / "brain.manifest.json"
if not mf.exists():
- return {"name": "brain_manifest", "status": "skip", "detail": "brain.manifest.json not found (optional)"}
+ return {
+ "name": "brain_manifest",
+ "status": "skip",
+ "detail": "brain.manifest.json not found (optional)",
+ }
try:
data = json.loads(mf.read_text(encoding="utf-8"))
version = data.get("schema_version", "?")
@@ -172,11 +195,19 @@ def _check_vectorstore(brain_path):
return _skip("vectorstore")
vs = brain_path / ".vectorstore"
if not vs.exists():
- return {"name": "vectorstore", "status": "skip", "detail": ".vectorstore/ not found (embeddings not enabled)"}
+ return {
+ "name": "vectorstore",
+ "status": "skip",
+ "detail": ".vectorstore/ not found (embeddings not enabled)",
+ }
if vs.is_dir():
file_count = sum(1 for _ in vs.rglob("*") if _.is_file())
return {"name": "vectorstore", "status": "ok", "detail": f"exists, {file_count} files"}
- return {"name": "vectorstore", "status": "fail", "detail": ".vectorstore exists but is not a directory"}
+ return {
+ "name": "vectorstore",
+ "status": "fail",
+ "detail": ".vectorstore exists but is not a directory",
+ }
def _check_disk_space(brain_path):
@@ -196,12 +227,262 @@ def _check_disk_space(brain_path):
return {"name": "disk_space", "status": "error", "detail": str(e)}
-def diagnose(brain_dir: str | Path | None = None) -> dict:
+def _gradata_config_path() -> Path:
+ env = os.environ.get("GRADATA_CONFIG")
+ if env:
+ return Path(env)
+ return Path.home() / ".gradata" / "config.toml"
+
+
+def _read_cloud_config() -> dict:
+ """Parse ~/.gradata/config.toml (tomllib in py311+). Returns {} on any failure."""
+ path = _gradata_config_path()
+ if not path.exists():
+ return {}
+ try:
+ import tomllib
+ except ImportError:
+ return {}
+ try:
+ with open(path, "rb") as f:
+ return tomllib.load(f).get("cloud", {})
+ except Exception:
+ return {}
+
+
+def _check_cloud_config():
+ """Is the user logged in? Config file present with credentials + brain_id?"""
+ path = _gradata_config_path()
+ if not path.exists():
+ return {
+ "name": "cloud_config",
+ "status": "missing",
+ "detail": f"{path} not found — run `gradata login`",
+ }
+ cfg = _read_cloud_config()
+ if not cfg.get("api_key"):
+ return {
+ "name": "cloud_config",
+ "status": "fail",
+ "detail": f"{path} missing [cloud] credentials — re-run `gradata login`",
+ }
+ brain_id = cfg.get("brain_id", "") or "(unset)"
+ return {
+ "name": "cloud_config",
+ "status": "ok",
+ "detail": f"logged in — brain_id={brain_id}",
+ }
+
+
+def _check_cloud_env_vars():
+ """Report which cloud-sync env vars are set (without leaking values)."""
+ enabled = os.environ.get("GRADATA_CLOUD_SYNC", "").strip() in ("1", "true", "yes")
+ url_set = bool(os.environ.get("GRADATA_CLOUD_URL") or os.environ.get("GRADATA_SUPABASE_URL"))
+ key_set = bool(
+ os.environ.get("GRADATA_CLOUD_KEY") or os.environ.get("GRADATA_SUPABASE_SERVICE_KEY")
+ )
+ if not (enabled or url_set or key_set):
+ return {
+ "name": "cloud_env",
+ "status": "skip",
+ "detail": "GRADATA_CLOUD_SYNC not enabled (optional Supabase push path)",
+ }
+ missing = []
+ if not url_set:
+ missing.append("GRADATA_CLOUD_URL / GRADATA_SUPABASE_URL")
+ if not key_set:
+ missing.append("GRADATA_CLOUD_KEY / GRADATA_SUPABASE_SERVICE_KEY")
+ if missing:
+ return {
+ "name": "cloud_env",
+ "status": "fail",
+ "detail": f"GRADATA_CLOUD_SYNC=1 but missing: {', '.join(missing)}",
+ }
+ status = "ok" if enabled else "warn"
+ detail = "enabled, URL+key set" if enabled else "URL+key set but GRADATA_CLOUD_SYNC!=1"
+ return {"name": "cloud_env", "status": status, "detail": detail}
+
+
+def _check_cloud_reachable():
+ """Can we reach the cloud API host? Low-cost TCP probe."""
+ cfg = _read_cloud_config()
+ api_url = (
+ cfg.get("api_url") or os.environ.get("GRADATA_API_URL") or "https://api.gradata.ai/api/v1"
+ )
+ host = api_url.split("://", 1)[-1].split("/", 1)[0]
+ try:
+ socket.create_connection((host, 443), timeout=_CLOUD_PROBE_TIMEOUT).close()
+ return {"name": "cloud_reachable", "status": "ok", "detail": f"{host}:443 reachable"}
+ except OSError as e:
+ return {
+ "name": "cloud_reachable",
+ "status": "fail",
+ "detail": f"{host}:443 unreachable ({e.__class__.__name__})",
+ }
+
+
+def _probe_api(url: str, bearer: str) -> tuple[int, str]:
+ """GET url with Bearer token. Returns (status_code, body_snippet). (0, err) on network fail."""
+ auth = "Bearer " + bearer
+ req = urllib.request.Request(
+ url,
+ headers={"Authorization": auth, "User-Agent": "gradata-sdk-doctor/0.6"},
+ method="GET",
+ )
+ try:
+ with urllib.request.urlopen(req, timeout=_CLOUD_PROBE_TIMEOUT) as resp:
+ body = resp.read(512).decode("utf-8", errors="replace")
+ return resp.status, body
+ except urllib.error.HTTPError as e:
+ body = ""
+ with contextlib.suppress(Exception):
+ body = e.read(512).decode("utf-8", errors="replace")
+ return e.code, body
+ except (urllib.error.URLError, OSError) as e:
+ return 0, str(e)
+
+
+def _check_cloud_auth():
+ """Does the stored credential work against the API?"""
+ cfg = _read_cloud_config()
+ bearer = cfg.get("api_key") or ""
+ if not bearer:
+ return {"name": "cloud_auth", "status": "skip", "detail": "no credential — skip"}
+ api_url = cfg.get("api_url", "https://api.gradata.ai/api/v1").rstrip("/")
+ brain_id = cfg.get("brain_id", "")
+ probe_url = f"{api_url}/brains/{brain_id}" if brain_id else f"{api_url}/auth/whoami"
+ code, body = _probe_api(probe_url, bearer)
+ if code == 0:
+ return {"name": "cloud_auth", "status": "error", "detail": f"network: {body[:80]}"}
+ if 200 <= code < 300:
+ return {"name": "cloud_auth", "status": "ok", "detail": f"HTTP {code} — token accepted"}
+ if code in (401, 403):
+ return {
+ "name": "cloud_auth",
+ "status": "fail",
+ "detail": f"HTTP {code} — token rejected; re-run `gradata login`",
+ }
+ if code == 404:
+ return {
+ "name": "cloud_auth",
+ "status": "warn",
+ "detail": f"HTTP 404 on {probe_url} — endpoint may have moved",
+ }
+ return {"name": "cloud_auth", "status": "warn", "detail": f"HTTP {code}"}
+
+
+def _check_cloud_has_data():
+ """Does the cloud actually have rows for this brain? Addresses the
+ 'HTTP 200 != visible data' silent-failure mode."""
+ cfg = _read_cloud_config()
+ bearer = cfg.get("api_key") or ""
+ brain_id = cfg.get("brain_id")
+ if not (bearer and brain_id):
+ return {"name": "cloud_has_data", "status": "skip", "detail": "not logged in — skip"}
+ api_url = cfg.get("api_url", "https://api.gradata.ai/api/v1").rstrip("/")
+ code, body = _probe_api(f"{api_url}/brains/{brain_id}/analytics", bearer)
+ if code == 0:
+ return {"name": "cloud_has_data", "status": "error", "detail": f"network: {body[:80]}"}
+ if code == 404:
+ return {
+ "name": "cloud_has_data",
+ "status": "warn",
+ "detail": f"brain_id={brain_id} not found in cloud — no sessions synced yet",
+ }
+ if not (200 <= code < 300):
+ return {"name": "cloud_has_data", "status": "warn", "detail": f"HTTP {code}"}
+ try:
+ data = json.loads(body) if body else {}
+ sessions = data.get("session_count") or data.get("sessions") or 0
+ if sessions:
+ return {
+ "name": "cloud_has_data",
+ "status": "ok",
+ "detail": f"{sessions} sessions synced to dashboard",
+ }
+ return {
+ "name": "cloud_has_data",
+ "status": "warn",
+ "detail": "connected, but 0 sessions visible — telemetry may not have fired yet",
+ }
+ except json.JSONDecodeError:
+ return {"name": "cloud_has_data", "status": "warn", "detail": "non-JSON response"}
+
+
+def _check_cloud_push_error(brain_path):
+ """Surface the last recorded cloud-push error, if any.
+
+ ``_cloud_sync.push`` writes ``cloud_push_error.json`` when a POST fails
+ with an HTTP/constraint error. Without this check, watermark stalls from
+ 23505 unique-violation retries are invisible until someone greps logs.
+
+ The resolved brain path is always included in the detail so a user with
+ multiple brains can confirm which one was inspected — the push writer and
+ the doctor reader can diverge in multi-brain setups where the daemon was
+ started with an explicit ``brain_dir`` that differs from ``BRAIN_DIR``.
+ """
+ if brain_path is None:
+ return _skip("cloud_push_error")
+ p = brain_path / "cloud_push_error.json"
+ if not p.exists():
+ return {
+ "name": "cloud_push_error",
+ "status": "ok",
+ "detail": f"no recent push errors (checked {brain_path})",
+ }
+ try:
+ data = json.loads(p.read_text(encoding="utf-8"))
+ except (OSError, json.JSONDecodeError) as e:
+ return {
+ "name": "cloud_push_error",
+ "status": "warn",
+ "detail": f"error file unreadable at {p}: {e}",
+ }
+ table = data.get("table", "?")
+ code = data.get("code", "?")
+ recorded = data.get("recorded_at", "?")
+ if data.get("constraint_violation"):
+ return {
+ "name": "cloud_push_error",
+ "status": "fail",
+ "detail": (
+ f"last push to '{table}' blocked by constraint (HTTP {code}) at {recorded} "
+ f"in {brain_path} — watermark stalled; run `gradata migrate --brain ` "
+ f"or dedupe rows"
+ ),
+ }
+ return {
+ "name": "cloud_push_error",
+ "status": "warn",
+ "detail": f"last push to '{table}' returned HTTP {code} at {recorded} in {brain_path}",
+ }
+
+
+def _cloud_checks(brain_path=None):
+ """All cloud checks, ordered so the first failure tells you what to do next."""
+ return [
+ _check_cloud_config(),
+ _check_cloud_env_vars(),
+ _check_cloud_reachable(),
+ _check_cloud_auth(),
+ _check_cloud_has_data(),
+ _check_cloud_push_error(brain_path),
+ ]
+
+
+def diagnose(
+ brain_dir: str | Path | None = None,
+ include_cloud: bool = True,
+ cloud_only: bool = False,
+) -> dict:
"""Run all health checks and return structured report.
Args:
brain_dir: Explicit brain directory to check. If None, resolves
from BRAIN_DIR env or _paths module.
+ include_cloud: If True, also probe cloud config/reachability/auth.
+ Set False for offline runs.
+ cloud_only: Skip local checks, only probe cloud.
Returns:
{
@@ -212,18 +493,23 @@ def diagnose(brain_dir: str | Path | None = None) -> dict:
# Resolve brain path
brain_path = Path(brain_dir).resolve() if brain_dir else _resolve_brain_path()
- checks = [
- _check_python_version(),
- _check_vector_store(),
- _check_sentence_transformers(),
- _check_sqlite3(),
- _check_brain_dir(),
- _check_system_db(brain_path),
- _check_events_jsonl(brain_path),
- _check_manifest(brain_path),
- _check_vectorstore(brain_path),
- _check_disk_space(brain_path),
- ]
+ if cloud_only:
+ checks = _cloud_checks(brain_path)
+ else:
+ checks = [
+ _check_python_version(),
+ _check_vector_store(),
+ _check_sentence_transformers(),
+ _check_sqlite3(),
+ _check_brain_dir(),
+ _check_system_db(brain_path),
+ _check_events_jsonl(brain_path),
+ _check_manifest(brain_path),
+ _check_vectorstore(brain_path),
+ _check_disk_space(brain_path),
+ ]
+ if include_cloud:
+ checks.extend(_cloud_checks(brain_path))
# Determine overall status — "skip" means not applicable, not a problem
active_statuses = [c["status"] for c in checks if c["status"] != "skip"]
diff --git a/Gradata/src/gradata/_events.py b/Gradata/src/gradata/_events.py
index d8607d97..c9747752 100644
--- a/Gradata/src/gradata/_events.py
+++ b/Gradata/src/gradata/_events.py
@@ -27,6 +27,38 @@
_log = logging.getLogger("gradata.events")
+# PII redaction — email pattern only for now. Extend as new PII types are
+# identified. Deliberately simple: no external deps, no config.
+import re as _re
+
+_EMAIL_RE = _re.compile(
+ r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}",
+ _re.IGNORECASE,
+)
+
+
+def _redact_str(s: str) -> str:
+ return _EMAIL_RE.sub("[REDACTED_EMAIL]", s)
+
+
+def _redact_payload(obj: object) -> object:
+ """Recursively redact PII from *obj* (dict, list, str, or scalar).
+
+ Returns a new object; never mutates the input.
+ Raises on non-serialisable input (e.g. custom objects without __str__)
+ only if they aren't handled by the str() fallback.
+ """
+ if isinstance(obj, str):
+ return _redact_str(obj)
+ if isinstance(obj, dict):
+ return {k: _redact_payload(v) for k, v in obj.items()}
+ if isinstance(obj, list):
+ return [_redact_payload(item) for item in obj]
+ if isinstance(obj, (int, float, bool, type(None))):
+ return obj
+ # Fallback for unexpected types — redact their string form.
+ return _redact_str(str(obj))
+
def _locked_append_many(path: Path, lines: list[str]) -> None:
"""Append *lines* (each must already end with \\n) to *path* under one advisory lock.
@@ -108,9 +140,17 @@ def _ensure_table(conn: sqlite3.Connection):
conn.commit()
-def emit(event_type: str, source: str, data: dict | None = None, tags: list | None = None,
- session: int | None = None, valid_from: str | None = None, valid_until: str | None = None,
- ctx: BrainContext | None = None, ts: str | None = None):
+def emit(
+ event_type: str,
+ source: str,
+ data: dict | None = None,
+ tags: list | None = None,
+ session: int | None = None,
+ valid_from: str | None = None,
+ valid_until: str | None = None,
+ ctx: BrainContext | None = None,
+ ts: str | None = None,
+):
"""Emit an event to the brain's event log.
Args:
@@ -141,22 +181,52 @@ def emit(event_type: str, source: str, data: dict | None = None, tags: list | No
enriched_tags = tags or []
try:
from gradata._tag_taxonomy import enrich_tags, validate_tags
+
enriched_tags = enrich_tags(enriched_tags, event_type, data or {})
issues = validate_tags(enriched_tags, event_type)
if issues:
import logging
+
_logger = logging.getLogger("gradata.events")
for issue in issues[:2]:
_logger.debug("tag validation: %s", issue)
except ImportError:
pass
+ raw_data = data or {}
+ # PII redaction — MUST happen before any write. If _redact_payload raises
+ # we propagate immediately (fail closed: no redacted or raw row is written).
+ redacted_data = _redact_payload(raw_data)
+
+ raw_event = {
+ "ts": ts,
+ "session": session,
+ "type": event_type,
+ "source": source,
+ "data": raw_data,
+ "tags": enriched_tags,
+ "valid_from": valid_from,
+ "valid_until": valid_until,
+ }
event = {
- "ts": ts, "session": session, "type": event_type, "source": source,
- "data": data or {}, "tags": enriched_tags,
- "valid_from": valid_from, "valid_until": valid_until,
+ "ts": ts,
+ "session": session,
+ "type": event_type,
+ "source": source,
+ "data": redacted_data,
+ "tags": enriched_tags,
+ "valid_from": valid_from,
+ "valid_until": valid_until,
}
+ # Best-effort raw side-log (gitignored). Failures MUST NOT block the
+ # canonical write — disk-full on the raw log is not a learning-data loss.
+ try:
+ raw_jsonl = events_jsonl.parent / "events.raw.jsonl"
+ _locked_append(raw_jsonl, json.dumps(raw_event, ensure_ascii=False) + "\n")
+ except Exception:
+ pass # intentionally swallowed
+
# Dual-write: JSONL (portable) + SQLite (queryable).
# At least ONE must succeed or we raise — learning data loss is unacceptable.
jsonl_ok = False
@@ -181,8 +251,17 @@ def emit(event_type: str, source: str, data: dict | None = None, tags: list | No
"INSERT OR IGNORE INTO events "
"(ts, session, type, source, data_json, tags_json, valid_from, valid_until, tenant_id, schema_version) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 1)",
- (ts, session, event_type, source, json.dumps(data or {}),
- json.dumps(enriched_tags), valid_from, valid_until, _tid),
+ (
+ ts,
+ session,
+ event_type,
+ source,
+ json.dumps(redacted_data),
+ json.dumps(enriched_tags),
+ valid_from,
+ valid_until,
+ _tid,
+ ),
)
if cursor.rowcount == 1:
event["id"] = cursor.lastrowid
@@ -199,6 +278,7 @@ def emit(event_type: str, source: str, data: dict | None = None, tags: list | No
if not jsonl_ok and not sqlite_ok:
from gradata.exceptions import EventPersistenceError
+
raise EventPersistenceError(
f"Event {event_type} failed to persist to BOTH JSONL and SQLite. "
"Learning data lost. Check file permissions and disk space."
@@ -208,25 +288,47 @@ def emit(event_type: str, source: str, data: dict | None = None, tags: list | No
return event
-
-def emit_gate_result(gate_name: str, result: str, sources_checked: list | None = None, detail: str = "") -> dict:
+def emit_gate_result(
+ gate_name: str, result: str, sources_checked: list | None = None, detail: str = ""
+) -> dict:
sources = sources_checked or []
- return emit("GATE_RESULT", "gate:execution", {
- "gate": gate_name, "result": result, "sources_checked": sources,
- "sources_complete": len(sources) > 0, "detail": detail,
- }, tags=[f"gate:{gate_name}"])
+ return emit(
+ "GATE_RESULT",
+ "gate:execution",
+ {
+ "gate": gate_name,
+ "result": result,
+ "sources_checked": sources,
+ "sources_complete": len(sources) > 0,
+ "detail": detail,
+ },
+ tags=[f"gate:{gate_name}"],
+ )
def emit_gate_override(gate_name: str, reason: str, steps_skipped: list | None = None) -> dict:
- return emit("GATE_OVERRIDE", "gate:override", {
- "gate": gate_name, "reason": reason,
- "steps_skipped": steps_skipped or [], "override_type": "explicit",
- }, tags=[f"gate:{gate_name}", "override:explicit"])
+ return emit(
+ "GATE_OVERRIDE",
+ "gate:override",
+ {
+ "gate": gate_name,
+ "reason": reason,
+ "steps_skipped": steps_skipped or [],
+ "override_type": "explicit",
+ },
+ tags=[f"gate:{gate_name}", "override:explicit"],
+ )
-def query(event_type: str | None = None, session: int | None = None, last_n_sessions: int | None = None,
- limit: int = 100, as_of: str | None = None, active_only: bool = False,
- ctx: BrainContext | None = None) -> list:
+def query(
+ event_type: str | None = None,
+ session: int | None = None,
+ last_n_sessions: int | None = None,
+ limit: int = 100,
+ as_of: str | None = None,
+ active_only: bool = False,
+ ctx: BrainContext | None = None,
+) -> list:
db_path = ctx.db_path if ctx else _p.DB_PATH
with contextlib.closing(sqlite3.connect(str(db_path))) as conn:
conn.row_factory = sqlite3.Row
@@ -261,19 +363,28 @@ def query(event_type: str | None = None, session: int | None = None, last_n_sess
return [
{
- "id": r["id"], "ts": r["ts"], "session": r["session"],
- "type": r["type"], "source": r["source"],
+ "id": r["id"],
+ "ts": r["ts"],
+ "session": r["session"],
+ "type": r["type"],
+ "source": r["source"],
"data": json.loads(r["data_json"]) if r["data_json"] else {},
"tags": json.loads(r["tags_json"]) if r["tags_json"] else [],
- "valid_from": r["valid_from"], "valid_until": r["valid_until"],
+ "valid_from": r["valid_from"],
+ "valid_until": r["valid_until"],
}
for r in rows
]
-def supersede(event_id: int, new_data: dict | None = None, new_tags: list | None = None,
- source: str = "supersede", new_valid_from: str | None = None,
- ctx: BrainContext | None = None):
+def supersede(
+ event_id: int,
+ new_data: dict | None = None,
+ new_tags: list | None = None,
+ source: str = "supersede",
+ new_valid_from: str | None = None,
+ ctx: BrainContext | None = None,
+):
now = datetime.now(UTC).isoformat()
db = ctx.db_path if ctx else _p.DB_PATH
with contextlib.closing(sqlite3.connect(str(db))) as conn:
@@ -286,9 +397,12 @@ def supersede(event_id: int, new_data: dict | None = None, new_tags: list | None
conn.commit()
orig_tags = json.loads(original["tags_json"]) if original["tags_json"] else []
replacement = emit(
- event_type=original["type"], source=source,
+ event_type=original["type"],
+ source=source,
data=new_data or (json.loads(original["data_json"]) if original["data_json"] else {}),
- tags=new_tags or orig_tags, session=_detect_session(ctx=ctx), valid_from=new_valid_from or now,
+ tags=new_tags or orig_tags,
+ session=_detect_session(ctx=ctx),
+ valid_from=new_valid_from or now,
ctx=ctx,
)
replacement["superseded_id"] = event_id
@@ -299,11 +413,14 @@ def correction_rate(last_n_sessions: int = 5, ctx: BrainContext | None = None) -
db = ctx.db_path if ctx else _p.DB_PATH
with contextlib.closing(sqlite3.connect(str(db))) as conn:
_ensure_table(conn)
- rows = conn.execute("""
+ rows = conn.execute(
+ """
SELECT session, COUNT(*) as count FROM events WHERE type = 'CORRECTION'
AND session >= (SELECT COALESCE(MAX(session), 0) - ? FROM events)
GROUP BY session ORDER BY session
- """, (last_n_sessions - 1,)).fetchall()
+ """,
+ (last_n_sessions - 1,),
+ ).fetchall()
return {r[0]: r[1] for r in rows}
@@ -312,8 +429,10 @@ def compute_leading_indicators(session: int, ctx: BrainContext | None = None) ->
with contextlib.closing(sqlite3.connect(str(db))) as conn:
_ensure_table(conn)
result = {
- "first_draft_acceptance": 0.0, "correction_density": 0.0,
- "avg_time_to_deliverable_ms": 0.0, "source_coverage": 0.0,
+ "first_draft_acceptance": 0.0,
+ "correction_density": 0.0,
+ "avg_time_to_deliverable_ms": 0.0,
+ "source_coverage": 0.0,
"confidence_calibration": 1.0,
}
outputs = conn.execute(
@@ -328,7 +447,9 @@ def compute_leading_indicators(session: int, ctx: BrainContext | None = None) ->
"SELECT COUNT(*) FROM events WHERE type = 'CORRECTION' AND session = ?", (session,)
).fetchone()[0]
output_count = len(outputs) if outputs else 0
- result["correction_density"] = min(corrections / output_count, 1.0) if output_count > 0 else 0.0
+ result["correction_density"] = (
+ min(corrections / output_count, 1.0) if output_count > 0 else 0.0
+ )
gates = conn.execute(
"SELECT data_json FROM events WHERE type = 'GATE_RESULT' AND session = ?", (session,)
@@ -364,7 +485,9 @@ def compute_leading_indicators(session: int, ctx: BrainContext | None = None) ->
# v1 format: delta-based (legacy)
total_cal = len(delta_events)
within_range = sum(1 for d in delta_events if abs(d.get("delta", 0)) <= 2)
- result["confidence_calibration"] = within_range / total_cal if total_cal > 0 else 1.0
+ result["confidence_calibration"] = (
+ within_range / total_cal if total_cal > 0 else 1.0
+ )
return result
@@ -397,7 +520,6 @@ def _detect_session(ctx: BrainContext | None = None) -> int:
# ── Brain-quality functions (promoted from brain shim) ────────────────
-
def find_contradictions(event_type: str | None = None, tag_prefix: str | None = None) -> list:
"""Find events that may contradict each other — same tags, overlapping validity.
@@ -418,16 +540,19 @@ def find_contradictions(event_type: str | None = None, tag_prefix: str | None =
conflicts = []
for i, a in enumerate(events):
- for b in events[i + 1:]:
+ for b in events[i + 1 :]:
# Check tag overlap
shared_tags = set(a.get("tags", [])) & set(b.get("tags", []))
if shared_tags and a["type"] == b["type"]:
- conflicts.append({
- "event_a": {"id": a["id"], "ts": a["ts"], "data": a["data"]},
- "event_b": {"id": b["id"], "ts": b["ts"], "data": b["data"]},
- "shared_tags": list(shared_tags),
- "both_active": a.get("valid_until") is None and b.get("valid_until") is None,
- })
+ conflicts.append(
+ {
+ "event_a": {"id": a["id"], "ts": a["ts"], "data": a["data"]},
+ "event_b": {"id": b["id"], "ts": b["ts"], "data": b["data"]},
+ "shared_tags": list(shared_tags),
+ "both_active": a.get("valid_until") is None
+ and b.get("valid_until") is None,
+ }
+ )
return conflicts
@@ -437,12 +562,15 @@ def audit_trend(last_n_sessions: int = 5, ctx: BrainContext | None = None) -> li
db = ctx.db_path if ctx else _p.DB_PATH
with contextlib.closing(sqlite3.connect(str(db))) as conn:
_ensure_table(conn)
- rows = conn.execute("""
+ rows = conn.execute(
+ """
SELECT session, data_json FROM events
WHERE type = 'AUDIT_SCORE'
AND session >= (SELECT COALESCE(MAX(session), 0) - ? FROM events)
ORDER BY session
- """, (last_n_sessions - 1,)).fetchall()
+ """,
+ (last_n_sessions - 1,),
+ ).fetchall()
return [{"session": r[0], "data": json.loads(r[1])} for r in rows]
@@ -467,6 +595,7 @@ class RetainOrchestrator:
def __init__(self, brain_dir: str | Path) -> None:
from pathlib import Path as _Path
+
self.brain_dir = _Path(brain_dir)
self.events_path = self.brain_dir / "events.jsonl"
self.db_path = self.brain_dir / "system.db"
@@ -545,19 +674,13 @@ def flush(self) -> dict:
continue
result["phases"]["read"] = {
"existing_keys": len(existing_keys),
- "new": sum(
- 1 for e in self._pending
- if self._event_key(e) not in existing_keys
- ),
+ "new": sum(1 for e in self._pending if self._event_key(e) not in existing_keys),
}
except Exception as exc:
result["errors"].append(f"Phase 1: {exc}")
# Fall through with empty existing_keys — safer than aborting
- new_events = [
- e for e in self._pending
- if self._event_key(e) not in existing_keys
- ]
+ new_events = [e for e in self._pending if self._event_key(e) not in existing_keys]
if not new_events:
self._pending.clear()
@@ -569,8 +692,7 @@ def flush(self) -> dict:
# multi-process interleaving on Windows (msvcrt.locking) and POSIX
# (fcntl.flock). Single lock + single fsync for the whole batch.
lines = [
- json.dumps(event, default=str, ensure_ascii=False) + "\n"
- for event in new_events
+ json.dumps(event, default=str, ensure_ascii=False) + "\n" for event in new_events
]
_locked_append_many(self.events_path, lines)
result["written"] = len(new_events)
@@ -620,6 +742,7 @@ def flush(self) -> dict:
try:
try:
from gradata._brain_manifest import update_manifest # type: ignore[import]
+
update_manifest(self.brain_dir)
manifest_updated = True
except (ImportError, Exception):
diff --git a/Gradata/src/gradata/_export_brain.py b/Gradata/src/gradata/_export_brain.py
index 1fe2394d..313d1f75 100644
--- a/Gradata/src/gradata/_export_brain.py
+++ b/Gradata/src/gradata/_export_brain.py
@@ -14,29 +14,59 @@
from gradata._paths import BrainContext
-def _VAULT_DIR(): return _p.BRAIN_DIR / "vault"
-def _LESSONS_ACTIVE(): return _p.LESSONS_FILE
-def _LESSONS_ARCHIVE(): return _p.BRAIN_DIR / "lessons-archive.md"
-def _QUALITY_RUBRICS(): return _p.BRAIN_DIR / "quality-rubrics.md"
-def _DOMAIN_CONFIG(): return _p.WORKING_DIR / "domain" / "DOMAIN.md"
-def _DOMAIN_SOUL(): return _p.WORKING_DIR / "domain" / "soul.md"
-def _CARL_LOOP(): return _p.CARL_DIR / "loop"
-def _CARL_GLOBAL(): return _p.CARL_DIR / "global"
+def _VAULT_DIR():
+ return _p.BRAIN_DIR / "vault"
+
+
+def _LESSONS_ACTIVE():
+ return _p.LESSONS_FILE
+
+
+def _LESSONS_ARCHIVE():
+ return _p.BRAIN_DIR / "lessons-archive.md"
+
+
+def _QUALITY_RUBRICS():
+ return _p.BRAIN_DIR / "quality-rubrics.md"
+
+
+def _DOMAIN_CONFIG():
+ return _p.WORKING_DIR / "domain" / "DOMAIN.md"
+
+
+def _DOMAIN_SOUL():
+ return _p.WORKING_DIR / "domain" / "soul.md"
+
+
+def _CARL_LOOP():
+ return _p.CARL_DIR / "loop"
+
+
+def _CARL_GLOBAL():
+ return _p.CARL_DIR / "global"
+
# Sensitive data patterns
-RE_EMAIL = re.compile(r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}')
-RE_PHONE = re.compile(r'(?:\+?1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}')
-RE_API_KEY = re.compile(r'(?:api[_\-]?key|token|secret|password|bearer)\s*[:=]\s*\S+', re.IGNORECASE)
-RE_USER_PATH = re.compile(r'C:[/\\]Users[/\\]\w+', re.IGNORECASE)
-RE_CRM_URL = re.compile(r'https?://[a-z0-9\-]+\.(?:pipedrive|hubspot|salesforce|zoho)\.com\S*', re.IGNORECASE)
-RE_CRM_DEAL_ID = re.compile(r'(?:pipedrive_deal_id|hubspot_deal_id|sf_opportunity_id|deal[_\-]?id)\s*[:=]\s*\d+', re.IGNORECASE)
+RE_EMAIL = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}")
+RE_PHONE = re.compile(r"(?:\+?1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}")
+RE_API_KEY = re.compile(
+ r"(?:api[_\-]?key|token|secret|password|bearer)\s*[:=]\s*\S+", re.IGNORECASE
+)
+RE_USER_PATH = re.compile(r"C:[/\\]Users[/\\]\w+", re.IGNORECASE)
+RE_CRM_URL = re.compile(
+ r"https?://[a-z0-9\-]+\.(?:pipedrive|hubspot|salesforce|zoho)\.com\S*", re.IGNORECASE
+)
+RE_CRM_DEAL_ID = re.compile(
+ r"(?:pipedrive_deal_id|hubspot_deal_id|sf_opportunity_id|deal[_\-]?id)\s*[:=]\s*\d+",
+ re.IGNORECASE,
+)
def read_version() -> str:
if not _p.VERSION_FILE.exists():
return "v0.0.0"
text = _p.VERSION_FILE.read_text(encoding="utf-8")
- match = re.search(r'Current Version:\s*(v[\d.]+)', text)
+ match = re.search(r"Current Version:\s*(v[\d.]+)", text)
return match.group(1) if match else "v0.0.0"
@@ -44,10 +74,10 @@ def read_domain_name() -> str:
if not _DOMAIN_CONFIG().exists():
return "Unknown"
text = _DOMAIN_CONFIG().read_text(encoding="utf-8")
- match = re.search(r'Talent:\s*(\w+)', text)
+ match = re.search(r"Talent:\s*(\w+)", text)
if match:
return match.group(1)
- match = re.search(r'^#\s+(.+)', text, re.MULTILINE)
+ match = re.search(r"^#\s+(.+)", text, re.MULTILINE)
return match.group(1).strip() if match else "Unknown"
@@ -55,7 +85,7 @@ def read_session_count() -> int:
if not _p.VERSION_FILE.exists():
return 0
text = _p.VERSION_FILE.read_text(encoding="utf-8")
- sessions = re.findall(r'Session\s+(\d+)', text)
+ sessions = re.findall(r"Session\s+(\d+)", text)
return max(int(s) for s in sessions) if sessions else 0
@@ -63,7 +93,7 @@ def count_lessons(filepath: Path) -> int:
if not filepath.exists():
return 0
text = filepath.read_text(encoding="utf-8")
- return len(re.findall(r'^\[20\d{2}-\d{2}-\d{2}\]', text, re.MULTILINE))
+ return len(re.findall(r"^\[20\d{2}-\d{2}-\d{2}\]", text, re.MULTILINE))
def build_prospect_map(prospects_dir: Path) -> dict[str, str]:
@@ -92,11 +122,11 @@ def build_prospect_map(prospects_dir: Path) -> dict[str, str]:
try:
text = f.read_text(encoding="utf-8")
- fm_name = re.search(r'^name:\s*(.+)$', text, re.MULTILINE)
+ fm_name = re.search(r"^name:\s*(.+)$", text, re.MULTILINE)
if fm_name and fm_name.group(1).strip():
val = fm_name.group(1).strip()
name_map[val] = f"[PROSPECT_{counter}]"
- fm_company = re.search(r'^company:\s*(.+)$', text, re.MULTILINE)
+ fm_company = re.search(r"^company:\s*(.+)$", text, re.MULTILINE)
if fm_company and fm_company.group(1).strip():
name_map[fm_company.group(1).strip()] = f"[COMPANY_{counter}]"
except Exception:
@@ -104,7 +134,9 @@ def build_prospect_map(prospects_dir: Path) -> dict[str, str]:
counter += 1
# Auto-detect owner name from brain manifest if available
- manifest_path = _p.BRAIN_DIR / "brain.manifest.json" if hasattr(_p, 'BRAIN_DIR') and _p.BRAIN_DIR else None
+ manifest_path = (
+ _p.BRAIN_DIR / "brain.manifest.json" if hasattr(_p, "BRAIN_DIR") and _p.BRAIN_DIR else None
+ )
if manifest_path and manifest_path.exists():
try:
with open(manifest_path, encoding="utf-8") as f:
@@ -119,12 +151,12 @@ def build_prospect_map(prospects_dir: Path) -> dict[str, str]:
def sanitize_content(text: str, name_map: dict[str, str]) -> str:
- text = RE_EMAIL.sub('[EMAIL_REDACTED]', text)
- text = RE_PHONE.sub('[PHONE_REDACTED]', text)
- text = RE_API_KEY.sub('[API_KEY_REDACTED]', text)
- text = RE_CRM_URL.sub('[CRM_URL_REDACTED]', text)
- text = RE_CRM_DEAL_ID.sub('deal_id: [DEAL_REDACTED]', text)
- text = RE_USER_PATH.sub('[USER_HOME]', text)
+ text = RE_EMAIL.sub("[EMAIL_REDACTED]", text)
+ text = RE_PHONE.sub("[PHONE_REDACTED]", text)
+ text = RE_API_KEY.sub("[API_KEY_REDACTED]", text)
+ text = RE_CRM_URL.sub("[CRM_URL_REDACTED]", text)
+ text = RE_CRM_DEAL_ID.sub("deal_id: [DEAL_REDACTED]", text)
+ text = RE_USER_PATH.sub("[USER_HOME]", text)
for real_name in sorted(name_map, key=len, reverse=True):
if len(real_name) >= 3:
text = text.replace(real_name, name_map[real_name])
@@ -173,8 +205,9 @@ def collect_domain_files() -> list[tuple[str, Path]]:
return files
-def export_brain(include_prospects: bool = True, domain_only: bool = False,
- ctx: BrainContext | None = None) -> Path:
+def export_brain(
+ include_prospects: bool = True, domain_only: bool = False, ctx: BrainContext | None = None
+) -> Path:
brain_dir = ctx.brain_dir if ctx else _p.BRAIN_DIR
prospects_dir = ctx.prospects_dir if ctx else _p.PROSPECTS_DIR
@@ -213,18 +246,23 @@ def export_brain(include_prospects: bool = True, domain_only: bool = False,
now = datetime.now(UTC)
try:
from gradata._brain_manifest import generate_manifest
+
manifest = generate_manifest(ctx=ctx)
manifest["export"] = {
"exported_at": now.isoformat(),
- "mode": "domain-only" if domain_only else ("no-prospects" if not include_prospects else "full"),
+ "mode": "domain-only"
+ if domain_only
+ else ("no-prospects" if not include_prospects else "full"),
"files": [path for path, _ in sanitized],
}
except Exception:
manifest = {
"schema_version": "1.0.0",
"metadata": {
- "brain_version": version, "domain": domain,
- "sessions_trained": sessions, "maturity_phase": "INFANT",
+ "brain_version": version,
+ "domain": domain,
+ "sessions_trained": sessions,
+ "maturity_phase": "INFANT",
"generated_at": now.isoformat(),
},
"quality": {"lessons_graduated": graduated, "lessons_active": active},
diff --git a/Gradata/src/gradata/_fact_extractor.py b/Gradata/src/gradata/_fact_extractor.py
index 4769d634..620832a1 100644
--- a/Gradata/src/gradata/_fact_extractor.py
+++ b/Gradata/src/gradata/_fact_extractor.py
@@ -16,13 +16,21 @@
# Constants — domain-specific fact types can be extended via brain config
_DEFAULT_FACT_TYPES = (
- "company_size", "tech_stack", "objection", "decision_maker",
- "pain_point", "budget", "timeline",
+ "company_size",
+ "tech_stack",
+ "objection",
+ "decision_maker",
+ "pain_point",
+ "budget",
+ "timeline",
)
+
def _load_fact_types() -> tuple:
"""Load fact types from brain config or use defaults."""
- config_path = _p.BRAIN_DIR / "taxonomy.json" if hasattr(_p, 'BRAIN_DIR') and _p.BRAIN_DIR else None
+ config_path = (
+ _p.BRAIN_DIR / "taxonomy.json" if hasattr(_p, "BRAIN_DIR") and _p.BRAIN_DIR else None
+ )
if config_path and config_path.exists():
try:
with open(config_path, encoding="utf-8") as f:
@@ -34,6 +42,7 @@ def _load_fact_types() -> tuple:
pass
return _DEFAULT_FACT_TYPES
+
VALID_FACT_TYPES = _load_fact_types()
MIN_FACT_LENGTH = 3
CONF_EXPLICIT = 0.9
@@ -86,7 +95,7 @@ def _get_entity_names():
"""Get entity names from brain directory (prospects, candidates, etc.)."""
names = set()
for dirname in ("prospects", "candidates", "customers", "entities"):
- entity_dir = _p.BRAIN_DIR / dirname if hasattr(_p, 'BRAIN_DIR') and _p.BRAIN_DIR else None
+ entity_dir = _p.BRAIN_DIR / dirname if hasattr(_p, "BRAIN_DIR") and _p.BRAIN_DIR else None
if not entity_dir or not entity_dir.exists():
continue
for f in entity_dir.glob("*.md"):
@@ -146,11 +155,16 @@ def extract_from_file(filepath):
def add_fact(ftype, fvalue, conf=CONF_EXPLICIT):
fvalue = _clean_value(fvalue)
if _quality_gate(ftype, fvalue):
- facts.append({
- "prospect": prospect, "company": company,
- "fact_type": ftype, "fact_value": fvalue,
- "confidence": conf, "source": source,
- })
+ facts.append(
+ {
+ "prospect": prospect,
+ "company": company,
+ "fact_type": ftype,
+ "fact_value": fvalue,
+ "confidence": conf,
+ "source": source,
+ }
+ )
# Frontmatter extraction
if fm.get("deal_value"):
@@ -171,7 +185,10 @@ def add_fact(ftype, fvalue, conf=CONF_EXPLICIT):
if emp_val and not emp_val.startswith("- **"):
add_fact("company_size", emp_val, CONF_EXPLICIT)
- for pattern in [r"^(?:employees|team_size|headcount):\s*(.+)", r"- \*\*(?:Team Size|Headcount):\*\*\s*(.+)"]:
+ for pattern in [
+ r"^(?:employees|team_size|headcount):\s*(.+)",
+ r"- \*\*(?:Team Size|Headcount):\*\*\s*(.+)",
+ ]:
for m in re.finditer(pattern, text, re.IGNORECASE | re.MULTILINE):
val = m.group(1).strip()
if val and val != fm.get("name", ""):
@@ -184,10 +201,25 @@ def add_fact(ftype, fvalue, conf=CONF_EXPLICIT):
add_fact("tech_stack", tech_val, CONF_EXPLICIT)
tech_keywords = [
- "Meta Pixel", "Google Ads", "Facebook Ads", "TikTok Ads",
- "Shopify", "WordPress", "HubSpot", "Salesforce", "Marketo",
- "Google Analytics", "GA4", "Klaviyo", "Mailchimp", "Segment",
- "BigQuery", "Looker", "Triple Whale", "Northbeam", "Hyros",
+ "Meta Pixel",
+ "Google Ads",
+ "Facebook Ads",
+ "TikTok Ads",
+ "Shopify",
+ "WordPress",
+ "HubSpot",
+ "Salesforce",
+ "Marketo",
+ "Google Analytics",
+ "GA4",
+ "Klaviyo",
+ "Mailchimp",
+ "Segment",
+ "BigQuery",
+ "Looker",
+ "Triple Whale",
+ "Northbeam",
+ "Hyros",
]
for kw in tech_keywords:
if kw.lower() in text.lower():
@@ -243,10 +275,9 @@ def add_fact(ftype, fvalue, conf=CONF_EXPLICIT):
return facts
-
-
-def query_facts(prospect=None, fact_type=None, min_confidence=0.0,
- ctx: "BrainContext | None" = None):
+def query_facts(
+ prospect=None, fact_type=None, min_confidence=0.0, ctx: "BrainContext | None" = None
+):
conn = _get_db(ctx)
_init_tables(conn)
sql = "SELECT * FROM facts WHERE stale=0"
@@ -266,8 +297,6 @@ def query_facts(prospect=None, fact_type=None, min_confidence=0.0,
return [dict(r) for r in rows]
-
-
def get_stats(ctx: BrainContext | None = None):
conn = _get_db(ctx)
_init_tables(conn)
diff --git a/Gradata/src/gradata/_file_lock.py b/Gradata/src/gradata/_file_lock.py
index bdc61c9a..fb33dc74 100644
--- a/Gradata/src/gradata/_file_lock.py
+++ b/Gradata/src/gradata/_file_lock.py
@@ -37,6 +37,7 @@
advisory locks are best-effort for preventing interleaving, not for data
integrity.
"""
+
from __future__ import annotations
import contextlib
@@ -49,14 +50,14 @@
# Helpers
# ---------------------------------------------------------------------------
-_BACKOFF_START = 0.01 # seconds
-_BACKOFF_CAP = 0.10 # seconds
-_BACKOFF_MULT = 2.0
+_BACKOFF_START = 0.01 # seconds
+_BACKOFF_CAP = 0.10 # seconds
+_BACKOFF_MULT = 2.0
-def _backoff_intervals(start: float = _BACKOFF_START,
- cap: float = _BACKOFF_CAP,
- mult: float = _BACKOFF_MULT):
+def _backoff_intervals(
+ start: float = _BACKOFF_START, cap: float = _BACKOFF_CAP, mult: float = _BACKOFF_MULT
+):
"""Yield truncated exponential backoff intervals forever."""
interval = start
while True:
@@ -68,6 +69,7 @@ def _backoff_intervals(start: float = _BACKOFF_START,
# Windows implementation
# ---------------------------------------------------------------------------
+
def _lock_win32(fh: IO, timeout: float | None) -> bool:
"""Acquire msvcrt advisory lock on byte 0.
@@ -98,9 +100,7 @@ def _lock_win32(fh: IO, timeout: float | None) -> bool:
pass
remaining = deadline - time.monotonic()
if remaining <= 0:
- raise TimeoutError(
- f"Could not acquire lock on {fh.name} within {timeout}s"
- )
+ raise TimeoutError(f"Could not acquire lock on {fh.name} within {timeout}s")
time.sleep(min(interval, remaining))
# Unreachable, but satisfies type checker.
@@ -109,6 +109,7 @@ def _lock_win32(fh: IO, timeout: float | None) -> bool:
def _unlock_win32(fh: IO) -> None:
import msvcrt # type: ignore[import]
+
fh.seek(0)
with contextlib.suppress(OSError):
msvcrt.locking(fh.fileno(), msvcrt.LK_UNLCK, 1)
@@ -118,6 +119,7 @@ def _unlock_win32(fh: IO) -> None:
# POSIX implementation
# ---------------------------------------------------------------------------
+
def _lock_posix(fh: IO, timeout: float | None) -> bool:
"""Acquire fcntl exclusive lock.
@@ -144,9 +146,7 @@ def _lock_posix(fh: IO, timeout: float | None) -> bool:
pass
remaining = deadline - time.monotonic()
if remaining <= 0:
- raise TimeoutError(
- f"Could not acquire lock on {fh.name} within {timeout}s"
- )
+ raise TimeoutError(f"Could not acquire lock on {fh.name} within {timeout}s")
time.sleep(min(interval, remaining))
raise TimeoutError(f"Could not acquire lock on {fh.name} within {timeout}s") # pragma: no cover
@@ -154,6 +154,7 @@ def _lock_posix(fh: IO, timeout: float | None) -> bool:
def _unlock_posix(fh: IO) -> None:
import fcntl # type: ignore[import]
+
with contextlib.suppress(OSError):
fcntl.flock(fh, fcntl.LOCK_UN)
@@ -162,6 +163,7 @@ def _unlock_posix(fh: IO) -> None:
# Public API
# ---------------------------------------------------------------------------
+
@contextlib.contextmanager
def platform_lock(fh: IO, *, timeout: float | None = None) -> Generator[None, None, None]:
"""Advisory exclusive lock on an open file handle.
diff --git a/Gradata/src/gradata/_http.py b/Gradata/src/gradata/_http.py
index ed445e95..ed666dea 100644
--- a/Gradata/src/gradata/_http.py
+++ b/Gradata/src/gradata/_http.py
@@ -44,6 +44,4 @@ def require_https(url: str, label: str = "URL") -> None:
if parsed.scheme == "http":
host = (parsed.hostname or "").lower()
if host not in _LOCAL_HOSTS:
- raise ValueError(
- f"{label} must use HTTPS for non-local hosts, got: {url!r}"
- )
+ raise ValueError(f"{label} must use HTTPS for non-local hosts, got: {url!r}")
diff --git a/Gradata/src/gradata/_installer.py b/Gradata/src/gradata/_installer.py
index 8917f3eb..ef665b14 100644
--- a/Gradata/src/gradata/_installer.py
+++ b/Gradata/src/gradata/_installer.py
@@ -18,6 +18,7 @@
5. Run bootstrap steps from manifest
6. Print activation instructions
"""
+
from __future__ import annotations
import json
@@ -102,6 +103,7 @@ def _run_bootstrap(target_dir: Path, manifest: dict) -> list[dict]:
# Allowlist: only permit safe commands (python, uv) — no arbitrary shell execution
import re as _re
import shlex as _shlex
+
_ALLOWED_CMD = _re.compile(r"^(python3?|uv|pip)\s+[\w\s./\-]+$")
for step in bootstrap:
@@ -115,10 +117,13 @@ def _run_bootstrap(target_dir: Path, manifest: dict) -> list[dict]:
# Security: reject commands not in allowlist
if not _ALLOWED_CMD.match(command):
- results.append({
- "step": name, "status": "blocked",
- "note": f"Command not in allowlist: {command[:80]}",
- })
+ results.append(
+ {
+ "step": name,
+ "status": "blocked",
+ "note": f"Command not in allowlist: {command[:80]}",
+ }
+ )
continue
# Run from target directory — NO shell=True
@@ -137,11 +142,13 @@ def _run_bootstrap(target_dir: Path, manifest: dict) -> list[dict]:
results.append({"step": name, "status": "ok"})
else:
status = "FAIL" if required else "warn"
- results.append({
- "step": name,
- "status": status,
- "error": result.stderr[:200] if result.stderr else "non-zero exit",
- })
+ results.append(
+ {
+ "step": name,
+ "status": status,
+ "error": result.stderr[:200] if result.stderr else "non-zero exit",
+ }
+ )
except subprocess.TimeoutExpired:
results.append({"step": name, "status": "timeout"})
except Exception as e:
@@ -167,20 +174,24 @@ def list_installed() -> list[dict]:
if meta_file.exists():
try:
meta = json.loads(meta_file.read_text(encoding="utf-8"))
- info.update({
- "version": meta.get("brain_version"),
- "domain": meta.get("domain"),
- "installed": meta.get("installed_at", "?")[:10],
- })
+ info.update(
+ {
+ "version": meta.get("brain_version"),
+ "domain": meta.get("domain"),
+ "installed": meta.get("installed_at", "?")[:10],
+ }
+ )
except Exception:
pass
elif manifest_file.exists():
try:
manifest = json.loads(manifest_file.read_text(encoding="utf-8"))
- info.update({
- "version": manifest.get("metadata", {}).get("brain_version"),
- "domain": manifest.get("metadata", {}).get("domain"),
- })
+ info.update(
+ {
+ "version": manifest.get("metadata", {}).get("brain_version"),
+ "domain": manifest.get("metadata", {}).get("domain"),
+ }
+ )
except Exception:
pass
diff --git a/Gradata/src/gradata/_manifest_helpers.py b/Gradata/src/gradata/_manifest_helpers.py
index 635a6721..081445e8 100644
--- a/Gradata/src/gradata/_manifest_helpers.py
+++ b/Gradata/src/gradata/_manifest_helpers.py
@@ -21,9 +21,12 @@
def _session_window(conn, window: int = 20) -> tuple[int, int]:
"""Return (max_session, min_session) for a recent window. Shared helper."""
- max_session = conn.execute(
- "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
- ).fetchone()[0] or 0
+ max_session = (
+ conn.execute("SELECT MAX(session) FROM events WHERE typeof(session)='integer'").fetchone()[
+ 0
+ ]
+ or 0
+ )
return max_session, max(1, max_session - window + 1)
@@ -66,7 +69,9 @@ def _get_tables(ctx: "BrainContext | None" = None) -> list[str]:
try:
db = ctx.db_path if ctx else _p.DB_PATH
conn = get_connection(db)
- rows = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name").fetchall()
+ rows = conn.execute(
+ "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
+ ).fetchall()
conn.close()
return [r[0] for r in rows]
except Exception:
@@ -121,10 +126,21 @@ def _sdk_capabilities() -> dict:
("git_backfill", "gradata.enhancements.git_backfill", "gradata"),
("auto_correct_hook", "gradata.hooks.auto_correct", "gradata"),
("reporting", "gradata.enhancements.reporting", "fest.build-inspired+gradata"),
- ("quality_monitoring", "gradata.enhancements.quality_monitoring", "jarvis-inspired+gradata"),
+ (
+ "quality_monitoring",
+ "gradata.enhancements.quality_monitoring",
+ "jarvis-inspired+gradata",
+ ),
]
- all_modules = _paul_modules + _ruflo_modules + _deerflow_modules + _ecc_modules + _everos_modules + _core_modules
+ all_modules = (
+ _paul_modules
+ + _ruflo_modules
+ + _deerflow_modules
+ + _ecc_modules
+ + _everos_modules
+ + _core_modules
+ )
for name, module_path, source in all_modules:
try:
@@ -143,6 +159,7 @@ def _sdk_capabilities() -> dict:
def _tag_taxonomy() -> dict:
try:
from gradata._tag_taxonomy import get_taxonomy_summary
+
return get_taxonomy_summary()
except ImportError:
return {}
diff --git a/Gradata/src/gradata/_manifest_metrics.py b/Gradata/src/gradata/_manifest_metrics.py
index 8aece68f..cb7f689c 100644
--- a/Gradata/src/gradata/_manifest_metrics.py
+++ b/Gradata/src/gradata/_manifest_metrics.py
@@ -5,6 +5,7 @@
Split from _brain_manifest.py for file size compliance (<500 lines).
"""
+import logging
import re
import statistics
from datetime import datetime
@@ -29,6 +30,8 @@
if TYPE_CHECKING:
from gradata._paths import BrainContext
+_log = logging.getLogger(__name__)
+
def _lesson_distribution(ctx: "BrainContext | None" = None) -> dict[str, int]:
"""Count lessons by state from lessons.md."""
@@ -38,14 +41,13 @@ def _lesson_distribution(ctx: "BrainContext | None" = None) -> dict[str, int]:
if lessons_file.exists():
text = lessons_file.read_text(encoding="utf-8")
for state in ("INSTINCT", "PATTERN", "RULE", "UNTESTABLE"):
- count = len(re.findall(
- rf"^\[20\d{{2}}-\d{{2}}-\d{{2}}\]\s+\[{state}",
- text, re.MULTILINE
- ))
+ count = len(
+ re.findall(rf"^\[20\d{{2}}-\d{{2}}-\d{{2}}\]\s+\[{state}", text, re.MULTILINE)
+ )
if count > 0:
dist[state] = count
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("lesson_distribution read failed (non-fatal): %s", e)
return dist
@@ -61,14 +63,20 @@ def _correction_rate_trend(ctx: "BrainContext | None" = None, window: int = 10)
return None
def _cro(min_s, max_s):
- outputs = conn.execute(
- "SELECT COUNT(*) FROM events WHERE type='OUTPUT' AND session BETWEEN ? AND ?",
- (min_s, max_s)
- ).fetchone()[0] or 0
- corrections = conn.execute(
- "SELECT COUNT(*) FROM events WHERE type='CORRECTION' AND session BETWEEN ? AND ?",
- (min_s, max_s)
- ).fetchone()[0] or 0
+ outputs = (
+ conn.execute(
+ "SELECT COUNT(*) FROM events WHERE type='OUTPUT' AND session BETWEEN ? AND ?",
+ (min_s, max_s),
+ ).fetchone()[0]
+ or 0
+ )
+ corrections = (
+ conn.execute(
+ "SELECT COUNT(*) FROM events WHERE type='CORRECTION' AND session BETWEEN ? AND ?",
+ (min_s, max_s),
+ ).fetchone()[0]
+ or 0
+ )
return round(corrections / outputs, 4) if outputs > 0 else None
current = _cro(max_session - window + 1, max_session)
@@ -78,14 +86,19 @@ def _cro(min_s, max_s):
if current is None or baseline is None:
return None
- direction = "improving" if current < baseline else ("stable" if current == baseline else "degrading")
+ direction = (
+ "improving"
+ if current < baseline
+ else ("stable" if current == baseline else "degrading")
+ )
return {
"current_window": current,
"baseline_window": baseline,
"direction": direction,
"sessions_in_window": window,
}
- except Exception:
+ except Exception as e:
+ _log.debug("correction_rate_trend failed (non-fatal): %s", e)
return None
@@ -137,7 +150,14 @@ def _temporal_provenance(ctx: "BrainContext | None" = None) -> dict:
pass
# Query 2: source counts grouped -- filter in Python, no second query
- internal_prefixes = ("event:", "correction_detector", "brain", "session", "gate", "supersede")
+ internal_prefixes = (
+ "event:",
+ "correction_detector",
+ "brain",
+ "session",
+ "gate",
+ "supersede",
+ )
source_rows = conn.execute("""
SELECT source, COUNT(*) as cnt FROM events
WHERE source IS NOT NULL AND source != ''
@@ -161,11 +181,12 @@ def _temporal_provenance(ctx: "BrainContext | None" = None) -> dict:
ORDER BY session
""").fetchall()
if len(session_starts) >= 2:
-
gaps = []
for i in range(1, len(session_starts)):
try:
- t0 = datetime.fromisoformat(str(session_starts[i - 1][0]).replace("Z", "+00:00"))
+ t0 = datetime.fromisoformat(
+ str(session_starts[i - 1][0]).replace("Z", "+00:00")
+ )
t1 = datetime.fromisoformat(str(session_starts[i][0]).replace("Z", "+00:00"))
gaps.append((t1 - t0).total_seconds() / 3600)
except (ValueError, TypeError):
@@ -185,12 +206,16 @@ def _temporal_provenance(ctx: "BrainContext | None" = None) -> dict:
gap_score = min(1.0, result["avg_gap_hours"] / 8) if result["avg_gap_hours"] > 0 else 0.0
result["provenance_score"] = round(
- 0.25 * day_score + 0.20 * spread_score + 0.20 * external_score
- + 0.15 * ratio_score + 0.20 * gap_score, 3
+ 0.25 * day_score
+ + 0.20 * spread_score
+ + 0.20 * external_score
+ + 0.15 * ratio_score
+ + 0.20 * gap_score,
+ 3,
)
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("temporal_provenance failed (non-fatal): %s", e)
return result
@@ -234,7 +259,9 @@ def _outcome_correlation(ctx: "BrainContext | None" = None, window: int = 20) ->
if sx == 0 or sy == 0:
r = 0.0
else:
- r = sum((xi - mx) * (vi - my) for xi, vi in zip(x, values, strict=False)) / ((n - 1) * sx * sy)
+ r = sum((xi - mx) * (vi - my) for xi, vi in zip(x, values, strict=False)) / (
+ (n - 1) * sx * sy
+ )
return {
"outcome_trend_slope": round(slope, 4),
@@ -243,7 +270,8 @@ def _outcome_correlation(ctx: "BrainContext | None" = None, window: int = 20) ->
"data_points": n,
"improving": slope < 0 and p_value < 0.10, # negative slope = fewer edits = better
}
- except Exception:
+ except Exception as e:
+ _log.debug("outcome_correlation failed (non-fatal): %s", e)
return None
@@ -279,27 +307,36 @@ def _quality_metrics(ctx: "BrainContext | None" = None) -> dict:
# Use top-N real sessions (by event count) to avoid phantom session IDs
db = ctx.db_path if ctx else _p.DB_PATH
conn = get_connection(db)
- recent_sessions = [r[0] for r in conn.execute("""
+ recent_sessions = [
+ r[0]
+ for r in conn.execute("""
SELECT session FROM events
WHERE typeof(session)='integer'
GROUP BY session HAVING COUNT(*) >= 2
ORDER BY session DESC LIMIT 10
- """).fetchall()]
+ """).fetchall()
+ ]
if recent_sessions:
placeholders = ",".join("?" * len(recent_sessions))
- total_corrections = conn.execute(
- f"SELECT COUNT(*) FROM events WHERE type='CORRECTION' AND session IN ({placeholders})",
- recent_sessions
- ).fetchone()[0] or 0
- total_outputs = conn.execute(
- f"SELECT COUNT(*) FROM events WHERE type='OUTPUT' AND session IN ({placeholders})",
- recent_sessions
- ).fetchone()[0] or 0
+ total_corrections = (
+ conn.execute(
+ f"SELECT COUNT(*) FROM events WHERE type='CORRECTION' AND session IN ({placeholders})",
+ recent_sessions,
+ ).fetchone()[0]
+ or 0
+ )
+ total_outputs = (
+ conn.execute(
+ f"SELECT COUNT(*) FROM events WHERE type='OUTPUT' AND session IN ({placeholders})",
+ recent_sessions,
+ ).fetchone()[0]
+ or 0
+ )
if total_outputs > 0:
result["correction_rate"] = round(total_corrections / total_outputs, 3)
conn.close()
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("quality_metrics correction_rate query failed (non-fatal): %s", e)
# FDA (fixed: correlation-based, excludes system sessions)
result["first_draft_acceptance"] = _compute_fda(ctx=ctx)
@@ -317,16 +354,18 @@ def _quality_metrics(ctx: "BrainContext | None" = None) -> dict:
try:
if lessons_file.exists():
text = lessons_file.read_text(encoding="utf-8")
- result["lessons_active"] = len(re.findall(
- r"^\[20\d{2}-\d{2}-\d{2}\]\s+\[(?:PATTERN|INSTINCT):", text, re.MULTILINE
- ))
+ result["lessons_active"] = len(
+ re.findall(
+ r"^\[20\d{2}-\d{2}-\d{2}\]\s+\[(?:PATTERN|INSTINCT):", text, re.MULTILINE
+ )
+ )
if archive_file.exists():
text = archive_file.read_text(encoding="utf-8")
- result["lessons_graduated"] = len(re.findall(
- r"^\[20\d{2}-\d{2}-\d{2}\]", text, re.MULTILINE
- ))
- except Exception:
- pass
+ result["lessons_graduated"] = len(
+ re.findall(r"^\[20\d{2}-\d{2}-\d{2}\]", text, re.MULTILINE)
+ )
+ except Exception as e:
+ _log.debug("quality_metrics lesson count failed (non-fatal): %s", e)
# Lesson distribution
result["lesson_distribution"] = _lesson_distribution(ctx=ctx)
@@ -335,16 +374,20 @@ def _quality_metrics(ctx: "BrainContext | None" = None) -> dict:
try:
db = ctx.db_path if ctx else _p.DB_PATH
conn = get_connection(db)
- sessions_trained = conn.execute(
- "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
- ).fetchone()[0] or 0
+ sessions_trained = (
+ conn.execute(
+ "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
+ ).fetchone()[0]
+ or 0
+ )
if total_corrections == 0:
- total_corrections = conn.execute(
- "SELECT COUNT(*) FROM events WHERE type='CORRECTION'"
- ).fetchone()[0] or 0
+ total_corrections = (
+ conn.execute("SELECT COUNT(*) FROM events WHERE type='CORRECTION'").fetchone()[0]
+ or 0
+ )
conn.close()
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("quality_metrics sessions_trained query failed (non-fatal): %s", e)
density_trend = _per_session_density(ctx=ctx)
severity = _severity_ratio(ctx=ctx)
@@ -368,9 +411,7 @@ def _quality_metrics(ctx: "BrainContext | None" = None) -> dict:
transfer=transfer,
)
- result["score_confidence"] = _score_confidence(
- result["compound_score"], sessions_trained
- )
+ result["score_confidence"] = _score_confidence(result["compound_score"], sessions_trained)
result["outcome_correlation"] = _outcome_correlation(ctx=ctx)
result["counterfactual"] = _counterfactual_percentile(
result["compound_score"], sessions_trained, ctx=ctx
@@ -430,18 +471,22 @@ def _memory_composition(ctx: "BrainContext | None" = None) -> dict:
def _rag_status(ctx: "BrainContext | None" = None) -> dict:
"""RAG status. Chunks counted from SQLite brain_embeddings table."""
result = {
- "active": False, "provider": "unknown", "model": "unknown",
- "dimensions": 0, "chunks_indexed": 0,
+ "active": False,
+ "provider": "unknown",
+ "model": "unknown",
+ "dimensions": 0,
+ "chunks_indexed": 0,
"fts5_enabled": True,
}
try:
from gradata._config import EMBEDDING_DIMS, EMBEDDING_MODEL, EMBEDDING_PROVIDER, RAG_ACTIVE
+
result["active"] = RAG_ACTIVE
result["provider"] = EMBEDDING_PROVIDER
result["model"] = EMBEDDING_MODEL
result["dimensions"] = EMBEDDING_DIMS
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("rag_status config import failed (non-fatal): %s", e)
# Count embeddings from SQLite
try:
db = ctx.db_path if ctx else _p.DB_PATH
@@ -449,6 +494,6 @@ def _rag_status(ctx: "BrainContext | None" = None) -> dict:
row = conn.execute("SELECT COUNT(*) FROM brain_embeddings").fetchone()
result["chunks_indexed"] = row[0] if row else 0
conn.close()
- except Exception:
- pass
+ except Exception as e:
+ _log.debug("rag_status chunk count query failed (non-fatal): %s", e)
return result
diff --git a/Gradata/src/gradata/_migrations/001_add_tenant_id.py b/Gradata/src/gradata/_migrations/001_add_tenant_id.py
index bf5ac4f8..8d3c4b47 100644
--- a/Gradata/src/gradata/_migrations/001_add_tenant_id.py
+++ b/Gradata/src/gradata/_migrations/001_add_tenant_id.py
@@ -22,6 +22,7 @@
Apply:
python src/gradata/_migrations/001_add_tenant_id.py --brain
"""
+
from __future__ import annotations
import argparse
@@ -127,9 +128,7 @@ def plan(conn: sqlite3.Connection) -> dict:
actions.append(f"ALTER {t} ADD tenant_id TEXT")
# Backfill count: rows where tenant_id is NULL (or column doesn't exist -> all rows)
if column_exists(conn, t, "tenant_id"):
- cnt = conn.execute(
- f"SELECT COUNT(*) FROM {t} WHERE tenant_id IS NULL"
- ).fetchone()[0]
+ cnt = conn.execute(f"SELECT COUNT(*) FROM {t} WHERE tenant_id IS NULL").fetchone()[0]
else:
cnt = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
if cnt:
@@ -145,9 +144,7 @@ def plan(conn: sqlite3.Connection) -> dict:
if not column_exists(conn, t, "visibility"):
actions.append(f"ALTER {t} ADD visibility TEXT DEFAULT 'private'")
- if table_exists(conn, "events") and not column_exists(
- conn, "events", "schema_version"
- ):
+ if table_exists(conn, "events") and not column_exists(conn, "events", "schema_version"):
actions.append("ALTER events ADD schema_version INTEGER DEFAULT 1")
return {
@@ -201,14 +198,10 @@ def up(conn: sqlite3.Connection, tenant_id: str) -> dict:
continue
if add_column_if_missing(conn, t, "tenant_id", "TEXT"):
summary["columns_added"].append(f"{t}.tenant_id")
- if add_column_if_missing(
- conn, t, "visibility", "TEXT DEFAULT 'private'"
- ):
+ if add_column_if_missing(conn, t, "visibility", "TEXT DEFAULT 'private'"):
summary["columns_added"].append(f"{t}.visibility")
# Backfill visibility for pre-existing NULLs
- cur = conn.execute(
- f"UPDATE {t} SET visibility = 'private' WHERE visibility IS NULL"
- )
+ cur = conn.execute(f"UPDATE {t} SET visibility = 'private' WHERE visibility IS NULL")
summary["visibility_backfilled"] += cur.rowcount
# Backfill tenant_id: all existing rows belong to primary tenant.
# Future: admin can promote rows to visibility='global' & tenant_id=NULL.
@@ -218,9 +211,7 @@ def up(conn: sqlite3.Connection, tenant_id: str) -> dict:
)
if cur.rowcount:
summary["rows_backfilled"] += cur.rowcount
- summary["tables_backfilled"][t] = (
- summary["tables_backfilled"].get(t, 0) + cur.rowcount
- )
+ summary["tables_backfilled"][t] = summary["tables_backfilled"].get(t, 0) + cur.rowcount
idx = f"idx_{t}_tenant"
if create_index_if_missing(conn, idx, t, "tenant_id"):
summary["indexes_created"].append(idx)
@@ -233,13 +224,9 @@ def up(conn: sqlite3.Connection, tenant_id: str) -> dict:
# earlier partial run. This keeps the migration idempotent across retries
# instead of only touching rows the first time the column is added.
if table_exists(conn, "events"):
- if add_column_if_missing(
- conn, "events", "schema_version", "INTEGER DEFAULT 1"
- ):
+ if add_column_if_missing(conn, "events", "schema_version", "INTEGER DEFAULT 1"):
summary["columns_added"].append("events.schema_version")
- conn.execute(
- "UPDATE events SET schema_version = 1 WHERE schema_version IS NULL"
- )
+ conn.execute("UPDATE events SET schema_version = 1 WHERE schema_version IS NULL")
# Commit lives in the caller (_apply_numbered) so the schema/data changes
# and the `migrations` tracking row land atomically.
@@ -283,8 +270,7 @@ def _main() -> int:
for a in p["actions"]:
print(f" {a}")
print(
- f" backfill {p['total_rows_to_backfill']} rows across "
- f"{len(p['row_backfills'])} tables"
+ f" backfill {p['total_rows_to_backfill']} rows across {len(p['row_backfills'])} tables"
)
if p["row_backfills"]:
sample = p["row_backfills"][:10]
diff --git a/Gradata/src/gradata/_migrations/002_add_event_identity.py b/Gradata/src/gradata/_migrations/002_add_event_identity.py
new file mode 100644
index 00000000..5c174582
--- /dev/null
+++ b/Gradata/src/gradata/_migrations/002_add_event_identity.py
@@ -0,0 +1,240 @@
+# ruff: noqa: N999 # numbered migration module — digit prefix is intentional
+"""Migration 002: add event_id / device_id / content_hash to events.
+
+Unblocks multi-device sync:
+- ``event_id`` — ULID, globally unique, time-ordered. Primary cloud key.
+- ``device_id`` — which machine wrote the event (authorship, ordering).
+- ``content_hash`` — sha256(canonical-JSON({type, source, data})). Dedup
+ across transcript replays and push retries.
+- ``correction_chain_id`` — groups a correction → lesson → graduation chain.
+- ``origin_agent`` — which subagent or CLI surface emitted it. Debug only.
+
+All five columns are nullable — existing writers keep working unchanged. The
+``emit()`` path will be taught to populate them in a follow-up commit; this
+migration is schema-only + chunked backfill of historical rows so nothing
+looks NULL in steady state.
+
+Backfill:
+- ``event_id`` — ULID whose 48-bit timestamp component is derived from
+ ``events.ts`` via ``ulid_from_iso``. Preserves the
+ useful property that event_ids sort like timestamps.
+- ``device_id`` — current device's id (from ``/.device_id``).
+ Per council: no ``legacy-*`` prefix; historical rows
+ belong to *this* machine because this is where they
+ were produced.
+- ``content_hash`` — sha256 over canonical-JSON of ``{type, source, data}``
+ (same fields the emit-time hasher will use).
+
+Chunked 10_000 rows per transaction so a brain with millions of events does
+not hold a single enormous write lock. Progress is idempotent — re-running
+resumes from the first row still missing an event_id.
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import sqlite3
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from _runner import ( # type: ignore[import-not-found]
+ add_column_if_missing,
+ create_index_if_missing,
+ has_applied,
+ mark_applied,
+ resolve_brain_db,
+ table_exists,
+)
+from _ulid import ulid_from_iso # type: ignore[import-not-found]
+from device_uuid import get_or_create_device_id # type: ignore[import-not-found]
+
+NAME = "002_add_event_identity"
+
+CHUNK_SIZE = 10_000
+
+NEW_COLUMNS: list[tuple[str, str]] = [
+ ("event_id", "TEXT"),
+ ("device_id", "TEXT"),
+ ("content_hash", "TEXT"),
+ ("correction_chain_id", "TEXT"),
+ ("origin_agent", "TEXT"),
+]
+
+
+def _canonical_content_hash(ev_type: str, source: str | None, data_json: str | None) -> str:
+ """sha256 over canonical JSON of {type, source, data}.
+
+ Canonical means: sort_keys + separators=(',', ':') + ensure_ascii=False.
+ Any two events with the same payload produce the same hash regardless of
+ how Python happened to spell the dict at write time.
+ """
+ try:
+ data = json.loads(data_json) if data_json else {}
+ except (json.JSONDecodeError, TypeError):
+ data = {"_raw": data_json}
+ canonical = json.dumps(
+ {"type": ev_type, "source": source or "", "data": data},
+ sort_keys=True,
+ separators=(",", ":"),
+ ensure_ascii=False,
+ )
+ return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
+
+
+def plan(conn: sqlite3.Connection) -> dict:
+ if not table_exists(conn, "events"):
+ return {"actions": [], "backfill_rows": 0}
+
+ actions: list[str] = []
+ for col, decl in NEW_COLUMNS:
+ if (
+ conn.execute(
+ "SELECT 1 FROM pragma_table_info('events') WHERE name = ?", (col,)
+ ).fetchone()
+ is None
+ ):
+ actions.append(f"ALTER events ADD {col} {decl}")
+
+ for idx, cols in [
+ ("idx_events_event_id", "event_id"),
+ ("idx_events_device_id", "device_id"),
+ ("idx_events_content_hash", "content_hash"),
+ ]:
+ actions.append(f"ensure index {idx}({cols})")
+
+ # Rows needing backfill: event_id IS NULL is the canonical signal.
+ try:
+ to_backfill = conn.execute("SELECT COUNT(*) FROM events WHERE event_id IS NULL").fetchone()[
+ 0
+ ]
+ except sqlite3.OperationalError:
+ # Column doesn't exist yet — everything needs backfill.
+ to_backfill = conn.execute("SELECT COUNT(*) FROM events").fetchone()[0]
+
+ return {
+ "actions": actions,
+ "backfill_rows": to_backfill,
+ "chunk_size": CHUNK_SIZE,
+ }
+
+
+def up(conn: sqlite3.Connection, tenant_id: str) -> dict:
+ """Apply migration. ``tenant_id`` is unused here but the runner passes it positionally."""
+ del tenant_id # event identity is device-scoped, not tenant-scoped
+ summary: dict = {
+ "columns_added": [],
+ "indexes_created": [],
+ "rows_backfilled": 0,
+ "chunks_committed": 0,
+ }
+
+ if not table_exists(conn, "events"):
+ return summary
+
+ # 1. Schema — all nullable so concurrent writers keep working.
+ for col, decl in NEW_COLUMNS:
+ if add_column_if_missing(conn, "events", col, decl):
+ summary["columns_added"].append(f"events.{col}")
+
+ # 2. Indexes.
+ if create_index_if_missing(conn, "idx_events_event_id", "events", "event_id"):
+ summary["indexes_created"].append("idx_events_event_id")
+ if create_index_if_missing(conn, "idx_events_device_id", "events", "device_id"):
+ summary["indexes_created"].append("idx_events_device_id")
+ if create_index_if_missing(conn, "idx_events_content_hash", "events", "content_hash"):
+ summary["indexes_created"].append("idx_events_content_hash")
+
+ # 3. Chunked backfill. Resolve device_id once — assigned to every
+ # historical row on this machine (per council: no legacy-* prefix).
+ brain_dir = _brain_dir_for(conn)
+ device_id = get_or_create_device_id(brain_dir)
+
+ while True:
+ rows = conn.execute(
+ "SELECT id, ts, type, source, data_json FROM events WHERE event_id IS NULL LIMIT ?",
+ (CHUNK_SIZE,),
+ ).fetchall()
+ if not rows:
+ break
+ updates: list[tuple[str, str, str, int]] = []
+ for row_id, ts, ev_type, source, data_json in rows:
+ eid = ulid_from_iso(ts or "")
+ chash = _canonical_content_hash(ev_type, source, data_json)
+ updates.append((eid, device_id, chash, row_id))
+ conn.executemany(
+ "UPDATE events SET event_id = ?, device_id = ?, content_hash = ? WHERE id = ?",
+ updates,
+ )
+ summary["rows_backfilled"] += len(updates)
+ summary["chunks_committed"] += 1
+ # Intermediate commit: lets other writers make progress between chunks.
+ # The runner's outer commit still fences the migration-applied row so
+ # partial work is safely resumable on next startup.
+ conn.commit()
+
+ return summary
+
+
+def _brain_dir_for(conn: sqlite3.Connection) -> Path:
+ """Best-effort resolution of the brain directory from an open connection."""
+ row = conn.execute("PRAGMA database_list").fetchone()
+ # row = (seq, name, file)
+ if row and row[2]:
+ return Path(row[2]).resolve().parent
+ return Path.cwd()
+
+
+def _main() -> int:
+ ap = argparse.ArgumentParser(description=f"Run migration {NAME}")
+ ap.add_argument("--brain", help="Path to brain directory or system.db")
+ ap.add_argument("--dry-run", action="store_true")
+ args = ap.parse_args()
+
+ db_path = resolve_brain_db(args.brain)
+ if not db_path.exists():
+ print(f"ERROR: brain DB not found at {db_path}", file=sys.stderr)
+ return 2
+
+ conn = sqlite3.connect(str(db_path))
+ conn.execute("PRAGMA journal_mode=WAL")
+ conn.execute("PRAGMA busy_timeout=5000")
+
+ try:
+ if has_applied(conn, NAME) and not args.dry_run:
+ print(f"Already applied: {NAME} (no-op)")
+ return 0
+
+ p = plan(conn)
+ print("\n--- plan ---")
+ for a in p["actions"]:
+ print(f" {a}")
+ print(f" backfill {p['backfill_rows']} rows (chunk={p['chunk_size']})")
+
+ if args.dry_run:
+ print("\n(dry-run) no changes made")
+ return 0
+
+ print("\n--- applying ---")
+ summary = up(conn, tenant_id="")
+ mark_applied(
+ conn,
+ NAME,
+ rows_affected=summary["rows_backfilled"],
+ notes=json.dumps({k: v for k, v in summary.items() if k != "rows_backfilled"}),
+ )
+ conn.commit()
+ print(f"columns_added : {summary['columns_added']}")
+ print(f"indexes_created : {summary['indexes_created']}")
+ print(f"rows_backfilled : {summary['rows_backfilled']}")
+ print(f"chunks_committed : {summary['chunks_committed']}")
+ print("\nOK")
+ return 0
+ finally:
+ conn.close()
+
+
+if __name__ == "__main__":
+ raise SystemExit(_main())
diff --git a/Gradata/src/gradata/_migrations/003_add_sync_state.py b/Gradata/src/gradata/_migrations/003_add_sync_state.py
new file mode 100644
index 00000000..b38c6a38
--- /dev/null
+++ b/Gradata/src/gradata/_migrations/003_add_sync_state.py
@@ -0,0 +1,166 @@
+# ruff: noqa: N999 # numbered migration module — digit prefix is intentional
+"""Migration 003: sync_state table + per-device watermark columns.
+
+Creates ``sync_state`` if it does not already exist (today it is created
+ad-hoc inside ``_cloud_sync.py`` tests and assumed to exist in prod) and
+adds the three watermark columns the Phase 1 push/pull client needs:
+
+- ``device_id`` — which machine this row belongs to. Pairs with
+ ``tenant_id`` (added by Migration 001) so the
+ future composite key ``(tenant_id, device_id)``
+ scopes watermarks per machine.
+- ``last_push_event_id`` — highest ULID this device has successfully
+ shipped to ``/events/push``. Resume point.
+- ``last_pull_cursor`` — opaque cursor returned by ``/events/pull``.
+ Used to avoid re-downloading own events.
+
+Backward compat: the existing ``brain_id`` primary key stays untouched so
+``_cloud_sync.py``'s ``_mark_push`` / ``_last_push_at`` calls keep working.
+Task 7 will switch push logic to the composite key or delete
+``_cloud_sync.py`` entirely — whichever the Phase 1 cleanup chooses.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sqlite3
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from _runner import ( # type: ignore[import-not-found]
+ add_column_if_missing,
+ create_index_if_missing,
+ has_applied,
+ mark_applied,
+ resolve_brain_db,
+ table_exists,
+)
+
+NAME = "003_add_sync_state"
+
+SYNC_STATE_SQL = """
+CREATE TABLE IF NOT EXISTS sync_state (
+ brain_id TEXT PRIMARY KEY,
+ last_push_at TEXT,
+ updated_at TEXT
+)
+"""
+
+NEW_COLUMNS: list[tuple[str, str]] = [
+ ("device_id", "TEXT"),
+ ("last_push_event_id", "TEXT"),
+ ("last_pull_cursor", "TEXT"),
+ ("tenant_id", "TEXT"), # idempotent — Migration 001 may have added it already
+]
+
+
+def plan(conn: sqlite3.Connection) -> dict:
+ actions: list[str] = []
+ if not table_exists(conn, "sync_state"):
+ actions.append("CREATE TABLE sync_state")
+ for col, decl in NEW_COLUMNS:
+ if (
+ conn.execute(
+ "SELECT 1 FROM pragma_table_info('sync_state') WHERE name = ?",
+ (col,),
+ ).fetchone()
+ is None
+ ):
+ actions.append(f"ALTER sync_state ADD {col} {decl}")
+ actions.append("ensure index idx_sync_state_device(device_id)")
+ actions.append("ensure index idx_sync_state_tenant_device(tenant_id, device_id)")
+ return {"actions": actions}
+
+
+def up(conn: sqlite3.Connection, tenant_id: str) -> dict:
+ summary: dict = {
+ "columns_added": [],
+ "indexes_created": [],
+ "table_created": False,
+ "rows_backfilled": 0,
+ }
+
+ if not table_exists(conn, "sync_state"):
+ conn.execute(SYNC_STATE_SQL)
+ summary["table_created"] = True
+
+ for col, decl in NEW_COLUMNS:
+ if add_column_if_missing(conn, "sync_state", col, decl):
+ summary["columns_added"].append(f"sync_state.{col}")
+
+ # Backfill tenant_id on any pre-existing rows so the composite key
+ # ``(tenant_id, device_id)`` is populated end-to-end even on brains
+ # upgraded through 001 → 003 in a single startup.
+ cur = conn.execute(
+ "UPDATE sync_state SET tenant_id = ? WHERE tenant_id IS NULL",
+ (tenant_id,),
+ )
+ if cur.rowcount:
+ summary["rows_backfilled"] += cur.rowcount
+
+ if create_index_if_missing(conn, "idx_sync_state_device", "sync_state", "device_id"):
+ summary["indexes_created"].append("idx_sync_state_device")
+ if create_index_if_missing(
+ conn,
+ "idx_sync_state_tenant_device",
+ "sync_state",
+ "tenant_id, device_id",
+ ):
+ summary["indexes_created"].append("idx_sync_state_tenant_device")
+
+ return summary
+
+
+def _main() -> int:
+ ap = argparse.ArgumentParser(description=f"Run migration {NAME}")
+ ap.add_argument("--brain", help="Path to brain directory or system.db")
+ ap.add_argument("--dry-run", action="store_true")
+ args = ap.parse_args()
+
+ db_path = resolve_brain_db(args.brain)
+ if not db_path.exists():
+ print(f"ERROR: brain DB not found at {db_path}", file=sys.stderr)
+ return 2
+
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
+ from tenant_uuid import get_or_create_tenant_id # type: ignore[import-not-found]
+
+ conn = sqlite3.connect(str(db_path))
+ conn.execute("PRAGMA journal_mode=WAL")
+ conn.execute("PRAGMA busy_timeout=5000")
+ try:
+ if has_applied(conn, NAME) and not args.dry_run:
+ print(f"Already applied: {NAME} (no-op)")
+ return 0
+
+ p = plan(conn)
+ print("\n--- plan ---")
+ for a in p["actions"]:
+ print(f" {a}")
+ if args.dry_run:
+ print("\n(dry-run) no changes made")
+ return 0
+
+ tid = get_or_create_tenant_id(db_path.parent)
+ summary = up(conn, tenant_id=tid)
+ mark_applied(
+ conn,
+ NAME,
+ rows_affected=summary["rows_backfilled"],
+ notes=json.dumps({k: v for k, v in summary.items() if k != "rows_backfilled"}),
+ )
+ conn.commit()
+ print(f"table_created : {summary['table_created']}")
+ print(f"columns_added : {summary['columns_added']}")
+ print(f"indexes_created : {summary['indexes_created']}")
+ print(f"rows_backfilled : {summary['rows_backfilled']}")
+ print("\nOK")
+ return 0
+ finally:
+ conn.close()
+
+
+if __name__ == "__main__":
+ raise SystemExit(_main())
diff --git a/Gradata/src/gradata/_migrations/_runner.py b/Gradata/src/gradata/_migrations/_runner.py
index aee7d01d..62a02146 100644
--- a/Gradata/src/gradata/_migrations/_runner.py
+++ b/Gradata/src/gradata/_migrations/_runner.py
@@ -5,6 +5,7 @@
- ``has_applied`` / ``mark_applied``
- Safe column / index existence checks for SQLite
"""
+
from __future__ import annotations
import sqlite3
@@ -39,9 +40,7 @@ def has_applied(conn: sqlite3.Connection, name: str) -> bool:
).fetchone()
if row is None:
return False
- row = conn.execute(
- "SELECT 1 FROM migrations WHERE name = ?", (name,)
- ).fetchone()
+ row = conn.execute("SELECT 1 FROM migrations WHERE name = ?", (name,)).fetchone()
return row is not None
@@ -112,6 +111,7 @@ def create_index_if_missing(
def resolve_brain_db(brain_arg: str | Path | None) -> Path:
"""Resolve the brain SQLite path from a CLI arg or env."""
import os
+
if brain_arg:
p = Path(brain_arg).expanduser().resolve()
else:
diff --git a/Gradata/src/gradata/_migrations/_ulid.py b/Gradata/src/gradata/_migrations/_ulid.py
new file mode 100644
index 00000000..8ad2c765
--- /dev/null
+++ b/Gradata/src/gradata/_migrations/_ulid.py
@@ -0,0 +1,51 @@
+"""Minimal ULID generator — no external dependency.
+
+26-char Crockford base32 string: 10 chars of 48-bit millisecond timestamp
++ 16 chars of 80-bit randomness. Lexicographically sortable by time,
+globally unique in practice (collision probability 1/2^80 within a ms).
+
+We roll our own because adding a dep for ~20 lines of code is not worth
+the supply-chain surface. If a future caller needs the full `python-ulid`
+API (monotonic, parsing back to components), swap this out.
+"""
+
+from __future__ import annotations
+
+import os
+import time
+
+# Crockford base32: no I, L, O, U.
+_ALPHABET = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+
+
+def _encode(value: int, length: int) -> str:
+ out = []
+ for _ in range(length):
+ out.append(_ALPHABET[value & 0x1F])
+ value >>= 5
+ return "".join(reversed(out))
+
+
+def new_ulid(ts_ms: int | None = None) -> str:
+ """Return a new ULID string. ``ts_ms`` lets callers backfill historical ts."""
+ if ts_ms is None:
+ ts_ms = int(time.time() * 1000)
+ ts_ms &= (1 << 48) - 1
+ rand = int.from_bytes(os.urandom(10), "big")
+ return _encode(ts_ms, 10) + _encode(rand, 16)
+
+
+def ulid_from_iso(iso_ts: str) -> str:
+ """Build a ULID whose timestamp component matches ``iso_ts`` (ISO 8601).
+
+ Used by Migration 002 to backfill event_id on historical rows so the
+ leading 10 chars still sort-align with the original ``events.ts``.
+ """
+ from datetime import datetime
+
+ try:
+ dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00"))
+ except (ValueError, TypeError):
+ return new_ulid()
+ ts_ms = int(dt.timestamp() * 1000)
+ return new_ulid(ts_ms=ts_ms)
diff --git a/Gradata/src/gradata/_migrations/device_uuid.py b/Gradata/src/gradata/_migrations/device_uuid.py
new file mode 100644
index 00000000..3458be35
--- /dev/null
+++ b/Gradata/src/gradata/_migrations/device_uuid.py
@@ -0,0 +1,107 @@
+"""Device UUID read/create for a given brain directory.
+
+The device_id is stored at ``/.device_id`` as a plain UTF-8 file.
+It identifies *which machine* wrote an event — scoping authorship so cloud
+sync can enforce "one author per event" and deterministic global ordering
+on ``(ts, device_id, event_id)``.
+
+Format: ``dev_<32 hex>`` — ``dev_`` prefix + uuid4 hex. Prefixed so logs and
+error messages disambiguate from tenant_id (no prefix) and brain_id (``brn_``).
+
+Per-brain, per-machine: two devices sharing a brain get different ids; one
+brain on one machine is stable across sessions.
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import os
+import re
+import uuid
+from pathlib import Path
+
+DEVICE_FILE = ".device_id"
+_DEVICE_RE = re.compile(r"^dev_[0-9a-f]{32}$")
+
+
+def _new_device_id() -> str:
+ return f"dev_{uuid.uuid4().hex}"
+
+
+def _is_valid(s: str) -> bool:
+ return bool(_DEVICE_RE.match(s))
+
+
+def get_or_create_device_id(brain_dir: str | Path) -> str:
+ """Atomic read-or-create of the brain's device id for this machine.
+
+ Same race-safe pattern as ``tenant_uuid.get_or_create_tenant_id``:
+ exclusive create of a pid-scoped temp file, atomic ``os.replace``,
+ fall through to read on collision.
+ """
+ brain = Path(brain_dir).expanduser().resolve()
+ brain.mkdir(parents=True, exist_ok=True)
+ fpath = brain / DEVICE_FILE
+
+ if fpath.exists():
+ did = fpath.read_text(encoding="utf-8").strip()
+ if _is_valid(did):
+ return did
+
+ new_did = _new_device_id()
+ tmp = brain / f".device_id.tmp.{os.getpid()}"
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
+ try:
+ fd = os.open(tmp, flags, 0o644)
+ except FileExistsError:
+ # Extremely unlikely PID collision; fall through to disk read.
+ pass
+ else:
+ try:
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
+ fh.write(new_did)
+ if not fpath.exists():
+ os.replace(tmp, fpath)
+ else:
+ os.unlink(tmp)
+ except Exception:
+ with contextlib.suppress(OSError):
+ os.unlink(tmp)
+ raise
+
+ if fpath.exists():
+ did = fpath.read_text(encoding="utf-8").strip()
+ if _is_valid(did):
+ return did
+ return new_did
+
+
+def read_device_id(brain_dir: str | Path) -> str | None:
+ fpath = Path(brain_dir).expanduser().resolve() / DEVICE_FILE
+ if not fpath.exists():
+ return None
+ did = fpath.read_text(encoding="utf-8").strip()
+ return did if _is_valid(did) else None
+
+
+def _main() -> int:
+ ap = argparse.ArgumentParser(description="Read or create brain device id")
+ ap.add_argument("--brain", required=True, help="Path to brain directory")
+ ap.add_argument("--peek", action="store_true", help="Read only; never create")
+ args = ap.parse_args()
+
+ if args.peek:
+ did = read_device_id(args.brain)
+ if did is None:
+ print("(no device id)")
+ return 1
+ print(did)
+ return 0
+
+ print(get_or_create_device_id(args.brain))
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(_main())
diff --git a/Gradata/src/gradata/_migrations/fill_null_tenant.py b/Gradata/src/gradata/_migrations/fill_null_tenant.py
index 06919bc9..9dbd6a20 100644
--- a/Gradata/src/gradata/_migrations/fill_null_tenant.py
+++ b/Gradata/src/gradata/_migrations/fill_null_tenant.py
@@ -13,6 +13,7 @@
python src/gradata/_migrations/fill_null_tenant.py --brain C:/.../brain
python src/gradata/_migrations/fill_null_tenant.py --brain C:/.../brain --dry-run
"""
+
from __future__ import annotations
import argparse
@@ -108,9 +109,7 @@ def main() -> int:
continue
if not column_exists(conn, t, "tenant_id"):
continue
- cnt = conn.execute(
- f"SELECT COUNT(*) FROM {t} WHERE tenant_id IS NULL"
- ).fetchone()[0]
+ cnt = conn.execute(f"SELECT COUNT(*) FROM {t} WHERE tenant_id IS NULL").fetchone()[0]
if not cnt:
continue
if args.dry_run:
diff --git a/Gradata/src/gradata/_migrations/tenant_uuid.py b/Gradata/src/gradata/_migrations/tenant_uuid.py
index e1255251..f360620c 100644
--- a/Gradata/src/gradata/_migrations/tenant_uuid.py
+++ b/Gradata/src/gradata/_migrations/tenant_uuid.py
@@ -12,6 +12,7 @@
CLI:
python src/gradata/_migrations/tenant_uuid.py --brain C:/.../brain
"""
+
from __future__ import annotations
import argparse
diff --git a/Gradata/src/gradata/_mine_transcripts.py b/Gradata/src/gradata/_mine_transcripts.py
index 43e254e9..8d67902e 100644
--- a/Gradata/src/gradata/_mine_transcripts.py
+++ b/Gradata/src/gradata/_mine_transcripts.py
@@ -6,6 +6,7 @@
Public entry point: run_mine(brain_root, projects_root, project, commit, dry_run).
"""
+
from __future__ import annotations
import hashlib
@@ -14,7 +15,7 @@
import sys
import unicodedata
from collections import Counter
-from datetime import datetime, timezone
+from datetime import UTC, datetime
from pathlib import Path
# ── Pushback / reminder / gap / challenge regexes ──
@@ -58,35 +59,129 @@
# categorization the live hook produces. Order matters: specific categories
# before broad ones (ACCURACY contains "wrong" which would swallow others).
CATEGORY_KEYWORDS: dict[str, list[str]] = {
- "DATA_INTEGRITY": ["filter", "owner", "oliver only", "anna", "shared",
- "duplicate", "overlap", "wrong person", "wrong deal"],
- "ARCHITECTURE": ["import", "module", "class", "function", "refactor",
- "dependency", "structure", "script", "python", "def "],
- "TOOL": ["tool", "api", "mcp", "install", "config", "command", "endpoint",
- "token", "integration"],
- "LEADS": ["lead", "prospect", "enrich", "csv", "campaign", "instantly",
- "apollo", "linkedin", "icp"],
- "PRICING": ["price", "cost", "pricing", "monthly", "annual", "$",
- "starter", "standard", "plan"],
+ "DATA_INTEGRITY": [
+ "filter",
+ "owner",
+ "oliver only",
+ "anna",
+ "shared",
+ "duplicate",
+ "overlap",
+ "wrong person",
+ "wrong deal",
+ ],
+ "ARCHITECTURE": [
+ "import",
+ "module",
+ "class",
+ "function",
+ "refactor",
+ "dependency",
+ "structure",
+ "script",
+ "python",
+ "def ",
+ ],
+ "TOOL": [
+ "tool",
+ "api",
+ "mcp",
+ "install",
+ "config",
+ "command",
+ "endpoint",
+ "token",
+ "integration",
+ ],
+ "LEADS": [
+ "lead",
+ "prospect",
+ "enrich",
+ "csv",
+ "campaign",
+ "instantly",
+ "apollo",
+ "linkedin",
+ "icp",
+ ],
+ "PRICING": [
+ "price",
+ "cost",
+ "pricing",
+ "monthly",
+ "annual",
+ "$",
+ "starter",
+ "standard",
+ "plan",
+ ],
"DEMO_PREP": ["demo", "cheat sheet", "battlecard", "prep"],
- "DRAFTING": ["email", "draft", "subject line", "follow-up", "copy",
- "prose", "paragraph", "rewrite", "subject"],
- "CONTEXT": ["session type", "startup context", "context window",
- "already know", "load context", "you loaded"],
- "PROCESS": ["skip", "forgot", "missing step", "workflow", "told you",
- "step", "order"],
- "THOROUGHNESS": ["incomplete", "all of them", "don't stop", "finish",
- "remaining", "rest of", "the rest"],
- "POSITIONING": ["agency", "competitor", "frame", "position", "pitch",
- "messaging", "value prop"],
- "COMMUNICATION": ["unclear", "ambiguous", "severity", "blocker",
- "too verbose", "verbose", "too long", "confusing"],
- "TONE": ["tone", "aggressive", "pushy", "salesy", "formal", "casual",
- "softer", "harsh"],
- "ACCURACY": ["incorrect", "inaccurate", "verify", "hallucin", "fabricat",
- "made up", "not real", "doesn't exist", "never said",
- "misquot", "stale", "wrong number", "wrong data",
- "wrong name", "wrong company"],
+ "DRAFTING": [
+ "email",
+ "draft",
+ "subject line",
+ "follow-up",
+ "copy",
+ "prose",
+ "paragraph",
+ "rewrite",
+ "subject",
+ ],
+ "CONTEXT": [
+ "session type",
+ "startup context",
+ "context window",
+ "already know",
+ "load context",
+ "you loaded",
+ ],
+ "PROCESS": ["skip", "forgot", "missing step", "workflow", "told you", "step", "order"],
+ "THOROUGHNESS": [
+ "incomplete",
+ "all of them",
+ "don't stop",
+ "finish",
+ "remaining",
+ "rest of",
+ "the rest",
+ ],
+ "POSITIONING": [
+ "agency",
+ "competitor",
+ "frame",
+ "position",
+ "pitch",
+ "messaging",
+ "value prop",
+ ],
+ "COMMUNICATION": [
+ "unclear",
+ "ambiguous",
+ "severity",
+ "blocker",
+ "too verbose",
+ "verbose",
+ "too long",
+ "confusing",
+ ],
+ "TONE": ["tone", "aggressive", "pushy", "salesy", "formal", "casual", "softer", "harsh"],
+ "ACCURACY": [
+ "incorrect",
+ "inaccurate",
+ "verify",
+ "hallucin",
+ "fabricat",
+ "made up",
+ "not real",
+ "doesn't exist",
+ "never said",
+ "misquot",
+ "stale",
+ "wrong number",
+ "wrong data",
+ "wrong name",
+ "wrong company",
+ ],
}
@@ -213,25 +308,30 @@ def _mine_session(path: Path) -> list[dict]:
if not signals:
continue
unique = list(dict.fromkeys(signals))
- snippet = re.sub(r'[\"\\\n]', " ", text[:100])
+ snippet = re.sub(r"[\"\\\n]", " ", text[:100])
category = _classify_correction(text)
session_uuid = msg.get("sessionId") or path.stem
- events.append({
- "ts": msg.get("timestamp") or datetime.now(timezone.utc).isoformat(),
- "event": "IMPLICIT_FEEDBACK",
- "source": "gradata.mine",
- "category": category,
- "session_uuid": session_uuid,
- "text": text[:200],
- "data": json.dumps({
- "signals": ",".join(unique),
- "snippet": snippet,
- "session_id": session_uuid,
- "uuid": msg.get("uuid", ""),
- "project": path.parent.name,
+ events.append(
+ {
+ "ts": msg.get("timestamp") or datetime.now(UTC).isoformat(),
+ "event": "IMPLICIT_FEEDBACK",
+ "source": "gradata.mine",
"category": category,
- }, ensure_ascii=False),
- })
+ "session_uuid": session_uuid,
+ "text": text[:200],
+ "data": json.dumps(
+ {
+ "signals": ",".join(unique),
+ "snippet": snippet,
+ "session_id": session_uuid,
+ "uuid": msg.get("uuid", ""),
+ "project": path.parent.name,
+ "category": category,
+ },
+ ensure_ascii=False,
+ ),
+ }
+ )
return events
@@ -253,11 +353,9 @@ def run_mine(
print(f"[err] transcript root not found: {root}", file=sys.stderr)
return 1
- project_dirs: list[Path]
- if project:
- project_dirs = [root / project]
- else:
- project_dirs = [p for p in root.iterdir() if p.is_dir()]
+ project_dirs: list[Path] = (
+ [root / project] if project else [p for p in root.iterdir() if p.is_dir()]
+ )
total_sessions = 0
total_events: list[dict] = []
@@ -308,6 +406,7 @@ def run_mine(
# re-runs idempotent while preserving historical timestamps.
from gradata._events import emit as _emit
from gradata.brain import Brain
+
brain = Brain(brain_root) # ensures table + ctx setup
written = 0
skipped = 0
@@ -343,6 +442,7 @@ def run_mine(
from gradata.enhancements.meta_rules_storage import (
upsert_correction_patterns_batch,
)
+
db_path = brain.ctx.db_path
batch: list[tuple[str, str, str, int, str]] = []
seen: set[tuple[str, int]] = set()
diff --git a/Gradata/src/gradata/_paths.py b/Gradata/src/gradata/_paths.py
index 720a2722..78b4145d 100644
--- a/Gradata/src/gradata/_paths.py
+++ b/Gradata/src/gradata/_paths.py
@@ -7,6 +7,7 @@
For the original runtime: brain/scripts/paths.py (unchanged).
This file is the SDK-portable equivalent.
"""
+
from __future__ import annotations
import os
@@ -22,6 +23,7 @@ class BrainContext:
Pass a BrainContext to functions instead of relying on mutable global state.
Enables multi-brain support (multiple Brain instances in one process).
"""
+
brain_dir: Path
db_path: Path
events_jsonl: Path
@@ -40,7 +42,9 @@ class BrainContext:
gates_dir: Path
@classmethod
- def from_brain_dir(cls, brain_dir: str | Path, working_dir: str | Path | None = None) -> BrainContext:
+ def from_brain_dir(
+ cls, brain_dir: str | Path, working_dir: str | Path | None = None
+ ) -> BrainContext:
"""Build a BrainContext from a brain directory path.
Args:
@@ -48,7 +52,11 @@ def from_brain_dir(cls, brain_dir: str | Path, working_dir: str | Path | None =
working_dir: Optional working directory. Falls back to WORKING_DIR env var or cwd.
"""
bd = resolve_brain_dir(brain_dir)
- wd = Path(working_dir).resolve() if working_dir else Path(os.environ.get("WORKING_DIR", ".")).resolve()
+ wd = (
+ Path(working_dir).resolve()
+ if working_dir
+ else Path(os.environ.get("WORKING_DIR", ".")).resolve()
+ )
return cls(
brain_dir=bd,
db_path=bd / "system.db",
@@ -175,6 +183,5 @@ def set_brain_dir(brain_dir: str | Path, working_dir: str | Path | None = None):
_current_context = BrainContext.from_brain_dir(brain_dir, working_dir)
-
# Module-level default context (None until set_brain_dir() is called)
_current_context: BrainContext | None = None
diff --git a/Gradata/src/gradata/_query.py b/Gradata/src/gradata/_query.py
index bcc25d8d..3f50e8d6 100644
--- a/Gradata/src/gradata/_query.py
+++ b/Gradata/src/gradata/_query.py
@@ -36,6 +36,7 @@
# ── FTS5 Full-Text Search ────────────────────────────────────────────────
+
def _ensure_fts_table(conn: sqlite3.Connection):
conn.execute("""
CREATE TABLE IF NOT EXISTS brain_fts_content (
@@ -59,8 +60,9 @@ def _ensure_fts_table(conn: sqlite3.Connection):
conn.commit()
-def fts_index(source: str, file_type: str, text: str, embed_date: str = "",
- ctx: "BrainContext | None" = None):
+def fts_index(
+ source: str, file_type: str, text: str, embed_date: str = "", ctx: "BrainContext | None" = None
+):
db = ctx.db_path if ctx else _p.DB_PATH
_brain_dir = ctx.brain_dir if ctx else Path(db).parent
_tid = tenant_for(_brain_dir)
@@ -129,8 +131,10 @@ def fts_rebuild(ctx: "BrainContext | None" = None):
embed_date = datetime.fromtimestamp(fpath.stat().st_mtime).strftime("%Y-%m-%d")
chunk_size = MAX_TOKENS_PER_CHUNK * 4
for i in range(0, len(text), chunk_size):
- chunk = text[i:i + chunk_size]
- docs.append({"source": rel, "file_type": file_type, "text": chunk, "embed_date": embed_date})
+ chunk = text[i : i + chunk_size]
+ docs.append(
+ {"source": rel, "file_type": file_type, "text": chunk, "embed_date": embed_date}
+ )
if docs:
_tid = tenant_for(brain_path)
@@ -149,8 +153,12 @@ def fts_rebuild(ctx: "BrainContext | None" = None):
return len(docs)
-def fts_search(query_text: str, file_type: str | None = None, top_k: int = 10,
- ctx: "BrainContext | None" = None) -> list[dict]:
+def fts_search(
+ query_text: str,
+ file_type: str | None = None,
+ top_k: int = 10,
+ ctx: "BrainContext | None" = None,
+) -> list[dict]:
db = ctx.db_path if ctx else _p.DB_PATH
conn = sqlite3.connect(str(db))
_ensure_fts_table(conn)
@@ -177,16 +185,22 @@ def fts_search(query_text: str, file_type: str | None = None, top_k: int = 10,
conn.close()
results = []
for r in rows:
- results.append({
- "rowid": r[0], "source": r[1] or "", "file_type": r[2] or "general",
- "text": (r[3] or "")[:500], "embed_date": r[4] or "",
- "fts_rank": abs(r[5]) if r[5] else 0,
- })
+ results.append(
+ {
+ "rowid": r[0],
+ "source": r[1] or "",
+ "file_type": r[2] or "general",
+ "text": (r[3] or "")[:500],
+ "embed_date": r[4] or "",
+ "fts_rank": abs(r[5]) if r[5] else 0,
+ }
+ )
return results
# ── Query Routing ────────────────────────────────────────────────────────
+
def detect_query_mode(query_text: str) -> str:
if query_text.startswith('"') and query_text.endswith('"'):
return "keyword"
@@ -222,7 +236,6 @@ def reciprocal_rank_fusion(ranked_lists: list[list[dict]], k: int = 60) -> list[
return output
-
def compute_recency_weight(embed_date: str) -> float:
try:
doc_date = datetime.strptime(embed_date, "%Y-%m-%d")
@@ -248,19 +261,37 @@ def classify_confidence(score: float) -> str:
def infer_memory_type(file_type: str, source: str = "") -> str:
source_lower = source.lower().replace("\\", "/")
- for pat in ["competitive-intelligence", "competitor-adaptations",
- "gap-analysis", "sdk-north-star", "sdk-improvements",
- "sdk-v2-improvements", "forecasting", "competitive-audit"]:
+ for pat in [
+ "competitive-intelligence",
+ "competitor-adaptations",
+ "gap-analysis",
+ "sdk-north-star",
+ "sdk-improvements",
+ "sdk-v2-improvements",
+ "forecasting",
+ "competitive-audit",
+ ]:
if pat in source_lower:
return "strategic"
- for pat in ["follow-up-cadence", "prospecting-tools", "versioning-protocol",
- "patterns.md", "protocol.md"]:
+ for pat in [
+ "follow-up-cadence",
+ "prospecting-tools",
+ "versioning-protocol",
+ "patterns.md",
+ "protocol.md",
+ ]:
if pat in source_lower:
return "procedural"
- for pat in ["judgment-calibration", "outcome-retrospectives",
- "calibration-audit", "outreach-analytics",
- "loop-state", "signals", "follow-up tracker",
- "experiment tracker"]:
+ for pat in [
+ "judgment-calibration",
+ "outcome-retrospectives",
+ "calibration-audit",
+ "outreach-analytics",
+ "loop-state",
+ "signals",
+ "follow-up tracker",
+ "experiment tracker",
+ ]:
if pat in source_lower:
return "episodic"
return MEMORY_TYPE_MAP.get(file_type, "semantic")
@@ -274,10 +305,15 @@ def get_memory_weight(memory_type: str, task) -> float:
def brain_search(
- query: str, file_type: str | None = None, domain: str = "default",
- top_k: int = DEFAULT_TOP_K, threshold: float = SIMILARITY_THRESHOLD,
- use_recency: bool = True, memory_type: str | None = None,
- mode: str | None = None, ctx: "BrainContext | None" = None,
+ query: str,
+ file_type: str | None = None,
+ domain: str = "default",
+ top_k: int = DEFAULT_TOP_K,
+ threshold: float = SIMILARITY_THRESHOLD,
+ use_recency: bool = True,
+ memory_type: str | None = None,
+ mode: str | None = None,
+ ctx: "BrainContext | None" = None,
) -> list[dict]:
"""Search the brain using FTS5.
@@ -299,7 +335,9 @@ def brain_search(
mem_type = infer_memory_type(r.get("file_type", ""), r.get("source", ""))
memory_w = get_memory_weight(mem_type, memory_type)
r["score"] = round(r.get("fts_rank", 0) * recency_w * memory_w, 4)
- r["confidence"] = "keyword_match" if mode == "keyword" else classify_confidence(min(r["score"], 1.0))
+ r["confidence"] = (
+ "keyword_match" if mode == "keyword" else classify_confidence(min(r["score"], 1.0))
+ )
r["recency_weight"] = round(recency_w, 3)
r["memory_weight"] = round(memory_w, 3)
r["memory_type"] = mem_type
diff --git a/Gradata/src/gradata/_stats.py b/Gradata/src/gradata/_stats.py
index b7b8aa77..ad8e5164 100644
--- a/Gradata/src/gradata/_stats.py
+++ b/Gradata/src/gradata/_stats.py
@@ -4,6 +4,7 @@
Portable statistical functions for convergence analysis, quality scoring,
and trend detection. All functions work with any data volume including 0.
"""
+
from __future__ import annotations
import math
@@ -13,6 +14,7 @@
# 0. TREND ANALYSIS (Theil-Sen + Mann-Kendall)
# ============================================================================
+
def trend_analysis(y: list[float]) -> tuple[float, float]:
"""Combined Theil-Sen slope + Mann-Kendall p-value in a single O(n^2) pass.
@@ -76,7 +78,7 @@ def cusum_changepoints(data: list[int] | list[float], threshold: float = 1.0) ->
variance = sum((x - mean) ** 2 for x in data) / n
if variance == 0:
return []
- std_dev = variance ** 0.5
+ std_dev = variance**0.5
limit = threshold * std_dev
changepoints: list[int] = []
@@ -100,13 +102,17 @@ def cusum_changepoints(data: list[int] | list[float], threshold: float = 1.0) ->
# 1. BAYESIAN BETA-BINOMIAL
# ============================================================================
-def beta_posterior(successes: int, trials: int, prior_alpha: float = 1.0, prior_beta: float = 1.0) -> dict:
+
+def beta_posterior(
+ successes: int, trials: int, prior_alpha: float = 1.0, prior_beta: float = 1.0
+) -> dict:
alpha = prior_alpha + successes
beta_param = prior_beta + trials - successes
mean = alpha / (alpha + beta_param)
try:
from scipy.stats import beta as beta_dist
+
ci_low = beta_dist.ppf(0.025, alpha, beta_param)
ci_high = beta_dist.ppf(0.975, alpha, beta_param)
except ImportError:
@@ -117,6 +123,7 @@ def beta_posterior(successes: int, trials: int, prior_alpha: float = 1.0, prior_
def prob_above(baseline: float) -> float:
try:
from scipy.stats import beta as beta_dist
+
return float(1 - beta_dist.cdf(baseline, alpha, beta_param))
except ImportError:
if mean > baseline:
@@ -138,27 +145,36 @@ def prob_above(baseline: float) -> float:
"ci_95": (round(float(ci_low), 4), round(float(ci_high), 4)),
"prob_above_baseline": round(p_above, 3),
"confidence_label": label,
- "alpha": alpha, "beta": beta_param, "n": trials,
+ "alpha": alpha,
+ "beta": beta_param,
+ "n": trials,
}
-
# ============================================================================
# 2. WILSON CONFIDENCE INTERVALS
# ============================================================================
+
def wilson_ci(successes: int, total: int, z: float = 1.96) -> dict:
if total == 0:
- return {"point_estimate": 0, "ci_low": 0, "ci_high": 0, "margin": 0, "display": "0% (no data)"}
+ return {
+ "point_estimate": 0,
+ "ci_low": 0,
+ "ci_high": 0,
+ "margin": 0,
+ "display": "0% (no data)",
+ }
p = successes / total
- denom = 1 + z ** 2 / total
- center = (p + z ** 2 / (2 * total)) / denom
- margin = z * math.sqrt((p * (1 - p) + z ** 2 / (4 * total)) / total) / denom
+ denom = 1 + z**2 / total
+ center = (p + z**2 / (2 * total)) / denom
+ margin = z * math.sqrt((p * (1 - p) + z**2 / (4 * total)) / total) / denom
ci_low = max(0, center - margin)
ci_high = min(1, center + margin)
return {
"point_estimate": round(p, 4),
- "ci_low": round(ci_low, 4), "ci_high": round(ci_high, 4),
+ "ci_low": round(ci_low, 4),
+ "ci_high": round(ci_high, 4),
"margin": round(margin, 4),
"display": f"{p:.1%} (CI: {ci_low:.1%}-{ci_high:.1%})",
}
@@ -168,14 +184,18 @@ def wilson_ci(successes: int, total: int, z: float = 1.96) -> dict:
# 3. ROLLING WINDOW COMPARISON
# ============================================================================
+
def rolling_comparison(values: list, window: int = 10) -> dict:
if not values:
return {"lifetime_avg": 0, "recent_avg": 0, "delta": 0, "trend": "NO_DATA", "pct_change": 0}
lifetime_avg = sum(values) / len(values)
if len(values) <= window:
return {
- "lifetime_avg": round(lifetime_avg, 4), "recent_avg": round(lifetime_avg, 4),
- "delta": 0, "trend": "INSUFFICIENT_WINDOW", "pct_change": 0,
+ "lifetime_avg": round(lifetime_avg, 4),
+ "recent_avg": round(lifetime_avg, 4),
+ "delta": 0,
+ "trend": "INSUFFICIENT_WINDOW",
+ "pct_change": 0,
}
recent = values[-window:]
recent_avg = sum(recent) / len(recent)
@@ -188,8 +208,11 @@ def rolling_comparison(values: list, window: int = 10) -> dict:
else:
trend = "DEGRADING"
return {
- "lifetime_avg": round(lifetime_avg, 4), "recent_avg": round(recent_avg, 4),
- "delta": round(delta, 4), "trend": trend, "pct_change": round(pct, 1),
+ "lifetime_avg": round(lifetime_avg, 4),
+ "recent_avg": round(recent_avg, 4),
+ "delta": round(delta, 4),
+ "trend": trend,
+ "pct_change": round(pct, 1),
}
@@ -197,6 +220,7 @@ def rolling_comparison(values: list, window: int = 10) -> dict:
# 4. BRIER SCORE
# ============================================================================
+
def brier_score(predictions_and_outcomes: list) -> dict:
if not predictions_and_outcomes:
return {"score": None, "calibration": "NO_DATA", "n": 0}
@@ -220,6 +244,7 @@ def brier_score(predictions_and_outcomes: list) -> dict:
# 5. EWMA CONTROL CHARTS
# ============================================================================
+
def ewma_control(values: list, lambda_param: float = 0.2, sigma_multiplier: float = 2.0) -> dict:
if len(values) < 3:
return {"ewma_current": None, "alerts": [], "status": "INSUFFICIENT_DATA"}
@@ -237,10 +262,18 @@ def ewma_control(values: list, lambda_param: float = 0.2, sigma_multiplier: floa
ucl = mean + sigma_multiplier * ewma_sigma
lcl = mean - sigma_multiplier * ewma_sigma
if ewma[i] > ucl or ewma[i] < lcl:
- alerts.append({"index": i, "value": round(values[i], 4),
- "ewma": round(ewma[i], 4), "type": "above" if ewma[i] > ucl else "below"})
+ alerts.append(
+ {
+ "index": i,
+ "value": round(values[i], 4),
+ "ewma": round(ewma[i], 4),
+ "type": "above" if ewma[i] > ucl else "below",
+ }
+ )
return {
- "ewma_current": round(ewma[-1], 4), "mean": round(mean, 4), "sigma": round(sigma, 4),
+ "ewma_current": round(ewma[-1], 4),
+ "mean": round(mean, 4),
+ "sigma": round(sigma, 4),
"ucl": round(mean + sigma_multiplier * sigma, 4),
"lcl": round(max(0, mean - sigma_multiplier * sigma), 4),
"alerts": alerts[-3:],
@@ -252,6 +285,7 @@ def ewma_control(values: list, lambda_param: float = 0.2, sigma_multiplier: floa
# 6. CORRECTION HALF-LIFE
# ============================================================================
+
def correction_half_life(corrections: list) -> dict:
if not corrections:
return {"categories": {}, "overall": "NO_DATA"}
@@ -281,18 +315,30 @@ def correction_half_life(corrections: list) -> dict:
else:
status = "SAME_SESSION"
results[cat] = {
- "occurrences": count, "first_session": sessions_sorted[0],
- "last_session": sessions_sorted[-1], "span": span,
- "density": round(count / max(span, 1), 3), "status": status,
+ "occurrences": count,
+ "first_session": sessions_sorted[0],
+ "last_session": sessions_sorted[-1],
+ "span": span,
+ "density": round(count / max(span, 1), 3),
+ "status": status,
}
- overall = "LEARNING" if learned > recurring else "STRUGGLING" if recurring > learned else "MIXED"
- return {"categories": results, "total_categories": len(results), "learned": learned, "recurring": recurring, "overall": overall}
+ overall = (
+ "LEARNING" if learned > recurring else "STRUGGLING" if recurring > learned else "MIXED"
+ )
+ return {
+ "categories": results,
+ "total_categories": len(results),
+ "learned": learned,
+ "recurring": recurring,
+ "overall": overall,
+ }
# ============================================================================
# 7. TASK SUCCESS RATE BY TYPE
# ============================================================================
+
def task_success_rate(events: list) -> dict:
if not events:
return {"by_type": {}, "overall_pass_rate": None}
@@ -306,7 +352,12 @@ def task_success_rate(events: list) -> dict:
for t, counts in by_type.items():
rate = counts["passed"] / counts["total"] if counts["total"] > 0 else 0
ci = wilson_ci(counts["passed"], counts["total"])
- results[t] = {"pass_rate": round(rate, 3), "total": counts["total"], "passed": counts["passed"], "ci": ci["display"]}
+ results[t] = {
+ "pass_rate": round(rate, 3),
+ "total": counts["total"],
+ "passed": counts["passed"],
+ "ci": ci["display"],
+ }
total = sum(c["total"] for c in by_type.values())
passed = sum(c["passed"] for c in by_type.values())
return {
@@ -320,6 +371,7 @@ def task_success_rate(events: list) -> dict:
# 8. MTBF / MTTR
# ============================================================================
+
def mtbf_mttr(corrections: list, total_sessions: int) -> dict:
if not corrections or total_sessions == 0:
return {"by_type": {}, "overall_mtbf": None}
@@ -332,13 +384,23 @@ def mtbf_mttr(corrections: list, total_sessions: int) -> dict:
mtbf = total_sessions / count if count > 0 else total_sessions
sessions_sorted = sorted(sessions)
if len(sessions_sorted) > 1:
- gaps = [sessions_sorted[i+1] - sessions_sorted[i] for i in range(len(sessions_sorted)-1)]
+ gaps = [
+ sessions_sorted[i + 1] - sessions_sorted[i] for i in range(len(sessions_sorted) - 1)
+ ]
mttr = sum(gaps) / len(gaps)
else:
mttr = None
- results[t] = {"corrections": count, "mtbf": round(mtbf, 1), "mttr": round(mttr, 1) if mttr else None}
+ results[t] = {
+ "corrections": count,
+ "mtbf": round(mtbf, 1),
+ "mttr": round(mttr, 1) if mttr else None,
+ }
overall_mtbf = total_sessions / len(corrections) if corrections else total_sessions
- return {"by_type": results, "overall_mtbf": round(overall_mtbf, 1), "total_corrections": len(corrections)}
+ return {
+ "by_type": results,
+ "overall_mtbf": round(overall_mtbf, 1),
+ "total_corrections": len(corrections),
+ }
# ============================================================================
diff --git a/Gradata/src/gradata/_telemetry.py b/Gradata/src/gradata/_telemetry.py
index 37140318..4ad24829 100644
--- a/Gradata/src/gradata/_telemetry.py
+++ b/Gradata/src/gradata/_telemetry.py
@@ -40,6 +40,7 @@
in the same config file). Heartbeat/recurring events are not this module's
concern.
"""
+
from __future__ import annotations
import contextlib
@@ -82,6 +83,7 @@ def _config_path() -> Path:
"""Shared resolver for the telemetry config file."""
return _config_dir() / _CONFIG_FILENAME
+
# The exhaustive set of activation events. Adding a new one here is the
# only place you need to touch — the prompt copy and the docs reference
# this tuple, the backend schema just validates string length.
diff --git a/Gradata/src/gradata/_tenant.py b/Gradata/src/gradata/_tenant.py
index ae486f99..83013ba5 100644
--- a/Gradata/src/gradata/_tenant.py
+++ b/Gradata/src/gradata/_tenant.py
@@ -12,6 +12,7 @@
DB rebuilds and can be read by tooling outside the SDK (cloud sync,
migrations, diagnostics).
"""
+
from __future__ import annotations
import os
diff --git a/Gradata/src/gradata/_text_utils.py b/Gradata/src/gradata/_text_utils.py
index 554a2913..e92dd87a 100644
--- a/Gradata/src/gradata/_text_utils.py
+++ b/Gradata/src/gradata/_text_utils.py
@@ -52,9 +52,7 @@
# Factual-token regex
# ---------------------------------------------------------------------------
-_FACTUAL_RE = re.compile(
- r"(\$[\d,.]+|\d{4}-\d{2}-\d{2}|\d+%|https?://\S+|\b\d{3,}\b)"
-)
+_FACTUAL_RE = re.compile(r"(\$[\d,.]+|\d{4}-\d{2}-\d{2}|\d+%|https?://\S+|\b\d{3,}\b)")
"""Match factual tokens: dollar amounts, ISO dates, percentages, URLs, 3+-digit numbers.
Used by edit_classifier and behavioral_extractor. *Not* compatible with
@@ -66,14 +64,82 @@
# ---------------------------------------------------------------------------
_STOP_WORDS: set[str] = {
- "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
- "have", "has", "had", "do", "does", "did", "will", "would", "shall",
- "should", "may", "can", "could", "might", "to", "of", "in", "for",
- "on", "with", "at", "by", "from", "as", "into", "about", "that",
- "this", "it", "its", "and", "or", "but", "not", "no", "if", "so",
- "than", "too", "very", "s", "t", "d", "ll", "ve", "re", "m",
- "i", "you", "we", "they", "he", "she", "me", "my", "your", "our",
- "their", "his", "her", "us", "them", "up", "out", "all", "am",
+ "a",
+ "an",
+ "the",
+ "is",
+ "are",
+ "was",
+ "were",
+ "be",
+ "been",
+ "being",
+ "have",
+ "has",
+ "had",
+ "do",
+ "does",
+ "did",
+ "will",
+ "would",
+ "shall",
+ "should",
+ "may",
+ "can",
+ "could",
+ "might",
+ "to",
+ "of",
+ "in",
+ "for",
+ "on",
+ "with",
+ "at",
+ "by",
+ "from",
+ "as",
+ "into",
+ "about",
+ "that",
+ "this",
+ "it",
+ "its",
+ "and",
+ "or",
+ "but",
+ "not",
+ "no",
+ "if",
+ "so",
+ "than",
+ "too",
+ "very",
+ "s",
+ "t",
+ "d",
+ "ll",
+ "ve",
+ "re",
+ "m",
+ "i",
+ "you",
+ "we",
+ "they",
+ "he",
+ "she",
+ "me",
+ "my",
+ "your",
+ "our",
+ "their",
+ "his",
+ "her",
+ "us",
+ "them",
+ "up",
+ "out",
+ "all",
+ "am",
}
"""Common English function words for edit-diff filtering.
diff --git a/Gradata/src/gradata/_transcript.py b/Gradata/src/gradata/_transcript.py
new file mode 100644
index 00000000..e2f0d574
--- /dev/null
+++ b/Gradata/src/gradata/_transcript.py
@@ -0,0 +1,148 @@
+"""Layer 0: lightweight turn logger for retroactive feedback mining.
+
+log_turn() appends conversation turns to
+brain/sessions/{session_id}/transcript.jsonl so that session_close can
+run the implicit_feedback regex sweep retroactively across the full
+session, catching signals the real-time UserPromptSubmit hook may have
+missed (e.g. turns that arrived too fast or during hook downtime).
+
+Opt-in only. Disabled unless GRADATA_TRANSCRIPT=1.
+
+Non-Anthropic middleware (wrap_openai, LangChainCallback, CrewAIGuard)
+calls log_turn() because those providers have no native session log.
+wrap_anthropic does NOT call log_turn() — Claude Code's native JSONL at
+~/.claude/projects/{hash}/{session_id}.jsonl is the authoritative source.
+
+PII policy:
+ - Assistant content is truncated at GRADATA_TRANSCRIPT_TRUNCATE (2000 chars).
+ - Non-text tool_use / image content is logged as {has_non_text: true}.
+ - No redaction of user content (caller is responsible).
+ - Files are TTL-cleaned by cleanup_ttl().
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from datetime import UTC, datetime
+from pathlib import Path
+
+_log = logging.getLogger(__name__)
+
+DEFAULT_TRUNCATE = 2000
+DEFAULT_TTL_DAYS = 30
+_ENABLED_ENV = "GRADATA_TRANSCRIPT"
+_TRUNCATE_ENV = "GRADATA_TRANSCRIPT_TRUNCATE"
+_TTL_ENV = "GRADATA_TRANSCRIPT_TTL_DAYS"
+
+
+def _is_enabled() -> bool:
+ return os.environ.get(_ENABLED_ENV, "0") == "1"
+
+
+def _session_dir(brain_dir: str, session_id: str) -> Path:
+ return Path(brain_dir) / "sessions" / session_id
+
+
+def _transcript_path(brain_dir: str, session_id: str) -> Path:
+ return _session_dir(brain_dir, session_id) / "transcript.jsonl"
+
+
+def log_turn(
+ brain_dir: str,
+ session_id: str,
+ role: str,
+ content: str | None,
+ *,
+ has_non_text: bool = False,
+ truncate_at: int | None = None,
+) -> None:
+ """Append one conversation turn to the session transcript.
+
+ Silently no-ops when GRADATA_TRANSCRIPT != 1, or on any write error.
+ Content is truncated to avoid bloating the transcript with long assistant
+ responses; the retroactive sweep only needs the user-role turns anyway.
+ """
+ if not _is_enabled():
+ return
+ if not brain_dir or not session_id:
+ return
+
+ limit = (
+ truncate_at
+ if truncate_at is not None
+ else int(os.environ.get(_TRUNCATE_ENV, str(DEFAULT_TRUNCATE)))
+ )
+
+ entry: dict = {
+ "ts": datetime.now(UTC).isoformat(),
+ "role": role,
+ }
+ if has_non_text:
+ entry["has_non_text"] = True
+ entry["content"] = None
+ elif content is not None:
+ entry["content"] = content[:limit] if len(content) > limit else content
+ else:
+ entry["content"] = None
+
+ try:
+ path = _transcript_path(brain_dir, session_id)
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with path.open("a", encoding="utf-8") as fh:
+ fh.write(json.dumps(entry, ensure_ascii=False) + "\n")
+ except OSError as exc:
+ _log.debug("transcript log_turn failed: %s", exc)
+
+
+def load_turns(brain_dir: str, session_id: str) -> list[dict]:
+ """Load all turns from a Gradata-written transcript.jsonl.
+
+ Returns an empty list on any read error or if the file doesn't exist.
+ """
+ path = _transcript_path(brain_dir, session_id)
+ if not path.is_file():
+ return []
+ turns: list[dict] = []
+ try:
+ with path.open(encoding="utf-8", errors="replace") as fh:
+ for line in fh:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ turns.append(json.loads(line))
+ except json.JSONDecodeError:
+ continue
+ except OSError:
+ pass
+ return turns
+
+
+def cleanup_ttl(brain_dir: str, ttl_days: int | None = None) -> int:
+ """Delete transcript directories older than ttl_days. Returns count deleted."""
+ days = (
+ ttl_days if ttl_days is not None else int(os.environ.get(_TTL_ENV, str(DEFAULT_TTL_DAYS)))
+ )
+ now = datetime.now(UTC).timestamp()
+ cutoff = now - days * 86400
+ sessions_dir = Path(brain_dir) / "sessions"
+ if not sessions_dir.is_dir():
+ return 0
+
+ deleted = 0
+ for session_dir in sessions_dir.iterdir():
+ if not session_dir.is_dir():
+ continue
+ transcript = session_dir / "transcript.jsonl"
+ if not transcript.is_file():
+ continue
+ try:
+ mtime = transcript.stat().st_mtime
+ if mtime < cutoff:
+ transcript.unlink(missing_ok=True)
+ deleted += 1
+ except OSError:
+ continue
+ return deleted
diff --git a/Gradata/src/gradata/_transcript_providers.py b/Gradata/src/gradata/_transcript_providers.py
new file mode 100644
index 00000000..381d9486
--- /dev/null
+++ b/Gradata/src/gradata/_transcript_providers.py
@@ -0,0 +1,182 @@
+"""Layer 0: TranscriptSource implementations for retroactive sweep.
+
+Two implementations:
+ ProviderTranscriptSource — reads Claude Code's native JSONL at
+ ~/.claude/projects/{hash}/{session_id}.jsonl.
+ GradataTranscriptSource — reads brain/sessions/{session_id}/transcript.jsonl
+ written by non-Anthropic middleware via _transcript.log_turn().
+
+Session close tries ProviderTranscriptSource first; falls back to
+GradataTranscriptSource. Both expose the same interface so the sweep
+is source-agnostic.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+
+_log = logging.getLogger(__name__)
+
+
+class ProviderTranscriptSource:
+ """Reads turns from Claude Code's native ~/.claude/projects/ JSONL.
+
+ Claude Code writes one JSONL file per session. Entries look like:
+ {"type": "user", "message": {"content": "..."}, ...}
+ {"type": "assistant", "message": {"content": [...]}, ...}
+
+ Content can be a plain string or a list of content blocks. Non-text
+ blocks (tool_use, images) are flagged as has_non_text=True and their
+ content is dropped to avoid bloating the in-memory sweep.
+ """
+
+ def __init__(self, session_id: str | None) -> None:
+ self._session_id = session_id
+ self._path: Path | None = self._locate()
+
+ def _locate(self) -> Path | None:
+ projects = Path.home() / ".claude" / "projects"
+ if not projects.is_dir():
+ return None
+ try:
+ all_dirs = [d for d in projects.iterdir() if d.is_dir()]
+ except OSError:
+ return None
+
+ if self._session_id:
+ for d in all_dirs:
+ candidate = d / f"{self._session_id}.jsonl"
+ if candidate.is_file():
+ return candidate
+
+ # Fallback: most-recently modified JSONL across all project dirs.
+ all_jsonls: list[Path] = []
+ for d in all_dirs:
+ try:
+ all_jsonls.extend(f for f in d.iterdir() if f.suffix == ".jsonl")
+ except OSError:
+ continue
+ return max(all_jsonls, key=lambda p: p.stat().st_mtime) if all_jsonls else None
+
+ def available(self) -> bool:
+ return self._path is not None and self._path.is_file()
+
+ def turns(self) -> list[dict]:
+ """Return normalised turns: [{role, content, has_non_text, ts}]."""
+ if not self.available():
+ return []
+ result: list[dict] = []
+ try:
+ with self._path.open(encoding="utf-8", errors="replace") as fh: # type: ignore[union-attr]
+ for line in fh:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ entry = json.loads(line)
+ except json.JSONDecodeError:
+ continue
+ if not isinstance(entry, dict):
+ continue
+ turn_type = entry.get("type", "")
+ if turn_type not in ("user", "assistant"):
+ continue
+ msg = entry.get("message") or {}
+ raw_content = msg.get("content") if isinstance(msg, dict) else None
+ ts = entry.get("timestamp", "")
+
+ if isinstance(raw_content, str):
+ result.append(
+ {
+ "role": turn_type,
+ "content": raw_content,
+ "has_non_text": False,
+ "ts": ts,
+ }
+ )
+ elif isinstance(raw_content, list):
+ text_parts: list[str] = []
+ has_non_text = False
+ for block in raw_content:
+ if not isinstance(block, dict):
+ continue
+ btype = block.get("type", "")
+ if btype == "text":
+ text_parts.append(block.get("text", ""))
+ else:
+ has_non_text = True
+ result.append(
+ {
+ "role": turn_type,
+ "content": "\n".join(text_parts) or None,
+ "has_non_text": has_non_text,
+ "ts": ts,
+ }
+ )
+ except OSError as exc:
+ _log.debug("ProviderTranscriptSource read failed: %s", exc)
+ return result
+
+
+class GradataTranscriptSource:
+ """Reads turns from brain/sessions/{session_id}/transcript.jsonl.
+
+ Written by non-Anthropic middleware via gradata._transcript.log_turn().
+ """
+
+ def __init__(self, brain_dir: str, session_id: str | None) -> None:
+ self._brain_dir = brain_dir
+ self._session_id = session_id
+
+ def _path(self) -> Path | None:
+ if not self._session_id:
+ return None
+ p = Path(self._brain_dir) / "sessions" / self._session_id / "transcript.jsonl"
+ return p if p.is_file() else None
+
+ def available(self) -> bool:
+ return self._path() is not None
+
+ def turns(self) -> list[dict]:
+ """Return all turns written by log_turn()."""
+ path = self._path()
+ if path is None:
+ return []
+ result: list[dict] = []
+ try:
+ with path.open(encoding="utf-8", errors="replace") as fh:
+ for line in fh:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ entry = json.loads(line)
+ except json.JSONDecodeError:
+ continue
+ if isinstance(entry, dict):
+ result.append(entry)
+ except OSError as exc:
+ _log.debug("GradataTranscriptSource read failed: %s", exc)
+ return result
+
+
+def get_transcript_source(
+ brain_dir: str, session_id: str | None
+) -> ProviderTranscriptSource | GradataTranscriptSource | None:
+ """Return the best available transcript source, or None if neither has data.
+
+ Prefers ProviderTranscriptSource (Claude Code native) over
+ GradataTranscriptSource (middleware-written). Returns None when neither
+ has a usable file so callers can skip the sweep cleanly.
+ """
+ provider = ProviderTranscriptSource(session_id)
+ if provider.available():
+ return provider
+
+ gradata = GradataTranscriptSource(brain_dir, session_id)
+ if gradata.available():
+ return gradata
+
+ return None
diff --git a/Gradata/src/gradata/_types.py b/Gradata/src/gradata/_types.py
index c8a9c5a5..454f02d6 100644
--- a/Gradata/src/gradata/_types.py
+++ b/Gradata/src/gradata/_types.py
@@ -176,7 +176,9 @@ class Lesson:
tree_level: int = 0 # Current depth: 0=leaf, 1=branch, 2=trunk
# Transient runtime state (not persisted to lessons.md) — self_improvement
# / rule_evolution decay confidence once this crosses a threshold.
- _contradiction_streak: int = 0 # Consecutive contradictions; triggers self-correction / penalty acceleration
+ _contradiction_streak: int = (
+ 0 # Consecutive contradictions; triggers self-correction / penalty acceleration
+ )
stale: bool = False # True = demoted via TTL (sessions_since_fire >= ttl); flagged for review
# Phase 5 council hook: optional registry slot for AST-class promotion routing.
# Unset today (rule_to_hook uses the regex-matched DETERMINISTIC_PATTERNS table).
diff --git a/Gradata/src/gradata/_validator.py b/Gradata/src/gradata/_validator.py
index 2d01b5ea..143087fa 100644
--- a/Gradata/src/gradata/_validator.py
+++ b/Gradata/src/gradata/_validator.py
@@ -10,6 +10,7 @@
4. DATA_COMPLETENESS — Are events well-formed with required fields?
5. BEHAVIORAL_COVERAGE — Do CARL rules cover declared capabilities?
"""
+
from __future__ import annotations
import json
@@ -35,6 +36,7 @@
# ── Dimension 1: Metric Integrity ─────────────────────────────────────
+
def _verify_metrics(manifest: dict, conn: sqlite3.Connection) -> dict:
"""Compare claimed metrics against independently computed values."""
results = []
@@ -47,13 +49,17 @@ def _verify_metrics(manifest: dict, conn: sqlite3.Connection) -> dict:
except Exception:
actual_events = 0
claimed_events = db_meta.get("total_events", 0)
- results.append({
- "check": "total_events",
- "claimed": claimed_events,
- "actual": actual_events,
- "pass": actual_events >= claimed_events,
- "note": "actual >= claimed is valid (events accumulate)" if actual_events >= claimed_events else "claimed exceeds actual — inflation detected",
- })
+ results.append(
+ {
+ "check": "total_events",
+ "claimed": claimed_events,
+ "actual": actual_events,
+ "pass": actual_events >= claimed_events,
+ "note": "actual >= claimed is valid (events accumulate)"
+ if actual_events >= claimed_events
+ else "claimed exceeds actual — inflation detected",
+ }
+ )
# 1b. Event type count
try:
@@ -61,67 +67,88 @@ def _verify_metrics(manifest: dict, conn: sqlite3.Connection) -> dict:
except Exception:
actual_types = 0
claimed_types = db_meta.get("event_types", 0)
- results.append({
- "check": "event_types",
- "claimed": claimed_types,
- "actual": actual_types,
- "pass": abs(actual_types - claimed_types) <= 2,
- "note": "within tolerance" if abs(actual_types - claimed_types) <= 2 else "type count mismatch",
- })
+ results.append(
+ {
+ "check": "event_types",
+ "claimed": claimed_types,
+ "actual": actual_types,
+ "pass": abs(actual_types - claimed_types) <= 2,
+ "note": "within tolerance"
+ if abs(actual_types - claimed_types) <= 2
+ else "type count mismatch",
+ }
+ )
# 1c. Lessons graduated count
graduated_claimed = claimed.get("lessons_graduated", 0)
graduated_actual = _count_lessons_in_file(_p.BRAIN_DIR / "lessons-archive.md")
- results.append({
- "check": "lessons_graduated",
- "claimed": graduated_claimed,
- "actual": graduated_actual,
- "pass": abs(graduated_actual - graduated_claimed) <= 5,
- "note": "within tolerance" if abs(graduated_actual - graduated_claimed) <= 5 else "graduated count mismatch",
- })
+ results.append(
+ {
+ "check": "lessons_graduated",
+ "claimed": graduated_claimed,
+ "actual": graduated_actual,
+ "pass": abs(graduated_actual - graduated_claimed) <= 5,
+ "note": "within tolerance"
+ if abs(graduated_actual - graduated_claimed) <= 5
+ else "graduated count mismatch",
+ }
+ )
# 1d. Lessons active count
active_claimed = claimed.get("lessons_active", 0)
active_actual = _count_lessons_in_file(_p.LESSONS_FILE)
- results.append({
- "check": "lessons_active",
- "claimed": active_claimed,
- "actual": active_actual,
- "pass": abs(active_actual - active_claimed) <= 3,
- "note": "within tolerance" if abs(active_actual - active_claimed) <= 3 else "active count mismatch",
- })
+ results.append(
+ {
+ "check": "lessons_active",
+ "claimed": active_claimed,
+ "actual": active_actual,
+ "pass": abs(active_actual - active_claimed) <= 3,
+ "note": "within tolerance"
+ if abs(active_actual - active_claimed) <= 3
+ else "active count mismatch",
+ }
+ )
# 1e. Session count
sessions_claimed = manifest.get("metadata", {}).get("sessions_trained", 0)
try:
- sessions_actual = conn.execute(
- "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
- ).fetchone()[0] or 0
+ sessions_actual = (
+ conn.execute(
+ "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
+ ).fetchone()[0]
+ or 0
+ )
except Exception:
sessions_actual = 0
- results.append({
- "check": "sessions_trained",
- "claimed": sessions_claimed,
- "actual": sessions_actual,
- "pass": abs(sessions_actual - sessions_claimed) <= 3,
- "note": "within tolerance" if abs(sessions_actual - sessions_claimed) <= 3 else "session count mismatch",
- })
+ results.append(
+ {
+ "check": "sessions_trained",
+ "claimed": sessions_claimed,
+ "actual": sessions_actual,
+ "pass": abs(sessions_actual - sessions_claimed) <= 3,
+ "note": "within tolerance"
+ if abs(sessions_actual - sessions_claimed) <= 3
+ else "session count mismatch",
+ }
+ )
# 1f. Table count
claimed_tables = len(db_meta.get("tables", []))
try:
- actual_tables = len(conn.execute(
- "SELECT name FROM sqlite_master WHERE type='table'"
- ).fetchall())
+ actual_tables = len(
+ conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
+ )
except Exception:
actual_tables = 0
- results.append({
- "check": "db_tables",
- "claimed": claimed_tables,
- "actual": actual_tables,
- "pass": actual_tables >= claimed_tables,
- "note": "ok" if actual_tables >= claimed_tables else "tables missing from DB",
- })
+ results.append(
+ {
+ "check": "db_tables",
+ "claimed": claimed_tables,
+ "actual": actual_tables,
+ "pass": actual_tables >= claimed_tables,
+ "note": "ok" if actual_tables >= claimed_tables else "tables missing from DB",
+ }
+ )
passed = sum(1 for r in results if r["pass"])
return {
@@ -135,6 +162,7 @@ def _verify_metrics(manifest: dict, conn: sqlite3.Connection) -> dict:
# ── Dimension 2: Training Depth ───────────────────────────────────────
+
def _verify_training_depth(manifest: dict, conn: sqlite3.Connection) -> dict:
"""Is this brain genuinely trained or just padded with empty sessions?"""
results = []
@@ -154,18 +182,24 @@ def _verify_training_depth(manifest: dict, conn: sqlite3.Connection) -> dict:
empty_sessions = sum(1 for c in counts if c <= 1)
total_sessions = len(counts)
- results.append({
- "check": "avg_events_per_session",
- "value": round(avg_events, 1),
- "pass": avg_events >= 3,
- "note": f"{avg_events:.1f} events/session (minimum useful: 3)" if avg_events >= 3 else "suspiciously low event density — padding?",
- })
- results.append({
- "check": "empty_session_ratio",
- "value": round(empty_sessions / total_sessions, 3) if total_sessions > 0 else 1.0,
- "pass": (empty_sessions / total_sessions < 0.3) if total_sessions > 0 else False,
- "note": f"{empty_sessions}/{total_sessions} sessions with <=1 event",
- })
+ results.append(
+ {
+ "check": "avg_events_per_session",
+ "value": round(avg_events, 1),
+ "pass": avg_events >= 3,
+ "note": f"{avg_events:.1f} events/session (minimum useful: 3)"
+ if avg_events >= 3
+ else "suspiciously low event density — padding?",
+ }
+ )
+ results.append(
+ {
+ "check": "empty_session_ratio",
+ "value": round(empty_sessions / total_sessions, 3) if total_sessions > 0 else 1.0,
+ "pass": (empty_sessions / total_sessions < 0.3) if total_sessions > 0 else False,
+ "note": f"{empty_sessions}/{total_sessions} sessions with <=1 event",
+ }
+ )
# 2b. Event type diversity (real training produces varied events)
try:
@@ -177,12 +211,14 @@ def _verify_training_depth(manifest: dict, conn: sqlite3.Connection) -> dict:
if type_counts:
types_used = len(type_counts)
- results.append({
- "check": "event_type_diversity",
- "value": types_used,
- "pass": types_used >= 5,
- "note": f"{types_used} distinct event types (minimum for real training: 5)",
- })
+ results.append(
+ {
+ "check": "event_type_diversity",
+ "value": types_used,
+ "pass": types_used >= 5,
+ "note": f"{types_used} distinct event types (minimum for real training: 5)",
+ }
+ )
# 2c. Temporal span (brain trained over real time, not one burst)
try:
@@ -197,12 +233,16 @@ def _verify_training_depth(manifest: dict, conn: sqlite3.Connection) -> dict:
first = datetime.fromisoformat(str(span[0]))
last = datetime.fromisoformat(str(span[1]))
days = (last - first).days
- results.append({
- "check": "training_span_days",
- "value": days,
- "pass": days >= 3,
- "note": f"Trained over {days} days" if days >= 3 else "all training in <3 days — insufficient maturation",
- })
+ results.append(
+ {
+ "check": "training_span_days",
+ "value": days,
+ "pass": days >= 3,
+ "note": f"Trained over {days} days"
+ if days >= 3
+ else "all training in <3 days — insufficient maturation",
+ }
+ )
except Exception:
pass
@@ -214,12 +254,14 @@ def _verify_training_depth(manifest: dict, conn: sqlite3.Connection) -> dict:
except Exception:
correction_count = 0
- results.append({
- "check": "corrections_exist",
- "value": correction_count,
- "pass": correction_count >= 3,
- "note": f"{correction_count} corrections (minimum for credible training: 3)",
- })
+ results.append(
+ {
+ "check": "corrections_exist",
+ "value": correction_count,
+ "pass": correction_count >= 3,
+ "note": f"{correction_count} corrections (minimum for credible training: 3)",
+ }
+ )
passed = sum(1 for r in results if r["pass"])
return {
@@ -233,6 +275,7 @@ def _verify_training_depth(manifest: dict, conn: sqlite3.Connection) -> dict:
# ── Dimension 3: Learning Signal ──────────────────────────────────────
+
def _verify_learning_signal(manifest: dict, conn: sqlite3.Connection) -> dict:
"""Does the brain actually learn? Corrections should decrease over time."""
results = []
@@ -254,19 +297,23 @@ def _verify_learning_signal(manifest: dict, conn: sqlite3.Connection) -> dict:
second_half_avg = sum(counts[mid:]) / (len(counts) - mid) if (len(counts) - mid) > 0 else 0
improving = second_half_avg <= first_half_avg
- results.append({
- "check": "correction_trend",
- "first_half_avg": round(first_half_avg, 2),
- "second_half_avg": round(second_half_avg, 2),
- "pass": improving,
- "note": f"Early avg: {first_half_avg:.1f}, Recent avg: {second_half_avg:.1f} — {'improving' if improving else 'NOT improving'}",
- })
+ results.append(
+ {
+ "check": "correction_trend",
+ "first_half_avg": round(first_half_avg, 2),
+ "second_half_avg": round(second_half_avg, 2),
+ "pass": improving,
+ "note": f"Early avg: {first_half_avg:.1f}, Recent avg: {second_half_avg:.1f} — {'improving' if improving else 'NOT improving'}",
+ }
+ )
else:
- results.append({
- "check": "correction_trend",
- "pass": False,
- "note": f"Insufficient correction data ({len(rows)} sessions with corrections, need 4+)",
- })
+ results.append(
+ {
+ "check": "correction_trend",
+ "pass": False,
+ "note": f"Insufficient correction data ({len(rows)} sessions with corrections, need 4+)",
+ }
+ )
# 3b. Lesson graduation rate (lessons should move from INSTINCT to PATTERN to RULE)
lessons_file = _p.LESSONS_FILE
@@ -277,35 +324,43 @@ def _verify_learning_signal(manifest: dict, conn: sqlite3.Connection) -> dict:
if total > 0:
grad_rate = graduated / total
- results.append({
- "check": "graduation_rate",
- "value": round(grad_rate, 3),
- "active": active,
- "graduated": graduated,
- "pass": grad_rate >= 0.3,
- "note": f"{graduated}/{total} lessons graduated ({grad_rate:.0%})" if grad_rate >= 0.3 else f"Low graduation rate ({grad_rate:.0%}) — brain retains but doesn't crystallize",
- })
+ results.append(
+ {
+ "check": "graduation_rate",
+ "value": round(grad_rate, 3),
+ "active": active,
+ "graduated": graduated,
+ "pass": grad_rate >= 0.3,
+ "note": f"{graduated}/{total} lessons graduated ({grad_rate:.0%})"
+ if grad_rate >= 0.3
+ else f"Low graduation rate ({grad_rate:.0%}) — brain retains but doesn't crystallize",
+ }
+ )
else:
- results.append({
- "check": "graduation_rate",
- "pass": False,
- "note": "No lessons found — brain has no learning pipeline",
- })
+ results.append(
+ {
+ "check": "graduation_rate",
+ "pass": False,
+ "note": "No lessons found — brain has no learning pipeline",
+ }
+ )
# 3c. Lesson application tracking (lessons are actually applied, not just stored)
try:
- app_count = conn.execute(
- "SELECT COUNT(*) FROM lesson_applications"
- ).fetchone()[0]
+ app_count = conn.execute("SELECT COUNT(*) FROM lesson_applications").fetchone()[0]
except Exception:
app_count = 0
- results.append({
- "check": "lesson_applications",
- "value": app_count,
- "pass": app_count >= 1,
- "note": f"{app_count} lesson applications tracked" if app_count >= 1 else "No lesson applications — lessons exist but aren't applied",
- })
+ results.append(
+ {
+ "check": "lesson_applications",
+ "value": app_count,
+ "pass": app_count >= 1,
+ "note": f"{app_count} lesson applications tracked"
+ if app_count >= 1
+ else "No lesson applications — lessons exist but aren't applied",
+ }
+ )
passed = sum(1 for r in results if r["pass"])
return {
@@ -319,6 +374,7 @@ def _verify_learning_signal(manifest: dict, conn: sqlite3.Connection) -> dict:
# ── Dimension 4: Data Completeness ────────────────────────────────────
+
def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
"""Are events well-formed with required fields?"""
results = []
@@ -334,12 +390,14 @@ def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
if total > 0:
ts_rate = with_ts / total
- results.append({
- "check": "timestamp_coverage",
- "value": round(ts_rate, 3),
- "pass": ts_rate >= 0.95,
- "note": f"{ts_rate:.0%} of events have timestamps",
- })
+ results.append(
+ {
+ "check": "timestamp_coverage",
+ "value": round(ts_rate, 3),
+ "pass": ts_rate >= 0.95,
+ "note": f"{ts_rate:.0%} of events have timestamps",
+ }
+ )
# 4b. Events have session numbers
try:
@@ -351,12 +409,14 @@ def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
if total > 0:
session_rate = with_session / total
- results.append({
- "check": "session_coverage",
- "value": round(session_rate, 3),
- "pass": session_rate >= 0.90,
- "note": f"{session_rate:.0%} of events have session numbers",
- })
+ results.append(
+ {
+ "check": "session_coverage",
+ "value": round(session_rate, 3),
+ "pass": session_rate >= 0.90,
+ "note": f"{session_rate:.0%} of events have session numbers",
+ }
+ )
# 4c. Events have data payloads
try:
@@ -368,12 +428,14 @@ def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
if total > 0:
data_rate = with_data / total
- results.append({
- "check": "data_coverage",
- "value": round(data_rate, 3),
- "pass": data_rate >= 0.80,
- "note": f"{data_rate:.0%} of events have data payloads",
- })
+ results.append(
+ {
+ "check": "data_coverage",
+ "value": round(data_rate, 3),
+ "pass": data_rate >= 0.80,
+ "note": f"{data_rate:.0%} of events have data payloads",
+ }
+ )
# 4d. CORRECTION events have category tags
try:
@@ -390,12 +452,14 @@ def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
if corrections_total > 0:
tag_rate = corrections_tagged / corrections_total
- results.append({
- "check": "correction_categorization",
- "value": round(tag_rate, 3),
- "pass": tag_rate >= 0.70,
- "note": f"{tag_rate:.0%} of corrections are categorized",
- })
+ results.append(
+ {
+ "check": "correction_categorization",
+ "value": round(tag_rate, 3),
+ "pass": tag_rate >= 0.70,
+ "note": f"{tag_rate:.0%} of corrections are categorized",
+ }
+ )
# 4e. events.jsonl exists and is consistent with DB
jsonl_count = 0
@@ -408,13 +472,15 @@ def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
if total > 0:
sync_ratio = jsonl_count / total if total > 0 else 0
- results.append({
- "check": "dual_write_consistency",
- "db_count": total,
- "jsonl_count": jsonl_count,
- "pass": 0.8 <= sync_ratio <= 1.3,
- "note": f"DB: {total}, JSONL: {jsonl_count} — {'consistent' if 0.8 <= sync_ratio <= 1.3 else 'drift detected'}",
- })
+ results.append(
+ {
+ "check": "dual_write_consistency",
+ "db_count": total,
+ "jsonl_count": jsonl_count,
+ "pass": 0.8 <= sync_ratio <= 1.3,
+ "note": f"DB: {total}, JSONL: {jsonl_count} — {'consistent' if 0.8 <= sync_ratio <= 1.3 else 'drift detected'}",
+ }
+ )
passed = sum(1 for r in results if r["pass"])
return {
@@ -428,6 +494,7 @@ def _verify_data_completeness(manifest: dict, conn: sqlite3.Connection) -> dict:
# ── Dimension 5: Behavioral Coverage ──────────────────────────────────
+
def _verify_behavioral_coverage(manifest: dict, conn: sqlite3.Connection) -> dict:
"""Do CARL rules cover the brain's declared capabilities?"""
results = []
@@ -435,44 +502,56 @@ def _verify_behavioral_coverage(manifest: dict, conn: sqlite3.Connection) -> dic
# 5a. Safety rules exist
safety = contract.get("safety_rules", 0)
- results.append({
- "check": "safety_rules",
- "value": safety,
- "pass": safety >= 3,
- "note": f"{safety} safety rules" if safety >= 3 else "insufficient safety rules for marketplace distribution",
- })
+ results.append(
+ {
+ "check": "safety_rules",
+ "value": safety,
+ "pass": safety >= 3,
+ "note": f"{safety} safety rules"
+ if safety >= 3
+ else "insufficient safety rules for marketplace distribution",
+ }
+ )
# 5b. Global rules exist
global_rules = contract.get("global_rules", 0)
- results.append({
- "check": "global_rules",
- "value": global_rules,
- "pass": global_rules >= 2,
- "note": f"{global_rules} global rules",
- })
+ results.append(
+ {
+ "check": "global_rules",
+ "value": global_rules,
+ "pass": global_rules >= 2,
+ "note": f"{global_rules} global rules",
+ }
+ )
# 5c. Total rule coverage is proportional to training
total_rules = contract.get("total", 0)
sessions = manifest.get("metadata", {}).get("sessions_trained", 0)
rule_density = total_rules / max(sessions, 1)
- results.append({
- "check": "rule_density",
- "value": round(rule_density, 2),
- "total_rules": total_rules,
- "sessions": sessions,
- "pass": rule_density >= 0.5,
- "note": f"{total_rules} rules / {sessions} sessions = {rule_density:.1f} rules/session",
- })
+ results.append(
+ {
+ "check": "rule_density",
+ "value": round(rule_density, 2),
+ "total_rules": total_rules,
+ "sessions": sessions,
+ "pass": rule_density >= 0.5,
+ "note": f"{total_rules} rules / {sessions} sessions = {rule_density:.1f} rules/session",
+ }
+ )
# 5d. Tag taxonomy exists and has entries
taxonomy = manifest.get("tag_taxonomy", {})
tax_count = len(taxonomy)
- results.append({
- "check": "tag_taxonomy",
- "value": tax_count,
- "pass": tax_count >= 3,
- "note": f"{tax_count} tag prefixes defined" if tax_count >= 3 else "insufficient tag vocabulary",
- })
+ results.append(
+ {
+ "check": "tag_taxonomy",
+ "value": tax_count,
+ "pass": tax_count >= 3,
+ "note": f"{tax_count} tag prefixes defined"
+ if tax_count >= 3
+ else "insufficient tag vocabulary",
+ }
+ )
passed = sum(1 for r in results if r["pass"])
return {
@@ -486,6 +565,7 @@ def _verify_behavioral_coverage(manifest: dict, conn: sqlite3.Connection) -> dic
# ── Helpers ───────────────────────────────────────────────────────────
+
def _count_lessons_in_file(filepath: Path) -> int:
"""Count lesson entries in a lessons file."""
if not filepath.exists():
@@ -535,18 +615,25 @@ def _compute_trust_score(dimensions: list[dict]) -> dict:
# ── Main Validation ──────────────────────────────────────────────────
+
def validate_brain(manifest_path: Path | None = None, ctx: BrainContext | None = None) -> dict:
"""Run full brain validation. Returns structured report."""
brain_dir = ctx.brain_dir if ctx else _p.BRAIN_DIR
path = manifest_path or (brain_dir / "brain.manifest.json")
if not path.exists():
- return {"error": f"Manifest not found: {path}", "trust": {"score": 0, "grade": "F", "verdict": "UNTRUSTED"}}
+ return {
+ "error": f"Manifest not found: {path}",
+ "trust": {"score": 0, "grade": "F", "verdict": "UNTRUSTED"},
+ }
try:
manifest = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
- return {"error": f"Invalid manifest JSON: {e}", "trust": {"score": 0, "grade": "F", "verdict": "UNTRUSTED"}}
+ return {
+ "error": f"Invalid manifest JSON: {e}",
+ "trust": {"score": 0, "grade": "F", "verdict": "UNTRUSTED"},
+ }
# Connect to DB
db_path = path.parent / "system.db"
@@ -555,7 +642,10 @@ def validate_brain(manifest_path: Path | None = None, ctx: BrainContext | None =
try:
conn = sqlite3.connect(str(db_path))
except Exception as e:
- return {"error": f"Cannot open DB: {e}", "trust": {"score": 0, "grade": "F", "verdict": "UNTRUSTED"}}
+ return {
+ "error": f"Cannot open DB: {e}",
+ "trust": {"score": 0, "grade": "F", "verdict": "UNTRUSTED"},
+ }
dimensions = [
_verify_metrics(manifest, conn),
@@ -612,8 +702,12 @@ def print_report(report: dict):
print("=" * 60)
print(f"Brain: {report.get('brain_version', '?')} ({report.get('domain', '?')})")
print(f"Date: {report.get('validated_at', '?')[:19]}")
- print(f"Trust: {trust.get('grade', '?')} ({trust.get('score', 0):.0%}) — {trust.get('verdict', '?')}")
- print(f"Checks: {summary.get('passed', 0)}/{summary.get('total_checks', 0)} passed ({summary.get('pass_rate', 0):.0%})")
+ print(
+ f"Trust: {trust.get('grade', '?')} ({trust.get('score', 0):.0%}) — {trust.get('verdict', '?')}"
+ )
+ print(
+ f"Checks: {summary.get('passed', 0)}/{summary.get('total_checks', 0)} passed ({summary.get('pass_rate', 0):.0%})"
+ )
print()
for dim in report.get("dimensions", []):
@@ -626,12 +720,15 @@ def print_report(report: dict):
print()
print("=" * 60)
- print(f"VERDICT: {trust.get('verdict', 'UNKNOWN')} (Grade {trust.get('grade', '?')}, Score {trust.get('score', 0):.0%})")
+ print(
+ f"VERDICT: {trust.get('verdict', 'UNKNOWN')} (Grade {trust.get('grade', '?')}, Score {trust.get('score', 0):.0%})"
+ )
print("=" * 60)
# ── CLI ──────────────────────────────────────────────────────────────
+
def main():
"""Standalone CLI entry point."""
import argparse
diff --git a/Gradata/src/gradata/_workers.py b/Gradata/src/gradata/_workers.py
index 90f01d00..3d83c3af 100644
--- a/Gradata/src/gradata/_workers.py
+++ b/Gradata/src/gradata/_workers.py
@@ -39,9 +39,14 @@
CONSOLIDATE_EVENTS = "CONSOLIDATE_EVENTS"
DP_EXPORT = "DP_EXPORT"
-KNOWN_JOB_TYPES: frozenset[str] = frozenset({
- SYNTHESIZE_META_RULES, APPLY_DECAY, CONSOLIDATE_EVENTS, DP_EXPORT,
-})
+KNOWN_JOB_TYPES: frozenset[str] = frozenset(
+ {
+ SYNTHESIZE_META_RULES,
+ APPLY_DECAY,
+ CONSOLIDATE_EVENTS,
+ DP_EXPORT,
+ }
+)
_SCHEMA_SQL: tuple[str, ...] = (
"""
@@ -90,17 +95,19 @@ class Job:
def _stub_handler(label: str) -> Handler:
"""Log-and-succeed stub. Follow-up PRs swap via ``WorkerPool.register``."""
+
def _run(job: Job) -> None:
logger.info("worker: would %s (job=%d)", label, job.id)
+
return _run
def default_handlers() -> dict[str, Handler]:
return {
SYNTHESIZE_META_RULES: _stub_handler("synthesize meta-rules"),
- APPLY_DECAY: _stub_handler("apply decay"),
- CONSOLIDATE_EVENTS: _stub_handler("consolidate events"),
- DP_EXPORT: _stub_handler("run DP export"),
+ APPLY_DECAY: _stub_handler("apply decay"),
+ CONSOLIDATE_EVENTS: _stub_handler("consolidate events"),
+ DP_EXPORT: _stub_handler("run DP export"),
}
@@ -207,12 +214,18 @@ def _claim_one(self, conn: sqlite3.Connection) -> Job | None:
except json.JSONDecodeError:
payload = {}
return Job(
- id=int(row["id"]), type=str(row["type"]),
- payload=payload, created_at=float(row["created_at"]),
+ id=int(row["id"]),
+ type=str(row["type"]),
+ payload=payload,
+ created_at=float(row["created_at"]),
)
def _finalize(
- self, conn: sqlite3.Connection, job_id: int, *, error: str | None = None,
+ self,
+ conn: sqlite3.Connection,
+ job_id: int,
+ *,
+ error: str | None = None,
) -> None:
conn.execute(
"UPDATE worker_jobs SET status=?, finished_at=?, error=? WHERE id=?",
@@ -251,9 +264,10 @@ def drain_once(self) -> bool:
def _has_pending(self) -> bool:
conn = get_connection(self._db_path)
try:
- return conn.execute(
- "SELECT 1 FROM worker_jobs WHERE status='pending' LIMIT 1"
- ).fetchone() is not None
+ return (
+ conn.execute("SELECT 1 FROM worker_jobs WHERE status='pending' LIMIT 1").fetchone()
+ is not None
+ )
finally:
conn.close()
@@ -284,7 +298,9 @@ def start(self) -> None:
self._drain_deadline = None
for i in range(self._n_workers):
t = threading.Thread(
- target=self._worker_loop, name=f"gradata-worker-{i}", daemon=True,
+ target=self._worker_loop,
+ name=f"gradata-worker-{i}",
+ daemon=True,
)
t.start()
self._threads.append(t)
@@ -361,7 +377,9 @@ def _handle_signal(signum: int, _frame: object) -> None:
parser.add_argument("--brain-dir", required=True, help="Path to the brain directory")
parser.add_argument("--workers", type=int, default=1, help="Worker threads (default 1)")
parser.add_argument(
- "--drain-timeout", type=float, default=5.0,
+ "--drain-timeout",
+ type=float,
+ default=5.0,
help="Seconds to let the queue drain on shutdown (default 5)",
)
parser.add_argument("--log-level", default="INFO")
diff --git a/Gradata/src/gradata/adapters/mem0.py b/Gradata/src/gradata/adapters/mem0.py
index 41bc0e57..a9fadc5e 100644
--- a/Gradata/src/gradata/adapters/mem0.py
+++ b/Gradata/src/gradata/adapters/mem0.py
@@ -86,9 +86,7 @@ def __init__(
) from exc
if not api_key:
- raise ValueError(
- "Mem0Adapter requires an api_key when no client is supplied"
- )
+ raise ValueError("Mem0Adapter requires an api_key when no client is supplied")
self._client = MemoryClient(api_key=api_key)
# ------------------------------------------------------------------
@@ -160,18 +158,12 @@ def pull_memory_for_context(
except TypeError:
# Older mem0ai versions don't accept `filters` kwarg.
try:
- raw = self._client.search(
- query, user_id=self.user_id, limit=k
- )
+ raw = self._client.search(query, user_id=self.user_id, limit=k)
except Exception as exc:
- logger.warning(
- "Mem0Adapter.pull_memory_for_context failed: %s", exc
- )
+ logger.warning("Mem0Adapter.pull_memory_for_context failed: %s", exc)
return []
except Exception as exc:
- logger.warning(
- "Mem0Adapter.pull_memory_for_context failed: %s", exc
- )
+ logger.warning("Mem0Adapter.pull_memory_for_context failed: %s", exc)
return []
return _normalise_search_results(raw)
@@ -261,19 +253,16 @@ def _normalise_search_results(raw: Any) -> list[dict[str, Any]]:
continue
# Mem0 uses "memory" for the text in most versions; fall back to
# "text" and "content" for older / alternative shapes.
- text = (
- item.get("memory")
- or item.get("text")
- or item.get("content")
- or ""
- )
+ text = item.get("memory") or item.get("text") or item.get("content") or ""
metadata = item.get("metadata") or {}
score = item.get("score")
- out.append({
- "text": text,
- "metadata": metadata if isinstance(metadata, dict) else {},
- "score": score,
- })
+ out.append(
+ {
+ "text": text,
+ "metadata": metadata if isinstance(metadata, dict) else {},
+ "score": score,
+ }
+ )
return out
diff --git a/Gradata/src/gradata/audit.py b/Gradata/src/gradata/audit.py
index 65956b91..bdfa342f 100644
--- a/Gradata/src/gradata/audit.py
+++ b/Gradata/src/gradata/audit.py
@@ -25,6 +25,7 @@
# Write provenance
# ---------------------------------------------------------------------------
+
def write_provenance(
db_path: str | Path,
*,
@@ -50,6 +51,7 @@ def write_provenance(
try:
import contextlib as _ctx
import sqlite3 as _sqlite3
+
with get_connection(db_path) as conn:
# Defensive migration: brains created before migration 001 lack tenant_id.
with _ctx.suppress(_sqlite3.OperationalError):
@@ -68,6 +70,7 @@ def write_provenance(
# Query provenance
# ---------------------------------------------------------------------------
+
def query_provenance(
db_path: str | Path,
*,
@@ -112,6 +115,7 @@ def query_provenance(
# Scan events.jsonl for specific IDs
# ---------------------------------------------------------------------------
+
def _scan_events_for_ids(
events_path: str | Path,
event_ids: list[str],
@@ -157,6 +161,7 @@ def _scan_events_for_ids(
# Full trace: provenance + events + transitions
# ---------------------------------------------------------------------------
+
def trace_rule(
db_path: str | Path,
events_path: str | Path,
@@ -196,8 +201,7 @@ def trace_rule(
correction_event_ids: list[str] = []
if provenance:
correction_event_ids = [
- r["correction_event_id"] for r in provenance
- if r.get("correction_event_id")
+ r["correction_event_id"] for r in provenance if r.get("correction_event_id")
]
if not correction_event_ids and target.correction_event_ids:
correction_event_ids = target.correction_event_ids
diff --git a/Gradata/src/gradata/brain.py b/Gradata/src/gradata/brain.py
index a2b07821..afb3f5d7 100644
--- a/Gradata/src/gradata/brain.py
+++ b/Gradata/src/gradata/brain.py
@@ -422,8 +422,8 @@ def correct(
# be defensive in case the schema changes.
if not dry_run and result and result.get("graduated"):
_telemetry.send_once("first_graduation")
- except Exception:
- pass
+ except Exception as e:
+ logger.debug("Telemetry send_once failed (non-fatal): %s", e)
return result
@@ -538,9 +538,7 @@ def auto_heal(
"""
from gradata.enhancements.self_healing import auto_heal_failures
- result = auto_heal_failures(
- self, failure_events=failure_events, max_patches=max_patches
- )
+ result = auto_heal_failures(self, failure_events=failure_events, max_patches=max_patches)
# Patching rewrites lessons.md; invalidate the in-memory rule cache
# so subsequent apply_brain_rules() calls see the patched text
# instead of a stale pre-patch prompt.
@@ -661,7 +659,9 @@ def _norm(s: str) -> str:
# l.category may have arbitrary casing (parse_lessons preserves
# on-disk form); compare case-insensitively against the canonical
# upper-cased `category` we're inserting.
- if (l.category or "").strip().upper() == category and _norm(l.description) == desc_norm:
+ if (l.category or "").strip().upper() == category and _norm(
+ l.description
+ ) == desc_norm:
return {
"added": False,
"reason": "duplicate",
@@ -881,7 +881,10 @@ def apply_brain_rules(
from gradata.rules.rule_engine import apply_rules_with_tree
applied = apply_rules_with_tree(
- lessons, scope, max_rules=max_rules, event_bus=_bus,
+ lessons,
+ scope,
+ max_rules=max_rules,
+ event_bus=_bus,
)
except (ImportError, Exception):
applied = apply_rules(lessons, scope, max_rules=max_rules, bus=_bus)
@@ -891,23 +894,26 @@ def apply_brain_rules(
# session's prompts. Fire-and-forget — never fails apply_brain_rules.
if _bus is not None and applied:
try:
- _bus.emit("rules.injected", {
- "rules": [
- {
- "id": a.rule_id,
- "category": a.lesson.category,
- "confidence": a.lesson.confidence,
- "state": a.lesson.state.value,
- }
- for a in applied
- ],
- "scope": {
- "task_type": scope.task_type,
- "domain": scope.domain,
- "audience": scope.audience,
+ _bus.emit(
+ "rules.injected",
+ {
+ "rules": [
+ {
+ "id": a.rule_id,
+ "category": a.lesson.category,
+ "confidence": a.lesson.confidence,
+ "state": a.lesson.state.value,
+ }
+ for a in applied
+ ],
+ "scope": {
+ "task_type": scope.task_type,
+ "domain": scope.domain,
+ "audience": scope.audience,
+ },
+ "task": task,
},
- "task": task,
- })
+ )
except Exception as e:
logger.debug("rules.injected emit failed: %s", e)
diff --git a/Gradata/src/gradata/brain_inspection.py b/Gradata/src/gradata/brain_inspection.py
index 01e570f4..f621e9d3 100644
--- a/Gradata/src/gradata/brain_inspection.py
+++ b/Gradata/src/gradata/brain_inspection.py
@@ -30,32 +30,50 @@ class BrainInspectionMixin:
bus: Any
def _find_lessons_path(self) -> Path | None: ...
- def emit(self, event_type: str, source: str, data: dict | None = None,
- tags: list | None = None, session: int | None = None) -> dict: ...
+ def emit(
+ self,
+ event_type: str,
+ source: str,
+ data: dict | None = None,
+ tags: list | None = None,
+ session: int | None = None,
+ ) -> dict: ...
# ── Rule Inspection API ────────────────────────────────────────────
def rules(self, *, include_all: bool = False, category: str | None = None) -> list[dict]:
"""List graduated brain rules. See gradata.inspection.list_rules."""
from gradata.inspection import list_rules
- return list_rules(db_path=self.db_path,
- lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
- include_all=include_all, category=category)
+
+ return list_rules(
+ db_path=self.db_path,
+ lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
+ include_all=include_all,
+ category=category,
+ )
def explain(self, rule_id: str) -> dict:
"""Trace a rule to its source corrections. See gradata.inspection.explain_rule."""
from gradata.inspection import explain_rule
- return explain_rule(db_path=self.db_path,
- events_path=self.ctx.events_jsonl if hasattr(self.ctx, "events_jsonl") else self.dir / "events.jsonl",
- rule_id=rule_id,
- lessons_path=self._find_lessons_path() or self.dir / "lessons.md")
+
+ return explain_rule(
+ db_path=self.db_path,
+ events_path=self.ctx.events_jsonl
+ if hasattr(self.ctx, "events_jsonl")
+ else self.dir / "events.jsonl",
+ rule_id=rule_id,
+ lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
+ )
def trace(self, rule_id: str) -> dict:
"""Trace a rule's full provenance chain. See gradata.audit.trace_rule."""
from gradata.audit import trace_rule
+
return trace_rule(
db_path=self.db_path,
- events_path=self.ctx.events_jsonl if hasattr(self.ctx, "events_jsonl") else self.dir / "events.jsonl",
+ events_path=self.ctx.events_jsonl
+ if hasattr(self.ctx, "events_jsonl")
+ else self.dir / "events.jsonl",
lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
rule_id=rule_id,
)
@@ -63,9 +81,12 @@ def trace(self, rule_id: str) -> dict:
def export_data(self, *, output_format: str = "json") -> str:
"""Export rules as JSON or YAML. See gradata.inspection.export_rules."""
from gradata.inspection import export_rules
- return export_rules(db_path=self.db_path,
- lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
- output_format=output_format)
+
+ return export_rules(
+ db_path=self.db_path,
+ lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
+ output_format=output_format,
+ )
# ── Batch Approval at Session End ─────────────────────────────────
@@ -76,6 +97,7 @@ def pending_promotions(self) -> list[dict]:
Returns list of rule dicts with id, category, state, confidence, etc.
"""
from gradata.inspection import list_rules
+
return list_rules(
db_path=self.db_path,
lessons_path=self._find_lessons_path() or self.dir / "lessons.md",
@@ -108,13 +130,17 @@ def approve_promotion(self, rule_id: str) -> dict:
write_lessons_safe(lessons_path, format_lessons(lessons))
try:
- self.emit("PROMOTION_APPROVED", "brain.approve_promotion", {
- "rule_id": rule_id,
- "category": target.category,
- "description": target.description[:200],
- "state": target.state.value,
- "confidence": target.confidence,
- })
+ self.emit(
+ "PROMOTION_APPROVED",
+ "brain.approve_promotion",
+ {
+ "rule_id": rule_id,
+ "category": target.category,
+ "description": target.description[:200],
+ "state": target.state.value,
+ "confidence": target.confidence,
+ },
+ )
except Exception as e:
logger.debug("promotion.approved emit failed: %s", e)
@@ -154,14 +180,18 @@ def reject_promotion(self, rule_id: str) -> dict:
write_lessons_safe(lessons_path, format_lessons(lessons))
try:
- self.emit("PROMOTION_REJECTED", "brain.reject_promotion", {
- "rule_id": rule_id,
- "category": target.category,
- "description": target.description[:200],
- "demoted_from": old_state,
- "new_state": "INSTINCT",
- "confidence": 0.40,
- })
+ self.emit(
+ "PROMOTION_REJECTED",
+ "brain.reject_promotion",
+ {
+ "rule_id": rule_id,
+ "category": target.category,
+ "description": target.description[:200],
+ "demoted_from": old_state,
+ "new_state": "INSTINCT",
+ "confidence": 0.40,
+ },
+ )
except Exception as e:
logger.debug("promotion.rejected emit failed: %s", e)
diff --git a/Gradata/src/gradata/cli.py b/Gradata/src/gradata/cli.py
index f11e2ff6..df5256c4 100644
--- a/Gradata/src/gradata/cli.py
+++ b/Gradata/src/gradata/cli.py
@@ -17,6 +17,7 @@
gradata install brain-archive.zip # Install from marketplace
gradata install --list # List installed brains
"""
+
from __future__ import annotations
import argparse
@@ -40,6 +41,7 @@ def _get_brain(args):
brains, etc.).
"""
from gradata import Brain
+
brain_dir = env_str("GRADATA_BRAIN") or getattr(args, "brain_dir", None) or Path.cwd()
return Brain(brain_dir)
@@ -113,8 +115,12 @@ def cmd_manifest(args):
meta = m.get("metadata", {})
quality = m.get("quality", {})
rag = m.get("rag", {})
- print(f"Brain {meta.get('brain_version', '?')} | {meta.get('sessions_trained', 0)} sessions | {meta.get('maturity_phase', '?')}")
- print(f" Quality: correction_rate={quality.get('correction_rate')}, lessons={quality.get('lessons_active', 0)} active / {quality.get('lessons_graduated', 0)} graduated")
+ print(
+ f"Brain {meta.get('brain_version', '?')} | {meta.get('sessions_trained', 0)} sessions | {meta.get('maturity_phase', '?')}"
+ )
+ print(
+ f" Quality: correction_rate={quality.get('correction_rate')}, lessons={quality.get('lessons_active', 0)} active / {quality.get('lessons_graduated', 0)} graduated"
+ )
print(f" RAG: {rag.get('provider', '?')} ({rag.get('chunks_indexed', 0)} chunks)")
@@ -132,11 +138,14 @@ def cmd_stats(args):
def cmd_audit(args):
try:
from gradata._data_flow_audit import run_audit
+
report = run_audit()
if args.json:
print(json.dumps(report, indent=2))
else:
- status = "PASS" if report["score"] >= 80 else "WARN" if report["score"] >= 60 else "FAIL"
+ status = (
+ "PASS" if report["score"] >= 80 else "WARN" if report["score"] >= 60 else "FAIL"
+ )
print(f"{status}: {report['passed']}/{report['total']} checks ({report['score']}%)")
failures = [c for c in report["checks"] if not c["passed"]]
if failures:
@@ -156,6 +165,7 @@ def cmd_export(args):
target = getattr(args, "target", None)
if target:
from gradata.enhancements.rule_export import export_rules
+
brain_root = _resolve_brain_root(args)
# Prefer the canonical lessons path the rest of the SDK uses, rather
# than hardcoding brain_root/"lessons.md" inside the exporter.
@@ -197,6 +207,7 @@ def cmd_context(args):
def cmd_validate(args):
brain = _get_brain(args)
from gradata._validator import print_report, validate_brain
+
manifest_path = Path(args.manifest) if args.manifest else brain.dir / "brain.manifest.json"
report = validate_brain(manifest_path)
if args.json:
@@ -209,8 +220,15 @@ def cmd_validate(args):
def cmd_doctor(args):
from gradata._doctor import diagnose, print_diagnosis
+
brain_dir = getattr(args, "brain_dir", None)
- report = diagnose(brain_dir=brain_dir)
+ cloud_only = getattr(args, "cloud", False)
+ include_cloud = not getattr(args, "no_cloud", False)
+ report = diagnose(
+ brain_dir=brain_dir,
+ include_cloud=include_cloud,
+ cloud_only=cloud_only,
+ )
if getattr(args, "json", False):
print(json.dumps(report, indent=2))
else:
@@ -250,11 +268,14 @@ def cmd_health(args):
except ImportError:
from gradata.enhancements.reporting import format_health_report, generate_health_report
except ImportError:
- print("Health reports require the reporting module. Cloud features require the Gradata cloud service (coming soon).")
+ print(
+ "Health reports require the reporting module. Cloud features require the Gradata cloud service (coming soon)."
+ )
sys.exit(1)
report = generate_health_report(brain.db_path)
if getattr(args, "json", False):
import dataclasses
+
print(json.dumps(dataclasses.asdict(report), indent=2))
else:
print(format_health_report(report))
@@ -282,7 +303,9 @@ def cmd_report(args):
generate_rule_audit,
)
except ImportError:
- print("Reports require the reporting module. Cloud features require the Gradata cloud service (coming soon).")
+ print(
+ "Reports require the reporting module. Cloud features require the Gradata cloud service (coming soon)."
+ )
sys.exit(1)
report_type = args.type
if report_type == "csv":
@@ -376,6 +399,7 @@ def cmd_diagnose(args):
if lessons_path.exists():
try:
from gradata.enhancements.self_improvement import parse_lessons
+
lessons = parse_lessons(lessons_path.read_text(encoding="utf-8"))
states = Counter(lesson.state.value for lesson in lessons)
print(f"Lessons: {len(lessons)}")
@@ -413,6 +437,7 @@ def cmd_correct(args):
def cmd_review(args):
brain = _get_brain(args)
import json as _json
+
if args.approve:
result = brain.approve_lesson(args.approve)
if args.json:
@@ -440,9 +465,9 @@ def cmd_review(args):
for p in pending:
print(f" ID {p['id']} [{p['lesson_category']}] {p['lesson_description'][:60]}")
print(f" Severity: {p.get('severity', '?')} | Created: {p['created_at']}")
- if p.get('draft_text'):
+ if p.get("draft_text"):
print(f" Draft: {p['draft_text'][:80]}...")
- if p.get('final_text'):
+ if p.get("final_text"):
print(f" Final: {p['final_text'][:80]}...")
print()
print(" gradata review --approve ID Accept a lesson")
@@ -474,7 +499,9 @@ def cmd_convergence(args):
print(f" S{s:<4} │{bar} {c}")
print(f" {'─' * (chart_width + 15)}")
- print(f" Total: {data.get('total_corrections', 0)} corrections across {data.get('total_sessions', 0)} sessions")
+ print(
+ f" Total: {data.get('total_corrections', 0)} corrections across {data.get('total_sessions', 0)} sessions"
+ )
print(f" Trend: {trend} (p={data.get('p_value', 1.0):.3f})")
# Category breakdown
@@ -491,6 +518,7 @@ def cmd_convergence(args):
def cmd_demo(args):
"""Copy pre-trained demo brain to target directory."""
import shutil
+
target = Path(args.target)
demo_src = Path(__file__).parent / "demo" / "brain"
if not demo_src.is_dir():
@@ -510,6 +538,7 @@ def _gradata_config_path(args=None) -> Path:
Precedence: --config arg > GRADATA_CONFIG env > ~/.gradata/config.toml
"""
import os
+
explicit = getattr(args, "config", None) if args else None
if explicit:
return Path(explicit)
@@ -522,13 +551,22 @@ def _gradata_config_path(args=None) -> Path:
def _sanitize_toml_value(val: str) -> str:
"""Finding 12: strip characters that could inject TOML structure."""
# Remove newlines, brackets, and unbalanced quotes to prevent injection
- return val.replace("\n", "").replace("\r", "").replace("[", "").replace("]", "").replace('"', "").replace("\\", "").strip()
+ return (
+ val.replace("\n", "")
+ .replace("\r", "")
+ .replace("[", "")
+ .replace("]", "")
+ .replace('"', "")
+ .replace("\\", "")
+ .strip()
+ )
def _check_config_permissions(config_path: Path) -> None:
"""Finding 4: warn if config file is world-readable (Unix only)."""
import os
import stat
+
try:
st = os.stat(config_path)
# Check if group or others have any permissions
@@ -636,9 +674,9 @@ def cmd_login(args):
config_path.write_text(
f"# Gradata cloud config (auto-generated by `gradata login`)\n"
f"[cloud]\n"
- f"api_key = \"{safe_key}\"\n"
- f"brain_id = \"{safe_brain}\"\n"
- f"api_url = \"{safe_url}\"\n",
+ f'api_key = "{safe_key}"\n'
+ f'brain_id = "{safe_brain}"\n'
+ f'api_url = "{safe_url}"\n',
encoding="utf-8",
)
@@ -679,6 +717,7 @@ def cmd_logout(args):
print("Not logged in (no config file found).")
import os
+
os.environ.pop("GRADATA_API_KEY", None)
@@ -740,7 +779,10 @@ def cmd_rule_add(args):
from gradata import Brain as _Brain
add_result = _Brain(brain_root).add_rule(
- description=description, category=category, state="RULE", confidence=1.0,
+ description=description,
+ category=category,
+ state="RULE",
+ confidence=1.0,
)
if not add_result.get("added"):
reason = add_result.get("reason", "unknown")
@@ -757,12 +799,12 @@ def cmd_rule_add(args):
# (yashserai19/TECHBITS). Seeded at RULE tier so they inject immediately, no
# correction loop required. Users still get learned rules on top.
_SEVEN_STARTER_RULES: list[tuple[str, str]] = [
- ("PATTERN", "Follow existing patterns before introducing new abstractions"),
- ("CODE", "Keep diffs small and focused"),
- ("PROCESS", "Run the smallest relevant test or lint after each change"),
- ("TRUTH", "State clearly when a command cannot be run — never pretend it ran"),
- ("PROCESS", "State assumptions before implementing"),
- ("PROCESS", "Update docs, tests, and types when behavior changes"),
+ ("PATTERN", "Follow existing patterns before introducing new abstractions"),
+ ("CODE", "Keep diffs small and focused"),
+ ("PROCESS", "Run the smallest relevant test or lint after each change"),
+ ("TRUTH", "State clearly when a command cannot be run — never pretend it ran"),
+ ("PROCESS", "State assumptions before implementing"),
+ ("PROCESS", "Update docs, tests, and types when behavior changes"),
("SECURITY", "Never expose secrets — no keys, tokens, or credentials in code or output"),
]
@@ -790,7 +832,10 @@ def cmd_seed(args):
skipped = 0
for category, text in rules:
result = brain.add_rule(
- description=text, category=category, state="RULE", confidence=1.0,
+ description=text,
+ category=category,
+ state="RULE",
+ confidence=1.0,
)
if result.get("added"):
added += 1
@@ -829,9 +874,7 @@ def cmd_rule_list(args):
# Accept both modern layout (marker inside description) and the legacy
# "[RULE:conf] [hooked] CATEGORY: desc" layout where the marker appears
# between the state bracket and the category.
- lesson_re = _re.compile(
- r"^\[[\d-]+\]\s+\[RULE:[\d.]+\]\s+(?:\[hooked\]\s+)?(\w+):\s+(.+)$"
- )
+ lesson_re = _re.compile(r"^\[[\d-]+\]\s+\[RULE:[\d.]+\]\s+(?:\[hooked\]\s+)?(\w+):\s+(.+)$")
for line in lessons_file.read_text(encoding="utf-8").splitlines():
stripped = line.strip()
# Legacy marker position: remember it, then strip for regex.
@@ -842,14 +885,12 @@ def cmd_rule_list(args):
category = m.group(1)
desc = m.group(2).strip()
modern_marker = desc.startswith("[hooked] ")
- clean_desc = desc[len("[hooked] "):] if modern_marker else desc
+ clean_desc = desc[len("[hooked] ") :] if modern_marker else desc
rules.append((category, clean_desc, modern_marker or legacy_marker))
# Discover installed hook files (pre + post)
- pre_dir = Path(os.environ.get("GRADATA_HOOK_ROOT")
- or ".claude/hooks/pre-tool/generated")
- post_dir = Path(os.environ.get("GRADATA_HOOK_ROOT_POST")
- or ".claude/hooks/post-tool/generated")
+ pre_dir = Path(os.environ.get("GRADATA_HOOK_ROOT") or ".claude/hooks/pre-tool/generated")
+ post_dir = Path(os.environ.get("GRADATA_HOOK_ROOT_POST") or ".claude/hooks/post-tool/generated")
installed_files: dict[str, Path] = {} # slug (file stem) -> path
for d in (pre_dir, post_dir):
@@ -914,10 +955,8 @@ def cmd_rule_remove(args):
lessons_file = brain_root / "lessons.md"
# 1. Delete hook file from whichever generated dir holds it
- pre_dir = Path(os.environ.get("GRADATA_HOOK_ROOT")
- or ".claude/hooks/pre-tool/generated")
- post_dir = Path(os.environ.get("GRADATA_HOOK_ROOT_POST")
- or ".claude/hooks/post-tool/generated")
+ pre_dir = Path(os.environ.get("GRADATA_HOOK_ROOT") or ".claude/hooks/pre-tool/generated")
+ post_dir = Path(os.environ.get("GRADATA_HOOK_ROOT_POST") or ".claude/hooks/post-tool/generated")
removed_file = None
for d in (pre_dir, post_dir):
@@ -962,7 +1001,7 @@ def cmd_rule_remove(args):
legacy_marker = bool(_re.search(r"\[RULE:[\d.]+\]\s+\[hooked\]\s+", stripped))
modern_marker = desc.startswith("[hooked] ")
was_hooked = legacy_marker or modern_marker
- clean_desc = desc[len("[hooked] "):] if modern_marker else desc
+ clean_desc = desc[len("[hooked] ") :] if modern_marker else desc
match_this = _slug(clean_desc) == slug
if not match_this:
@@ -990,7 +1029,7 @@ def cmd_rule_remove(args):
meta_line = lines[i]
meta_stripped = meta_line.strip()
if meta_stripped.startswith("Metadata:"):
- payload = meta_stripped[len("Metadata:"):].strip()
+ payload = meta_stripped[len("Metadata:") :].strip()
try:
md = _json_meta.loads(payload)
except (ValueError, TypeError):
@@ -1017,6 +1056,7 @@ def cmd_rule_remove(args):
HOOK_DEMOTED,
RULE_PATCH_REVERTED,
)
+
_events.emit(
RULE_PATCH_REVERTED,
"cli:rule-remove",
@@ -1059,17 +1099,75 @@ def cmd_rule(args):
print(f"error: unknown rule subcommand: {sub}", file=sys.stderr)
+def cmd_skill_export(args):
+ """Export graduated rules as an Anthropic Claude Skill folder.
+
+ Produces ``//SKILL.md`` ready to drop into
+ ``.claude/skills/`` or any Skills-aware harness.
+ """
+ from gradata.enhancements.skill_export import export_skill, write_skill
+
+ brain_root = _resolve_brain_root(args)
+ lessons_path: Path | None = None
+ try:
+ brain = _get_brain(args)
+ lessons_path = brain._find_lessons_path()
+ except Exception:
+ lessons_path = None
+
+ name = args.name.strip()
+ if not name:
+ print("error: skill name required", file=sys.stderr)
+ return
+
+ output_dir = getattr(args, "output_dir", None)
+ if output_dir:
+ skill_md = write_skill(
+ brain_root,
+ name=name,
+ output_dir=Path(output_dir),
+ description=getattr(args, "description", None),
+ category=getattr(args, "category", None),
+ include_meta=not getattr(args, "no_meta", False),
+ lessons_path=lessons_path,
+ )
+ print(f"Wrote skill to {skill_md}")
+ return
+
+ text = export_skill(
+ brain_root,
+ name=name,
+ description=getattr(args, "description", None),
+ category=getattr(args, "category", None),
+ include_meta=not getattr(args, "no_meta", False),
+ lessons_path=lessons_path,
+ )
+ print(text, end="")
+
+
+def cmd_skill(args):
+ """Dispatch `gradata skill `."""
+ sub = getattr(args, "skill_cmd", None)
+ if sub == "export":
+ cmd_skill_export(args)
+ else:
+ print(f"error: unknown skill subcommand: {sub}", file=sys.stderr)
+
+
def cmd_hooks(args):
"""Manage Claude Code hook integration."""
action = args.action
if action == "install":
from gradata.hooks.claude_code import install_hook
+
install_hook(profile=getattr(args, "profile", "standard"))
elif action == "uninstall":
from gradata.hooks.claude_code import uninstall_hook
+
uninstall_hook()
elif action == "status":
from gradata.hooks.claude_code import hook_status
+
hook_status()
@@ -1078,8 +1176,9 @@ def main():
prog="gradata",
description="Personal AI Brain SDK",
)
- parser.add_argument("--brain-dir", "-b", type=Path,
- help="Brain directory (default: current dir)")
+ parser.add_argument(
+ "--brain-dir", "-b", type=Path, help="Brain directory (default: current dir)"
+ )
sub = parser.add_subparsers(dest="command")
# init
@@ -1088,10 +1187,15 @@ def main():
p_init.add_argument("--name", default=None, help="Brain name (default: directory name)")
p_init.add_argument("--domain", default=None, help="Brain domain (e.g., Sales, Engineering)")
p_init.add_argument("--company", default=None, help="Company name (creates company.md)")
- p_init.add_argument("--embedding", choices=["local", "gemini"], default=None,
- help="Embedding provider: local (default) or gemini")
- p_init.add_argument("--no-interactive", action="store_true",
- help="Skip interactive prompts, use defaults")
+ p_init.add_argument(
+ "--embedding",
+ choices=["local", "gemini"],
+ default=None,
+ help="Embedding provider: local (default) or gemini",
+ )
+ p_init.add_argument(
+ "--no-interactive", action="store_true", help="Skip interactive prompts, use defaults"
+ )
# search
p_search = sub.add_parser("search", help="Search the brain")
@@ -1119,15 +1223,15 @@ def main():
"export",
help="Export brain (marketplace archive, or graduated rules for cursor/agents/aider)",
)
- p_export.add_argument("--mode", choices=["full", "no-prospects", "domain-only"],
- default="full")
+ p_export.add_argument("--mode", choices=["full", "no-prospects", "domain-only"], default="full")
p_export.add_argument(
"--target",
choices=["cursor", "agents", "aider", "codex", "cline", "continue"],
help="Emit graduated RULE-tier lessons in platform-specific format",
)
- p_export.add_argument("--output", "-o",
- help="Output file when using --target (default: stdout)")
+ p_export.add_argument(
+ "--output", "-o", help="Output file when using --target (default: stdout)"
+ )
# context
p_ctx = sub.add_parser("context", help="Compile context for a message")
@@ -1142,6 +1246,8 @@ def main():
# doctor
p_doctor = sub.add_parser("doctor", help="Check environment and brain health")
p_doctor.add_argument("--json", action="store_true", help="Output as JSON")
+ p_doctor.add_argument("--cloud", action="store_true", help="Only run cloud checks")
+ p_doctor.add_argument("--no-cloud", action="store_true", help="Skip cloud checks (offline)")
# install
p_install = sub.add_parser("install", help="Install a brain from marketplace archive")
@@ -1156,25 +1262,29 @@ def main():
# report
p_report = sub.add_parser("report", help="Generate reports (csv, metrics, rules)")
- p_report.add_argument("type", choices=["csv", "metrics", "rules", "health"],
- help="Report type")
+ p_report.add_argument("type", choices=["csv", "metrics", "rules", "health"], help="Report type")
p_report.add_argument("--window", type=int, default=20, help="Rolling window size")
# watch — sidecar file watcher
p_watch = sub.add_parser("watch", help="Watch a directory for AI-generated file edits")
- p_watch.add_argument("--dir", required=True, type=str,
- help="Directory to watch for file changes")
- p_watch.add_argument("--brain", default=None, type=str,
- help="Path to brain directory (default: current dir)")
- p_watch.add_argument("--interval", type=float, default=5.0,
- help="Poll interval in seconds (default: 5)")
+ p_watch.add_argument(
+ "--dir", required=True, type=str, help="Directory to watch for file changes"
+ )
+ p_watch.add_argument(
+ "--brain", default=None, type=str, help="Path to brain directory (default: current dir)"
+ )
+ p_watch.add_argument(
+ "--interval", type=float, default=5.0, help="Poll interval in seconds (default: 5)"
+ )
# diagnose — free correction pattern diagnostic (no graduation needed)
sub.add_parser("diagnose", help="Analyze correction patterns (free diagnostic)")
# review — human-in-the-loop approval
p_review = sub.add_parser("review", help="Review pending lessons for approval")
- p_review.add_argument("--approve", type=int, metavar="ID", help="Approve a pending lesson by ID")
+ p_review.add_argument(
+ "--approve", type=int, metavar="ID", help="Approve a pending lesson by ID"
+ )
p_review.add_argument("--reject", type=int, metavar="ID", help="Reject a pending lesson by ID")
p_review.add_argument("--reason", type=str, default="", help="Reason for rejection")
p_review.add_argument("--json", action="store_true", help="Output as JSON")
@@ -1196,13 +1306,21 @@ def main():
# login / logout — device auth flow for cloud sync
sub.add_parser("login", help="Connect SDK to app.gradata.ai (device auth flow)")
p_logout = sub.add_parser("logout", help="Disconnect SDK from cloud")
- p_logout.add_argument("--config", type=str, default=None,
- help="Path to config file (default: ~/.gradata/config.toml)")
+ p_logout.add_argument(
+ "--config",
+ type=str,
+ default=None,
+ help="Path to config file (default: ~/.gradata/config.toml)",
+ )
p_hooks = sub.add_parser("hooks", help="Manage Claude Code hook integration")
p_hooks.add_argument("action", choices=["install", "uninstall", "status"], help="Hook action")
- p_hooks.add_argument("--profile", choices=["minimal", "standard", "strict"],
- default="standard", help="Hook profile tier (default: standard)")
+ p_hooks.add_argument(
+ "--profile",
+ choices=["minimal", "standard", "strict"],
+ default="standard",
+ help="Hook profile tier (default: standard)",
+ )
# seed — pre-populate brain with high-confidence starter rules
p_seed = sub.add_parser(
@@ -1221,14 +1339,41 @@ def main():
"mine",
help="Backfill brain from ~/.claude/projects transcript archive",
)
- p_mine.add_argument("--commit", action="store_true",
- help="Append to live events.jsonl (default: shadow file only)")
- p_mine.add_argument("--dry-run", action="store_true",
- help="Report counts only, write nothing")
- p_mine.add_argument("--project", default=None,
- help="Only scan one project dir (default: all)")
- p_mine.add_argument("--projects-root", default=None,
- help="Override transcript root (default: ~/.claude/projects)")
+ p_mine.add_argument(
+ "--commit",
+ action="store_true",
+ help="Append to live events.jsonl (default: shadow file only)",
+ )
+ p_mine.add_argument("--dry-run", action="store_true", help="Report counts only, write nothing")
+ p_mine.add_argument("--project", default=None, help="Only scan one project dir (default: all)")
+ p_mine.add_argument(
+ "--projects-root",
+ default=None,
+ help="Override transcript root (default: ~/.claude/projects)",
+ )
+
+ # skill — export graduated rules as an Anthropic Claude Skill folder
+ p_skill = sub.add_parser("skill", help="Export brain as a Claude Skill folder")
+ skill_sub = p_skill.add_subparsers(dest="skill_cmd", required=True)
+ p_skill_export = skill_sub.add_parser(
+ "export", help="Export graduated rules as a Claude Skill (SKILL.md)"
+ )
+ p_skill_export.add_argument("name", help="Skill name (becomes folder name + frontmatter name)")
+ p_skill_export.add_argument(
+ "--output-dir",
+ "-o",
+ help="Write Skill folder under this dir (default: print SKILL.md to stdout)",
+ )
+ p_skill_export.add_argument(
+ "--description",
+ help="Frontmatter description (default: auto-generated from rule categories)",
+ )
+ p_skill_export.add_argument("--category", help="Only include rules in this category")
+ p_skill_export.add_argument(
+ "--no-meta",
+ action="store_true",
+ help="Skip injectable meta-principles section",
+ )
# rule — user-declared rules (fast-track to RULE tier, try hook install)
p_rule = sub.add_parser("rule", help="Manage user-declared rules")
@@ -1238,8 +1383,11 @@ def main():
rule_sub.add_parser("list", help="List RULE-tier lessons and hook status")
p_rule_remove = rule_sub.add_parser("remove", help="Remove a graduated hook by slug")
p_rule_remove.add_argument("slug", help="Hook slug (from `gradata rule list`)")
- p_rule_remove.add_argument("--purge", action="store_true",
- help="Also delete the lesson (default: keep as soft injection)")
+ p_rule_remove.add_argument(
+ "--purge",
+ action="store_true",
+ help="Also delete the lesson (default: keep as soft injection)",
+ )
args = parser.parse_args()
@@ -1269,6 +1417,7 @@ def main():
commands["logout"] = cmd_logout
commands["hooks"] = cmd_hooks
commands["rule"] = cmd_rule
+ commands["skill"] = cmd_skill
commands["seed"] = cmd_seed
commands["mine"] = cmd_mine
diff --git a/Gradata/src/gradata/cloud/client.py b/Gradata/src/gradata/cloud/client.py
index 64afb5ed..9be777fa 100644
--- a/Gradata/src/gradata/cloud/client.py
+++ b/Gradata/src/gradata/cloud/client.py
@@ -26,7 +26,7 @@
logger = logging.getLogger("gradata.cloud")
-DEFAULT_ENDPOINT = "https://api.gradata.com/v1"
+DEFAULT_ENDPOINT = "https://api.gradata.ai/api/v1"
ENV_API_KEY = "GRADATA_API_KEY"
ENV_ENDPOINT = "GRADATA_ENDPOINT"
@@ -46,9 +46,9 @@ def __init__(
) -> None:
self.brain_dir = Path(brain_dir).resolve()
self.api_key = api_key or os.environ.get(ENV_API_KEY, "")
- self.endpoint = (
- endpoint or os.environ.get(ENV_ENDPOINT, "") or DEFAULT_ENDPOINT
- ).rstrip("/")
+ self.endpoint = (endpoint or os.environ.get(ENV_ENDPOINT, "") or DEFAULT_ENDPOINT).rstrip(
+ "/"
+ )
if self.endpoint:
require_https(self.endpoint, "GRADATA_ENDPOINT")
self.connected = False
@@ -65,11 +65,14 @@ def connect(self) -> bool:
try:
manifest = self._read_local_manifest()
- resp = self._post("/brains/connect", {
- "brain_name": manifest.get("metadata", {}).get("name", self.brain_dir.name),
- "domain": manifest.get("metadata", {}).get("domain", ""),
- "manifest": manifest,
- })
+ resp = self._post(
+ "/brains/connect",
+ {
+ "brain_name": manifest.get("metadata", {}).get("name", self.brain_dir.name),
+ "domain": manifest.get("metadata", {}).get("domain", ""),
+ "manifest": manifest,
+ },
+ )
self._brain_id = resp.get("brain_id")
self.connected = True
logger.info("Connected to Gradata Cloud: brain_id=%s", self._brain_id)
@@ -126,10 +129,15 @@ def sync(self) -> dict:
return {"status": "not_connected"}
try:
- return self._post("/brains/sync", {
- "brain_id": self._brain_id,
- "manifest": self._read_local_manifest(),
- })
+ # Backend route: POST /api/v1/sync (see cloud/app/routes/sync.py).
+ # DEFAULT_ENDPOINT already includes /api/v1 so we append /sync only.
+ return self._post(
+ "/sync",
+ {
+ "brain_id": self._brain_id,
+ "manifest": self._read_local_manifest(),
+ },
+ )
except Exception as e:
logger.warning("Sync failed: %s", e)
return {"status": "error", "error": str(e)}
diff --git a/Gradata/src/gradata/cloud/sync.py b/Gradata/src/gradata/cloud/sync.py
index 0f1a1c5d..a6b12069 100644
--- a/Gradata/src/gradata/cloud/sync.py
+++ b/Gradata/src/gradata/cloud/sync.py
@@ -13,6 +13,7 @@
- Separate opt-in for corpus contribution (anonymized corrections for
cross-user meta-rule synthesis). See `CloudClient.contribute_corpus()`.
"""
+
from __future__ import annotations
import json
@@ -137,11 +138,16 @@ def _post(self, path: str, payload: dict, timeout: float = 10.0) -> dict | None:
with urllib.request.urlopen(req, timeout=timeout) as resp:
body = resp.read().decode()
return json.loads(body) if body else {}
- except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
- log.debug("cloud POST %s failed: %s", path, e)
+ except urllib.error.HTTPError as e:
+ # Surface HTTP errors at WARNING — silent 4xx/5xx is how the
+ # 'last_sync never updates' bug hid for months.
+ log.warning("cloud POST %s failed: HTTP %s %s", path, e.code, e.reason)
+ return None
+ except (urllib.error.URLError, OSError) as e:
+ log.warning("cloud POST %s failed (network): %s", path, e)
return None
except json.JSONDecodeError:
- log.debug("cloud response non-JSON for %s", path)
+ log.warning("cloud response non-JSON for %s", path)
return {}
def sync_metrics(self, payload: TelemetryPayload) -> bool:
@@ -151,7 +157,10 @@ def sync_metrics(self, payload: TelemetryPayload) -> bool:
"""
if not self.enabled:
return False
- result = self._post("/v1/telemetry/metrics", asdict(payload))
+ # Backend mounts the metrics router under /api/v1 (see
+ # cloud/app/main.py → app.include_router(router, prefix="/api/v1")
+ # and cloud/app/routes/metrics.py → @router.post("/telemetry/metrics")).
+ result = self._post("/api/v1/telemetry/metrics", asdict(payload))
if result is not None:
self.config.last_sync_at = payload.sent_at
save_config(self.brain_dir, self.config)
@@ -167,7 +176,9 @@ def contribute_corpus(self, anonymized_patterns: list[dict]) -> bool:
"""
if not self.enabled or not self.config.contribute_corpus:
return False
- result = self._post("/v1/corpus/contribute", {"patterns": anonymized_patterns})
+ # Backend mounts the corpus router under /api/v1 (same prefix as
+ # telemetry — see cloud/app/main.py).
+ result = self._post("/api/v1/corpus/contribute", {"patterns": anonymized_patterns})
return result is not None
diff --git a/Gradata/src/gradata/context_wrapper.py b/Gradata/src/gradata/context_wrapper.py
index 3e645f79..22a4c6cb 100644
--- a/Gradata/src/gradata/context_wrapper.py
+++ b/Gradata/src/gradata/context_wrapper.py
@@ -143,8 +143,8 @@ def system_prompt(self, task: str = "", context: dict | None = None) -> str:
rules = self._brain.apply_brain_rules(task, context)
if rules:
parts.append(rules)
- except Exception:
- pass
+ except Exception as e:
+ logger.debug("apply_brain_rules failed (non-fatal): %s", e)
elif self._rules_text:
parts.append(self._rules_text)
@@ -154,12 +154,11 @@ def system_prompt(self, task: str = "", context: dict | None = None) -> str:
brain_ctx = self._brain.context_for(task)
if brain_ctx:
parts.append(brain_ctx)
- except Exception:
- pass
+ except Exception as e:
+ logger.debug("context_for failed (non-fatal): %s", e)
return "\n\n".join(parts)
-
def capture_response(self, response: str) -> None:
"""Capture the AI's response for tracking.
diff --git a/Gradata/src/gradata/contrib/enhancements/eval_benchmark.py b/Gradata/src/gradata/contrib/enhancements/eval_benchmark.py
index cdc06ab0..1e7580cf 100644
--- a/Gradata/src/gradata/contrib/enhancements/eval_benchmark.py
+++ b/Gradata/src/gradata/contrib/enhancements/eval_benchmark.py
@@ -57,6 +57,7 @@ class BenchmarkCase:
task_type: Task type context.
tags: Arbitrary tags for filtering.
"""
+
correction_text: str = ""
category: str = ""
severity: str = "moderate"
@@ -81,6 +82,7 @@ class CaseResult:
discriminator_confidence: Confidence from discriminator.
error: Error message if case failed to run.
"""
+
case: BenchmarkCase
category_correct: bool = False
severity_correct: bool = True
@@ -114,6 +116,7 @@ class BenchmarkResult:
total_cases: Number of cases run.
passed_cases: Number of cases that passed all assertions.
"""
+
cases: list[CaseResult] = field(default_factory=list)
correction_recall: float = 0.0
rule_precision: float = 0.0
@@ -160,6 +163,7 @@ def run(self) -> BenchmarkResult:
# Import discriminator
try:
from gradata.enhancements.lesson_discriminator import LessonDiscriminator
+
discriminator = LessonDiscriminator()
except ImportError:
discriminator = None
@@ -168,6 +172,7 @@ def run(self) -> BenchmarkResult:
try:
from gradata.enhancements.diff_engine import compute_diff
from gradata.enhancements.edit_classifier import classify_edits
+
has_classifier = True
except ImportError:
has_classifier = False
@@ -205,9 +210,7 @@ def run(self) -> BenchmarkResult:
cr.discriminator_confidence = verdict.confidence
if case.expected_high_value is not None:
- cr.high_value_correct = (
- verdict.is_high_value == case.expected_high_value
- )
+ cr.high_value_correct = verdict.is_high_value == case.expected_high_value
except Exception as e:
cr.error = str(e)
@@ -222,29 +225,26 @@ def run(self) -> BenchmarkResult:
category_cases = [cr for cr in case_results if cr.case.expected_category]
correction_recall = (
sum(1 for cr in category_cases if cr.category_correct) / len(category_cases)
- if category_cases else 1.0
+ if category_cases
+ else 1.0
)
# Rule precision: of cases expected to be rules, how many flagged high-value
rule_cases = [cr for cr in case_results if cr.case.expected_rule]
rule_precision = (
sum(1 for cr in rule_cases if cr.predicted_high_value is True) / len(rule_cases)
- if rule_cases else 1.0
+ if rule_cases
+ else 1.0
)
# Graduation accuracy: all high-value predictions matching expected
hv_cases = [cr for cr in case_results if cr.high_value_correct is not None]
graduation_accuracy = (
- sum(1 for cr in hv_cases if cr.high_value_correct) / len(hv_cases)
- if hv_cases else 1.0
+ sum(1 for cr in hv_cases if cr.high_value_correct) / len(hv_cases) if hv_cases else 1.0
)
# Overall: weighted average (rule precision most important)
- overall = (
- correction_recall * 0.25
- + rule_precision * 0.45
- + graduation_accuracy * 0.30
- )
+ overall = correction_recall * 0.25 + rule_precision * 0.45 + graduation_accuracy * 0.30
return BenchmarkResult(
cases=case_results,
@@ -265,40 +265,54 @@ def run(self) -> BenchmarkResult:
# High severity, should graduate
BenchmarkCase(
correction_text="Complete rewrite of email tone from formal to casual",
- category="TONE", severity="rewrite",
- expected_rule=True, expected_high_value=True,
+ category="TONE",
+ severity="rewrite",
+ expected_rule=True,
+ expected_high_value=True,
),
BenchmarkCase(
correction_text="Fixed incorrect pricing in proposal",
- category="ACCURACY", severity="major",
- expected_rule=True, expected_high_value=True,
+ category="ACCURACY",
+ severity="major",
+ expected_rule=True,
+ expected_high_value=True,
),
BenchmarkCase(
correction_text="Restructured entire email flow",
- category="STRUCTURE", severity="major",
- expected_rule=True, expected_high_value=True,
+ category="STRUCTURE",
+ severity="major",
+ expected_rule=True,
+ expected_high_value=True,
),
# Low severity, should not graduate
BenchmarkCase(
correction_text="Fixed typo in greeting",
- category="TONE", severity="trivial",
- expected_rule=False, expected_high_value=False,
+ category="TONE",
+ severity="trivial",
+ expected_rule=False,
+ expected_high_value=False,
),
BenchmarkCase(
correction_text="Adjusted spacing in signature",
- category="STYLE", severity="trivial",
- expected_rule=False, expected_high_value=False,
+ category="STYLE",
+ severity="trivial",
+ expected_rule=False,
+ expected_high_value=False,
),
# Moderate, borderline
BenchmarkCase(
correction_text="Changed call-to-action from link to button",
- category="CONTENT", severity="moderate",
- expected_rule=False, expected_high_value=None, # Don't assert
+ category="CONTENT",
+ severity="moderate",
+ expected_rule=False,
+ expected_high_value=None, # Don't assert
),
BenchmarkCase(
correction_text="Replaced em dash with colon",
- category="STYLE", severity="minor",
- expected_rule=False, expected_high_value=False,
+ category="STYLE",
+ severity="minor",
+ expected_rule=False,
+ expected_high_value=False,
),
]
diff --git a/Gradata/src/gradata/contrib/enhancements/install_manifest.py b/Gradata/src/gradata/contrib/enhancements/install_manifest.py
index 3f5c01bb..ae16b0c4 100644
--- a/Gradata/src/gradata/contrib/enhancements/install_manifest.py
+++ b/Gradata/src/gradata/contrib/enhancements/install_manifest.py
@@ -48,15 +48,17 @@
class ModuleCost(Enum):
"""Resource cost tag for modules."""
- LIGHT = "light" # Minimal resource usage
- MEDIUM = "medium" # Moderate resource usage
- HEAVY = "heavy" # Significant resource usage
+
+ LIGHT = "light" # Minimal resource usage
+ MEDIUM = "medium" # Moderate resource usage
+ HEAVY = "heavy" # Significant resource usage
class ModuleStability(Enum):
"""Stability tag for modules."""
- STABLE = "stable" # Production-ready
- BETA = "beta" # Functional but evolving
+
+ STABLE = "stable" # Production-ready
+ BETA = "beta" # Functional but evolving
EXPERIMENTAL = "experimental" # Use with caution
@@ -75,6 +77,7 @@ class Module:
stability: Stability tag.
default_install: Whether to include in default installs.
"""
+
id: str
name: str
description: str = ""
@@ -95,6 +98,7 @@ class Profile:
description: What this profile is for.
modules: Module IDs included in this profile.
"""
+
name: str
description: str = ""
modules: list[str] = field(default_factory=list)
@@ -110,6 +114,7 @@ class InstallPlan:
dependencies_added: Modules added automatically via dependencies.
estimated_cost: Aggregate cost estimate.
"""
+
profile: str = ""
modules: list[Module] = field(default_factory=list)
dependencies_added: list[str] = field(default_factory=list)
@@ -126,6 +131,7 @@ class InstallState:
Enables idempotent installs — only apply changes since last install.
"""
+
schema_version: int = 1
installed_modules: list[str] = field(default_factory=list)
profile: str = ""
@@ -168,10 +174,18 @@ def is_installed(self, module_id: str) -> bool:
description="15 base agentic patterns (pipeline, RAG, reflection, etc.)",
kind="pattern",
components=[
- "patterns.pipeline", "patterns.rag", "patterns.reflection",
- "patterns.orchestrator", "patterns.parallel", "patterns.memory",
- "patterns.guardrails", "patterns.human_loop", "patterns.scope",
- "patterns.sub_agents", "patterns.evaluator", "patterns.tools",
+ "patterns.pipeline",
+ "patterns.rag",
+ "patterns.reflection",
+ "patterns.orchestrator",
+ "patterns.parallel",
+ "patterns.memory",
+ "patterns.guardrails",
+ "patterns.human_loop",
+ "patterns.scope",
+ "patterns.sub_agents",
+ "patterns.evaluator",
+ "patterns.tools",
],
cost=ModuleCost.LIGHT,
stability=ModuleStability.STABLE,
@@ -183,8 +197,10 @@ def is_installed(self, module_id: str) -> bool:
description="Context brackets, reconciliation, task escalation, execute/qualify loop.",
kind="pattern",
components=[
- "patterns.context_brackets", "patterns.reconciliation",
- "patterns.task_escalation", "patterns.execute_qualify",
+ "patterns.context_brackets",
+ "patterns.reconciliation",
+ "patterns.task_escalation",
+ "patterns.execute_qualify",
],
cost=ModuleCost.LIGHT,
stability=ModuleStability.STABLE,
@@ -206,8 +222,10 @@ def is_installed(self, module_id: str) -> bool:
description="INSTINCT->PATTERN->RULE graduation with severity-weighted confidence.",
kind="enhancement",
components=[
- "enhancements.self_improvement", "enhancements.correction_tracking",
- "enhancements.edit_classifier", "enhancements.pattern_extractor",
+ "enhancements.self_improvement",
+ "enhancements.correction_tracking",
+ "enhancements.edit_classifier",
+ "enhancements.pattern_extractor",
],
dependencies=["quality-gates"],
cost=ModuleCost.MEDIUM,
@@ -273,8 +291,10 @@ def is_installed(self, module_id: str) -> bool:
description="HMAC signing, contradiction detection, rule verification.",
kind="enhancement",
components=[
- "enhancements.rule_integrity", "enhancements.contradiction_detector",
- "enhancements.rule_conflicts", "enhancements.rule_canary",
+ "enhancements.rule_integrity",
+ "enhancements.contradiction_detector",
+ "enhancements.rule_conflicts",
+ "enhancements.rule_canary",
],
dependencies=["learning-pipeline"],
cost=ModuleCost.MEDIUM,
@@ -297,8 +317,10 @@ def is_installed(self, module_id: str) -> bool:
description="Adapters for Anthropic, OpenAI, LangChain, CrewAI.",
kind="integration",
components=[
- "integrations.anthropic_adapter", "integrations.openai_adapter",
- "integrations.langchain_adapter", "integrations.crewai_adapter",
+ "integrations.anthropic_adapter",
+ "integrations.openai_adapter",
+ "integrations.langchain_adapter",
+ "integrations.crewai_adapter",
],
cost=ModuleCost.LIGHT,
stability=ModuleStability.STABLE,
@@ -316,8 +338,13 @@ def is_installed(self, module_id: str) -> bool:
name="standard",
description="Recommended. Core + learning pipeline + behavioral engine + context management.",
modules=[
- "core-patterns", "context-management", "quality-gates",
- "learning-pipeline", "behavioral-engine", "truth-protocol", "agent-modes",
+ "core-patterns",
+ "context-management",
+ "quality-gates",
+ "learning-pipeline",
+ "behavioral-engine",
+ "truth-protocol",
+ "agent-modes",
],
),
Profile(
@@ -329,9 +356,16 @@ def is_installed(self, module_id: str) -> bool:
name="research",
description="Full pipeline + RL router + observation hooks for research.",
modules=[
- "core-patterns", "context-management", "quality-gates",
- "learning-pipeline", "behavioral-engine", "truth-protocol", "agent-modes",
- "observation-hooks", "q-learning-router", "meta-rules",
+ "core-patterns",
+ "context-management",
+ "quality-gates",
+ "learning-pipeline",
+ "behavioral-engine",
+ "truth-protocol",
+ "agent-modes",
+ "observation-hooks",
+ "q-learning-router",
+ "meta-rules",
"rule-integrity",
],
),
@@ -342,6 +376,7 @@ def is_installed(self, module_id: str) -> bool:
# InstallManifest
# ---------------------------------------------------------------------------
+
class InstallManifest:
"""Registry of modules and profiles for selective installation.
@@ -357,9 +392,9 @@ def __init__(
self._modules: dict[str, Module] = {}
self._profiles: dict[str, Profile] = {}
- for m in (modules or []):
+ for m in modules or []:
self._modules[m.id] = m
- for p in (profiles or []):
+ for p in profiles or []:
self._profiles[p.name] = p
@classmethod
@@ -367,7 +402,6 @@ def default(cls) -> InstallManifest:
"""Create a manifest with default modules and profiles."""
return cls(modules=DEFAULT_MODULES, profiles=DEFAULT_PROFILES)
-
@property
def available_modules(self) -> list[Module]:
"""Return all registered modules."""
@@ -404,8 +438,7 @@ def _resolve(mid: str) -> None:
return
if mid in visiting:
raise ValueError(
- f"Circular dependency detected: {mid} is already "
- f"in the resolution chain"
+ f"Circular dependency detected: {mid} is already in the resolution chain"
)
visiting.add(mid)
module = self._modules.get(mid)
diff --git a/Gradata/src/gradata/contrib/enhancements/quality_gates.py b/Gradata/src/gradata/contrib/enhancements/quality_gates.py
index 3c60ff6a..baa06e6f 100644
--- a/Gradata/src/gradata/contrib/enhancements/quality_gates.py
+++ b/Gradata/src/gradata/contrib/enhancements/quality_gates.py
@@ -52,9 +52,7 @@ def __post_init__(self) -> None:
if self.weight <= 0:
raise ValueError(f"QualityRubric '{self.name}': weight must be > 0")
if not (0.0 <= self.threshold <= 10.0):
- raise ValueError(
- f"QualityRubric '{self.name}': threshold must be in [0, 10]"
- )
+ raise ValueError(f"QualityRubric '{self.name}': threshold must be in [0, 10]")
@dataclass
@@ -250,16 +248,10 @@ def evaluate(
raw = scorer(output, rubric)
dimension_scores[rubric.name] = round(min(10.0, max(0.0, float(raw))), 2)
- overall = sum(
- dimension_scores[r.name] * r.weight for r in self.rubrics
- ) / total_weight
+ overall = sum(dimension_scores[r.name] * r.weight for r in self.rubrics) / total_weight
overall = round(overall, 2)
- failures = [
- r.name
- for r in self.rubrics
- if dimension_scores[r.name] < r.threshold
- ]
+ failures = [r.name for r in self.rubrics if dimension_scores[r.name] < r.threshold]
passed = overall >= self.threshold and len(failures) == 0
@@ -325,6 +317,7 @@ def run_with_fix(
@dataclass
class SuccessCondition:
"""A single success condition evaluation."""
+
name: str
met: bool = False
value: float = 0.0
@@ -335,13 +328,16 @@ class SuccessCondition:
@dataclass
class SuccessConditionsReport:
"""Result of evaluating all 6 success conditions."""
+
all_met: bool = False
conditions: list[SuccessCondition] = field(default_factory=list)
window_size: int = 20
sessions_evaluated: int = 0
-def evaluate_success_conditions(db_path=None, window: int = 20, ctx=None) -> SuccessConditionsReport:
+def evaluate_success_conditions(
+ db_path=None, window: int = 20, ctx=None
+) -> SuccessConditionsReport:
"""Evaluate the 6 SPEC success conditions over a session window."""
report = SuccessConditionsReport(window_size=window)
conditions = [
@@ -355,10 +351,16 @@ def evaluate_success_conditions(db_path=None, window: int = 20, ctx=None) -> Suc
try:
import sqlite3
from pathlib import Path as _Path
+
db = _Path(db_path) if db_path else (_Path(ctx.brain_dir) / "system.db" if ctx else None)
if db and db.exists():
conn = sqlite3.connect(str(db))
- max_session = conn.execute("SELECT MAX(session) FROM events WHERE typeof(session)='integer'").fetchone()[0] or 0
+ max_session = (
+ conn.execute(
+ "SELECT MAX(session) FROM events WHERE typeof(session)='integer'"
+ ).fetchone()[0]
+ or 0
+ )
report.sessions_evaluated = max_session
conn.close()
except Exception:
diff --git a/Gradata/src/gradata/contrib/enhancements/truth_protocol.py b/Gradata/src/gradata/contrib/enhancements/truth_protocol.py
index ac030e9f..c5ef3f0f 100644
--- a/Gradata/src/gradata/contrib/enhancements/truth_protocol.py
+++ b/Gradata/src/gradata/contrib/enhancements/truth_protocol.py
@@ -117,18 +117,17 @@ def add(self, check: TruthCheck) -> None:
# Pre-compiled patterns for performance
_BANNED_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
- (phrase, re.compile(re.escape(phrase), re.IGNORECASE))
- for phrase in BANNED_PHRASES
+ (phrase, re.compile(re.escape(phrase), re.IGNORECASE)) for phrase in BANNED_PHRASES
]
# Numbers without source: percentage or multiplier not preceded by a citation.
# Matches "300%", "3x", "2.5x" etc.
_NUMBER_CLAIM_RE = re.compile(
- r"(? TruthVerdict:
found_phrases.append(phrase)
if found_phrases:
- verdict.add(TruthCheck(
- name="no_banned_phrases",
- passed=False,
- detail=(
- f"Output contains {len(found_phrases)} banned phrase(s) "
- "that signal unverified success claims."
- ),
- evidence="; ".join(found_phrases[:5]), # cap evidence to 5
- ))
+ verdict.add(
+ TruthCheck(
+ name="no_banned_phrases",
+ passed=False,
+ detail=(
+ f"Output contains {len(found_phrases)} banned phrase(s) "
+ "that signal unverified success claims."
+ ),
+ evidence="; ".join(found_phrases[:5]), # cap evidence to 5
+ )
+ )
else:
- verdict.add(TruthCheck(
- name="no_banned_phrases",
- passed=True,
- detail="No banned success phrases detected.",
- ))
+ verdict.add(
+ TruthCheck(
+ name="no_banned_phrases",
+ passed=True,
+ detail="No banned success phrases detected.",
+ )
+ )
# --- Check 2: numbers without source ---
number_matches = list(_NUMBER_CLAIM_RE.finditer(output))
@@ -197,26 +200,30 @@ def verify_claims(output: str) -> TruthVerdict:
# Deduplicate while preserving order
seen: set[str] = set()
unique_unverified = [
- v for v in unverified if not (v in seen or seen.add(v)) # type: ignore[func-returns-value]
+ v
+ for v in unverified
+ if not (v in seen or seen.add(v)) # type: ignore[func-returns-value]
]
if unique_unverified:
- verdict.add(TruthCheck(
- name="no_unverified_numbers",
- passed=False,
- detail=(
- f"Found {len(unique_unverified)} numeric claim(s) without "
- "an accompanying citation or source reference."
- ),
- evidence=", ".join(unique_unverified[:5]),
- ))
+ verdict.add(
+ TruthCheck(
+ name="no_unverified_numbers",
+ passed=False,
+ detail=(
+ f"Found {len(unique_unverified)} numeric claim(s) without "
+ "an accompanying citation or source reference."
+ ),
+ evidence=", ".join(unique_unverified[:5]),
+ )
+ )
else:
- verdict.add(TruthCheck(
- name="no_unverified_numbers",
- passed=True,
- detail="All numeric claims have accompanying citations or none found.",
- ))
+ verdict.add(
+ TruthCheck(
+ name="no_unverified_numbers",
+ passed=True,
+ detail="All numeric claims have accompanying citations or none found.",
+ )
+ )
return verdict
-
-
diff --git a/Gradata/src/gradata/contrib/patterns/__init__.py b/Gradata/src/gradata/contrib/patterns/__init__.py
index 7fe80a39..a89d658f 100644
--- a/Gradata/src/gradata/contrib/patterns/__init__.py
+++ b/Gradata/src/gradata/contrib/patterns/__init__.py
@@ -102,6 +102,7 @@
def __getattr__(name: str):
if name in _LAZY_IMPORTS:
import importlib
+
rel_module, attr = _LAZY_IMPORTS[name]
mod = importlib.import_module(rel_module, __package__)
return getattr(mod, attr)
diff --git a/Gradata/src/gradata/contrib/patterns/agent_modes.py b/Gradata/src/gradata/contrib/patterns/agent_modes.py
index cd46e63a..5128292c 100644
--- a/Gradata/src/gradata/contrib/patterns/agent_modes.py
+++ b/Gradata/src/gradata/contrib/patterns/agent_modes.py
@@ -29,25 +29,27 @@
class AgentMode(Enum):
"""Available operating modes for Gradata agents."""
- GODMODE = "godmode" # Full autonomy, OODA loop, no permission checks
- PLAN = "plan" # Propose before executing, wait for approval
- AUDIT = "audit" # Read-only. Observe and report only.
- CANARY = "canary" # Build in isolation (worktree/branch), merge only if tests pass
- SAFE = "safe" # One file at a time, verify after each change
+
+ GODMODE = "godmode" # Full autonomy, OODA loop, no permission checks
+ PLAN = "plan" # Propose before executing, wait for approval
+ AUDIT = "audit" # Read-only. Observe and report only.
+ CANARY = "canary" # Build in isolation (worktree/branch), merge only if tests pass
+ SAFE = "safe" # One file at a time, verify after each change
@dataclass
class ModeConfig:
"""Configuration and constraints for a single agent mode."""
+
mode: AgentMode
- can_write: bool # Can create/edit files
- can_execute: bool # Can run bash commands
- can_spawn: bool # Can spawn sub-agents
- can_commit: bool # Can git commit
- requires_approval: bool # Must get approval before acting
- max_files_per_action: int # 0 = unlimited
- must_verify_after_edit: bool # Run py_compile/tests after every change
- isolation: str # "none", "branch", "worktree"
+ can_write: bool # Can create/edit files
+ can_execute: bool # Can run bash commands
+ can_spawn: bool # Can spawn sub-agents
+ can_commit: bool # Can git commit
+ requires_approval: bool # Must get approval before acting
+ max_files_per_action: int # 0 = unlimited
+ must_verify_after_edit: bool # Run py_compile/tests after every change
+ isolation: str # "none", "branch", "worktree"
description: str
@@ -57,37 +59,62 @@ class ModeConfig:
MODE_CONFIGS: dict[AgentMode, ModeConfig] = {
AgentMode.GODMODE: ModeConfig(
- mode=AgentMode.GODMODE, can_write=True, can_execute=True,
- can_spawn=True, can_commit=True, requires_approval=False,
- max_files_per_action=0, must_verify_after_edit=False,
+ mode=AgentMode.GODMODE,
+ can_write=True,
+ can_execute=True,
+ can_spawn=True,
+ can_commit=True,
+ requires_approval=False,
+ max_files_per_action=0,
+ must_verify_after_edit=False,
isolation="none",
description="Full autonomy. OODA loop. Never pause.",
),
AgentMode.PLAN: ModeConfig(
- mode=AgentMode.PLAN, can_write=True, can_execute=True,
- can_spawn=True, can_commit=True, requires_approval=True,
- max_files_per_action=0, must_verify_after_edit=False,
+ mode=AgentMode.PLAN,
+ can_write=True,
+ can_execute=True,
+ can_spawn=True,
+ can_commit=True,
+ requires_approval=True,
+ max_files_per_action=0,
+ must_verify_after_edit=False,
isolation="none",
description="Propose plan, wait for approval before executing.",
),
AgentMode.AUDIT: ModeConfig(
- mode=AgentMode.AUDIT, can_write=False, can_execute=False,
- can_spawn=False, can_commit=False, requires_approval=False,
- max_files_per_action=0, must_verify_after_edit=False,
+ mode=AgentMode.AUDIT,
+ can_write=False,
+ can_execute=False,
+ can_spawn=False,
+ can_commit=False,
+ requires_approval=False,
+ max_files_per_action=0,
+ must_verify_after_edit=False,
isolation="none",
description="Read-only. Observe, analyze, report. Cannot modify.",
),
AgentMode.CANARY: ModeConfig(
- mode=AgentMode.CANARY, can_write=True, can_execute=True,
- can_spawn=True, can_commit=True, requires_approval=False,
- max_files_per_action=0, must_verify_after_edit=True,
+ mode=AgentMode.CANARY,
+ can_write=True,
+ can_execute=True,
+ can_spawn=True,
+ can_commit=True,
+ requires_approval=False,
+ max_files_per_action=0,
+ must_verify_after_edit=True,
isolation="worktree",
description="Build in isolation. Merge only if all tests pass.",
),
AgentMode.SAFE: ModeConfig(
- mode=AgentMode.SAFE, can_write=True, can_execute=True,
- can_spawn=False, can_commit=True, requires_approval=False,
- max_files_per_action=1, must_verify_after_edit=True,
+ mode=AgentMode.SAFE,
+ can_write=True,
+ can_execute=True,
+ can_spawn=False,
+ can_commit=True,
+ requires_approval=False,
+ max_files_per_action=1,
+ must_verify_after_edit=True,
isolation="branch",
description="One file at a time. Verify after every change.",
),
@@ -105,6 +132,7 @@ class ModeConfig:
# Public API
# ---------------------------------------------------------------------------
+
def get_mode(mode_name: str) -> ModeConfig:
"""Get mode config by name. Defaults to GODMODE for unknown names."""
try:
@@ -136,8 +164,7 @@ def check_permission(mode: ModeConfig, action: str) -> tuple[bool, str]:
if allowed:
return True, ""
return False, (
- f"Action '{action}' is not permitted in {mode.mode.value} mode. "
- f"{mode.description}"
+ f"Action '{action}' is not permitted in {mode.mode.value} mode. {mode.description}"
)
@@ -149,7 +176,7 @@ def format_mode_prompt(mode: AgentMode) -> str:
"""
cfg = MODE_CONFIGS[mode]
lines: list[str] = [
- f"",
+ f'',
f" Description: {cfg.description}",
]
@@ -163,8 +190,7 @@ def format_mode_prompt(mode: AgentMode) -> str:
lines.append(" CONSTRAINT: Do NOT make git commits.")
if cfg.requires_approval:
lines.append(
- " CONSTRAINT: Propose your full plan FIRST. "
- "Do NOT execute until the user approves."
+ " CONSTRAINT: Propose your full plan FIRST. Do NOT execute until the user approves."
)
if cfg.max_files_per_action > 0:
lines.append(
diff --git a/Gradata/src/gradata/contrib/patterns/context_brackets.py b/Gradata/src/gradata/contrib/patterns/context_brackets.py
index a2f43ef0..9a2a2730 100644
--- a/Gradata/src/gradata/contrib/patterns/context_brackets.py
+++ b/Gradata/src/gradata/contrib/patterns/context_brackets.py
@@ -41,10 +41,11 @@
class ContextBracket(Enum):
"""Context capacity brackets based on remaining token budget."""
- FRESH = "fresh" # >70% remaining
- MODERATE = "moderate" # 40-70% remaining
- DEEP = "deep" # 20-40% remaining
- CRITICAL = "critical" # <20% remaining
+
+ FRESH = "fresh" # >70% remaining
+ MODERATE = "moderate" # 40-70% remaining
+ DEEP = "deep" # 20-40% remaining
+ CRITICAL = "critical" # <20% remaining
@dataclass(frozen=True)
@@ -61,6 +62,7 @@ class BracketConfig:
plan_sizing: Recommended plan size as fraction of remaining capacity.
should_handoff: Whether to prepare a session handoff.
"""
+
bracket: ContextBracket
min_ratio: float
max_ratio: float
@@ -153,6 +155,7 @@ class BracketConfig:
# Bracket detection
# ---------------------------------------------------------------------------
+
def get_bracket(remaining_ratio: float) -> ContextBracket:
"""Determine the context bracket from remaining capacity ratio.
@@ -166,9 +169,7 @@ def get_bracket(remaining_ratio: float) -> ContextBracket:
ValueError: If remaining_ratio is outside [0.0, 1.0].
"""
if not (0.0 <= remaining_ratio <= 1.0):
- raise ValueError(
- f"remaining_ratio must be in [0.0, 1.0], got {remaining_ratio}"
- )
+ raise ValueError(f"remaining_ratio must be in [0.0, 1.0], got {remaining_ratio}")
if remaining_ratio >= 0.70:
return ContextBracket.FRESH
@@ -249,9 +250,7 @@ def format_bracket_prompt(bracket: ContextBracket) -> str:
]
if config.prohibited_actions:
- lines.append(
- f" AVOID: {', '.join(config.prohibited_actions)}"
- )
+ lines.append(f" AVOID: {', '.join(config.prohibited_actions)}")
if config.should_handoff:
lines.append(" ACTION REQUIRED: Prepare session handoff before context exhaustion.")
@@ -275,9 +274,7 @@ class ContextTracker:
max_tokens: int
tokens_used: int = 0
- _transitions: list[tuple[int, ContextBracket]] = field(
- default_factory=list, repr=False
- )
+ _transitions: list[tuple[int, ContextBracket]] = field(default_factory=list, repr=False)
def __post_init__(self) -> None:
if self.max_tokens <= 0:
diff --git a/Gradata/src/gradata/contrib/patterns/evaluator.py b/Gradata/src/gradata/contrib/patterns/evaluator.py
index e22bfaca..4546fcc6 100644
--- a/Gradata/src/gradata/contrib/patterns/evaluator.py
+++ b/Gradata/src/gradata/contrib/patterns/evaluator.py
@@ -335,9 +335,7 @@ def evaluate(
else:
verdict = _VERDICT_MAJOR_REVISION
- regression = (
- previous_result is not None and average < previous_result.average
- )
+ regression = previous_result is not None and average < previous_result.average
if regression and previous_result is not None:
logger.warning(
@@ -400,13 +398,9 @@ def evaluate_optimize_loop(
is less than 1.
"""
if not (0.0 < threshold <= 10.0):
- raise ValueError(
- f"threshold must be in (0, 10]; got {threshold!r}."
- )
+ raise ValueError(f"threshold must be in (0, 10]; got {threshold!r}.")
if max_iterations < 1:
- raise ValueError(
- f"max_iterations must be >= 1; got {max_iterations!r}."
- )
+ raise ValueError(f"max_iterations must be >= 1; got {max_iterations!r}.")
iteration_results: list[EvalResult] = []
current_output: Any = None
@@ -486,9 +480,11 @@ def dimensions_from_graduated_rules(task_type: str = "") -> list[EvalDimension]:
dims = []
for rule in rules:
- dims.append(EvalDimension(
- name=f"rule_{rule.category.lower()}_{len(dims)}",
- weight=rule.confidence,
- description=f"Check: {rule.principle}",
- ))
+ dims.append(
+ EvalDimension(
+ name=f"rule_{rule.category.lower()}_{len(dims)}",
+ weight=rule.confidence,
+ description=f"Check: {rule.principle}",
+ )
+ )
return dims
diff --git a/Gradata/src/gradata/contrib/patterns/execute_qualify.py b/Gradata/src/gradata/contrib/patterns/execute_qualify.py
index 8d3dc24c..6ccdee57 100644
--- a/Gradata/src/gradata/contrib/patterns/execute_qualify.py
+++ b/Gradata/src/gradata/contrib/patterns/execute_qualify.py
@@ -50,6 +50,7 @@
class QualifyScore(Enum):
"""Qualification score from fresh verification."""
+
PASS = "pass"
GAP = "gap"
DRIFT = "drift"
@@ -63,6 +64,7 @@ class FailureClassification(Enum):
- SPEC: The acceptance criteria were wrong. Fix plan first, then code.
- CODE: Implementation doesn't match correct plan. Fix code in place.
"""
+
INTENT = "intent"
SPEC = "spec"
CODE = "code"
@@ -78,6 +80,7 @@ class QualifyResult:
classification: Root cause if score != PASS.
concerns: Issues found during qualification.
"""
+
score: QualifyScore
evidence: str = ""
classification: FailureClassification | None = None
@@ -100,14 +103,13 @@ class ExecuteQualifyResult:
final_qualify: The last QualifyResult from verification.
attempt_history: Full history of (outcome, qualify) pairs.
"""
+
passed: bool
attempts_used: int
max_attempts: int
final_outcome: TaskOutcome | None = None
final_qualify: QualifyResult | None = None
- attempt_history: list[tuple[TaskOutcome, QualifyResult | None]] = field(
- default_factory=list
- )
+ attempt_history: list[tuple[TaskOutcome, QualifyResult | None]] = field(default_factory=list)
# Type aliases for callables
diff --git a/Gradata/src/gradata/contrib/patterns/guardrails.py b/Gradata/src/gradata/contrib/patterns/guardrails.py
index a6ba7066..d9e033c2 100644
--- a/Gradata/src/gradata/contrib/patterns/guardrails.py
+++ b/Gradata/src/gradata/contrib/patterns/guardrails.py
@@ -48,9 +48,9 @@ class GuardCheck:
"""
name: str
- result: str # "pass" | "fail" | "override"
+ result: str # "pass" | "fail" | "override"
details: str
- action_taken: str # "blocked" | "redacted" | "passed" | "user_override"
+ action_taken: str # "blocked" | "redacted" | "passed" | "user_override"
@dataclass
@@ -200,9 +200,7 @@ def _wrapper(*args: Any, **kwargs: Any) -> GuardedResult:
failing_input = [c for c in input_checks if c.result == "fail"]
if failing_input:
- block_reason = "; ".join(
- f"{c.name}: {c.details}" for c in failing_input
- )
+ block_reason = "; ".join(f"{c.name}: {c.details}" for c in failing_input)
return GuardedResult(
input_checks=input_checks,
output_checks=[],
@@ -242,12 +240,8 @@ def _wrapper(*args: Any, **kwargs: Any) -> GuardedResult:
# ---------------------------------------------------------------------------
# Input patterns
-_RE_EMAIL = re.compile(
- r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b"
-)
-_RE_PHONE = re.compile(
- r"(?:\+\d[\s\-.]?)?(?:\(\d{3}\)|\d{3})[\s\-.]?\d{3}[\s\-.]?\d{4}\b"
-)
+_RE_EMAIL = re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b")
+_RE_PHONE = re.compile(r"(?:\+\d[\s\-.]?)?(?:\(\d{3}\)|\d{3})[\s\-.]?\d{3}[\s\-.]?\d{4}\b")
_RE_SSN = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
_RE_API_KEY = re.compile(r"\b(?:sk-|key-)[A-Za-z0-9_\-]{8,}\b")
@@ -271,7 +265,6 @@ def _wrapper(*args: Any, **kwargs: Any) -> GuardedResult:
_RE_OUT_OF_SCOPE: re.Pattern | None = None
-
# ---------------------------------------------------------------------------
# Guard check functions (private)
# ---------------------------------------------------------------------------
@@ -329,7 +322,12 @@ def _check_injection(data: Any) -> GuardCheck:
def _check_scope(data: Any) -> GuardCheck:
"""Validate that the request is in-scope (configurable, disabled by default)."""
if _RE_OUT_OF_SCOPE is None:
- return GuardCheck(name="scope_validator", result="pass", details="scope guard disabled", action_taken="passed")
+ return GuardCheck(
+ name="scope_validator",
+ result="pass",
+ details="scope guard disabled",
+ action_taken="passed",
+ )
text = str(data)
match = _RE_OUT_OF_SCOPE.search(text)
if match:
@@ -455,7 +453,7 @@ def check_write_path(
target = target[2:]
# 1. Global deny list
- for pattern in (global_deny or []):
+ for pattern in global_deny or []:
if fnmatch(target, pattern) or fnmatch(target.split("/")[-1], pattern):
return ManifestCheckResult(False, f"DENIED by global policy: matches '{pattern}'")
@@ -465,7 +463,7 @@ def check_write_path(
return ManifestCheckResult(True, f"ALLOWED: matches agent write path '{pattern}'")
# 3. Check tools_denied for write restrictions
- for denial in (agent_tools_denied or []):
+ for denial in agent_tools_denied or []:
if denial.startswith("Write "):
deny_pattern = denial[6:]
if fnmatch(target, deny_pattern):
@@ -493,7 +491,9 @@ def check_exec_command(
cmd_lower = command.lower().strip()
for pattern in deny_patterns:
if pattern.lower() in cmd_lower:
- return ManifestCheckResult(False, f"DENIED: command matches blocked pattern '{pattern}'")
+ return ManifestCheckResult(
+ False, f"DENIED: command matches blocked pattern '{pattern}'"
+ )
return ManifestCheckResult(True, "ALLOWED: no deny patterns matched")
@@ -553,7 +553,9 @@ def validate_agent_spawn(
if available >= max_tokens:
return ManifestCheckResult(True, f"ALLOWED: budget {max_tokens} tokens", max_tokens)
- usage_pct = int((max_tokens / parent_budget_remaining) * 100) if parent_budget_remaining > 0 else 100
+ usage_pct = (
+ int((max_tokens / parent_budget_remaining) * 100) if parent_budget_remaining > 0 else 100
+ )
if usage_pct >= child_hard_limit_percent:
return ManifestCheckResult(
@@ -634,6 +636,7 @@ def guards_from_graduated_rules() -> list[Guard]:
def _make_check(rule_text: str, rule_cat: str) -> Callable[[Any], GuardCheck]:
"""Create a check function that scans output for rule violations."""
+
def check_fn(data: Any) -> GuardCheck:
str(data).lower() if data else ""
# Simple keyword check — does the output violate the rule?
@@ -645,10 +648,13 @@ def check_fn(data: Any) -> GuardCheck:
details=f"Rule: {rule_text[:80]}",
action_taken="passed",
)
+
return check_fn
- guards.append(Guard(
- name=f"rule_{rule.category.lower()}_{len(guards)}",
- check_fn=_make_check(rule.principle, rule.category),
- ))
+ guards.append(
+ Guard(
+ name=f"rule_{rule.category.lower()}_{len(guards)}",
+ check_fn=_make_check(rule.principle, rule.category),
+ )
+ )
return guards
diff --git a/Gradata/src/gradata/contrib/patterns/human_loop.py b/Gradata/src/gradata/contrib/patterns/human_loop.py
index caf060b3..3617ed8d 100644
--- a/Gradata/src/gradata/contrib/patterns/human_loop.py
+++ b/Gradata/src/gradata/contrib/patterns/human_loop.py
@@ -358,8 +358,7 @@ def assess_risk(
return RiskAssessment(
tier="high",
reason=(
- f"Action contains high-risk keyword(s): "
- f"{', '.join(sorted(set(matched_high)))}."
+ f"Action contains high-risk keyword(s): {', '.join(sorted(set(matched_high)))}."
),
affected=affected,
reversible=reversible,
@@ -371,8 +370,7 @@ def assess_risk(
return RiskAssessment(
tier="medium",
reason=(
- f"Action contains medium-risk keyword(s): "
- f"{', '.join(sorted(set(matched_medium)))}."
+ f"Action contains medium-risk keyword(s): {', '.join(sorted(set(matched_medium)))}."
),
affected=affected,
reversible=reversible,
@@ -383,10 +381,7 @@ def assess_risk(
if matched_low:
return RiskAssessment(
tier="low",
- reason=(
- f"Action contains low-risk keyword(s): "
- f"{', '.join(sorted(set(matched_low)))}."
- ),
+ reason=(f"Action contains low-risk keyword(s): {', '.join(sorted(set(matched_low)))}."),
affected=affected,
reversible=reversible,
)
@@ -460,15 +455,17 @@ def preview_action(
]
if affected:
- entity_str = ", ".join(affected) if len(affected) <= 5 else (
- ", ".join(affected[:5]) + f" ... (+{len(affected) - 5} more)"
+ entity_str = (
+ ", ".join(affected)
+ if len(affected) <= 5
+ else (", ".join(affected[:5]) + f" ... (+{len(affected) - 5} more)")
)
lines.append(f"Affects: {entity_str}")
else:
lines.append("Affects: (entities not specified)")
- reversibility = "Yes — can be undone." if risk.reversible else (
- "No — this action cannot be reversed."
+ reversibility = (
+ "Yes — can be undone." if risk.reversible else ("No — this action cannot be reversed.")
)
lines.append(f"Reversible: {reversibility}")
@@ -479,6 +476,7 @@ def preview_action(
# Convenience class wrapper
# ---------------------------------------------------------------------------
+
class HumanLoopGate:
"""OOP wrapper around ``assess_risk`` and ``gate`` for approval workflows.
@@ -502,11 +500,7 @@ def check(
"""Full gate check: assess risk, request approval if needed."""
request = gate(action)
if request is None:
- return ApprovalResult(
- approved=True, feedback="auto_approved_low_risk"
- )
+ return ApprovalResult(approved=True, feedback="auto_approved_low_risk")
if approver is not None:
return approver(request)
- return ApprovalResult(
- approved=False, feedback="requires_human_review"
- )
+ return ApprovalResult(approved=False, feedback="requires_human_review")
diff --git a/Gradata/src/gradata/contrib/patterns/loop_detection.py b/Gradata/src/gradata/contrib/patterns/loop_detection.py
index b288f661..35523db5 100644
--- a/Gradata/src/gradata/contrib/patterns/loop_detection.py
+++ b/Gradata/src/gradata/contrib/patterns/loop_detection.py
@@ -51,9 +51,10 @@
class LoopAction(Enum):
"""Action to take based on loop detection."""
- ALLOW = "allow" # No loop detected, proceed normally
- WARN = "warn" # Loop pattern detected, log warning but continue
- STOP = "stop" # Hard loop detected, halt execution
+
+ ALLOW = "allow" # No loop detected, proceed normally
+ WARN = "warn" # Loop pattern detected, log warning but continue
+ STOP = "stop" # Hard loop detected, halt execution
@dataclass
@@ -66,6 +67,7 @@ class LoopEvent:
action: The action determined by the detector.
repeat_count: How many times this exact call has been seen in window.
"""
+
tool_name: str
call_hash: str
action: LoopAction
@@ -81,6 +83,7 @@ class LoopDetectorConfig:
warn_threshold: Number of identical calls before warning.
stop_threshold: Number of identical calls before hard stop.
"""
+
window_size: int = 20
warn_threshold: int = 3
stop_threshold: int = 5
@@ -210,10 +213,7 @@ def _normalize_args(args: dict[str, Any]) -> dict[str, Any]:
if isinstance(val, dict):
result[key] = _normalize_args(val)
elif isinstance(val, (list, tuple)):
- result[key] = [
- _normalize_args(v) if isinstance(v, dict) else v
- for v in val
- ]
+ result[key] = [_normalize_args(v) if isinstance(v, dict) else v for v in val]
else:
result[key] = val
return result
diff --git a/Gradata/src/gradata/contrib/patterns/mcp.py b/Gradata/src/gradata/contrib/patterns/mcp.py
index ffe87f9e..66e73234 100644
--- a/Gradata/src/gradata/contrib/patterns/mcp.py
+++ b/Gradata/src/gradata/contrib/patterns/mcp.py
@@ -110,16 +110,13 @@ def handle_call(self, tool_name: str, arguments: dict[str, Any]) -> dict[str, An
except Exception as e:
return {"error": str(e)}
-
def stats(self) -> dict[str, Any]:
"""Bridge statistics."""
return {
"brain_tools": len(self._tools),
"brain_handlers": len(self._handlers),
"connected_servers": len(self._connected_servers),
- "total_external_tools": sum(
- len(s.tools) for s in self._connected_servers
- ),
+ "total_external_tools": sum(len(s.tools) for s in self._connected_servers),
}
@@ -130,29 +127,34 @@ def create_brain_mcp_tools() -> list[MCPToolSchema]:
"""
return [
MCPToolSchema(
- "brain_search", "Search the brain for relevant context",
+ "brain_search",
+ "Search the brain for relevant context",
{"query": {"type": "string", "description": "Search query"}},
),
MCPToolSchema(
- "brain_correct", "Record a user correction to improve the brain",
+ "brain_correct",
+ "Record a user correction to improve the brain",
{
"draft": {"type": "string", "description": "Original AI draft"},
"final": {"type": "string", "description": "User-edited final version"},
},
),
MCPToolSchema(
- "brain_log_output", "Log an AI-generated output for tracking",
+ "brain_log_output",
+ "Log an AI-generated output for tracking",
{
"text": {"type": "string", "description": "Generated text"},
"output_type": {"type": "string", "description": "Type of output"},
},
),
MCPToolSchema(
- "brain_manifest", "Generate and return brain quality manifest",
+ "brain_manifest",
+ "Generate and return brain quality manifest",
{},
),
MCPToolSchema(
- "brain_health", "Check brain health status",
+ "brain_health",
+ "Check brain health status",
{},
),
]
diff --git a/Gradata/src/gradata/contrib/patterns/memory.py b/Gradata/src/gradata/contrib/patterns/memory.py
index 718e71e4..a94c337d 100644
--- a/Gradata/src/gradata/contrib/patterns/memory.py
+++ b/Gradata/src/gradata/contrib/patterns/memory.py
@@ -11,11 +11,13 @@
# Constants
# ---------------------------------------------------------------------------
-VALID_TYPES: frozenset[str] = frozenset({
- "episodic", # What happened (interactions, outcomes)
- "semantic", # What is true (facts, knowledge)
- "procedural", # How to do things (workflows, patterns)
-})
+VALID_TYPES: frozenset[str] = frozenset(
+ {
+ "episodic", # What happened (interactions, outcomes)
+ "semantic", # What is true (facts, knowledge)
+ "procedural", # How to do things (workflows, patterns)
+ }
+)
# ---------------------------------------------------------------------------
@@ -55,8 +57,7 @@ class Memory:
def __post_init__(self) -> None:
if self.memory_type not in VALID_TYPES:
raise ValueError(
- f"Invalid memory_type {self.memory_type!r}. "
- f"Must be one of: {sorted(VALID_TYPES)}"
+ f"Invalid memory_type {self.memory_type!r}. Must be one of: {sorted(VALID_TYPES)}"
)
if not self.content:
raise ValueError("Memory content must not be empty.")
@@ -217,10 +218,7 @@ def decay(self, max_age_days: int = 30, min_reinforcements: int = 1) -> list[str
for memory in list(self._store.all()):
if memory.memory_type != self.memory_type:
continue
- if (
- memory.age_days() > max_age_days
- and memory.reinforcement_count < min_reinforcements
- ):
+ if memory.age_days() > max_age_days and memory.reinforcement_count < min_reinforcements:
self._store.delete(memory.id)
pruned.append(memory.id)
return pruned
@@ -350,10 +348,7 @@ def decay(self, max_age_days: int = 30, min_reinforcements: int = 1) -> list[str
for memory in list(self._store.all()):
if memory.memory_type != self.memory_type:
continue
- if (
- memory.age_days() > max_age_days
- and memory.reinforcement_count < min_reinforcements
- ):
+ if memory.age_days() > max_age_days and memory.reinforcement_count < min_reinforcements:
self._store.delete(memory.id)
pruned.append(memory.id)
return pruned
@@ -390,10 +385,7 @@ def store(
return self.semantic.store(content, metadata)
if memory_type == "procedural":
return self.procedural.store(content, metadata)
- raise ValueError(
- f"Unknown memory_type {memory_type!r}. "
- f"Valid types: {sorted(VALID_TYPES)}"
- )
+ raise ValueError(f"Unknown memory_type {memory_type!r}. Valid types: {sorted(VALID_TYPES)}")
def retrieve(
self,
@@ -423,10 +415,7 @@ def decay(
)
pruned: list[str] = []
for memory in list(self._store.all()):
- if (
- memory.age_days() > max_age_days
- and memory.reinforcement_count < min_reinforcements
- ):
+ if memory.age_days() > max_age_days and memory.reinforcement_count < min_reinforcements:
self._store.delete(memory.id)
pruned.append(memory.id)
return pruned
@@ -452,9 +441,7 @@ def stats(self) -> dict:
by_type[m.memory_type] = by_type.get(m.memory_type, 0) + 1
avg_reinforcements = (
- round(sum(m.reinforcement_count for m in all_memories) / total, 2)
- if total > 0
- else 0.0
+ round(sum(m.reinforcement_count for m in all_memories) / total, 2) if total > 0 else 0.0
)
created_timestamps = [m.created for m in all_memories]
@@ -502,7 +489,6 @@ def __repr__(self) -> str:
(r"^competitors/", "project"),
(r"^icp-research", "project"),
(r"^learnings/", "project"),
-
# USER scope (personal, never shared)
(r"^metrics/", "user"),
(r"^loop-state\.md$", "user"),
@@ -510,7 +496,6 @@ def __repr__(self) -> str:
(r"^self-model\.md$", "user"),
(r"^audits/", "user"),
(r"^evals/", "user"),
-
# LOCAL scope (deployment-specific)
(r"^prospects/", "local"),
(r"^pipeline/", "local"),
diff --git a/Gradata/src/gradata/contrib/patterns/middleware.py b/Gradata/src/gradata/contrib/patterns/middleware.py
index 00c043f9..95e0a714 100644
--- a/Gradata/src/gradata/contrib/patterns/middleware.py
+++ b/Gradata/src/gradata/contrib/patterns/middleware.py
@@ -50,6 +50,7 @@ def after(self, ctx: MiddlewareContext) -> MiddlewareContext:
class MiddlewareError(Exception):
"""Raised when middleware chain has configuration errors."""
+
pass
@@ -68,6 +69,7 @@ class MiddlewareContext:
metadata: Middleware-contributed metadata.
errors: Errors collected during chain execution.
"""
+
operation: str = ""
data: dict[str, Any] = field(default_factory=dict)
result: Any = None
@@ -88,8 +90,9 @@ class Middleware:
before_middleware: Name of middleware this should precede.
If neither is set, middleware is appended to the end.
"""
+
name: str = "unnamed"
- after_middleware: str = "" # Insert after this middleware
+ after_middleware: str = "" # Insert after this middleware
before_middleware: str = "" # Insert before this middleware
def before(self, ctx: MiddlewareContext) -> MiddlewareContext:
@@ -148,9 +151,7 @@ def add(self, middleware: Middleware) -> None:
MiddlewareError: If anchors reference unknown or circular deps.
"""
if middleware.name in self._name_index:
- raise MiddlewareError(
- f"Middleware '{middleware.name}' already registered"
- )
+ raise MiddlewareError(f"Middleware '{middleware.name}' already registered")
if middleware.after_middleware and middleware.before_middleware:
raise MiddlewareError(
@@ -263,6 +264,4 @@ def stats(self) -> dict[str, Any]:
def _rebuild_index(self) -> None:
"""Rebuild the name-to-index mapping."""
- self._name_index = {
- mw.name: i for i, mw in enumerate(self._middlewares)
- }
+ self._name_index = {mw.name: i for i, mw in enumerate(self._middlewares)}
diff --git a/Gradata/src/gradata/contrib/patterns/orchestrator.py b/Gradata/src/gradata/contrib/patterns/orchestrator.py
index 9241f20d..1a1a616f 100644
--- a/Gradata/src/gradata/contrib/patterns/orchestrator.py
+++ b/Gradata/src/gradata/contrib/patterns/orchestrator.py
@@ -100,6 +100,7 @@
# Intent-to-pattern mapping
# ---------------------------------------------------------------------------
+
@dataclass(frozen=True)
class IntentPattern:
"""Maps a named intent to its primary pattern and optional secondaries.
@@ -159,7 +160,6 @@ class IntentPattern:
primary=PATTERN_PLANNING,
secondary=[PATTERN_CHAIN_OF_THOUGHT, PATTERN_ORCHESTRATION],
),
-
# ── Engineering / developer ──────────────────────────────────────────────
IntentPattern(
intent="code_review",
@@ -181,7 +181,6 @@ class IntentPattern:
primary=PATTERN_TRANSFORMATION,
secondary=[PATTERN_REFLECTION, PATTERN_VALIDATION],
),
-
# ── Recruiting / talent ──────────────────────────────────────────────────
IntentPattern(
intent="interview_prep",
@@ -198,7 +197,6 @@ class IntentPattern:
primary=PATTERN_GENERATION,
secondary=[PATTERN_REFLECTION, PATTERN_VALIDATION],
),
-
# ── Sales (preserved for backward compatibility) ─────────────────────────
IntentPattern(
intent="email_draft",
@@ -288,21 +286,15 @@ def register_intent_pattern(
)
"""
if pattern not in ALL_PATTERNS:
- raise ValueError(
- f"Unknown pattern {pattern!r}. "
- f"Must be one of: {sorted(ALL_PATTERNS)}"
- )
+ raise ValueError(f"Unknown pattern {pattern!r}. Must be one of: {sorted(ALL_PATTERNS)}")
bad = [s for s in (secondary or []) if s not in ALL_PATTERNS]
if bad:
raise ValueError(
- f"Unknown secondary pattern(s) {bad!r}. "
- f"Must be one of: {sorted(ALL_PATTERNS)}"
+ f"Unknown secondary pattern(s) {bad!r}. Must be one of: {sorted(ALL_PATTERNS)}"
)
global _REGISTERED_INTENT_PATTERNS
- _REGISTERED_INTENT_PATTERNS = [
- p for p in _REGISTERED_INTENT_PATTERNS if p.intent != intent
- ]
+ _REGISTERED_INTENT_PATTERNS = [p for p in _REGISTERED_INTENT_PATTERNS if p.intent != intent]
entry = IntentPattern(
intent=intent,
@@ -319,6 +311,7 @@ def register_intent_pattern(
# Classification result
# ---------------------------------------------------------------------------
+
@dataclass
class RequestClassification:
"""Full classification of a single incoming request.
@@ -346,6 +339,7 @@ class RequestClassification:
# Public API
# ---------------------------------------------------------------------------
+
def classify_request(query: str) -> RequestClassification:
"""Classify a raw query and return the full routing decision.
@@ -401,6 +395,7 @@ def classify_request(query: str) -> RequestClassification:
# lists to agent names. Domains register their own rules at startup;
# ``route_by_keywords`` then matches an incoming task description.
+
@dataclass
class RouteRule:
"""Maps a list of keyword phrases to an agent name.
@@ -514,9 +509,15 @@ def execute_orchestrated(
if len(tasks) == 1:
try:
result = worker(tasks[0]) # type: ignore[operator]
- return {"strategy": "direct", "results": [{"task": tasks[0], "status": "completed", "result": result}]}
+ return {
+ "strategy": "direct",
+ "results": [{"task": tasks[0], "status": "completed", "result": result}],
+ }
except Exception as e:
- return {"strategy": "direct", "results": [{"task": tasks[0], "status": "failed", "error": str(e)}]}
+ return {
+ "strategy": "direct",
+ "results": [{"task": tasks[0], "status": "failed", "error": str(e)}],
+ }
# Multiple tasks — classify to check if they're independent
classifications = [classify_request(t) for t in tasks]
diff --git a/Gradata/src/gradata/contrib/patterns/parallel.py b/Gradata/src/gradata/contrib/patterns/parallel.py
index d8689cf6..d30f2098 100644
--- a/Gradata/src/gradata/contrib/patterns/parallel.py
+++ b/Gradata/src/gradata/contrib/patterns/parallel.py
@@ -170,8 +170,7 @@ def _topological_waves(tasks: list[ParallelTask]) -> list[list[str]]:
for dep_id in task.depends_on:
if dep_id not in task_map:
raise ValueError(
- f"Task '{task.id}' declares dependency on unknown "
- f"task '{dep_id}'."
+ f"Task '{task.id}' declares dependency on unknown task '{dep_id}'."
)
in_degree[task.id] += 1
dependents[dep_id].append(task.id)
@@ -193,9 +192,7 @@ def _topological_waves(tasks: list[ParallelTask]) -> list[list[str]]:
scheduled = sum(len(w) for w in waves)
if scheduled != len(tasks):
unscheduled = [tid for tid in in_degree if in_degree[tid] > 0]
- raise ValueError(
- f"Dependency cycle detected. Tasks involved: {unscheduled}"
- )
+ raise ValueError(f"Dependency cycle detected. Tasks involved: {unscheduled}")
return waves
@@ -304,26 +301,20 @@ def run(self) -> ParallelResult:
# Check whether any dependency failed; skip if so.
failed_deps = [
- dep for dep in task.depends_on
- if dep in results and not results[dep].success
+ dep for dep in task.depends_on if dep in results and not results[dep].success
]
if failed_deps:
results[tid] = TaskResult(
task_id=tid,
success=False,
output=None,
- error=(
- f"Skipped: upstream dependencies failed: "
- f"{failed_deps}"
- ),
+ error=(f"Skipped: upstream dependencies failed: {failed_deps}"),
)
continue
# Forward upstream outputs into input_data.
if task.depends_on:
- upstream_outputs = {
- dep: results[dep].output for dep in task.depends_on
- }
+ upstream_outputs = {dep: results[dep].output for dep in task.depends_on}
if len(upstream_outputs) == 1:
# Single parent: pass the value directly for ergonomics.
task.input_data = next(iter(upstream_outputs.values()))
@@ -332,9 +323,7 @@ def run(self) -> ParallelResult:
results[tid] = _run_task(task)
- total_duration_ms = round(
- (time.monotonic() - graph_start) * 1000.0, 2
- )
+ total_duration_ms = round((time.monotonic() - graph_start) * 1000.0, 2)
all_succeeded = all(r.success for r in results.values())
return ParallelResult(
@@ -388,8 +377,7 @@ def merge_results(
valid_strategies = {"combine", "best_of", "synthesize"}
if strategy not in valid_strategies:
raise ValueError(
- f"Unknown merge strategy '{strategy}'. "
- f"Choose from: {sorted(valid_strategies)}"
+ f"Unknown merge strategy '{strategy}'. Choose from: {sorted(valid_strategies)}"
)
successful = [r for r in results if r.success]
diff --git a/Gradata/src/gradata/contrib/patterns/pipeline.py b/Gradata/src/gradata/contrib/patterns/pipeline.py
index 7b5ac47d..ba4f8dd1 100644
--- a/Gradata/src/gradata/contrib/patterns/pipeline.py
+++ b/Gradata/src/gradata/contrib/patterns/pipeline.py
@@ -60,9 +60,7 @@ class GateResult:
def __post_init__(self) -> None:
if self.score is not None and not (0.0 <= self.score <= 1.0):
- raise ValueError(
- f"GateResult.score must be in [0.0, 1.0], got {self.score!r}"
- )
+ raise ValueError(f"GateResult.score must be in [0.0, 1.0], got {self.score!r}")
@dataclass
@@ -241,10 +239,7 @@ def run(self, input_data: Any) -> tuple[Any, GateResult | None, int]:
def __repr__(self) -> str:
gate_label = self.gate.__name__ if self.gate is not None else "none"
- return (
- f"Stage(name={self.name!r}, gate={gate_label!r}, "
- f"max_retries={self.max_retries!r})"
- )
+ return f"Stage(name={self.name!r}, gate={gate_label!r}, max_retries={self.max_retries!r})"
# ---------------------------------------------------------------------------
diff --git a/Gradata/src/gradata/contrib/patterns/q_learning_router.py b/Gradata/src/gradata/contrib/patterns/q_learning_router.py
index 5f83a951..29906b41 100644
--- a/Gradata/src/gradata/contrib/patterns/q_learning_router.py
+++ b/Gradata/src/gradata/contrib/patterns/q_learning_router.py
@@ -63,10 +63,19 @@ class RouterConfig:
feature_dim: Dimensionality of feature vectors.
save_interval: Auto-save after this many updates.
"""
- agents: list[str] = field(default_factory=lambda: [
- "coder", "reviewer", "architect", "researcher",
- "debugger", "writer", "optimizer", "tester",
- ])
+
+ agents: list[str] = field(
+ default_factory=lambda: [
+ "coder",
+ "reviewer",
+ "architect",
+ "researcher",
+ "debugger",
+ "writer",
+ "optimizer",
+ "tester",
+ ]
+ )
learning_rate: float = 0.1
discount_factor: float = 0.95
epsilon_start: float = 1.0
@@ -90,6 +99,7 @@ class RouteDecision:
confidence: Confidence in the decision (max Q / sum Q).
exploiting: True if decision was greedy, False if exploring.
"""
+
agent: str
state_hash: str = ""
q_values: dict[str, float] = field(default_factory=dict)
@@ -107,6 +117,7 @@ class Experience:
reward: Reward received.
td_error: Temporal difference error magnitude (for prioritized replay).
"""
+
state_hash: str
action_idx: int
reward: float
@@ -157,7 +168,7 @@ def _extract_features(text: str, dim: int = 32) -> list[float]:
# N-gram hash features (remaining dimensions)
for n in range(1, 4): # unigrams, bigrams, trigrams
for j in range(len(words) - n + 1):
- ngram = " ".join(words[j:j + n])
+ ngram = " ".join(words[j : j + n])
h = int(hashlib.md5(ngram.encode()).hexdigest(), 16)
idx = 8 + (h % max(1, dim - 8))
if idx < dim:
@@ -187,6 +198,7 @@ def _hash_state(features: list[float], quantize_bits: int = 4) -> str:
# Q-Learning Router
# ---------------------------------------------------------------------------
+
class QLearningRouter:
"""Q-Learning based agent router with experience replay.
@@ -374,6 +386,7 @@ def _compute_hmac(data_bytes: bytes) -> str:
# Key derived from machine identity (not secret, just tamper detection)
import platform
+
key = f"gradata-router-{platform.node()}".encode()
return _hmac.new(key, data_bytes, "sha256").hexdigest()
@@ -427,6 +440,7 @@ def load(self, filepath: str | Path) -> bool:
expected = self._compute_hmac(body)
if stored_hmac != expected:
import logging
+
logging.getLogger(__name__).warning(
"Q-table integrity check failed: %s may be tampered", filepath
)
@@ -485,9 +499,7 @@ def _get_q_values(self, state_hash: str) -> list[float]:
"""Get or initialize Q-values for a state."""
if state_hash not in self.q_table:
# Initialize with small random values to break ties
- self.q_table[state_hash] = [
- random.uniform(0.0, 0.01) for _ in self.config.agents
- ]
+ self.q_table[state_hash] = [random.uniform(0.0, 0.01) for _ in self.config.agents]
return self.q_table[state_hash]
def _compute_confidence(self, q_values: list[float]) -> float:
diff --git a/Gradata/src/gradata/contrib/patterns/rag.py b/Gradata/src/gradata/contrib/patterns/rag.py
index eacac563..a4ad2737 100644
--- a/Gradata/src/gradata/contrib/patterns/rag.py
+++ b/Gradata/src/gradata/contrib/patterns/rag.py
@@ -26,12 +26,13 @@
# Data types
# ---------------------------------------------------------------------------
+
@dataclass
class Chunk:
"""A retrieved chunk of brain content."""
content: str
- source: str # file/doc name
+ source: str # file/doc name
chunk_id: str = ""
relevance_score: float = 0.0
recency_weight: float = 1.0
@@ -45,7 +46,7 @@ class RetrievalResult:
chunks: list[Chunk]
query: str
- mode: str # "fts", "vector", "hybrid", "cascade"
+ mode: str # "fts", "vector", "hybrid", "cascade"
total_candidates: int = 0
citations: dict[str, str] = field(default_factory=dict) # claim -> source
@@ -54,24 +55,27 @@ class RetrievalResult:
class CascadeConfig:
"""Configuration for the retrieval cascade."""
- fts_threshold: float = 0.3 # min FTS score to stop cascade
- vector_threshold: float = 0.5 # min vector score to stop cascade
- hybrid_rrf_k: int = 60 # RRF constant
+ fts_threshold: float = 0.3 # min FTS score to stop cascade
+ vector_threshold: float = 0.5 # min vector score to stop cascade
+ hybrid_rrf_k: int = 60 # RRF constant
max_results: int = 10
- two_pass: bool = False # Enable two-pass query expansion
- two_pass_top_k: int = 3 # How many results to mine for expansion terms
- graduation_boost: dict[str, float] = field(default_factory=lambda: {
- "RULE": 1.2,
- "PATTERN": 1.0,
- "INSTINCT": 0.8,
- "UNTESTABLE": 0.5,
- })
+ two_pass: bool = False # Enable two-pass query expansion
+ two_pass_top_k: int = 3 # How many results to mine for expansion terms
+ graduation_boost: dict[str, float] = field(
+ default_factory=lambda: {
+ "RULE": 1.2,
+ "PATTERN": 1.0,
+ "INSTINCT": 0.8,
+ "UNTESTABLE": 0.5,
+ }
+ )
# ---------------------------------------------------------------------------
# Graduation-aware scoring
# ---------------------------------------------------------------------------
+
def apply_graduation_scoring(
chunks: list[Chunk],
config: CascadeConfig | None = None,
@@ -102,6 +106,7 @@ def apply_graduation_scoring(
# Reciprocal Rank Fusion (RRF)
# ---------------------------------------------------------------------------
+
def rrf_merge(
*result_lists: list[Chunk],
k: int = 60,
@@ -125,15 +130,17 @@ def rrf_merge(
merged: list[Chunk] = []
for cid, score in sorted(scores.items(), key=lambda x: -x[1]):
chunk = chunks_by_id[cid]
- merged.append(Chunk(
- content=chunk.content,
- source=chunk.source,
- chunk_id=cid,
- relevance_score=round(score, 6),
- recency_weight=chunk.recency_weight,
- memory_type=chunk.memory_type,
- graduation_level=chunk.graduation_level,
- ))
+ merged.append(
+ Chunk(
+ content=chunk.content,
+ source=chunk.source,
+ chunk_id=cid,
+ relevance_score=round(score, 6),
+ recency_weight=chunk.recency_weight,
+ memory_type=chunk.memory_type,
+ graduation_level=chunk.graduation_level,
+ )
+ )
return merged
@@ -143,7 +150,114 @@ def rrf_merge(
# Common stopwords to filter out during term extraction (pure stdlib)
_STOPWORDS = frozenset(
- ["a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "will", "would", "shall", "should", "may", "might", "can", "could", "of", "in", "to", "for", "on", "with", "at", "by", "from", "as", "into", "through", "during", "before", "after", "above", "below", "between", "out", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "each", "every", "both", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "it", "its", "and", "but", "or", "if", "while", "that", "this", "these", "those", "i", "me", "my", "we", "our", "you", "your", "he", "him", "his", "she", "her", "they", "them", "their", "what", "which", "who", "whom"]
+ [
+ "a",
+ "an",
+ "the",
+ "is",
+ "are",
+ "was",
+ "were",
+ "be",
+ "been",
+ "being",
+ "have",
+ "has",
+ "had",
+ "do",
+ "does",
+ "did",
+ "will",
+ "would",
+ "shall",
+ "should",
+ "may",
+ "might",
+ "can",
+ "could",
+ "of",
+ "in",
+ "to",
+ "for",
+ "on",
+ "with",
+ "at",
+ "by",
+ "from",
+ "as",
+ "into",
+ "through",
+ "during",
+ "before",
+ "after",
+ "above",
+ "below",
+ "between",
+ "out",
+ "off",
+ "over",
+ "under",
+ "again",
+ "further",
+ "then",
+ "once",
+ "here",
+ "there",
+ "when",
+ "where",
+ "why",
+ "how",
+ "all",
+ "each",
+ "every",
+ "both",
+ "few",
+ "more",
+ "most",
+ "other",
+ "some",
+ "such",
+ "no",
+ "nor",
+ "not",
+ "only",
+ "own",
+ "same",
+ "so",
+ "than",
+ "too",
+ "very",
+ "it",
+ "its",
+ "and",
+ "but",
+ "or",
+ "if",
+ "while",
+ "that",
+ "this",
+ "these",
+ "those",
+ "i",
+ "me",
+ "my",
+ "we",
+ "our",
+ "you",
+ "your",
+ "he",
+ "him",
+ "his",
+ "she",
+ "her",
+ "they",
+ "them",
+ "their",
+ "what",
+ "which",
+ "who",
+ "whom",
+ ]
)
@@ -198,6 +312,7 @@ def extract_expansion_terms(
# Retrieval cascade
# ---------------------------------------------------------------------------
+
def cascade_retrieve(
query: str,
fts_fn: Callable | None = None,
@@ -324,7 +439,10 @@ def cascade_retrieve(
if _cascade_errors:
mode = f"cascade_failed ({', '.join(_cascade_errors)})"
return RetrievalResult(
- chunks=[], query=query, mode=mode, total_candidates=0,
+ chunks=[],
+ query=query,
+ mode=mode,
+ total_candidates=0,
)
@@ -332,6 +450,7 @@ def cascade_retrieve(
# Context ordering (Lost in the Middle paper)
# ---------------------------------------------------------------------------
+
def order_by_relevance_position(chunks: list[Chunk]) -> list[Chunk]:
"""Reorder chunks per "Lost in the Middle" paper findings.
@@ -358,6 +477,7 @@ def order_by_relevance_position(chunks: list[Chunk]) -> list[Chunk]:
# Convenience classes (wrap cascade_retrieve for OOP usage)
# ---------------------------------------------------------------------------
+
class SmartRAG:
"""Smart retrieval with graduation-aware scoring and cascade strategy.
@@ -381,7 +501,9 @@ def __init__(
def retrieve(self, query: str) -> RetrievalResult:
"""Run the cascade retrieval pipeline."""
- return cascade_retrieve(query, fts_fn=self.fts_fn, vector_fn=self.vector_fn, config=self.config)
+ return cascade_retrieve(
+ query, fts_fn=self.fts_fn, vector_fn=self.vector_fn, config=self.config
+ )
class NaiveRAG:
@@ -399,6 +521,8 @@ def retrieve(self, query: str, top_k: int = 5) -> RetrievalResult:
return RetrievalResult(chunks=[], query=query, mode="naive", total_candidates=0)
try:
results = self.fts_fn(query, top_k)
- return RetrievalResult(chunks=results, query=query, mode="naive", total_candidates=len(results))
+ return RetrievalResult(
+ chunks=results, query=query, mode="naive", total_candidates=len(results)
+ )
except Exception:
return RetrievalResult(chunks=[], query=query, mode="naive", total_candidates=0)
diff --git a/Gradata/src/gradata/contrib/patterns/reconciliation.py b/Gradata/src/gradata/contrib/patterns/reconciliation.py
index dd1019cd..9ee914a6 100644
--- a/Gradata/src/gradata/contrib/patterns/reconciliation.py
+++ b/Gradata/src/gradata/contrib/patterns/reconciliation.py
@@ -44,9 +44,10 @@
class DeviationScore(Enum):
"""Qualification score for plan-vs-actual comparison."""
- PASS = "pass" # Actual matches plan exactly
- GAP = "gap" # Partial achievement, missing elements
- DRIFT = "drift" # Achieved something different than planned
+
+ PASS = "pass" # Actual matches plan exactly
+ GAP = "gap" # Partial achievement, missing elements
+ DRIFT = "drift" # Achieved something different than planned
@dataclass
@@ -59,6 +60,7 @@ class PlanItem:
criteria: How to verify achievement (executable check preferred).
files: Optional list of files expected to be modified.
"""
+
id: str
description: str
criteria: str = ""
@@ -76,6 +78,7 @@ class ActualResult:
deviation: Description of how actual differed from plan (if any).
files_modified: Actual files that were modified.
"""
+
plan_id: str
achieved: bool
evidence: str = ""
@@ -95,6 +98,7 @@ class DeviationDetail:
impact: How the deviation affects the overall goal.
classification: Root cause type (intent/spec/code).
"""
+
plan_id: str
score: DeviationScore
what_differed: str = ""
@@ -120,6 +124,7 @@ class ReconciliationSummary:
decisions: Key decisions made during execution.
metadata: Arbitrary metadata from the reconciliation.
"""
+
plan_items: list[PlanItem]
actual_results: list[ActualResult]
deviations: list[DeviationDetail]
@@ -190,12 +195,14 @@ def reconcile(
for item in plan:
actual = actual_map.get(item.id)
if actual is None:
- deviations.append(DeviationDetail(
- plan_id=item.id,
- score=DeviationScore.GAP,
- what_differed="No result provided for this plan item.",
- impact="Plan item was not addressed.",
- ))
+ deviations.append(
+ DeviationDetail(
+ plan_id=item.id,
+ score=DeviationScore.GAP,
+ what_differed="No result provided for this plan item.",
+ impact="Plan item was not addressed.",
+ )
+ )
gap_count += 1
continue
@@ -286,10 +293,22 @@ def _classify_root_cause(
combined = evidence_lower + " " + deviation_lower
# Heuristic classification
- intent_signals = ("wrong approach", "should not have", "requirements changed",
- "misunderstood", "wrong goal", "different requirement")
- spec_signals = ("criteria wrong", "spec incorrect", "acceptance criteria",
- "test was wrong", "wrong assertion", "bad criteria")
+ intent_signals = (
+ "wrong approach",
+ "should not have",
+ "requirements changed",
+ "misunderstood",
+ "wrong goal",
+ "different requirement",
+ )
+ spec_signals = (
+ "criteria wrong",
+ "spec incorrect",
+ "acceptance criteria",
+ "test was wrong",
+ "wrong assertion",
+ "bad criteria",
+ )
if any(s in combined for s in intent_signals):
return "intent"
diff --git a/Gradata/src/gradata/contrib/patterns/reflection.py b/Gradata/src/gradata/contrib/patterns/reflection.py
index 44f0ebfe..a87da8ce 100644
--- a/Gradata/src/gradata/contrib/patterns/reflection.py
+++ b/Gradata/src/gradata/contrib/patterns/reflection.py
@@ -64,9 +64,7 @@ class Criterion:
def __post_init__(self) -> None:
if self.weight <= 0:
- raise ValueError(
- f"Criterion '{self.name}': weight must be > 0, got {self.weight}"
- )
+ raise ValueError(f"Criterion '{self.name}': weight must be > 0, got {self.weight}")
@dataclass
@@ -89,8 +87,7 @@ class CriterionScore:
def __post_init__(self) -> None:
if self.score is not None and not (0.0 <= self.score <= 10.0):
raise ValueError(
- f"CriterionScore '{self.name}': score must be in [0, 10], "
- f"got {self.score}"
+ f"CriterionScore '{self.name}': score must be in [0, 10], got {self.score}"
)
@@ -174,9 +171,7 @@ def __init__(self, *criteria: Criterion) -> None:
names = [c.name for c in criteria]
duplicates = {n for n in names if names.count(n) > 1}
if duplicates:
- raise ValueError(
- f"CritiqueChecklist: duplicate criterion names: {duplicates}"
- )
+ raise ValueError(f"CritiqueChecklist: duplicate criterion names: {duplicates}")
self._criteria: tuple[Criterion, ...] = criteria
# ------------------------------------------------------------------
@@ -213,11 +208,7 @@ def evaluate(
criterion_score = evaluator(output, criterion)
scores[criterion.name] = criterion_score
- all_required_passed = all(
- scores[c.name].passed
- for c in self._criteria
- if c.required
- )
+ all_required_passed = all(scores[c.name].passed for c in self._criteria if c.required)
overall_score = _weighted_average(self._criteria, scores)
return CritiqueResult(
@@ -302,9 +293,7 @@ def reflect(
)
# Collect failing scores to guide the refiner
- failed: list[CriterionScore] = [
- s for s in critique.scores.values() if not s.passed
- ]
+ failed: list[CriterionScore] = [s for s in critique.scores.values() if not s.passed]
# Only refine if there are cycles remaining
if cycle < max_cycles:
@@ -366,20 +355,26 @@ def default_evaluator(output: Any, criterion: Criterion) -> CriterionScore:
if name == "has_subject":
passed = "subject:" in text.lower()
- reason = (
- "Found 'Subject:' header." if passed
- else "No 'Subject:' header detected."
- )
+ reason = "Found 'Subject:' header." if passed else "No 'Subject:' header detected."
elif name == "has_cta":
cta_phrases = (
- "book", "schedule", "reply", "click", "visit",
- "call", "download", "sign up", "learn more", "get started",
+ "book",
+ "schedule",
+ "reply",
+ "click",
+ "visit",
+ "call",
+ "download",
+ "sign up",
+ "learn more",
+ "get started",
)
matched = next((p for p in cta_phrases if p in text.lower()), None)
passed = matched is not None
reason = (
- f"Call-to-action phrase found: '{matched}'." if passed
+ f"Call-to-action phrase found: '{matched}'."
+ if passed
else "No recognisable call-to-action phrase found."
)
@@ -387,30 +382,30 @@ def default_evaluator(output: Any, criterion: Criterion) -> CriterionScore:
word_count = len(text.split())
passed = word_count < 200
reason = (
- f"Word count {word_count} is within the 200-word limit." if passed
+ f"Word count {word_count} is within the 200-word limit."
+ if passed
else f"Word count {word_count} exceeds the 200-word limit."
)
elif name == "no_jargon":
jargon_tokens = (
- "synergy", "leverage", "paradigm", "disruptive",
- "holistic", "bandwidth", "circle back", "deep dive",
+ "synergy",
+ "leverage",
+ "paradigm",
+ "disruptive",
+ "holistic",
+ "bandwidth",
+ "circle back",
+ "deep dive",
)
found = [j for j in jargon_tokens if j in text.lower()]
passed = len(found) == 0
- reason = (
- "No jargon detected." if passed
- else f"Jargon detected: {found}."
- )
+ reason = "No jargon detected." if passed else f"Jargon detected: {found}."
else:
# Generic fallback: non-empty string
passed = isinstance(output, str) and len(output.strip()) > 0
- reason = (
- "Output is a non-empty string."
- if passed
- else "Output is empty or not a string."
- )
+ reason = "Output is a non-empty string." if passed else "Output is empty or not a string."
return CriterionScore(
name=criterion.name,
@@ -420,7 +415,6 @@ def default_evaluator(output: Any, criterion: Criterion) -> CriterionScore:
)
-
# ---------------------------------------------------------------------------
# Predefined checklists
# ---------------------------------------------------------------------------
@@ -530,10 +524,12 @@ def criteria_from_graduated_rules(task_type: str = "") -> list[Criterion]:
criteria = []
for rule in rules:
- criteria.append(Criterion(
- name=f"rule_{rule.category.lower()}_{len(criteria)}",
- question=f"Does the output follow this rule: {rule.principle}?",
- required=rule.is_rule_tier, # RULE tier = required, PATTERN = optional
- weight=rule.confidence,
- ))
+ criteria.append(
+ Criterion(
+ name=f"rule_{rule.category.lower()}_{len(criteria)}",
+ question=f"Does the output follow this rule: {rule.principle}?",
+ required=rule.is_rule_tier, # RULE tier = required, PATTERN = optional
+ weight=rule.confidence,
+ )
+ )
return criteria
diff --git a/Gradata/src/gradata/contrib/patterns/sub_agents.py b/Gradata/src/gradata/contrib/patterns/sub_agents.py
index 3e3afda8..1c0e5a1c 100644
--- a/Gradata/src/gradata/contrib/patterns/sub_agents.py
+++ b/Gradata/src/gradata/contrib/patterns/sub_agents.py
@@ -33,14 +33,14 @@ class Delegation:
and how to know if it succeeded.
"""
- agent: str # agent type/name (e.g., "researcher", "writer", "critic")
- objective: str # one-sentence goal
- input_data: Any = None # data to pass to the agent
- output_format: str = "text" # expected output type hint
- success_criteria: str = "" # how to evaluate success
+ agent: str # agent type/name (e.g., "researcher", "writer", "critic")
+ objective: str # one-sentence goal
+ input_data: Any = None # data to pass to the agent
+ output_format: str = "text" # expected output type hint
+ success_criteria: str = "" # how to evaluate success
depends_on: list[str] = field(default_factory=list) # delegation IDs this depends on
timeout_seconds: int = 300
- id: str = "" # auto-assigned if empty
+ id: str = "" # auto-assigned if empty
def __post_init__(self) -> None:
if not self.id:
@@ -64,7 +64,7 @@ class OrchestratedResult:
"""Result of orchestrating multiple delegations."""
success: bool
- output: Any # synthesized final output
+ output: Any # synthesized final output
delegations_completed: int
delegations_total: int
delegation_results: list[DelegationResult] = field(default_factory=list)
@@ -84,10 +84,7 @@ def _topological_waves(delegations: list[Delegation]) -> list[list[Delegation]]:
waves: list[list[Delegation]] = []
while remaining:
- wave = [
- d for d in remaining
- if all(dep in completed for dep in d.depends_on)
- ]
+ wave = [d for d in remaining if all(dep in completed for dep in d.depends_on)]
if not wave:
# Circular dependency — break by taking first remaining
wave = [remaining[0]]
@@ -137,35 +134,41 @@ def orchestrate(
handler = handlers.get(delegation.agent, default_handler)
if handler is None:
- results.append(DelegationResult(
- delegation_id=delegation.id,
- agent=delegation.agent,
- success=False,
- error=f"No handler for agent '{delegation.agent}'",
- ))
+ results.append(
+ DelegationResult(
+ delegation_id=delegation.id,
+ agent=delegation.agent,
+ success=False,
+ error=f"No handler for agent '{delegation.agent}'",
+ )
+ )
continue
start = time.perf_counter()
try:
output = handler(delegation, context)
duration = (time.perf_counter() - start) * 1000
- results.append(DelegationResult(
- delegation_id=delegation.id,
- agent=delegation.agent,
- success=True,
- output=output,
- duration_ms=round(duration, 2),
- ))
+ results.append(
+ DelegationResult(
+ delegation_id=delegation.id,
+ agent=delegation.agent,
+ success=True,
+ output=output,
+ duration_ms=round(duration, 2),
+ )
+ )
context[delegation.id] = output
except Exception as e:
duration = (time.perf_counter() - start) * 1000
- results.append(DelegationResult(
- delegation_id=delegation.id,
- agent=delegation.agent,
- success=False,
- error=str(e),
- duration_ms=round(duration, 2),
- ))
+ results.append(
+ DelegationResult(
+ delegation_id=delegation.id,
+ agent=delegation.agent,
+ success=False,
+ error=str(e),
+ duration_ms=round(duration, 2),
+ )
+ )
execution_order.append(wave_ids)
@@ -202,7 +205,6 @@ def orchestrate(
)
-
# ---------------------------------------------------------------------------
# Agent definition loading (extracted from brain/scripts/spawn.py)
# ---------------------------------------------------------------------------
@@ -293,6 +295,7 @@ def load_agent_definition(
# Inter-agent handoff management (extracted from brain/scripts/spawn.py)
# ---------------------------------------------------------------------------
+
def create_handoff(
task_id: str,
agent_name: str,
diff --git a/Gradata/src/gradata/contrib/patterns/task_escalation.py b/Gradata/src/gradata/contrib/patterns/task_escalation.py
index 8de40b7d..0911e4ba 100644
--- a/Gradata/src/gradata/contrib/patterns/task_escalation.py
+++ b/Gradata/src/gradata/contrib/patterns/task_escalation.py
@@ -51,6 +51,7 @@ class TaskStatus(Enum):
BLOCKED: Cannot complete — structural impediment. Stops
execution and reports what blocks progress.
"""
+
DONE = "done"
DONE_WITH_CONCERNS = "done_with_concerns"
NEEDS_CONTEXT = "needs_context"
@@ -72,6 +73,7 @@ class TaskOutcome:
files_modified: Files changed during execution.
metadata: Arbitrary metadata.
"""
+
status: TaskStatus
task_id: str = ""
description: str = ""
@@ -123,8 +125,7 @@ def report_outcome(
if status == TaskStatus.DONE_WITH_CONCERNS and not concerns:
raise ValueError(
- "DONE_WITH_CONCERNS requires at least one concern. "
- "Use DONE if there are no concerns."
+ "DONE_WITH_CONCERNS requires at least one concern. Use DONE if there are no concerns."
)
if status == TaskStatus.NEEDS_CONTEXT and not missing_context:
@@ -134,10 +135,7 @@ def report_outcome(
)
if status == TaskStatus.BLOCKED and not blockers:
- raise ValueError(
- "BLOCKED requires at least one blocker. "
- "Specify what prevents progress."
- )
+ raise ValueError("BLOCKED requires at least one blocker. Specify what prevents progress.")
return TaskOutcome(
status=status,
diff --git a/Gradata/src/gradata/contrib/patterns/tools.py b/Gradata/src/gradata/contrib/patterns/tools.py
index 73f97703..72f4fc3c 100644
--- a/Gradata/src/gradata/contrib/patterns/tools.py
+++ b/Gradata/src/gradata/contrib/patterns/tools.py
@@ -42,8 +42,8 @@ class ToolSpec:
class PlannedStep:
"""A single step in an execution plan."""
- tool: str # tool name
- purpose: str # why this step is needed
+ tool: str # tool name
+ purpose: str # why this step is needed
params: dict[str, Any] = field(default_factory=dict)
depends_on: list[int] = field(default_factory=list) # step indices
@@ -119,8 +119,7 @@ def search(self, query: str) -> list[ToolSpec]:
"""Search tools by name or description keyword."""
q = query.lower()
return [
- t for t in self._tools.values()
- if q in t.name.lower() or q in t.description.lower()
+ t for t in self._tools.values() if q in t.name.lower() or q in t.description.lower()
]
def execute(
@@ -142,7 +141,8 @@ def execute(
handler = self._handlers.get(name)
if handler is None:
return ToolResult(
- tool=name, success=False,
+ tool=name,
+ success=False,
error=f"No handler registered for '{name}'",
)
@@ -152,13 +152,19 @@ def execute(
try:
output = handler(**params)
return ToolResult(
- tool=name, success=True, output=output, retries=attempt,
+ tool=name,
+ success=True,
+ output=output,
+ retries=attempt,
)
except Exception as e:
last_error = str(e)
return ToolResult(
- tool=name, success=False, error=last_error, retries=max_retries,
+ tool=name,
+ success=False,
+ error=last_error,
+ retries=max_retries,
)
def plan(self, task: str) -> ExecutionPlan:
@@ -175,10 +181,12 @@ def plan(self, task: str) -> ExecutionPlan:
desc_words = set(tool.description.lower().split())
task_words = set(task_lower.split())
if desc_words & task_words:
- steps.append(PlannedStep(
- tool=tool.name,
- purpose=f"Use {tool.name}: {tool.description}",
- ))
+ steps.append(
+ PlannedStep(
+ tool=tool.name,
+ purpose=f"Use {tool.name}: {tool.description}",
+ )
+ )
return ExecutionPlan(steps=steps, task=task)
@@ -189,5 +197,3 @@ def stats(self) -> dict[str, Any]:
"with_handlers": len(self._handlers),
"categories": self.categories(),
}
-
-
diff --git a/Gradata/src/gradata/contrib/patterns/tree_of_thoughts.py b/Gradata/src/gradata/contrib/patterns/tree_of_thoughts.py
index d17e5672..475f0e63 100644
--- a/Gradata/src/gradata/contrib/patterns/tree_of_thoughts.py
+++ b/Gradata/src/gradata/contrib/patterns/tree_of_thoughts.py
@@ -15,6 +15,7 @@
@dataclass
class Thought:
"""A single candidate in the exploration tree."""
+
content: str
score: float = 0.0
rationale: str = ""
@@ -28,6 +29,7 @@ def is_leaf(self) -> bool:
@dataclass
class ToTResult:
"""Result of Tree of Thoughts exploration."""
+
best: Thought
alternatives: list[Thought]
depth: int
@@ -120,6 +122,7 @@ def evaluate_rule_candidates(
"""
effective_scorer: Callable[[str], tuple[float, str]]
if scorer is None:
+
def _default_scorer(candidate: str) -> tuple[float, str]:
# Heuristic: shorter, more specific rules score higher
words = candidate.split()
@@ -131,7 +134,11 @@ def _default_scorer(candidate: str) -> tuple[float, str]:
if len(common) > 5:
overlap_penalty += 0.2
score = round(length_score - overlap_penalty, 4)
- return (max(0.0, min(1.0, score)), f"length={len(words)}, overlap_penalty={overlap_penalty:.2f}")
+ return (
+ max(0.0, min(1.0, score)),
+ f"length={len(words)}, overlap_penalty={overlap_penalty:.2f}",
+ )
+
effective_scorer = _default_scorer
else:
effective_scorer = scorer
diff --git a/Gradata/src/gradata/correction_detector.py b/Gradata/src/gradata/correction_detector.py
index dc6ea690..0bf3ae26 100644
--- a/Gradata/src/gradata/correction_detector.py
+++ b/Gradata/src/gradata/correction_detector.py
@@ -39,34 +39,84 @@
# Direct negation of AI output
(re.compile(r"no[,.]?\s*(not\s+)?(that|this|like that)", re.IGNORECASE), 0.85, "negation"),
# Instruction to change
- (re.compile(r"(change|fix|update|replace)\s+(this|that|it)\s+to", re.IGNORECASE), 0.90, "change_instruction"),
+ (
+ re.compile(r"(change|fix|update|replace)\s+(this|that|it)\s+to", re.IGNORECASE),
+ 0.90,
+ "change_instruction",
+ ),
# Prohibition
- (re.compile(r"don'?t\s+(do|use|include|add|write|say|put|make)", re.IGNORECASE), 0.92, "prohibition"),
+ (
+ re.compile(r"don'?t\s+(do|use|include|add|write|say|put|make)", re.IGNORECASE),
+ 0.92,
+ "prohibition",
+ ),
# Wrong/incorrect labels
- (re.compile(r"\b(wrong|incorrect|inaccurate|not right|not correct)\b", re.IGNORECASE), 0.88, "wrong_label"),
+ (
+ re.compile(r"\b(wrong|incorrect|inaccurate|not right|not correct)\b", re.IGNORECASE),
+ 0.88,
+ "wrong_label",
+ ),
# Stop/never directives
- (re.compile(r"(stop|quit|never)\s+(doing|using|writing|adding|putting|making)", re.IGNORECASE), 0.90, "stop_directive"),
+ (
+ re.compile(
+ r"(stop|quit|never)\s+(doing|using|writing|adding|putting|making)", re.IGNORECASE
+ ),
+ 0.90,
+ "stop_directive",
+ ),
# Redo requests
- (re.compile(r"\b(redo|rewrite|start over|try again|do it again)\b", re.IGNORECASE), 0.85, "redo_request"),
+ (
+ re.compile(r"\b(redo|rewrite|start over|try again|do it again)\b", re.IGNORECASE),
+ 0.85,
+ "redo_request",
+ ),
# Too much/little
- (re.compile(r"\btoo\s+(long|short|verbose|brief|formal|casual|aggressive|soft)\b", re.IGNORECASE), 0.80, "degree_correction"),
+ (
+ re.compile(
+ r"\btoo\s+(long|short|verbose|brief|formal|casual|aggressive|soft)\b", re.IGNORECASE
+ ),
+ 0.80,
+ "degree_correction",
+ ),
# Remove/delete requests
- (re.compile(r"\b(remove|delete|drop|cut|get rid of)\s+(the|this|that|all)", re.IGNORECASE), 0.82, "removal"),
+ (
+ re.compile(r"\b(remove|delete|drop|cut|get rid of)\s+(the|this|that|all)", re.IGNORECASE),
+ 0.82,
+ "removal",
+ ),
]
_IMPLICIT_PATTERNS: list[tuple[re.Pattern, float, str]] = [
# Redirect with "actually", "instead", "rather"
(re.compile(r"\b(actually|instead|rather)[,.]?\s", re.IGNORECASE), 0.65, "redirect"),
# Should-be directives
- (re.compile(r"(should\s+be|needs\s+to\s+be|make\s+it|make\s+this)", re.IGNORECASE), 0.70, "should_be"),
+ (
+ re.compile(r"(should\s+be|needs\s+to\s+be|make\s+it|make\s+this)", re.IGNORECASE),
+ 0.70,
+ "should_be",
+ ),
# Reference to prior instruction
- (re.compile(r"I\s+(said|told\s+you|asked\s+for|wanted|meant)", re.IGNORECASE), 0.75, "prior_reference"),
+ (
+ re.compile(r"I\s+(said|told\s+you|asked\s+for|wanted|meant)", re.IGNORECASE),
+ 0.75,
+ "prior_reference",
+ ),
# Preference expression
- (re.compile(r"I\s+(prefer|want|need|like)\s+(it\s+)?(to\s+be\s+)?", re.IGNORECASE), 0.60, "preference"),
+ (
+ re.compile(r"I\s+(prefer|want|need|like)\s+(it\s+)?(to\s+be\s+)?", re.IGNORECASE),
+ 0.60,
+ "preference",
+ ),
# But/however (often precedes a correction)
(re.compile(r"\b(but|however)[,.]?\s+(the|this|that|it|you)", re.IGNORECASE), 0.55, "contrast"),
# More/less directive
- (re.compile(r"\b(more|less)\s+(concise|detailed|specific|general|formal|casual)", re.IGNORECASE), 0.68, "degree_adjust"),
+ (
+ re.compile(
+ r"\b(more|less)\s+(concise|detailed|specific|general|formal|casual)", re.IGNORECASE
+ ),
+ 0.68,
+ "degree_adjust",
+ ),
]
# ---------------------------------------------------------------------------
@@ -78,24 +128,64 @@
_TYPE_KEYWORD_PATTERNS: list[tuple[re.Pattern, str]] = [
(re.compile(r"\bhallucin|made\s+up|doesn'?t\s+exist\b", re.IGNORECASE), "hallucination"),
(re.compile(r"\b(wrong|incorrect|inaccurate|false)\b", re.IGNORECASE), "factual_error"),
- (re.compile(r"\b(tone|warm|cold|formal|casual|friendly|harsh|aggressive|soft)\b", re.IGNORECASE), "tone"),
+ (
+ re.compile(
+ r"\b(tone|warm|cold|formal|casual|friendly|harsh|aggressive|soft)\b", re.IGNORECASE
+ ),
+ "tone",
+ ),
# format before style — layout/heading/structure are format, not style
- (re.compile(r"\b(format|layout|structure|heading|indent|spacing|align)\b", re.IGNORECASE), "format"),
+ (
+ re.compile(r"\b(format|layout|structure|heading|indent|spacing|align)\b", re.IGNORECASE),
+ "format",
+ ),
(re.compile(r"\b(style|dash(?:es)?|emoji|bold|italic|bullet|font)\b", re.IGNORECASE), "style"),
- (re.compile(r"\b(missing|forgot|omit|skip|left\s+out|didn'?t\s+include)\b", re.IGNORECASE), "omission"),
- (re.compile(r"\b(approach|method|strategy|workflow|process|tactic|technique)\b", re.IGNORECASE), "approach"),
- (re.compile(r"\b(scope|domain|context|only\s+for|not\s+for|outside)\b", re.IGNORECASE), "scope"),
+ (
+ re.compile(r"\b(missing|forgot|omit|skip|left\s+out|didn'?t\s+include)\b", re.IGNORECASE),
+ "omission",
+ ),
+ (
+ re.compile(
+ r"\b(approach|method|strategy|workflow|process|tactic|technique)\b", re.IGNORECASE
+ ),
+ "approach",
+ ),
+ (
+ re.compile(r"\b(scope|domain|context|only\s+for|not\s+for|outside)\b", re.IGNORECASE),
+ "scope",
+ ),
]
# Domain keyword → domain name mapping.
_DOMAIN_KEYWORD_PATTERNS: list[tuple[re.Pattern, str]] = [
- (re.compile(r"\b(email|subject\s+line|inbox|reply|thread|sender|recipient)\b", re.IGNORECASE), "email"),
- (re.compile(r"\b(code|function|class|method|variable|import|test|pytest|lint)\b", re.IGNORECASE), "code"),
+ (
+ re.compile(
+ r"\b(email|subject\s+line|inbox|reply|thread|sender|recipient)\b", re.IGNORECASE
+ ),
+ "email",
+ ),
+ (
+ re.compile(
+ r"\b(code|function|class|method|variable|import|test|pytest|lint)\b", re.IGNORECASE
+ ),
+ "code",
+ ),
# deploy before sales — "pipeline" and "workflow" are deploy terms; sales uses "campaign/prospect/lead/deal"
- (re.compile(r"\b(deploy|railway|docker|ci|cd|build|pipeline|workflow|action)\b", re.IGNORECASE), "deploy"),
+ (
+ re.compile(
+ r"\b(deploy|railway|docker|ci|cd|build|pipeline|workflow|action)\b", re.IGNORECASE
+ ),
+ "deploy",
+ ),
(re.compile(r"\b(sales|prospect|lead|deal|outreach|campaign|crm)\b", re.IGNORECASE), "sales"),
- (re.compile(r"\b(api|endpoint|route|request|response|rest|graphql|http)\b", re.IGNORECASE), "api"),
- (re.compile(r"\b(database|db|sql|query|schema|table|migration|supabase)\b", re.IGNORECASE), "database"),
+ (
+ re.compile(r"\b(api|endpoint|route|request|response|rest|graphql|http)\b", re.IGNORECASE),
+ "api",
+ ),
+ (
+ re.compile(r"\b(database|db|sql|query|schema|table|migration|supabase)\b", re.IGNORECASE),
+ "database",
+ ),
(re.compile(r"\b(doc|document|readme|spec|design|architecture|plan)\b", re.IGNORECASE), "docs"),
]
@@ -228,6 +318,7 @@ class CorrectionContext:
signal_details: List of (signal_type, matched_text, confidence) tuples.
implied_changes: What the user wants changed (extracted from text).
"""
+
is_correction: bool
confidence: float
signals: list[str]
diff --git a/Gradata/src/gradata/daemon.py b/Gradata/src/gradata/daemon.py
index 4910db32..fc051893 100644
--- a/Gradata/src/gradata/daemon.py
+++ b/Gradata/src/gradata/daemon.py
@@ -60,13 +60,34 @@
# ── Category detection from file extension ─────────────────────────────
_EXT_CATEGORY: dict[str, str] = {
- ".py": "CODE", ".js": "CODE", ".ts": "CODE", ".tsx": "CODE", ".jsx": "CODE",
- ".rs": "CODE", ".go": "CODE", ".java": "CODE", ".rb": "CODE", ".c": "CODE",
- ".cpp": "CODE", ".h": "CODE", ".cs": "CODE", ".swift": "CODE", ".kt": "CODE",
- ".md": "CONTENT", ".txt": "CONTENT", ".rst": "CONTENT",
- ".json": "CONFIG", ".yaml": "CONFIG", ".yml": "CONFIG", ".toml": "CONFIG",
- ".ini": "CONFIG", ".env": "CONFIG",
- ".html": "FRONTEND", ".css": "FRONTEND", ".scss": "FRONTEND", ".vue": "FRONTEND",
+ ".py": "CODE",
+ ".js": "CODE",
+ ".ts": "CODE",
+ ".tsx": "CODE",
+ ".jsx": "CODE",
+ ".rs": "CODE",
+ ".go": "CODE",
+ ".java": "CODE",
+ ".rb": "CODE",
+ ".c": "CODE",
+ ".cpp": "CODE",
+ ".h": "CODE",
+ ".cs": "CODE",
+ ".swift": "CODE",
+ ".kt": "CODE",
+ ".md": "CONTENT",
+ ".txt": "CONTENT",
+ ".rst": "CONTENT",
+ ".json": "CONFIG",
+ ".yaml": "CONFIG",
+ ".yml": "CONFIG",
+ ".toml": "CONFIG",
+ ".ini": "CONFIG",
+ ".env": "CONFIG",
+ ".html": "FRONTEND",
+ ".css": "FRONTEND",
+ ".scss": "FRONTEND",
+ ".vue": "FRONTEND",
".svelte": "FRONTEND",
}
@@ -83,14 +104,17 @@ def _category_from_path(file_path: str) -> str:
# ── Threaded HTTP server ────────────────────────────────────────────────
+
class _ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
"""HTTPServer that handles each request in a new thread."""
+
daemon_threads = True
allow_reuse_address = True
# ── Request handler ─────────────────────────────────────────────────────
+
class _Handler(BaseHTTPRequestHandler):
"""Routes requests to the parent GradataDaemon instance."""
@@ -156,21 +180,21 @@ def _handle_health(self) -> None:
d = self.daemon
with d._brain_lock:
lessons = d._brain._load_lessons()
- rules_count = sum(
- 1 for lesson in lessons if lesson.state.name == "RULE"
- )
+ rules_count = sum(1 for lesson in lessons if lesson.state.name == "RULE")
lessons_count = len(lessons)
uptime = time.monotonic() - d._started_mono
- self._send_json({
- "status": "ok",
- "sdk_version": gradata.__version__,
- "brain_dir": str(d._brain.dir),
- "uptime_seconds": round(uptime, 2),
- "active_sessions": len(d._sessions),
- "rules_count": rules_count,
- "lessons_count": lessons_count,
- })
+ self._send_json(
+ {
+ "status": "ok",
+ "sdk_version": gradata.__version__,
+ "brain_dir": str(d._brain.dir),
+ "uptime_seconds": round(uptime, 2),
+ "active_sessions": len(d._sessions),
+ "rules_count": rules_count,
+ "lessons_count": lessons_count,
+ }
+ )
def _handle_apply_rules(self) -> None:
self.daemon._reset_idle_timer()
@@ -202,13 +226,15 @@ def _handle_apply_rules(self) -> None:
rules_out = []
fired_ids = []
for ar in applied:
- rules_out.append({
- "rule_id": ar.rule_id,
- "tier": ar.lesson.state.value,
- "category": ar.lesson.category,
- "instruction": ar.instruction,
- "relevance": ar.relevance,
- })
+ rules_out.append(
+ {
+ "rule_id": ar.rule_id,
+ "tier": ar.lesson.state.value,
+ "category": ar.lesson.category,
+ "instruction": ar.instruction,
+ "relevance": ar.relevance,
+ }
+ )
fired_ids.append(ar.rule_id)
# Store fired rule IDs and instruction tokens for acceptance tracking
@@ -220,13 +246,15 @@ def _handle_apply_rules(self) -> None:
mode, mode_conf = classify_mode(prompt)
- self._send_json({
- "rules": rules_out,
- "injection_text": injection_text,
- "mode_detected": mode,
- "mode_confidence": mode_conf,
- "fired_rule_ids": fired_ids,
- })
+ self._send_json(
+ {
+ "rules": rules_out,
+ "injection_text": injection_text,
+ "mode_detected": mode,
+ "mode_confidence": mode_conf,
+ "fired_rule_ids": fired_ids,
+ }
+ )
def _handle_correct(self) -> None:
self.daemon._reset_idle_timer()
@@ -307,18 +335,20 @@ def _handle_correct(self) -> None:
break
# Build response
- self._send_json({
- "captured": True,
- "severity": result.get("severity", "unknown"),
- "instruction_extracted": result.get("instruction", ""),
- "lesson_created": result.get("lesson_created", False),
- "lesson_state": result.get("lesson_state", "INSTINCT"),
- "misfired_rules": misfired,
- "accepted_rules": [],
- "addition_detected": addition_detected,
- "addition_lesson": addition_lesson,
- "correction_conflict": correction_conflict,
- })
+ self._send_json(
+ {
+ "captured": True,
+ "severity": result.get("severity", "unknown"),
+ "instruction_extracted": result.get("instruction", ""),
+ "lesson_created": result.get("lesson_created", False),
+ "lesson_state": result.get("lesson_state", "INSTINCT"),
+ "misfired_rules": misfired,
+ "accepted_rules": [],
+ "addition_detected": addition_detected,
+ "addition_lesson": addition_lesson,
+ "correction_conflict": correction_conflict,
+ }
+ )
def _handle_detect(self) -> None:
self.daemon._reset_idle_timer()
@@ -333,7 +363,8 @@ def _handle_detect(self) -> None:
try:
with d._brain_lock:
result = d._brain.detect_implicit_feedback(
- user_message, session=session_num,
+ user_message,
+ session=session_num,
)
except Exception as exc:
logger.warning("detect_implicit_feedback failed: %s", exc)
@@ -348,16 +379,18 @@ def _handle_detect(self) -> None:
mode, mode_conf = classify_mode(user_message)
- self._send_json({
- "implicit_feedback": {
- "detected": detected,
- "signals": signals,
- "related_rules": related_rules,
- "action_taken": "logged" if detected else None,
- },
- "mode": mode,
- "mode_confidence": mode_conf,
- })
+ self._send_json(
+ {
+ "implicit_feedback": {
+ "detected": detected,
+ "signals": signals,
+ "related_rules": related_rules,
+ "action_taken": "logged" if detected else None,
+ },
+ "mode": mode,
+ "mode_confidence": mode_conf,
+ }
+ )
def _handle_end_session(self) -> None:
self.daemon._reset_idle_timer()
@@ -389,14 +422,16 @@ def _handle_end_session(self) -> None:
except Exception:
convergence = {}
- self._send_json({
- "corrections_captured": result.get("corrections_captured", 0),
- "instructions_extracted": result.get("instructions_extracted", 0),
- "lessons_graduated": result.get("lessons_graduated", 0),
- "meta_rules_synthesized": result.get("meta_rules_synthesized", 0),
- "convergence": convergence,
- "cross_project_candidates": [],
- })
+ self._send_json(
+ {
+ "corrections_captured": result.get("corrections_captured", 0),
+ "instructions_extracted": result.get("instructions_extracted", 0),
+ "lessons_graduated": result.get("lessons_graduated", 0),
+ "meta_rules_synthesized": result.get("meta_rules_synthesized", 0),
+ "convergence": convergence,
+ "cross_project_candidates": [],
+ }
+ )
# ── Extended endpoint handlers ─────────────────────────────────────
@@ -424,11 +459,13 @@ def _handle_brain_recall(self) -> None:
except Exception as e:
logger.exception("brain-recall search failed: %s", e)
- self._send_json({
- "context": "\n".join(context_parts),
- "relevant_rules": relevant_rules,
- "relevant_corrections": [],
- })
+ self._send_json(
+ {
+ "context": "\n".join(context_parts),
+ "relevant_rules": relevant_rules,
+ "relevant_corrections": [],
+ }
+ )
def _handle_enforce_rules(self) -> None:
self.daemon._reset_idle_timer()
@@ -452,16 +489,20 @@ def _handle_enforce_rules(self) -> None:
keywords = [w for w in never_what.split() if len(w) > 3]
if any(kw in content_lower for kw in keywords):
desc_hash = hashlib.sha256(rule.description.encode()).hexdigest()[:8]
- violations.append({
- "rule_id": f"{rule.category}:{desc_hash}",
- "description": rule.description,
- "severity": "warn",
- })
-
- self._send_json({
- "violations": violations,
- "pass": len(violations) == 0,
- })
+ violations.append(
+ {
+ "rule_id": f"{rule.category}:{desc_hash}",
+ "description": rule.description,
+ "severity": "warn",
+ }
+ )
+
+ self._send_json(
+ {
+ "violations": violations,
+ "pass": len(violations) == 0,
+ }
+ )
def _handle_log_event(self) -> None:
self.daemon._reset_idle_timer()
@@ -530,20 +571,31 @@ def _handle_checkpoint(self) -> None:
try:
with d._brain_lock:
lessons = d._brain._load_lessons()
- pending = sum(1 for le in lessons
- if le.state in (LessonState.INSTINCT, LessonState.PATTERN))
- d._brain.emit("CHECKPOINT", "plugin.pre_compact", {
- "session_id": session_id, "reason": reason, "pending_lessons": pending,
- })
+ pending = sum(
+ 1 for le in lessons if le.state in (LessonState.INSTINCT, LessonState.PATTERN)
+ )
+ d._brain.emit(
+ "CHECKPOINT",
+ "plugin.pre_compact",
+ {
+ "session_id": session_id,
+ "reason": reason,
+ "pending_lessons": pending,
+ },
+ )
except Exception as e:
- logger.exception("checkpoint failed for session_id=%s, reason=%s: %s", session_id, reason, e)
+ logger.exception(
+ "checkpoint failed for session_id=%s, reason=%s: %s", session_id, reason, e
+ )
checkpointed = False
- self._send_json({
- "checkpointed": checkpointed,
- "pending_lessons": pending,
- "unsaved_corrections": 0,
- })
+ self._send_json(
+ {
+ "checkpointed": checkpointed,
+ "pending_lessons": pending,
+ "unsaved_corrections": 0,
+ }
+ )
def _handle_maintain(self) -> None:
self.daemon._reset_idle_timer()
@@ -574,15 +626,18 @@ def _handle_maintain(self) -> None:
failed.append(task_name)
duration_ms = round((time.monotonic() - start) * 1000)
- self._send_json({
- "completed": completed,
- "failed": failed,
- "duration_ms": duration_ms,
- })
+ self._send_json(
+ {
+ "completed": completed,
+ "failed": failed,
+ "duration_ms": duration_ms,
+ }
+ )
# ── Main daemon class ──────────────────────────────────────────────────
+
class GradataDaemon:
"""Long-lived HTTP daemon that holds a Brain in memory.
@@ -749,7 +804,9 @@ def _maybe_send_telemetry(self) -> None:
except FileNotFoundError:
return
- if not re.search(r"^\s*telemetry\s*=\s*true\s*$", config_text, re.IGNORECASE | re.MULTILINE):
+ if not re.search(
+ r"^\s*telemetry\s*=\s*true\s*$", config_text, re.IGNORECASE | re.MULTILINE
+ ):
return
match = re.search(r'telemetry_last_sent\s*=\s*"([^"]+)"', config_text)
@@ -764,6 +821,7 @@ def _maybe_send_telemetry(self) -> None:
def _send() -> None:
import platform
import urllib.request
+
rules_count = 0
lessons_count = 0
try:
@@ -773,13 +831,15 @@ def _send() -> None:
rules_count = sum(1 for lesson in lessons if lesson.state.name == "RULE")
except Exception as e:
logger.exception("telemetry: failed to load lessons: %s", e)
- payload = json.dumps({
- "sdk_version": gradata.__version__,
- "rules_count": rules_count,
- "lessons_count": lessons_count,
- "os": platform.system().lower(),
- "python_version": platform.python_version(),
- }).encode()
+ payload = json.dumps(
+ {
+ "sdk_version": gradata.__version__,
+ "rules_count": rules_count,
+ "lessons_count": lessons_count,
+ "os": platform.system().lower(),
+ "python_version": platform.python_version(),
+ }
+ ).encode()
try:
req = urllib.request.Request(
"https://api.gradata.com/telemetry",
@@ -815,6 +875,7 @@ def port(self) -> int:
# ── Port allocation ─────────────────────────────────────────────────────
+
def _pick_port(brain_dir_str: str) -> int:
"""Deterministic port from brain_dir hash: hash % 16383 + 49152."""
return abs(hash(brain_dir_str)) % 16383 + 49152
@@ -822,6 +883,7 @@ def _pick_port(brain_dir_str: str) -> int:
# ── PID file ────────────────────────────────────────────────────────────
+
def _write_pid_file(
pid_file: Path,
port: int,
@@ -842,6 +904,7 @@ def _write_pid_file(
# ── Logging setup ───────────────────────────────────────────────────────
+
def _setup_logging(brain_dir: Path) -> None:
log_dir = brain_dir / "logs"
log_dir.mkdir(parents=True, exist_ok=True)
@@ -851,9 +914,7 @@ def _setup_logging(brain_dir: Path) -> None:
backupCount=3,
encoding="utf-8",
)
- handler.setFormatter(logging.Formatter(
- "%(asctime)s %(levelname)s %(name)s: %(message)s"
- ))
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s"))
root_logger = logging.getLogger("gradata")
root_logger.addHandler(handler)
root_logger.setLevel(logging.DEBUG)
@@ -861,8 +922,10 @@ def _setup_logging(brain_dir: Path) -> None:
# ── Signal handling ─────────────────────────────────────────────────────
+
def _register_signal_handler(daemon: GradataDaemon) -> None:
"""Register SIGTERM to cleanly shut down the daemon."""
+
def _handler(signum: int, _frame: object) -> None:
logger.info("Received signal %d, shutting down.", signum)
daemon.stop()
@@ -879,6 +942,7 @@ def _handler(signum: int, _frame: object) -> None:
# ── CLI entrypoint ──────────────────────────────────────────────────────
+
def main() -> None:
parser = argparse.ArgumentParser(description="Gradata daemon HTTP server")
parser.add_argument("--brain-dir", required=True, help="Path to the brain directory")
diff --git a/Gradata/src/gradata/detection/addition_pattern.py b/Gradata/src/gradata/detection/addition_pattern.py
index 030d53b4..a819dae5 100644
--- a/Gradata/src/gradata/detection/addition_pattern.py
+++ b/Gradata/src/gradata/detection/addition_pattern.py
@@ -59,13 +59,26 @@ def is_addition(old: str, new: str, min_added_chars: int = 10) -> bool:
# Extension → high-level category
_EXT_CATEGORY: dict[str, str] = {
- ".py": "python", ".pyi": "python",
- ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript",
- ".ts": "typescript", ".tsx": "typescript",
- ".rs": "rust", ".go": "go", ".java": "java", ".rb": "ruby",
- ".c": "c", ".cpp": "cpp", ".h": "c", ".cs": "csharp",
- ".swift": "swift", ".kt": "kotlin",
- ".md": "markdown", ".txt": "text", ".rst": "restructuredtext",
+ ".py": "python",
+ ".pyi": "python",
+ ".js": "javascript",
+ ".jsx": "javascript",
+ ".mjs": "javascript",
+ ".ts": "typescript",
+ ".tsx": "typescript",
+ ".rs": "rust",
+ ".go": "go",
+ ".java": "java",
+ ".rb": "ruby",
+ ".c": "c",
+ ".cpp": "cpp",
+ ".h": "c",
+ ".cs": "csharp",
+ ".swift": "swift",
+ ".kt": "kotlin",
+ ".md": "markdown",
+ ".txt": "text",
+ ".rst": "restructuredtext",
}
# Regex patterns for non-Python code files
@@ -114,12 +127,18 @@ def _classify_python_addition(added_text: str) -> str:
if node.returns is not None:
return "return_type"
# Check for docstring
- if (node.body and isinstance(node.body[0], ast.Expr)
- and isinstance(node.body[0].value, ast.Constant)
- and isinstance(node.body[0].value.value, str)):
+ if (
+ node.body
+ and isinstance(node.body[0], ast.Expr)
+ and isinstance(node.body[0].value, ast.Constant)
+ and isinstance(node.body[0].value.value, str)
+ ):
return "docstring"
- if (isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant)
- and isinstance(node.value.value, str)):
+ if (
+ isinstance(node, ast.Expr)
+ and isinstance(node.value, ast.Constant)
+ and isinstance(node.value.value, str)
+ ):
return "docstring"
if isinstance(node, ast.Assert):
return "assertion"
@@ -142,7 +161,7 @@ def classify_addition(old: str, new: str, file_ext: str) -> tuple[str, str]:
# Extract only the added portion
if old and old in new:
idx = new.index(old)
- added_text = new[:idx] + new[idx + len(old):]
+ added_text = new[:idx] + new[idx + len(old) :]
else:
added_text = new
@@ -173,6 +192,7 @@ def classify_addition(old: str, new: str, file_ext: str) -> tuple[str, str]:
@dataclass
class _FingerprintCounter:
"""Track occurrences of a fingerprint across sessions."""
+
count: int = 0
sessions: set[str] = field(default_factory=set)
@@ -189,7 +209,9 @@ def __init__(self, threshold: int = 3, cross_session_threshold: int = 2) -> None
if not isinstance(threshold, int) or threshold < 1:
raise ValueError(f"threshold must be a positive integer, got {threshold}")
if not isinstance(cross_session_threshold, int) or cross_session_threshold < 1:
- raise ValueError(f"cross_session_threshold must be a positive integer, got {cross_session_threshold}")
+ raise ValueError(
+ f"cross_session_threshold must be a positive integer, got {cross_session_threshold}"
+ )
self._threshold = threshold
self._cross_session_threshold = cross_session_threshold
self._counters: dict[tuple[str, str], _FingerprintCounter] = defaultdict(
@@ -197,9 +219,7 @@ def __init__(self, threshold: int = 3, cross_session_threshold: int = 2) -> None
)
self._lock = threading.Lock()
- def record(
- self, fingerprint: tuple[str, str], session_id: str
- ) -> dict | None:
+ def record(self, fingerprint: tuple[str, str], session_id: str) -> dict | None:
"""Record one occurrence. Returns a lesson dict when threshold met."""
category, stype = fingerprint
lesson = None
@@ -211,8 +231,7 @@ def record(
# Check cross-session first (2 occurrences across 2+ sessions)
if (
- len(counter.sessions) >= 2
- and counter.count >= self._cross_session_threshold
+ len(counter.sessions) >= 2 and counter.count >= self._cross_session_threshold
) or counter.count >= self._threshold:
self._counters[fingerprint] = _FingerprintCounter()
lesson = self._make_lesson(category, stype)
diff --git a/Gradata/src/gradata/enhancements/_sanitize.py b/Gradata/src/gradata/enhancements/_sanitize.py
index 4a49e5c5..922e8204 100644
--- a/Gradata/src/gradata/enhancements/_sanitize.py
+++ b/Gradata/src/gradata/enhancements/_sanitize.py
@@ -61,7 +61,7 @@
_XML_ESCAPE_TABLE = str.maketrans(
{
- "&": "&", # Must be first to avoid double-escaping
+ "&": "&", # Must be first to avoid double-escaping
"<": "<",
">": ">",
'"': """,
@@ -83,9 +83,8 @@ def _escape_xml(text: str) -> str:
# handled by json.dumps(). json.dumps() handles \, ", \n, \r, \t, \0 — so
# the residual risk is backtick (template literal injection) and tag.
_JS_BREAKOUT_RE = re.compile(
- r"`" # template literal delimiter
- r"|<\s*/\s*script\s*>" # tag to break out of tag to break out of