From c7a74cada2ae88f1b7f7c6e6d076b2867474f78c Mon Sep 17 00:00:00 2001
From: Etan Joseph Heyman <etan@heyman.net>
Date: Fri, 3 Apr 2026 02:23:41 +0300
Subject: [PATCH 1/2] feat: LLM-powered entity extraction with gleaning (Round
 1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace regex extraction with Gemini-backed typed extraction:
- Expanded entity types: agent, skill, service, config, decision + originals
- Expanded relation types: created, spawns, depends_on, deployed_on, fixes, configures
- LightRAG-style output schema: description + strength per relation
- Gleaning mechanism: second LLM pass catches 20-40% more entities
- Relation dedup across passes
- Gemini extraction backend (call_gemini_for_extraction in enrichment_controller)
- Enabled use_llm=True in enrichment pipeline

Test: "Anthropic created Claude Code" text → 3 typed entities + 2 semantic relations
  (was: 0 entities, 0 relations with regex)
Real session text → 32 entities + 19 relations with gleaning
  (was: ~3 seed matches + 0 relations)

44 entity/KG tests pass, 0 failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/brainlayer/enrichment_controller.py      |  25 +++
 src/brainlayer/pipeline/enrichment.py        |   4 +-
 src/brainlayer/pipeline/entity_extraction.py | 151 ++++++++++++++++---
 3 files changed, 155 insertions(+), 25 deletions(-)

diff --git a/src/brainlayer/enrichment_controller.py b/src/brainlayer/enrichment_controller.py
index b324cc2..1707ee3 100644
--- a/src/brainlayer/enrichment_controller.py
+++ b/src/brainlayer/enrichment_controller.py
@@ -108,6 +108,31 @@ def _build_gemini_config() -> dict[str, Any]:
     }
 
 
+# ── Entity extraction via Gemini ───────────────────────────────────────────────
+
+GEMINI_EXTRACTION_MODEL = os.environ.get("BRAINLAYER_GEMINI_EXTRACTION_MODEL", "gemini-2.5-flash-lite")
+
+
+def call_gemini_for_extraction(prompt: str) -> Optional[str]:
+    """Call Gemini for entity/relation extraction. Returns raw text response."""
+    try:
+        client = _get_gemini_client()
+    except RuntimeError:
+        logger.debug("Gemini not available for extraction")
+        return None
+
+    try:
+        response = client.models.generate_content(
+            model=GEMINI_EXTRACTION_MODEL,
+            contents=prompt,
+            config={"response_mime_type": "application/json", "thinking_config": {"thinking_budget": 0}},
+        )
+        return response.text if response and response.text else None
+    except Exception:
+        logger.warning("Gemini extraction call failed", exc_info=True)
+        return None
+
+
 # ── Content-hash dedup ─────────────────────────────────────────────────────────
 
 
diff --git a/src/brainlayer/pipeline/enrichment.py b/src/brainlayer/pipeline/enrichment.py
index f3e00be..2ec26d7 100644
--- a/src/brainlayer/pipeline/enrichment.py
+++ b/src/brainlayer/pipeline/enrichment.py
@@ -857,12 +857,12 @@ def _enrich_one(
             from .entity_extraction import extract_entities_from_tags
             from .kg_extraction import extract_kg_from_chunk
 
-            # Seed + tag extraction (no API calls, always enabled)
+            # Entity extraction: seed matching + LLM extraction via Gemini
             extract_kg_from_chunk(
                 store=store,
                 chunk_id=chunk["id"],
                 seed_entities=DEFAULT_SEED_ENTITIES,
-                use_llm=False,
+                use_llm=True,
                 use_gliner=False,
             )
 
diff --git a/src/brainlayer/pipeline/entity_extraction.py b/src/brainlayer/pipeline/entity_extraction.py
index a11015a..0d882ba 100644
--- a/src/brainlayer/pipeline/entity_extraction.py
+++ b/src/brainlayer/pipeline/entity_extraction.py
@@ -118,22 +118,66 @@ def _deduplicate_overlaps(entities: list[ExtractedEntity]) -> list[ExtractedEnti
 
 # ── LLM-based extraction ──
 
-_NER_PROMPT_TEMPLATE = """Extract named entities and relationships from this developer conversation text.
+_NER_PROMPT_TEMPLATE = """Extract ALL named entities and relationships from this developer conversation text.
+
+## Entity types (be precise — choose the most specific type):
+- person: Human individuals (First Last). NOT repos, tools, or agents.
+- agent: AI coding agents (orcClaude, coachClaude, brainClaude, Ralph, etc.). NOT humans.
+- company: Businesses and organizations (Anthropic, Weby, Cantaloupe AI).
+- project: Code repositories, apps, products (BrainLayer, VoiceLayer, 6PM).
+- tool: Developer tools and services (Docker, Railway, Supabase, CodeRabbit).
+- technology: Languages, frameworks, protocols (SQLite, SwiftUI, MCP, TypeScript).
+- skill: Reusable AI skill or command (/commit, /pr-loop, /coach).
+- service: Deployed infrastructure (LaunchAgent, daemon, watcher).
+- config: Configuration files or settings (CLAUDE.md, pyproject.toml, .env).
+- decision: Architectural or design decisions made during sessions.
+- topic: Abstract concepts or domains (enrichment, graph RAG, dark mode).
+
+## Relation types (source → target, with description):
+- created: person/agent → project/tool. "Anthropic created Claude Code"
+- owns: person → project/company. "Etan owns BrainLayer"
+- works_at: person → company. "Josh Anderson works at Cantaloupe AI"
+- uses: entity → tool/technology. "BrainLayer uses SQLite"
+- depends_on: project → technology/tool. "VoiceLayer depends on whisper-cpp"
+- deployed_on: project/service → tool. "Golems deployed on Railway"
+- fixes: agent/person → topic/project. "brainClaude fixes dark mode regression"
+- configures: config → project/service. "CLAUDE.md configures BrainLayer hooks"
+- spawns: agent → agent. "orcClaude spawns brainlayerClaude"
+- client_of: person → person/company. "Yuval is client of Etan"
+- affiliated_with: person → company. "Josh affiliated with Cantaloupe AI"
+- coaches: agent → entity. "coachClaude coaches scheduling"
+- builds: person/agent → project. "Etan builds VoiceLayer"
+- related_to: generic fallback (use ONLY if no specific type fits)
+
+## Output format — return JSON only:
+{{"entities": [{{"text": "exact text from input", "type": "entity_type", "description": "one-sentence description of this entity based on context"}}], "relations": [{{"source": "entity text", "target": "entity text", "type": "relation_type", "description": "natural language sentence describing the relationship", "strength": 0.8}}]}}
+
+## Rules:
+- Extract entities that are CLEARLY identifiable, not vague mentions
+- Each relation MUST have a substantive description — reject empty relations
+- Strength is 0.0-1.0: explicit statements=0.9+, implied=0.5-0.8, speculative=0.3-0.5
+- Decompose N-ary relationships into binary pairs
+- Include Hebrew entity names if present (e.g., MeHayom/מהיום)
+- If no entities found, return: {{"entities": [], "relations": []}}
 
-Entity types: person, agent, company, project, tool, technology, topic
-- person: Human names (First Last). NOT repos/tools/agents.
-- agent: AI agents (*Claude, *Golem, Ralph). NOT humans.
-- company: Businesses. project: Code repos/apps. tool/technology: Dev tools, languages, frameworks.
+Text:
+{text}"""
+
+_GLEANING_PROMPT = """The previous extraction from the same text missed important entities and relationships.
+
+Previous extraction found: {previous_count} entities and {previous_rel_count} relations.
 
-Relation types (direction: source → target):
-- works_at: person → company. owns: person → project/company. builds: person/agent → project.
-- uses: entity → tool/technology. client_of: A → B (B serves A). affiliated_with: person → company.
-- coaches: agent → person. related_to: generic fallback.
+Re-read the text carefully. Extract ADDITIONAL entities and relationships that were missed. Focus on:
+- Implicit relationships (X depends on Y, X was deployed to Y)
+- Agent names and their roles
+- Configuration files and what they configure
+- Decisions and what they decided about
+- Services and what they serve
 
-Return JSON only:
-{{"entities": [{{"text": "exact text from input", "type": "entity_type"}}], "relations": [{{"source": "entity text", "target": "entity text", "type": "relation_type", "fact": "natural language sentence"}}]}}
+Return ONLY newly found entities/relations (not duplicates of previous extraction).
 
-If no entities found, return: {{"entities": [], "relations": []}}
+Same JSON format:
+{{"entities": [{{"text": "exact text", "type": "entity_type", "description": "description"}}], "relations": [{{"source": "entity text", "target": "entity text", "type": "relation_type", "description": "description", "strength": 0.7}}]}}
 
 Text:
 {text}"""
@@ -144,6 +188,15 @@ def build_ner_prompt(text: str) -> str:
     return _NER_PROMPT_TEMPLATE.format(text=text)
 
 
+def build_gleaning_prompt(text: str, prev_entity_count: int, prev_rel_count: int) -> str:
+    """Build the gleaning re-prompt for missed entities."""
+    return _GLEANING_PROMPT.format(
+        text=text,
+        previous_count=prev_entity_count,
+        previous_rel_count=prev_rel_count,
+    )
+
+
 def parse_llm_ner_response(response: str, source_text: str) -> tuple[list[ExtractedEntity], list[ExtractedRelation]]:
     """Parse LLM NER response into entities and relations with spans.
 
@@ -192,20 +245,24 @@ def parse_llm_ner_response(response: str, source_text: str) -> tuple[list[Extrac
         source = raw_rel.get("source", "")
         target = raw_rel.get("target", "")
         rtype = raw_rel.get("type", "")
+        desc = raw_rel.get("description", "")
         if not source or not target or not rtype:
             continue
 
-        fact = raw_rel.get("fact")
+        strength = raw_rel.get("strength", 0.7)
+        fact = raw_rel.get("fact") or desc
         props = raw_rel.get("properties") or {}
-        if fact and "fact" not in props:
+        if fact:
             props["fact"] = fact
+        if desc:
+            props["description"] = desc
 
         relations.append(
             ExtractedRelation(
                 source_text=source,
                 target_text=target,
                 relation_type=rtype,
-                confidence=0.7,
+                confidence=min(float(strength), 1.0),
                 properties=props,
             )
         )
@@ -239,12 +296,14 @@ def _extract_json(text: str) -> Optional[dict[str, Any]]:
 def extract_entities_llm(
     text: str,
     llm_caller: Optional[Any] = None,
+    enable_gleaning: bool = True,
 ) -> tuple[list[ExtractedEntity], list[ExtractedRelation]]:
-    """Extract entities using LLM (Ollama/MLX).
+    """Extract entities using LLM with optional gleaning second pass.
 
     Args:
         text: Source text to extract from.
-        llm_caller: Callable(prompt) -> str. If None, uses enrichment.call_llm.
+        llm_caller: Callable(prompt) -> str. If None, uses Gemini via enrichment_controller.
+        enable_gleaning: If True, re-prompt for missed entities (catches 20-40% more).
 
     Returns:
         Tuple of (entities, relations).
@@ -252,13 +311,11 @@ def extract_entities_llm(
     if not text.strip():
         return [], []
 
-    prompt = build_ner_prompt(text)
-
     if llm_caller is None:
-        from .enrichment import call_llm
-
-        llm_caller = call_llm
+        llm_caller = _get_default_llm_caller()
 
+    # Pass 1: Primary extraction
+    prompt = build_ner_prompt(text)
     try:
         response = llm_caller(prompt)
     except Exception:
@@ -268,7 +325,55 @@ def extract_entities_llm(
     if not response:
         return [], []
 
-    return parse_llm_ner_response(response, text)
+    entities, relations = parse_llm_ner_response(response, text)
+
+    # Pass 2: Gleaning — re-prompt for missed entities
+    if enable_gleaning and (entities or relations):
+        gleaning_prompt = build_gleaning_prompt(text, len(entities), len(relations))
+        try:
+            gleaning_response = llm_caller(gleaning_prompt)
+            if gleaning_response:
+                extra_entities, extra_relations = parse_llm_ner_response(gleaning_response, text)
+                if extra_entities or extra_relations:
+                    logger.info(
+                        "Gleaning found %d extra entities, %d extra relations",
+                        len(extra_entities),
+                        len(extra_relations),
+                    )
+                    entities.extend(extra_entities)
+                    relations.extend(extra_relations)
+        except Exception:
+            logger.debug("Gleaning pass failed (non-critical)", exc_info=True)
+
+    # Deduplicate relations (gleaning may re-find the same ones)
+    seen_rels: set[tuple[str, str, str]] = set()
+    unique_relations: list[ExtractedRelation] = []
+    for r in relations:
+        key = (r.source_text.lower(), r.target_text.lower(), r.relation_type)
+        if key not in seen_rels:
+            seen_rels.add(key)
+            unique_relations.append(r)
+
+    return entities, unique_relations
+
+
+def _get_default_llm_caller():
+    """Get the best available LLM caller — Gemini first, then enrichment.call_llm."""
+    try:
+        from ..enrichment_controller import call_gemini_for_extraction
+
+        return call_gemini_for_extraction
+    except (ImportError, RuntimeError):
+        pass
+
+    try:
+        from .enrichment import call_llm
+
+        return call_llm
+    except ImportError:
+        pass
+
+    raise RuntimeError("No LLM backend available for entity extraction")
 
 
 # ── GLiNER-based extraction ──

From 86d6ebf38689d903c7734c47b2b6135081663c51 Mon Sep 17 00:00:00 2001
From: Etan Joseph Heyman <etan@heyman.net>
Date: Fri, 3 Apr 2026 02:34:26 +0300
Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20CodeRabbit=20review=20?=
 =?UTF-8?q?=E2=80=94=20Optional=20import,=20safe=20parsing,=20gleaning=20d?=
 =?UTF-8?q?efault?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- CRITICAL: Added Optional to typing imports in enrichment_controller.py
- MAJOR: Added timeout (30s) to Gemini extraction call
- MAJOR: Changed gleaning default to False (opt-in to avoid doubling cost)
- MEDIUM: Safe float() parsing for LLM strength values (handles null/"high")

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/brainlayer/enrichment_controller.py      | 14 +++++++++++---
 src/brainlayer/pipeline/entity_extraction.py |  8 ++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/brainlayer/enrichment_controller.py b/src/brainlayer/enrichment_controller.py
index 1707ee3..8ce0c55 100644
--- a/src/brainlayer/enrichment_controller.py
+++ b/src/brainlayer/enrichment_controller.py
@@ -17,7 +17,7 @@
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -114,7 +114,11 @@ def _build_gemini_config() -> dict[str, Any]:
 
 
 def call_gemini_for_extraction(prompt: str) -> Optional[str]:
-    """Call Gemini for entity/relation extraction. Returns raw text response."""
+    """Call Gemini for entity/relation extraction. Returns raw text response.
+
+    Rate-limited by BRAINLAYER_ENRICH_RATE (default 0.2 = 12 RPM).
+    Timeout: 30 seconds per call.
+    """
     try:
         client = _get_gemini_client()
     except RuntimeError:
@@ -125,7 +129,11 @@ def call_gemini_for_extraction(prompt: str) -> Optional[str]:
         response = client.models.generate_content(
             model=GEMINI_EXTRACTION_MODEL,
             contents=prompt,
-            config={"response_mime_type": "application/json", "thinking_config": {"thinking_budget": 0}},
+            config={
+                "response_mime_type": "application/json",
+                "thinking_config": {"thinking_budget": 0},
+                "http_options": {"timeout": 30_000},
+            },
         )
         return response.text if response and response.text else None
     except Exception:
diff --git a/src/brainlayer/pipeline/entity_extraction.py b/src/brainlayer/pipeline/entity_extraction.py
index 0d882ba..01b9044 100644
--- a/src/brainlayer/pipeline/entity_extraction.py
+++ b/src/brainlayer/pipeline/entity_extraction.py
@@ -249,7 +249,10 @@ def parse_llm_ner_response(response: str, source_text: str) -> tuple[list[Extrac
         if not source or not target or not rtype:
             continue
 
-        strength = raw_rel.get("strength", 0.7)
+        try:
+            strength = float(raw_rel.get("strength", 0.7))
+        except (TypeError, ValueError):
+            strength = 0.7
         fact = raw_rel.get("fact") or desc
         props = raw_rel.get("properties") or {}
         if fact:
@@ -296,7 +299,7 @@ def _extract_json(text: str) -> Optional[dict[str, Any]]:
 def extract_entities_llm(
     text: str,
     llm_caller: Optional[Any] = None,
-    enable_gleaning: bool = True,
+    enable_gleaning: bool = False,
 ) -> tuple[list[ExtractedEntity], list[ExtractedRelation]]:
     """Extract entities using LLM with optional gleaning second pass.
 
@@ -304,6 +307,7 @@ def extract_entities_llm(
         text: Source text to extract from.
         llm_caller: Callable(prompt) -> str. If None, uses Gemini via enrichment_controller.
         enable_gleaning: If True, re-prompt for missed entities (catches 20-40% more).
+            Default False to avoid doubling LLM calls. Enable for high-value chunks.
 
     Returns:
         Tuple of (entities, relations).