From 60d02d9de9cf35e33632d2a962e119510d7da94c Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Wed, 13 May 2026 12:53:07 +0000
Subject: [PATCH 1/5] perf(enricher): parse tree-sitter tree once per file, not
 per node (Task A1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each LanguageExtractor.Extract reparsed the source file at its top —
on Python at ~13 nodes/file that meant ~13x over-parse. pprof on
airflow flagged 91% of total allocations from tree-sitter.
(*Tree).cachedNode driven by the per-node re-parse storm.

Adds ExtractFromTree(ctx, tree, nodes) []Result to the
LanguageExtractor interface. The orchestrator now parses the file
once and calls ExtractFromTree(tree, allNodes) — the AST is walked
multiple times for distinct node-kinds but never re-parsed. Extract
is retained as a thin wrapper for single-node convenience callers
and tests.

Plan: docs/superpowers/plans/2026-05-13-enrich-oom-fix.md Task A1.

Per-file caches: matchAllList (py), matchInterfaceAssertion (go),
collectExports (ts) are computed once per file rather than once per
matching node.

Verification:
- go test ./internal/intelligence/extractor/... -count=1: 28 pass
- go test ./... -count=1: 875 pass
---
 .../intelligence/extractor/enricher.go        | 18 +++-
 .../intelligence/extractor/enricher_test.go   | 29 ++++++-
 .../intelligence/extractor/extractor.go       | 16 +++-
 .../extractor/golang/extractor.go             | 71 ++++++++++------
 .../intelligence/extractor/java/extractor.go  | 60 +++++++++-----
 .../extractor/python/extractor.go             | 82 +++++++++++++------
 .../extractor/typescript/extractor.go         | 69 +++++++++++-----
 7 files changed, 246 insertions(+), 99 deletions(-)

diff --git a/go/internal/intelligence/extractor/enricher.go b/go/internal/intelligence/extractor/enricher.go
index ef666b1e..ef5fb2e2 100644
--- a/go/internal/intelligence/extractor/enricher.go
+++ b/go/internal/intelligence/extractor/enricher.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 
 	"github.com/randomcodespace/codeiq/go/internal/model"
+	"github.com/randomcodespace/codeiq/go/internal/parser"
 )
 
 // Enricher orchestrates per-language extractors over a node list. Mirrors
@@ -114,12 +115,23 @@ func (en *Enricher) Enrich(nodes []*model.CodeNode, edges *[]*model.CodeEdge, ro
 				Content:  content,
 				Registry: registry,
 			}
+			// Parse once per file; reuse the tree across every node in this
+			// file via ExtractFromTree. Eliminates the per-node re-parse that
+			// pprof on airflow flagged as 91% of total allocations.
+			tree, _ := parser.ParseByName(t.ext.Language(), raw)
+			if tree != nil {
+				defer tree.Close()
+			}
+			results := t.ext.ExtractFromTree(ctx, tree, t.ns)
 			var localEdges []*model.CodeEdge
-			for _, n := range t.ns {
-				r := t.ext.Extract(ctx, n)
+			for j, r := range results {
+				if j >= len(t.ns) {
+					break
+				}
+				n := t.ns[j]
 				localEdges = append(localEdges, r.CallEdges...)
 				localEdges = append(localEdges, r.SymbolReferences...)
-				if len(r.TypeHints) > 0 {
+				if len(r.TypeHints) > 0 && n != nil {
 					if n.Properties == nil {
 						n.Properties = map[string]any{}
 					}
diff --git a/go/internal/intelligence/extractor/enricher_test.go b/go/internal/intelligence/extractor/enricher_test.go
index 27d80379..62fd03ad 100644
--- a/go/internal/intelligence/extractor/enricher_test.go
+++ b/go/internal/intelligence/extractor/enricher_test.go
@@ -8,13 +8,14 @@ import (
 	"testing"
 
 	"github.com/randomcodespace/codeiq/go/internal/model"
+	"github.com/randomcodespace/codeiq/go/internal/parser"
 )
 
 // fakeExtractor is a test-only LanguageExtractor that records each call so we
 // can assert the orchestrator's read-once contract and per-language dispatch.
 type fakeExtractor struct {
 	lang       string
-	calls      int32 // atomic counter of Extract() invocations
+	calls      int32 // counts per-node visits (across both Extract and ExtractFromTree)
 	filesSeen  []string
 	emitEdge   bool
 	emitHint   bool
@@ -25,9 +26,9 @@ type fakeExtractor struct {
 
 func (f *fakeExtractor) Language() string { return f.lang }
 
-func (f *fakeExtractor) Extract(ctx Context, node *model.CodeNode) Result {
-	atomic.AddInt32(&f.calls, 1)
-	f.filesSeen = append(f.filesSeen, ctx.FilePath)
+// resultFor synthesises a Result for one node — shared between Extract and
+// ExtractFromTree so behaviour is identical regardless of call path.
+func (f *fakeExtractor) resultFor(node *model.CodeNode) Result {
 	r := EmptyResult()
 	if f.emitEdge {
 		r.CallEdges = []*model.CodeEdge{{
@@ -47,6 +48,26 @@ func (f *fakeExtractor) Extract(ctx Context, node *model.CodeNode) Result {
 	return r
 }
 
+func (f *fakeExtractor) Extract(ctx Context, node *model.CodeNode) Result {
+	atomic.AddInt32(&f.calls, 1)
+	f.filesSeen = append(f.filesSeen, ctx.FilePath)
+	return f.resultFor(node)
+}
+
+func (f *fakeExtractor) ExtractFromTree(ctx Context, _ *parser.Tree, nodes []*model.CodeNode) []Result {
+	atomic.AddInt32(&f.calls, int32(len(nodes)))
+	f.filesSeen = append(f.filesSeen, ctx.FilePath)
+	results := make([]Result, len(nodes))
+	for i, n := range nodes {
+		if n == nil {
+			results[i] = EmptyResult()
+			continue
+		}
+		results[i] = f.resultFor(n)
+	}
+	return results
+}
+
 func TestEnricher_DispatchesPerLanguageAndAppendsEdges(t *testing.T) {
 	dir := t.TempDir()
 	javaPath := "src/Foo.java"
diff --git a/go/internal/intelligence/extractor/extractor.go b/go/internal/intelligence/extractor/extractor.go
index cf4daa9c..b9d09299 100644
--- a/go/internal/intelligence/extractor/extractor.go
+++ b/go/internal/intelligence/extractor/extractor.go
@@ -7,7 +7,10 @@
 // language via DetectLanguage.
 package extractor
 
-import "github.com/randomcodespace/codeiq/go/internal/model"
+import (
+	"github.com/randomcodespace/codeiq/go/internal/model"
+	"github.com/randomcodespace/codeiq/go/internal/parser"
+)
 
 // Context is the per-file context an extractor sees during enrich. The
 // orchestrator reads the file once and passes the contents to every node-level
@@ -52,6 +55,15 @@ type LanguageExtractor interface {
 	// Language returns the canonical language key, lower-case (e.g. "java").
 	// This key must match DetectLanguage for the orchestrator to dispatch.
 	Language() string
-	// Extract runs the extractor against a single node within a parsed file.
+	// Extract runs the extractor against a single node, parsing ctx.Content
+	// internally. Retained as the single-node convenience wrapper for tests
+	// and ad-hoc callers; the orchestrator uses ExtractFromTree to avoid
+	// re-parsing N times for a file with N nodes.
 	Extract(ctx Context, node *model.CodeNode) Result
+	// ExtractFromTree runs the extractor against every node in `nodes` using
+	// a single pre-parsed tree. Returns one Result per input node in matching
+	// order, so callers can stamp TypeHints back onto the corresponding node.
+	// `tree` may be nil when ctx.Language has no tree-sitter grammar — the
+	// extractor must handle that by returning len(nodes) EmptyResult entries.
+	ExtractFromTree(ctx Context, tree *parser.Tree, nodes []*model.CodeNode) []Result
 }
diff --git a/go/internal/intelligence/extractor/golang/extractor.go b/go/internal/intelligence/extractor/golang/extractor.go
index e24a59c5..02db4e9e 100644
--- a/go/internal/intelligence/extractor/golang/extractor.go
+++ b/go/internal/intelligence/extractor/golang/extractor.go
@@ -43,40 +43,65 @@ func New() *Extractor { return &Extractor{} }
 // Language returns "go".
 func (e *Extractor) Language() string { return "go" }
 
-// Extract dispatches by node kind. CLASS is the registry kind for Go structs
-// here — the Java side uses CLASS + COMPONENT; the per-task brief is CLASS
-// only, so we mirror that.
+// Extract dispatches by node kind. Single-node convenience wrapper —
+// production paths use ExtractFromTree.
 func (e *Extractor) Extract(ctx extractor.Context, node *model.CodeNode) extractor.Result {
-	switch node.Kind {
-	case model.NodeMethod, model.NodeClass:
-	default:
-		return extractor.EmptyResult()
+	tree, _ := parser.ParseByName("go", []byte(ctx.Content))
+	if tree != nil {
+		defer tree.Close()
 	}
-	tree, err := parser.ParseByName("go", []byte(ctx.Content))
-	if err != nil || tree == nil || tree.Root == nil {
+	out := e.ExtractFromTree(ctx, tree, []*model.CodeNode{node})
+	if len(out) == 0 {
 		return extractor.EmptyResult()
 	}
-	defer tree.Close()
+	return out[0]
+}
+
+// ExtractFromTree walks the pre-parsed tree once per input node, returning
+// one Result per node in matching order. tree may be nil — every result is
+// EmptyResult in that case.
+func (e *Extractor) ExtractFromTree(ctx extractor.Context, tree *parser.Tree, nodes []*model.CodeNode) []extractor.Result {
+	results := make([]extractor.Result, len(nodes))
+	for i := range results {
+		results[i] = extractor.EmptyResult()
+	}
+	if tree == nil || tree.Root == nil {
+		return results
+	}
 	root := tree.Root.RootNode()
 	if root == nil {
-		return extractor.EmptyResult()
+		return results
 	}
-
-	switch node.Kind {
-	case model.NodeMethod:
-		return extractor.Result{
-			CallEdges:  collectGoCallEdges(root, ctx.Content, node, ctx.Registry),
-			Confidence: model.CapabilityPartial,
+	// matchInterfaceAssertion only reads ctx.Content; compute once per file.
+	var ifaceAssertion string
+	var ifaceAssertionComputed bool
+	for i, node := range nodes {
+		if node == nil {
+			continue
 		}
-	case model.NodeClass:
-		if iface := matchInterfaceAssertion(ctx.Content); iface != "" {
-			return extractor.Result{
-				TypeHints:  map[string]string{"implements_types": iface},
-				Confidence: model.CapabilityPartial,
+		switch node.Kind {
+		case model.NodeMethod:
+			edges := collectGoCallEdges(root, ctx.Content, node, ctx.Registry)
+			if len(edges) > 0 {
+				results[i] = extractor.Result{
+					CallEdges:  edges,
+					Confidence: model.CapabilityPartial,
+				}
+			}
+		case model.NodeClass:
+			if !ifaceAssertionComputed {
+				ifaceAssertion = matchInterfaceAssertion(ctx.Content)
+				ifaceAssertionComputed = true
+			}
+			if ifaceAssertion != "" {
+				results[i] = extractor.Result{
+					TypeHints:  map[string]string{"implements_types": ifaceAssertion},
+					Confidence: model.CapabilityPartial,
+				}
 			}
 		}
 	}
-	return extractor.EmptyResult()
+	return results
 }
 
 // matchInterfaceAssertion runs the package-level regex against the source. The
diff --git a/go/internal/intelligence/extractor/java/extractor.go b/go/internal/intelligence/extractor/java/extractor.go
index 5e939937..98dacc6b 100644
--- a/go/internal/intelligence/extractor/java/extractor.go
+++ b/go/internal/intelligence/extractor/java/extractor.go
@@ -38,36 +38,54 @@ func New() *Extractor { return &Extractor{} }
 func (e *Extractor) Language() string { return "java" }
 
 // Extract returns CALLS edges for METHOD nodes and type-hierarchy hints for
-// CLASS / ABSTRACT_CLASS / INTERFACE nodes. All other node kinds short-circuit
-// to EmptyResult.
+// CLASS / ABSTRACT_CLASS / INTERFACE nodes. Single-node convenience wrapper —
+// parses once per call. Production paths use ExtractFromTree.
 func (e *Extractor) Extract(ctx extractor.Context, node *model.CodeNode) extractor.Result {
-	switch node.Kind {
-	case model.NodeMethod, model.NodeClass,
-		model.NodeAbstractClass, model.NodeInterface:
-	default:
-		return extractor.EmptyResult()
+	tree, _ := parser.ParseByName("java", []byte(ctx.Content))
+	if tree != nil {
+		defer tree.Close()
 	}
-	tree, err := parser.ParseByName("java", []byte(ctx.Content))
-	if err != nil || tree == nil || tree.Root == nil {
+	out := e.ExtractFromTree(ctx, tree, []*model.CodeNode{node})
+	if len(out) == 0 {
 		return extractor.EmptyResult()
 	}
-	defer tree.Close()
+	return out[0]
+}
+
+// ExtractFromTree walks the pre-parsed tree once per input node and returns
+// one Result per node in matching order. tree may be nil — all results are
+// EmptyResult in that case.
+func (e *Extractor) ExtractFromTree(ctx extractor.Context, tree *parser.Tree, nodes []*model.CodeNode) []extractor.Result {
+	results := make([]extractor.Result, len(nodes))
+	for i := range results {
+		results[i] = extractor.EmptyResult()
+	}
+	if tree == nil || tree.Root == nil {
+		return results
+	}
 	root := tree.Root.RootNode()
 	if root == nil {
-		return extractor.EmptyResult()
+		return results
 	}
-
-	if node.Kind == model.NodeMethod {
-		return extractor.Result{
-			CallEdges:  collectCallEdges(root, ctx.Content, node, ctx.Registry),
-			Confidence: model.CapabilityPartial,
+	for i, node := range nodes {
+		if node == nil {
+			continue
+		}
+		switch node.Kind {
+		case model.NodeMethod:
+			results[i] = extractor.Result{
+				CallEdges:  collectCallEdges(root, ctx.Content, node, ctx.Registry),
+				Confidence: model.CapabilityPartial,
+			}
+		case model.NodeClass, model.NodeAbstractClass, model.NodeInterface:
+			hints := extractTypeHierarchyHints(root, ctx.Content, node.Label)
+			results[i] = extractor.Result{
+				TypeHints:  hints,
+				Confidence: model.CapabilityPartial,
+			}
 		}
 	}
-	hints := extractTypeHierarchyHints(root, ctx.Content, node.Label)
-	return extractor.Result{
-		TypeHints:  hints,
-		Confidence: model.CapabilityPartial,
-	}
+	return results
 }
 
 // collectCallEdges walks the tree to locate the method_declaration whose
diff --git a/go/internal/intelligence/extractor/python/extractor.go b/go/internal/intelligence/extractor/python/extractor.go
index 85b3b80d..5154efc7 100644
--- a/go/internal/intelligence/extractor/python/extractor.go
+++ b/go/internal/intelligence/extractor/python/extractor.go
@@ -39,45 +39,75 @@ func New() *Extractor { return &Extractor{} }
 // Language returns "python".
 func (e *Extractor) Language() string { return "python" }
 
-// Extract dispatches by node kind.
+// Extract dispatches by node kind. Single-node convenience wrapper; parses
+// the file each call. Production paths use ExtractFromTree to amortise the
+// parse across every node in a file.
 func (e *Extractor) Extract(ctx extractor.Context, node *model.CodeNode) extractor.Result {
-	switch node.Kind {
-	case model.NodeMethod, model.NodeClass, model.NodeModule:
-	default:
-		return extractor.EmptyResult()
+	tree, _ := parser.ParseByName("python", []byte(ctx.Content))
+	if tree != nil {
+		defer tree.Close()
 	}
-	tree, err := parser.ParseByName("python", []byte(ctx.Content))
-	if err != nil || tree == nil || tree.Root == nil {
+	out := e.ExtractFromTree(ctx, tree, []*model.CodeNode{node})
+	if len(out) == 0 {
 		return extractor.EmptyResult()
 	}
-	defer tree.Close()
+	return out[0]
+}
+
+// ExtractFromTree walks a single pre-parsed tree once and produces a Result
+// per input node. Order matches `nodes`. tree may be nil — every node maps
+// to EmptyResult in that case.
+func (e *Extractor) ExtractFromTree(ctx extractor.Context, tree *parser.Tree, nodes []*model.CodeNode) []extractor.Result {
+	results := make([]extractor.Result, len(nodes))
+	for i := range results {
+		results[i] = extractor.EmptyResult()
+	}
+	if tree == nil || tree.Root == nil {
+		return results
+	}
 	root := tree.Root.RootNode()
 	if root == nil {
-		return extractor.EmptyResult()
+		return results
 	}
+	// matchAllList only reads ctx.Content; compute once per file and reuse
+	// across every Module node in the input.
+	var moduleAllExports string
+	var moduleAllExportsComputed bool
 
-	switch node.Kind {
-	case model.NodeMethod:
-		return extractor.Result{
-			CallEdges:  collectFunctionCallEdges(root, ctx.Content, node, ctx.Registry),
-			Confidence: model.CapabilityPartial,
+	for i, node := range nodes {
+		if node == nil {
+			continue
 		}
-	case model.NodeClass:
-		if base := classBase(root, ctx.Content, node.Label); base != "" {
-			return extractor.Result{
-				TypeHints:  map[string]string{"extends_type": base},
-				Confidence: model.CapabilityPartial,
+		switch node.Kind {
+		case model.NodeMethod:
+			edges := collectFunctionCallEdges(root, ctx.Content, node, ctx.Registry)
+			if len(edges) > 0 {
+				results[i] = extractor.Result{
+					CallEdges:  edges,
+					Confidence: model.CapabilityPartial,
+				}
 			}
-		}
-	case model.NodeModule:
-		if all := matchAllList(ctx.Content); all != "" {
-			return extractor.Result{
-				TypeHints:  map[string]string{"all_exports": all},
-				Confidence: model.CapabilityPartial,
+		case model.NodeClass:
+			if base := classBase(root, ctx.Content, node.Label); base != "" {
+				results[i] = extractor.Result{
+					TypeHints:  map[string]string{"extends_type": base},
+					Confidence: model.CapabilityPartial,
+				}
+			}
+		case model.NodeModule:
+			if !moduleAllExportsComputed {
+				moduleAllExports = matchAllList(ctx.Content)
+				moduleAllExportsComputed = true
+			}
+			if moduleAllExports != "" {
+				results[i] = extractor.Result{
+					TypeHints:  map[string]string{"all_exports": moduleAllExports},
+					Confidence: model.CapabilityPartial,
+				}
 			}
 		}
 	}
-	return extractor.EmptyResult()
+	return results
 }
 
 // matchAllList extracts the literal entries of a `__all__ = [...]` list as
diff --git a/go/internal/intelligence/extractor/typescript/extractor.go b/go/internal/intelligence/extractor/typescript/extractor.go
index e92d5cd2..236f1d4a 100644
--- a/go/internal/intelligence/extractor/typescript/extractor.go
+++ b/go/internal/intelligence/extractor/typescript/extractor.go
@@ -34,35 +34,64 @@ func New() *Extractor { return &Extractor{} }
 func (e *Extractor) Language() string { return "typescript" }
 
 // Extract dispatches by node kind: METHOD -> call edges, MODULE -> exports
-// hint. Other kinds short-circuit.
+// hint. Single-node convenience wrapper; production paths use ExtractFromTree.
 func (e *Extractor) Extract(ctx extractor.Context, node *model.CodeNode) extractor.Result {
-	if node.Kind != model.NodeMethod && node.Kind != model.NodeModule {
-		return extractor.EmptyResult()
+	tree, _ := parser.ParseByName("typescript", []byte(ctx.Content))
+	if tree != nil {
+		defer tree.Close()
 	}
-	tree, err := parser.ParseByName("typescript", []byte(ctx.Content))
-	if err != nil || tree == nil || tree.Root == nil {
+	out := e.ExtractFromTree(ctx, tree, []*model.CodeNode{node})
+	if len(out) == 0 {
 		return extractor.EmptyResult()
 	}
-	defer tree.Close()
+	return out[0]
+}
+
+// ExtractFromTree walks the pre-parsed tree once per input node, returning
+// one Result per node in matching order. tree may be nil.
+func (e *Extractor) ExtractFromTree(ctx extractor.Context, tree *parser.Tree, nodes []*model.CodeNode) []extractor.Result {
+	results := make([]extractor.Result, len(nodes))
+	for i := range results {
+		results[i] = extractor.EmptyResult()
+	}
+	if tree == nil || tree.Root == nil {
+		return results
+	}
 	root := tree.Root.RootNode()
 	if root == nil {
-		return extractor.EmptyResult()
+		return results
 	}
-	if node.Kind == model.NodeMethod {
-		return extractor.Result{
-			CallEdges:  collectCallEdges(root, ctx.Content, node, ctx.Registry),
-			Confidence: model.CapabilityPartial,
+	// collectExports only reads the tree root; compute once for all MODULE
+	// nodes in the input.
+	var moduleExports string
+	var moduleExportsComputed bool
+	for i, node := range nodes {
+		if node == nil {
+			continue
+		}
+		switch node.Kind {
+		case model.NodeMethod:
+			results[i] = extractor.Result{
+				CallEdges:  collectCallEdges(root, ctx.Content, node, ctx.Registry),
+				Confidence: model.CapabilityPartial,
+			}
+		case model.NodeModule:
+			if !moduleExportsComputed {
+				exports := collectExports(root, ctx.Content)
+				if len(exports) > 0 {
+					moduleExports = strings.Join(exports, ", ")
+				}
+				moduleExportsComputed = true
+			}
+			if moduleExports != "" {
+				results[i] = extractor.Result{
+					TypeHints:  map[string]string{"module_exports": moduleExports},
+					Confidence: model.CapabilityPartial,
+				}
+			}
 		}
 	}
-	// MODULE
-	exports := collectExports(root, ctx.Content)
-	if len(exports) == 0 {
-		return extractor.EmptyResult()
-	}
-	return extractor.Result{
-		TypeHints:  map[string]string{"module_exports": strings.Join(exports, ", ")},
-		Confidence: model.CapabilityPartial,
-	}
+	return results
 }
 
 // collectCallEdges finds the function-like declaration matching fn.Label and

From 21f07d82cb1c596337694429e9efa0817b22d75b Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Wed, 13 May 2026 12:55:46 +0000
Subject: [PATCH 2/5] perf(enricher): bound goroutine pool to 2*GOMAXPROCS
 (Task A2)

Previously the enricher spawned one goroutine per source file with no
cap. On polyglot Python repos (airflow: 7,456 files) that produced
7k+ concurrent live tree-sitter Trees + file content strings, driving
the OOM-prone RSS spike pprof exposed.

Adds a semaphore-bounded fan-out at 2*runtime.GOMAXPROCS(0). Tasks
still write to indexed slots, so determinism (sorted file path order)
is preserved. Polyglot real-world targets see materially lower peak
RSS at no measurable wall-time cost.

Plan: docs/superpowers/plans/2026-05-13-enrich-oom-fix.md Task A2.

Verification:
- New TestEnricher_BoundedConcurrency asserts peak in-flight calls
  <= 2*GOMAXPROCS by driving 4*cap files through a tracking extractor.
- go test ./... -count=1: 876 pass.
---
 .../intelligence/extractor/enricher.go        |  8 ++
 .../intelligence/extractor/enricher_test.go   | 78 +++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/go/internal/intelligence/extractor/enricher.go b/go/internal/intelligence/extractor/enricher.go
index ef5fb2e2..9aef2948 100644
--- a/go/internal/intelligence/extractor/enricher.go
+++ b/go/internal/intelligence/extractor/enricher.go
@@ -3,6 +3,7 @@ package extractor
 import (
 	"os"
 	"path/filepath"
+	"runtime"
 	"sort"
 	"strings"
 	"sync"
@@ -94,11 +95,18 @@ func (en *Enricher) Enrich(nodes []*model.CodeNode, edges *[]*model.CodeEdge, ro
 
 	// Run per-file work concurrently; collect into indexed slots so the
 	// final concat order matches `paths` (sorted) — deterministic output.
+	// Cap concurrent goroutines at 2*GOMAXPROCS so the simultaneously-live
+	// tree-sitter Trees + file content strings stay bounded. Polyglot
+	// targets like airflow (~7k Python files) previously spawned one
+	// goroutine per file, driving peak RSS into OOM territory.
 	out := make([][]*model.CodeEdge, len(tasks))
+	sem := make(chan struct{}, 2*runtime.GOMAXPROCS(0))
 	var wg sync.WaitGroup
 	for i, t := range tasks {
 		wg.Add(1)
+		sem <- struct{}{}
 		go func(i int, t task) {
+			defer func() { <-sem }()
 			defer wg.Done()
 			full := filepath.Join(root, t.path)
 			raw, err := os.ReadFile(full)
diff --git a/go/internal/intelligence/extractor/enricher_test.go b/go/internal/intelligence/extractor/enricher_test.go
index 62fd03ad..0ba15fca 100644
--- a/go/internal/intelligence/extractor/enricher_test.go
+++ b/go/internal/intelligence/extractor/enricher_test.go
@@ -3,9 +3,11 @@ package extractor
 import (
 	"os"
 	"path/filepath"
+	"runtime"
 	"sort"
 	"sync/atomic"
 	"testing"
+	"time"
 
 	"github.com/randomcodespace/codeiq/go/internal/model"
 	"github.com/randomcodespace/codeiq/go/internal/parser"
@@ -177,6 +179,82 @@ func TestEnricher_SkipsFilteredFiles(t *testing.T) {
 	}
 }
 
+// concurrencyTrackingExtractor records the maximum number of goroutines
+// observed inside ExtractFromTree at the same time, so we can assert that
+// the orchestrator bounds the fan-out.
+type concurrencyTrackingExtractor struct {
+	lang     string
+	inFlight atomic.Int32
+	maxSeen  atomic.Int32
+	hold     time.Duration
+}
+
+func (c *concurrencyTrackingExtractor) Language() string { return c.lang }
+
+func (c *concurrencyTrackingExtractor) Extract(ctx Context, node *model.CodeNode) Result {
+	// Unused for this test; orchestrator hits ExtractFromTree.
+	return EmptyResult()
+}
+
+func (c *concurrencyTrackingExtractor) ExtractFromTree(_ Context, _ *parser.Tree, nodes []*model.CodeNode) []Result {
+	cur := c.inFlight.Add(1)
+	defer c.inFlight.Add(-1)
+	for {
+		old := c.maxSeen.Load()
+		if cur <= old || c.maxSeen.CompareAndSwap(old, cur) {
+			break
+		}
+	}
+	time.Sleep(c.hold)
+	results := make([]Result, len(nodes))
+	for i := range results {
+		results[i] = EmptyResult()
+	}
+	return results
+}
+
+func TestEnricher_BoundedConcurrency(t *testing.T) {
+	// Generate enough files to overwhelm the goroutine pool if it were
+	// unbounded — 4 * cap files at minimum.
+	cap := 2 * runtime.GOMAXPROCS(0)
+	nFiles := 4 * cap
+	dir := t.TempDir()
+	nodes := make([]*model.CodeNode, 0, nFiles)
+	for i := 0; i < nFiles; i++ {
+		rel := filepath.Join("src", filepath.Base(t.TempDir())+".java")
+		// One file per node; deterministic distinct paths.
+		rel = filepath.Join("src", "f", "F"+itoa(i)+".java")
+		writeFile(t, filepath.Join(dir, rel), "class F"+itoa(i)+" {}")
+		n := model.NewCodeNode("n:"+itoa(i), model.NodeClass, "F"+itoa(i))
+		n.FilePath = rel
+		nodes = append(nodes, n)
+	}
+	ext := &concurrencyTrackingExtractor{lang: "java", hold: 25 * time.Millisecond}
+	en := NewEnricher(ext)
+	var edges []*model.CodeEdge
+	en.Enrich(nodes, &edges, dir)
+	peak := ext.maxSeen.Load()
+	if peak == 0 {
+		t.Fatal("peak in-flight was 0 — orchestrator never invoked the extractor")
+	}
+	if int(peak) > cap {
+		t.Fatalf("peak concurrent ExtractFromTree calls = %d, want <= %d (2*GOMAXPROCS)", peak, cap)
+	}
+}
+
+func itoa(i int) string {
+	const digits = "0123456789"
+	if i == 0 {
+		return "0"
+	}
+	out := make([]byte, 0, 8)
+	for i > 0 {
+		out = append([]byte{digits[i%10]}, out...)
+		i /= 10
+	}
+	return string(out)
+}
+
 func TestEnricher_NoExtractorsIsNoop(t *testing.T) {
 	en := NewEnricher()
 	n := model.NewCodeNode("n:1", model.NodeClass, "Foo")

From e311d992844902005aab05a2c9e7af22383bb38c Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Wed, 13 May 2026 12:57:59 +0000
Subject: [PATCH 3/5] perf(graph): cap Kuzu BufferPoolSize and MaxNumThreads by
 default (Task A3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kuzu.DefaultSystemConfig() allocates 80% of system RAM as the buffer
pool (~12 GiB on a 15 GiB host) before any enrich work runs. Combined
with Go-side enricher memory that's enough to OOM the process. The
default also allocates full GOMAXPROCS worth of internal threads,
amplifying COPY-side working set.

Adds OpenOptions struct + OpenWithOptions(path, opts). Open(path)
now applies safe defaults via OpenWithOptions(path, OpenOptions{}):
- BufferPoolBytes: 2 GiB (DefaultBufferPoolBytes)
- MaxThreads: min(4, GOMAXPROCS)

OpenReadOnly is unchanged externally (same signature) but routes
through OpenWithOptions internally — read paths inherit the same
buffer pool cap (2 GiB is plenty for read-side caching at our graph
scale).

Plan: docs/superpowers/plans/2026-05-13-enrich-oom-fix.md Task A3.
Future polish: surface --max-buffer-pool and --copy-threads CLI flags
for power-user tuning (deferred).

Verification:
- go test ./internal/graph/... -count=1: 44 pass
- go test ./... -count=1: 876 pass
---
 go/internal/graph/store.go | 90 ++++++++++++++++++++++++++++++--------
 1 file changed, 71 insertions(+), 19 deletions(-)

diff --git a/go/internal/graph/store.go b/go/internal/graph/store.go
index df0ff56a..94a9553f 100644
--- a/go/internal/graph/store.go
+++ b/go/internal/graph/store.go
@@ -15,12 +15,60 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"runtime"
 	"sync"
 	"time"
 
 	kuzu "github.com/kuzudb/go-kuzu"
 )
 
+// DefaultBufferPoolBytes caps Kuzu's buffer pool to 2 GiB by default.
+// kuzu.DefaultSystemConfig() allocates 80% of system RAM (~12 GiB on a 15
+// GiB host) before any Go-side enrich work runs, leaving insufficient
+// headroom for the in-memory enricher pipeline. 2 GiB is enough for
+// real-world graphs at ~/projects/-scale (~430k nodes / ~300k edges) while
+// keeping the host OOM bar well below ceiling.
+const DefaultBufferPoolBytes uint64 = 2 << 30
+
+// defaultMaxThreads returns the per-query thread cap for Kuzu — bounded so
+// COPY FROM's working set scales with parallelism in a controlled way.
+// min(4, GOMAXPROCS): keeps headroom even on small hosts; 4 is enough to
+// saturate IO+CPU for our COPY shape.
+func defaultMaxThreads() uint64 {
+	n := runtime.GOMAXPROCS(0)
+	if n > 4 {
+		n = 4
+	}
+	if n < 1 {
+		n = 1
+	}
+	return uint64(n)
+}
+
+// OpenOptions tunes how Open and OpenReadOnly wire the underlying Kuzu
+// SystemConfig. Zero-valued fields fall back to safe defaults documented
+// alongside each field.
+type OpenOptions struct {
+	// BufferPoolBytes caps Kuzu's buffer pool in bytes. Zero -> DefaultBufferPoolBytes.
+	BufferPoolBytes uint64
+	// MaxThreads caps Kuzu's per-query parallelism. Zero -> defaultMaxThreads().
+	MaxThreads uint64
+	// ReadOnly opens the database in read-only mode.
+	ReadOnly bool
+	// QueryTimeout, if > 0, sets the per-query wall-clock timeout.
+	QueryTimeout time.Duration
+}
+
+func (o OpenOptions) resolved() OpenOptions {
+	if o.BufferPoolBytes == 0 {
+		o.BufferPoolBytes = DefaultBufferPoolBytes
+	}
+	if o.MaxThreads == 0 {
+		o.MaxThreads = defaultMaxThreads()
+	}
+	return o
+}
+
 // Store is the embedded Kuzu graph store facade. It owns one Kuzu database
 // and a single long-lived connection. The zero value is not usable — call
 // Open or OpenReadOnly to construct.
@@ -32,14 +80,26 @@ type Store struct {
 	readOnly bool
 }
 
-// Open creates or opens a Kuzu database at the given directory path. Kuzu
-// itself creates the directory if it does not exist; we ensure the parent
-// exists so a fresh `.codeiq/graph/codeiq.kuzu/` works on first run.
+// Open creates or opens a Kuzu database with safe default OpenOptions
+// (capped BufferPoolBytes + MaxThreads). For tuning, see OpenWithOptions.
 func Open(path string) (*Store, error) {
+	return OpenWithOptions(path, OpenOptions{})
+}
+
+// OpenWithOptions creates or opens a Kuzu database, applying any non-zero
+// fields of opts. Zero-valued fields fall back to safe defaults — see
+// OpenOptions and DefaultBufferPoolBytes.
+func OpenWithOptions(path string, opts OpenOptions) (*Store, error) {
 	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
 		return nil, fmt.Errorf("graph: mkdir parent: %w", err)
 	}
+	opts = opts.resolved()
 	sys := kuzu.DefaultSystemConfig()
+	sys.BufferPoolSize = opts.BufferPoolBytes
+	sys.MaxNumThreads = opts.MaxThreads
+	if opts.ReadOnly {
+		sys.ReadOnly = true
+	}
 	db, err := kuzu.OpenDatabase(path, sys)
 	if err != nil {
 		return nil, fmt.Errorf("graph: open db: %w", err)
@@ -49,7 +109,10 @@ func Open(path string) (*Store, error) {
 		db.Close()
 		return nil, fmt.Errorf("graph: open conn: %w", err)
 	}
-	return &Store{db: db, conn: conn, path: path}, nil
+	if opts.QueryTimeout > 0 {
+		conn.SetTimeout(uint64(opts.QueryTimeout / time.Millisecond))
+	}
+	return &Store{db: db, conn: conn, path: path, readOnly: opts.ReadOnly}, nil
 }
 
 // OpenReadOnly opens an existing Kuzu store in read-only mode and sets a
@@ -65,21 +128,10 @@ func Open(path string) (*Store, error) {
 // queryTimeout <= 0 disables the per-query timeout. Kuzu interprets the
 // timeout in milliseconds; we accept a Go duration for ergonomics.
 func OpenReadOnly(path string, queryTimeout time.Duration) (*Store, error) {
-	sys := kuzu.DefaultSystemConfig()
-	sys.ReadOnly = true
-	db, err := kuzu.OpenDatabase(path, sys)
-	if err != nil {
-		return nil, fmt.Errorf("graph: open read-only %q: %w", path, err)
-	}
-	conn, err := kuzu.OpenConnection(db)
-	if err != nil {
-		db.Close()
-		return nil, fmt.Errorf("graph: open ro conn: %w", err)
-	}
-	if queryTimeout > 0 {
-		conn.SetTimeout(uint64(queryTimeout / time.Millisecond))
-	}
-	return &Store{db: db, conn: conn, path: path, readOnly: true}, nil
+	return OpenWithOptions(path, OpenOptions{
+		ReadOnly:     true,
+		QueryTimeout: queryTimeout,
+	})
 }
 
 // IsReadOnly reports whether the store rejects mutating Cypher.

From 3170fe33b55cc879d3cdce256ac37eb39c2ee103 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Wed, 13 May 2026 12:59:47 +0000
Subject: [PATCH 4/5] perf(graph_builder): release dedup maps after Snapshot
 (Task A4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GraphBuilder.Snapshot extracted deduped nodes/edges into sorted slices
but left builder.nodes and builder.edges maps holding references to
the same objects. With the slices and maps coexisting for the rest of
the enrich pipeline (~30 sec wall time on ~/projects/), ~280 MB of
duplicate references stayed live needlessly.

Clear the maps inside Snapshot before returning. Snapshot is now
single-shot — calling it twice on the same builder returns an empty
snapshot (acceptable; the only caller is analyzer.Enrich which calls
once).

Plan: docs/superpowers/plans/2026-05-13-enrich-oom-fix.md Task A4.

Verification:
- New TestSnapshotReleasesDedupMaps asserts both nodes + edges maps
  are nilled after Snapshot returns.
- go test ./... -count=1: 876 pass (no regressions).
---
 go/internal/analyzer/graph_builder.go      | 16 +++++++++++++++-
 go/internal/analyzer/graph_builder_test.go | 15 +++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/go/internal/analyzer/graph_builder.go b/go/internal/analyzer/graph_builder.go
index 95d4e162..1b62c25b 100644
--- a/go/internal/analyzer/graph_builder.go
+++ b/go/internal/analyzer/graph_builder.go
@@ -85,6 +85,14 @@ type Snapshot struct {
 
 // Snapshot returns the current state as a sorted, dangling-edge-free
 // Snapshot with surfaced dedup/drop counts.
+//
+// After this call returns, the builder's internal dedup maps are cleared
+// (set to nil). This releases ~280 MB of reference pressure at ~/projects/
+// scale where the downstream enrich pipeline holds the returned Snapshot
+// slices for the lifetime of the function — coexisting with the dedup
+// maps was the largest in-memory duplication in the pipeline. Snapshot
+// is therefore single-shot: subsequent calls to Snapshot or Add on the
+// same builder are not supported.
 func (b *GraphBuilder) Snapshot() Snapshot {
 	b.mu.Lock()
 	defer b.mu.Unlock()
@@ -109,11 +117,17 @@ func (b *GraphBuilder) Snapshot() Snapshot {
 	}
 	sort.Slice(edges, func(i, j int) bool { return edges[i].ID < edges[j].ID })
 
-	return Snapshot{
+	snap := Snapshot{
 		Nodes:        nodes,
 		Edges:        edges,
 		DedupedNodes: b.dedupedNodes,
 		DedupedEdges: b.dedupedEdges,
 		DroppedEdges: dropped,
 	}
+	// Release dedup maps so Go GC can collect them while downstream
+	// enrich stages run. The maps held references to every node and
+	// edge already projected into the returned slices.
+	b.nodes = nil
+	b.edges = nil
+	return snap
 }
diff --git a/go/internal/analyzer/graph_builder_test.go b/go/internal/analyzer/graph_builder_test.go
index cb51c0f1..d0f743f0 100644
--- a/go/internal/analyzer/graph_builder_test.go
+++ b/go/internal/analyzer/graph_builder_test.go
@@ -7,6 +7,21 @@ import (
 	"github.com/randomcodespace/codeiq/go/internal/model"
 )
 
+func TestSnapshotReleasesDedupMaps(t *testing.T) {
+	gb := NewGraphBuilder()
+	gb.Add(&detector.Result{
+		Nodes: []*model.CodeNode{model.NewCodeNode("x", model.NodeClass, "X")},
+		Edges: []*model.CodeEdge{{ID: "e:x:x", SourceID: "x", TargetID: "x", Kind: model.EdgeContains}},
+	})
+	_ = gb.Snapshot()
+	if gb.nodes != nil {
+		t.Errorf("Snapshot must nil GraphBuilder.nodes to allow GC; got len=%d", len(gb.nodes))
+	}
+	if gb.edges != nil {
+		t.Errorf("Snapshot must nil GraphBuilder.edges to allow GC; got len=%d", len(gb.edges))
+	}
+}
+
 func TestGraphBuilderDeduplicatesByID(t *testing.T) {
 	gb := NewGraphBuilder()
 	n1 := model.NewCodeNode("a", model.NodeClass, "A")

From a6cbff700f35aa3aac55c4f5e21d67cb775a2928 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Wed, 13 May 2026 13:08:59 +0000
Subject: [PATCH 5/5] fix(enricher_test): remove unused rel assignment
 (staticcheck SA4006)

---
 go/internal/intelligence/extractor/enricher_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/internal/intelligence/extractor/enricher_test.go b/go/internal/intelligence/extractor/enricher_test.go
index 0ba15fca..246c98df 100644
--- a/go/internal/intelligence/extractor/enricher_test.go
+++ b/go/internal/intelligence/extractor/enricher_test.go
@@ -221,9 +221,9 @@ func TestEnricher_BoundedConcurrency(t *testing.T) {
 	dir := t.TempDir()
 	nodes := make([]*model.CodeNode, 0, nFiles)
 	for i := 0; i < nFiles; i++ {
-		rel := filepath.Join("src", filepath.Base(t.TempDir())+".java")
-		// One file per node; deterministic distinct paths.
-		rel = filepath.Join("src", "f", "F"+itoa(i)+".java")
+		// Deterministic distinct file paths so the orchestrator schedules
+		// one task per file.
+		rel := filepath.Join("src", "f", "F"+itoa(i)+".java")
 		writeFile(t, filepath.Join(dir, rel), "class F"+itoa(i)+" {}")
 		n := model.NewCodeNode("n:"+itoa(i), model.NodeClass, "F"+itoa(i))
 		n.FilePath = rel