diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..2296162
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,23 @@
+# Benchmarks
+
+Reproducible local benchmarks for `semble`.
+
+Pinned repositories live in `repos.json` and are checked out into `~/.cache/semble-bench`.
+
+## Setup
+
+```bash
+uv run python -m benchmarks.sync_repos
+uv run python -m benchmarks.sync_repos --check
+```
+
+## Run
+
+```bash
+uv run python -m benchmarks.run_benchmark
+uv run python -m benchmarks.run_benchmark --repo fastapi --repo axios
+uv run python -m benchmarks.run_benchmark --language python
+```
+
+Full runs (no `--repo`/`--language` filters) automatically save results to
+`benchmarks/results/<sha>.json`.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/benchmarks/annotations/aiohttp.json b/benchmarks/annotations/aiohttp.json
new file mode 100644
index 0000000..ea00500
--- /dev/null
+++ b/benchmarks/annotations/aiohttp.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how the async HTTP client session works",
+    "relevant": ["aiohttp/client.py"],
+    "secondary": []
+  },
+  {
+    "query": "connection pooling and TCP connector",
+    "relevant": ["aiohttp/connector.py"],
+    "secondary": []
+  },
+  {
+    "query": "WebSocket client implementation",
+    "relevant": ["aiohttp/client_ws.py"],
+    "secondary": ["aiohttp/_websocket/reader.py"]
+  },
+  {
+    "query": "request and response object internals",
+    "relevant": ["aiohttp/client_reqrep.py"],
+    "secondary": []
+  },
+  {
+    "query": "URL routing and resource dispatching",
+    "relevant": ["aiohttp/web_urldispatcher.py"],
+    "secondary": []
+  },
+  {
+    "query": "server-side middleware execution",
+    "relevant": ["aiohttp/web_middlewares.py"],
+    "secondary": ["aiohttp/web_app.py"]
+  },
+  {
+    "query": "multipart and form data handling",
+    "relevant": ["aiohttp/multipart.py"],
+    "secondary": ["aiohttp/formdata.py"]
+  },
+  {
+    "query": "response streaming and payload",
+    "relevant": ["aiohttp/streams.py"],
+    "secondary": ["aiohttp/payload.py"]
+  },
+  {
+    "query": "HTTP exception types and error responses",
+    "relevant": ["aiohttp/web_exceptions.py"],
+    "secondary": ["aiohttp/client_exceptions.py"]
+  },
+  {
+    "query": "request tracing and observability hooks",
+    "relevant": ["aiohttp/tracing.py"],
+    "secondary": []
+  },
+  {
+    "query": "how chunked transfer encoding is parsed",
+    "relevant": ["aiohttp/http_parser.py"],
+    "secondary": []
+  },
+  {
+    "query": "how DNS resolution is handled asynchronously",
+    "relevant": ["aiohttp/resolver.py"],
+    "secondary": ["aiohttp/connector.py"]
+  },
+  {
+    "query": "how backpressure and flow control work in streaming",
+    "relevant": ["aiohttp/streams.py"],
+    "secondary": []
+  },
+  {
+    "query": "how connection draining and cleanup happen on close",
+    "relevant": ["aiohttp/connector.py"],
+    "secondary": ["aiohttp/client_proto.py"]
+  },
+  {
+    "query": "how the web application sets up and tears down on startup",
+    "relevant": ["aiohttp/web_app.py"],
+    "secondary": ["aiohttp/web_runner.py"]
+  },
+  {"query": "ClientSession", "relevant": ["aiohttp/client.py"], "secondary": []},
+  {"query": "TCPConnector", "relevant": ["aiohttp/connector.py"], "secondary": []},
+  {"query": "UrlDispatcher", "relevant": ["aiohttp/web_urldispatcher.py"], "secondary": []},
+  {"query": "ClientResponse", "relevant": ["aiohttp/client_reqrep.py"], "secondary": []},
+  {"query": "TraceConfig", "relevant": ["aiohttp/tracing.py"], "secondary": []},
+  {
+    "query": "how ClientSession acquires and releases connections from the connector",
+    "relevant": ["aiohttp/client.py"],
+    "secondary": ["aiohttp/connector.py"],
+    "category": "architecture",
+    "seed": {"path": "aiohttp/client.py", "line": 374},
+    "related": ["aiohttp/connector.py"]
+  }
+]
diff --git a/benchmarks/annotations/alamofire.json b/benchmarks/annotations/alamofire.json
new file mode 100644
index 0000000..df5ee2b
--- /dev/null
+++ b/benchmarks/annotations/alamofire.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how the Session manages the underlying URLSession and dispatches requests",
+    "relevant": ["Source/Core/Session.swift"],
+    "secondary": ["Source/Core/SessionDelegate.swift"],
+    "category": "architecture"
+  },
+  {
+    "query": "how request retrying is implemented with backoff and retry conditions",
+    "relevant": ["Source/Features/RetryPolicy.swift"],
+    "secondary": ["Source/Features/RequestInterceptor.swift"],
+    "category": "architecture"
+  },
+  {
+    "query": "how response validation checks status codes and content types",
+    "relevant": ["Source/Features/Validation.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how response serialization decodes JSON, Decodable, and strings",
+    "relevant": ["Source/Features/ResponseSerialization.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how multipart form data encodes fields and file attachments",
+    "relevant": ["Source/Features/MultipartFormData.swift"],
+    "secondary": ["Source/Features/MultipartUpload.swift"],
+    "category": "semantic"
+  },
+  {
+    "query": "how authentication interceptors handle credential challenges",
+    "relevant": ["Source/Features/AuthenticationInterceptor.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how server trust evaluation handles SSL certificate pinning",
+    "relevant": ["Source/Features/ServerTrustEvaluation.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how URL query parameters are encoded from Encodable values",
+    "relevant": ["Source/Features/URLEncodedFormEncoder.swift"],
+    "secondary": ["Source/Core/ParameterEncoder.swift"],
+    "category": "semantic"
+  },
+  {
+    "query": "how network reachability is monitored to detect connectivity changes",
+    "relevant": ["Source/Features/NetworkReachabilityManager.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how download requests save responses to disk",
+    "relevant": ["Source/Core/DownloadRequest.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how request and response events are logged via EventMonitor",
+    "relevant": ["Source/Features/EventMonitor.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the Request class tracks lifecycle state transitions",
+    "relevant": ["Source/Core/Request.swift"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "Session",
+    "relevant": ["Source/Core/Session.swift"],
+    "secondary": []
+  },
+  {
+    "query": "AFError",
+    "relevant": ["Source/Core/AFError.swift"],
+    "secondary": []
+  },
+  {
+    "query": "RetryPolicy",
+    "relevant": ["Source/Features/RetryPolicy.swift"],
+    "secondary": []
+  },
+  {
+    "query": "ServerTrustEvaluating",
+    "relevant": ["Source/Features/ServerTrustEvaluation.swift"],
+    "secondary": []
+  },
+  {
+    "query": "HTTPHeaders",
+    "relevant": ["Source/Core/HTTPHeaders.swift"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/axios.json b/benchmarks/annotations/axios.json
new file mode 100644
index 0000000..3466c21
--- /dev/null
+++ b/benchmarks/annotations/axios.json
@@ -0,0 +1,46 @@
+[
+  {
+    "query": "how HTTP requests are dispatched through the configured adapter",
+    "relevant": ["lib/core/dispatchRequest.js"],
+    "secondary": ["lib/adapters/adapters.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "request and response interceptors",
+    "relevant": ["lib/core/InterceptorManager.js"],
+    "secondary": ["lib/core/Axios.js"],
+    "category": "semantic"
+  },
+  {
+    "query": "node HTTP adapter implementation",
+    "relevant": ["lib/adapters/http.js"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how config defaults are merged before a request is sent",
+    "relevant": ["lib/core/mergeConfig.js"],
+    "secondary": ["lib/core/Axios.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "Axios",
+    "relevant": [{"path": "lib/core/Axios.js", "start_line": 22, "end_line": 61}],
+    "secondary": [],
+    "category": "symbol",
+    "seed": {"path": "lib/core/Axios.js", "line": 46},
+    "related": [{"path": "lib/core/Axios.js", "start_line": 179, "end_line": 239}]
+  },
+  {
+    "query": "InterceptorManager",
+    "relevant": [{"path": "lib/core/InterceptorManager.js", "start_line": 5, "end_line": 33}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "mergeConfig",
+    "relevant": [{"path": "lib/core/mergeConfig.js", "start_line": 17, "end_line": 106}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/cats.json b/benchmarks/annotations/cats.json
new file mode 100644
index 0000000..4140db6
--- /dev/null
+++ b/benchmarks/annotations/cats.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how the Functor type class defines mapping over a context",
+    "relevant": ["core/src/main/scala/cats/Functor.scala"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "how Monad composes dependent effectful computations with flatMap",
+    "relevant": ["core/src/main/scala/cats/Monad.scala"],
+    "secondary": ["core/src/main/scala/cats/FlatMap.scala"],
+    "category": "architecture"
+  },
+  {
+    "query": "how Applicative combines independent effects",
+    "relevant": ["core/src/main/scala/cats/Applicative.scala"],
+    "secondary": ["core/src/main/scala/cats/Apply.scala"],
+    "category": "architecture"
+  },
+  {
+    "query": "how errors are handled and recovered in ApplicativeError and MonadError",
+    "relevant": ["core/src/main/scala/cats/ApplicativeError.scala"],
+    "secondary": ["core/src/main/scala/cats/MonadError.scala"],
+    "category": "semantic"
+  },
+  {
+    "query": "how Validated accumulates errors across independent computations",
+    "relevant": ["core/src/main/scala/cats/data/Validated.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how EitherT combines the Either monad with another effect",
+    "relevant": ["core/src/main/scala/cats/data/EitherT.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Kleisli composes functions that return monadic values",
+    "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Eval provides lazy and memoized evaluation",
+    "relevant": ["core/src/main/scala/cats/Eval.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Foldable traverses and reduces elements in a container",
+    "relevant": ["core/src/main/scala/cats/Foldable.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Chain provides O(1) concatenation as an alternative to List",
+    "relevant": ["core/src/main/scala/cats/data/Chain.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how natural transformations map between type constructors",
+    "relevant": ["core/src/main/scala/cats/arrow/FunctionK.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the Contravariant functor reverses the mapping direction",
+    "relevant": ["core/src/main/scala/cats/Contravariant.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Monad",
+    "relevant": ["core/src/main/scala/cats/Monad.scala"],
+    "secondary": []
+  },
+  {
+    "query": "Functor",
+    "relevant": ["core/src/main/scala/cats/Functor.scala"],
+    "secondary": []
+  },
+  {
+    "query": "EitherT",
+    "relevant": ["core/src/main/scala/cats/data/EitherT.scala"],
+    "secondary": []
+  },
+  {
+    "query": "Validated",
+    "relevant": ["core/src/main/scala/cats/data/Validated.scala"],
+    "secondary": []
+  },
+  {
+    "query": "Kleisli",
+    "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/chi.json b/benchmarks/annotations/chi.json
new file mode 100644
index 0000000..38223b5
--- /dev/null
+++ b/benchmarks/annotations/chi.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "HTTP router and middleware composition",
+    "relevant": ["mux.go"],
+    "secondary": ["chain.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "radix tree path matching",
+    "relevant": ["tree.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "request routing context storage",
+    "relevant": ["context.go"],
+    "secondary": ["mux.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "request logging middleware",
+    "relevant": ["middleware/logger.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Mux",
+    "relevant": ["mux.go"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/cobra.json b/benchmarks/annotations/cobra.json
new file mode 100644
index 0000000..c8d97a2
--- /dev/null
+++ b/benchmarks/annotations/cobra.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "core command execution and command tree",
+    "relevant": ["command.go"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "shell completion request handling",
+    "relevant": ["completions.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "bash completion generation",
+    "relevant": ["bash_completions.go"],
+    "secondary": ["completions.go"],
+    "category": "semantic"
+  },
+  {
+    "query": "positional argument validators",
+    "relevant": ["args.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Command",
+    "relevant": ["command.go"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/commons-lang.json b/benchmarks/annotations/commons-lang.json
new file mode 100644
index 0000000..9d51c0c
--- /dev/null
+++ b/benchmarks/annotations/commons-lang.json
@@ -0,0 +1,43 @@
+[
+  {
+    "query": "null-safe string operations and text helpers",
+    "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "argument validation helpers and exception rules",
+    "relevant": ["src/main/java/org/apache/commons/lang3/Validate.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "reflection-based equals implementation",
+    "relevant": ["src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java"],
+    "secondary": ["src/main/java/org/apache/commons/lang3/ClassUtils.java"],
+    "category": "architecture"
+  },
+  {
+    "query": "range object with inclusive bounds and comparator support",
+    "relevant": ["src/main/java/org/apache/commons/lang3/Range.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "StringUtils",
+    "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "reflection equals builder internals",
+    "relevant": [{"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "start_line": 89, "end_line": 99}],
+    "secondary": [],
+    "category": "architecture",
+    "seed": {"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "line": 89},
+    "related": [
+      {"path": "src/main/java/org/apache/commons/lang3/builder/HashCodeBuilder.java", "start_line": 105, "end_line": 141},
+      {"path": "src/main/java/org/apache/commons/lang3/builder/IDKey.java", "start_line": 27, "end_line": 74}
+    ]
+  }
+]
diff --git a/benchmarks/annotations/express.json b/benchmarks/annotations/express.json
new file mode 100644
index 0000000..dd82ccf
--- /dev/null
+++ b/benchmarks/annotations/express.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "application initialization and default configuration",
+    "relevant": ["lib/application.js"],
+    "secondary": ["lib/express.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "request API helpers and header access",
+    "relevant": ["lib/request.js"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "response sending and content negotiation",
+    "relevant": ["lib/response.js"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "template view lookup and rendering",
+    "relevant": ["lib/view.js"],
+    "secondary": ["lib/application.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "response",
+    "relevant": ["lib/response.js"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/fastapi.json b/benchmarks/annotations/fastapi.json
new file mode 100644
index 0000000..23a990b
--- /dev/null
+++ b/benchmarks/annotations/fastapi.json
@@ -0,0 +1,112 @@
+[
+  {
+    "query": "how does dependency injection work",
+    "relevant": ["fastapi/dependencies/utils.py"],
+    "secondary": ["fastapi/dependencies/models.py", "fastapi/params.py"]
+  },
+  {
+    "query": "request validation and error handling",
+    "relevant": ["fastapi/exceptions.py"],
+    "secondary": ["fastapi/exception_handlers.py"]
+  },
+  {
+    "query": "how are routes registered",
+    "relevant": ["fastapi/routing.py"],
+    "secondary": ["fastapi/applications.py"]
+  },
+  {
+    "query": "websocket endpoint implementation",
+    "relevant": ["fastapi/websockets.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "OpenAPI schema generation",
+    "relevant": ["fastapi/openapi/utils.py"],
+    "secondary": ["fastapi/openapi/models.py"]
+  },
+  {
+    "query": "middleware stack and CORS",
+    "relevant": ["fastapi/middleware/cors.py"],
+    "secondary": ["fastapi/applications.py"]
+  },
+  {
+    "query": "file upload handling",
+    "relevant": ["fastapi/datastructures.py"],
+    "secondary": []
+  },
+  {
+    "query": "response model serialization",
+    "relevant": ["fastapi/encoders.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "background tasks",
+    "relevant": ["fastapi/background.py"],
+    "secondary": []
+  },
+  {
+    "query": "security and OAuth2 authentication",
+    "relevant": ["fastapi/security/oauth2.py"],
+    "secondary": ["fastapi/security/http.py"]
+  },
+  {
+    "query": "how is response validation and serialization applied before sending",
+    "relevant": ["fastapi/routing.py"],
+    "secondary": ["fastapi/encoders.py"]
+  },
+  {
+    "query": "how are nested and sub-dependencies resolved",
+    "relevant": ["fastapi/dependencies/utils.py"],
+    "secondary": []
+  },
+  {
+    "query": "how does FastAPI run sync route functions without blocking",
+    "relevant": ["fastapi/concurrency.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "how are path parameters extracted and type-converted",
+    "relevant": ["fastapi/routing.py"],
+    "secondary": ["fastapi/dependencies/utils.py"]
+  },
+  {
+    "query": "how does exception propagation work through dependency injection",
+    "relevant": ["fastapi/dependencies/utils.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "Depends",
+    "relevant": [{"path": "fastapi/params.py", "start_line": 746, "end_line": 749}],
+    "secondary": ["fastapi/param_functions.py"],
+    "category": "symbol",
+    "seed": {"path": "fastapi/params.py", "line": 746},
+    "related": [
+      {"path": "fastapi/params.py", "start_line": 434, "end_line": 468},
+      {"path": "fastapi/params.py", "start_line": 627, "end_line": 664}
+    ]
+  },
+  {
+    "query": "HTTPException",
+    "relevant": [{"path": "fastapi/exceptions.py", "start_line": 17, "end_line": 66}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "APIRouter",
+    "relevant": [{"path": "fastapi/routing.py", "start_line": 1005, "end_line": 1030}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "BackgroundTasks",
+    "relevant": [{"path": "fastapi/background.py", "start_line": 11, "end_line": 39}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "OAuth2PasswordBearer",
+    "relevant": [{"path": "fastapi/security/oauth2.py", "start_line": 433, "end_line": 471}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/flask.json b/benchmarks/annotations/flask.json
new file mode 100644
index 0000000..de9fd41
--- /dev/null
+++ b/benchmarks/annotations/flask.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how are routes registered",
+    "relevant": ["src/flask/sansio/scaffold.py"],
+    "secondary": ["src/flask/sansio/app.py"]
+  },
+  {
+    "query": "request context handling",
+    "relevant": ["src/flask/ctx.py"],
+    "secondary": []
+  },
+  {
+    "query": "session management",
+    "relevant": ["src/flask/sessions.py"],
+    "secondary": []
+  },
+  {
+    "query": "blueprint registration",
+    "relevant": ["src/flask/sansio/blueprints.py"],
+    "secondary": ["src/flask/blueprints.py"]
+  },
+  {
+    "query": "configuration loading",
+    "relevant": ["src/flask/config.py"],
+    "secondary": []
+  },
+  {
+    "query": "template rendering",
+    "relevant": ["src/flask/templating.py"],
+    "secondary": []
+  },
+  {
+    "query": "error handlers",
+    "relevant": ["src/flask/sansio/scaffold.py"],
+    "secondary": ["src/flask/app.py"]
+  },
+  {
+    "query": "CLI commands",
+    "relevant": ["src/flask/cli.py"],
+    "secondary": []
+  },
+  {
+    "query": "testing client",
+    "relevant": ["src/flask/testing.py"],
+    "secondary": []
+  },
+  {
+    "query": "JSON response helpers",
+    "relevant": ["src/flask/json/provider.py"],
+    "secondary": ["src/flask/helpers.py"]
+  },
+  {
+    "query": "how does the application context push and pop around requests",
+    "relevant": ["src/flask/ctx.py"],
+    "secondary": ["src/flask/app.py"]
+  },
+  {
+    "query": "how does Flask select which error handler to invoke for an exception",
+    "relevant": ["src/flask/app.py"],
+    "secondary": ["src/flask/sansio/app.py"]
+  },
+  {
+    "query": "how is g used to store data scoped to the current request",
+    "relevant": ["src/flask/ctx.py"],
+    "secondary": ["src/flask/globals.py"]
+  },
+  {
+    "query": "how are request lifecycle signals emitted",
+    "relevant": ["src/flask/signals.py"],
+    "secondary": ["src/flask/app.py"]
+  },
+  {
+    "query": "how does Flask convert a view return value into a response object",
+    "relevant": ["src/flask/app.py"],
+    "secondary": ["src/flask/wrappers.py"]
+  },
+  {"query": "Blueprint", "relevant": ["src/flask/sansio/blueprints.py"], "secondary": ["src/flask/blueprints.py"]},
+  {"query": "render_template", "relevant": ["src/flask/templating.py"], "secondary": []},
+  {"query": "Flask", "relevant": ["src/flask/app.py"], "secondary": []},
+  {"query": "session", "relevant": ["src/flask/sessions.py"], "secondary": ["src/flask/globals.py"]},
+  {"query": "g", "relevant": ["src/flask/globals.py"], "secondary": ["src/flask/ctx.py"]},
+  {
+    "query": "how Blueprint inherits routing behaviour from its sansio base class",
+    "relevant": ["src/flask/sansio/blueprints.py"],
+    "secondary": ["src/flask/blueprints.py"],
+    "category": "architecture",
+    "seed": {"path": "src/flask/blueprints.py", "line": 18},
+    "related": ["src/flask/sansio/blueprints.py"]
+  }
+]
diff --git a/benchmarks/annotations/gin.json b/benchmarks/annotations/gin.json
new file mode 100644
index 0000000..2ba38bc
--- /dev/null
+++ b/benchmarks/annotations/gin.json
@@ -0,0 +1,40 @@
+[
+  {
+    "query": "how routes are grouped and registered",
+    "relevant": ["routergroup.go"],
+    "secondary": ["gin.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "radix tree path matching",
+    "relevant": ["tree.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "request context lifecycle and helpers",
+    "relevant": ["context.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "panic recovery middleware",
+    "relevant": ["recovery.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "JSON request binding and validation",
+    "relevant": ["binding/json.go"],
+    "secondary": ["binding/default_validator.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the Gin Engine embeds RouterGroup and uses Context per request",
+    "relevant": ["gin.go"],
+    "secondary": ["routergroup.go", "context.go"],
+    "category": "architecture",
+    "seed": {"path": "gin.go", "line": 92},
+    "related": ["context.go", "routergroup.go"]
+  }
+]
diff --git a/benchmarks/annotations/gson.json b/benchmarks/annotations/gson.json
new file mode 100644
index 0000000..46c2f5e
--- /dev/null
+++ b/benchmarks/annotations/gson.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "main Gson API for toJson and fromJson",
+    "relevant": ["gson/src/main/java/com/google/gson/Gson.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "GsonBuilder configuration and create",
+    "relevant": ["gson/src/main/java/com/google/gson/GsonBuilder.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "reflection-based field serialization and deserialization",
+    "relevant": ["gson/src/main/java/com/google/gson/internal/bind/ReflectiveTypeAdapterFactory.java"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "TypeAdapter",
+    "relevant": ["gson/src/main/java/com/google/gson/TypeAdapter.java"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "JsonParser",
+    "relevant": ["gson/src/main/java/com/google/gson/JsonParser.java"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/guzzle.json b/benchmarks/annotations/guzzle.json
new file mode 100644
index 0000000..2bbbf02
--- /dev/null
+++ b/benchmarks/annotations/guzzle.json
@@ -0,0 +1,92 @@
+[
+  {
+    "query": "HTTP client request sending and defaults",
+    "relevant": ["src/Client.php"],
+    "secondary": ["src/ClientTrait.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "middleware handler stack composition",
+    "relevant": ["src/HandlerStack.php"],
+    "secondary": ["src/Middleware.php"],
+    "category": "architecture"
+  },
+  {
+    "query": "retry middleware and exponential backoff",
+    "relevant": ["src/RetryMiddleware.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "cookie jar implementation",
+    "relevant": ["src/Cookie/CookieJar.php"],
+    "secondary": ["src/Cookie/SetCookie.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "redirect following and location header handling",
+    "relevant": ["src/RedirectMiddleware.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "curl-based HTTP handler and connection management",
+    "relevant": ["src/Handler/CurlFactory.php"],
+    "secondary": ["src/Handler/CurlHandler.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "mock handler for simulating HTTP responses in tests",
+    "relevant": ["src/Handler/MockHandler.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "concurrent request pool with limited parallelism",
+    "relevant": ["src/Pool.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "available request options and their configuration",
+    "relevant": ["src/RequestOptions.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "HTTP message formatting and request logging",
+    "relevant": ["src/MessageFormatter.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how request body and content-type are prepared before sending",
+    "relevant": ["src/PrepareBodyMiddleware.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "exception hierarchy for HTTP and transfer errors",
+    "relevant": ["src/Exception/RequestException.php"],
+    "secondary": ["src/Exception/GuzzleException.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "Client",
+    "relevant": ["src/Client.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "HandlerStack",
+    "relevant": ["src/HandlerStack.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "RedirectMiddleware",
+    "relevant": ["src/RedirectMiddleware.php"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/httpx.json b/benchmarks/annotations/httpx.json
new file mode 100644
index 0000000..08edd0c
--- /dev/null
+++ b/benchmarks/annotations/httpx.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how are HTTP requests sent",
+    "relevant": ["httpx/_client.py"],
+    "secondary": []
+  },
+  {
+    "query": "authentication and credentials",
+    "relevant": ["httpx/_auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "connection pooling and transport",
+    "relevant": ["httpx/_transports/default.py"],
+    "secondary": ["httpx/_transports/base.py"]
+  },
+  {
+    "query": "URL parsing and construction",
+    "relevant": ["httpx/_urlparse.py"],
+    "secondary": ["httpx/_urls.py"]
+  },
+  {
+    "query": "response decoding and content",
+    "relevant": ["httpx/_decoders.py"],
+    "secondary": ["httpx/_models.py"]
+  },
+  {
+    "query": "timeout configuration",
+    "relevant": ["httpx/_config.py"],
+    "secondary": []
+  },
+  {
+    "query": "cookie handling",
+    "relevant": ["httpx/_models.py"],
+    "secondary": ["httpx/_client.py"]
+  },
+  {
+    "query": "multipart file upload",
+    "relevant": ["httpx/_multipart.py"],
+    "secondary": ["httpx/_content.py"]
+  },
+  {
+    "query": "redirect following",
+    "relevant": ["httpx/_client.py"],
+    "secondary": []
+  },
+  {
+    "query": "error and exception types",
+    "relevant": ["httpx/_exceptions.py"],
+    "secondary": []
+  },
+  {
+    "query": "how does digest authentication handle the challenge-response flow",
+    "relevant": ["httpx/_auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "how are keep-alive connections managed and reused",
+    "relevant": ["httpx/_transports/default.py"],
+    "secondary": ["httpx/_config.py"]
+  },
+  {
+    "query": "how does streaming response body iteration work",
+    "relevant": ["httpx/_models.py"],
+    "secondary": ["httpx/_decoders.py"]
+  },
+  {
+    "query": "how are query parameters encoded into the URL",
+    "relevant": ["httpx/_urls.py"],
+    "secondary": ["httpx/_urlparse.py"]
+  },
+  {
+    "query": "how are retries and transport errors surfaced to the caller",
+    "relevant": ["httpx/_exceptions.py"],
+    "secondary": ["httpx/_transports/default.py"]
+  },
+  {"query": "DigestAuth", "relevant": ["httpx/_auth.py"], "secondary": []},
+  {"query": "AsyncClient", "relevant": ["httpx/_client.py"], "secondary": []},
+  {"query": "Timeout", "relevant": ["httpx/_config.py"], "secondary": []},
+  {"query": "HTTPStatusError", "relevant": ["httpx/_exceptions.py"], "secondary": []},
+  {"query": "URL", "relevant": ["httpx/_urls.py"], "secondary": []},
+  {
+    "query": "how the HTTP transport backend sends requests over the wire",
+    "relevant": ["httpx/_transports/default.py"],
+    "secondary": ["httpx/_transports/base.py"],
+    "category": "architecture",
+    "seed": {"path": "httpx/_client.py", "line": 731},
+    "related": ["httpx/_transports/default.py"]
+  }
+]
diff --git a/benchmarks/annotations/jackson-databind.json b/benchmarks/annotations/jackson-databind.json
new file mode 100644
index 0000000..079d2b6
--- /dev/null
+++ b/benchmarks/annotations/jackson-databind.json
@@ -0,0 +1,49 @@
+[
+  {
+    "query": "ObjectMapper entry point for reading and writing JSON",
+    "relevant": ["src/main/java/tools/jackson/databind/ObjectMapper.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "JSON-specific mapper builder",
+    "relevant": ["src/main/java/tools/jackson/databind/json/JsonMapper.java"],
+    "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"],
+    "category": "architecture"
+  },
+  {
+    "query": "mutable JSON object node with named field operations",
+    "relevant": ["src/main/java/tools/jackson/databind/node/ObjectNode.java"],
+    "secondary": ["src/main/java/tools/jackson/databind/node/JsonNodeFactory.java"],
+    "category": "semantic"
+  },
+  {
+    "query": "polymorphic type resolution",
+    "relevant": ["src/main/java/tools/jackson/databind/jsontype/impl/StdTypeResolverBuilder.java"],
+    "secondary": ["src/main/java/tools/jackson/databind/jsontype/impl/TypeDeserializerBase.java"],
+    "category": "architecture"
+  },
+  {
+    "query": "ObjectMapper",
+    "relevant": [{"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 93, "end_line": 132}],
+    "secondary": [],
+    "category": "symbol",
+    "seed": {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "line": 109},
+    "related": [
+      {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 356, "end_line": 387},
+      {"path": "src/main/java/tools/jackson/databind/cfg/MapperBuilder.java", "start_line": 338, "end_line": 344}
+    ]
+  },
+  {
+    "query": "JsonMapper",
+    "relevant": [{"path": "src/main/java/tools/jackson/databind/json/JsonMapper.java", "start_line": 16, "end_line": 44}],
+    "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"],
+    "category": "symbol"
+  },
+  {
+    "query": "ObjectNode",
+    "relevant": [{"path": "src/main/java/tools/jackson/databind/node/ObjectNode.java", "start_line": 21, "end_line": 60}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/ktor.json b/benchmarks/annotations/ktor.json
new file mode 100644
index 0000000..b2aa38b
--- /dev/null
+++ b/benchmarks/annotations/ktor.json
@@ -0,0 +1,93 @@
+[
+  {
+    "query": "how the HttpClient is configured with plugins and an engine",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClientConfig.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how request and response pipelines process interceptors",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/HttpRequestPipeline.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/statement/HttpResponsePipeline.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how HTTP caching stores and validates cached responses",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCache.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCacheEntry.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how HTTP status codes are checked and exceptions raised on failure",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpCallValidator.kt"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how request and response timeouts are enforced",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how HTTP redirects are followed automatically",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how cookies are stored and sent with requests",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/AcceptAllCookiesStorage.kt"],
+    "category": "semantic"
+  },
+  {
+    "query": "how multipart form data uploads are constructed",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/FormDataContent.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/formDsl.kt"],
+    "category": "semantic"
+  },
+  {
+    "query": "how WebSocket connections are established and messages exchanged",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/WebSockets.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/ClientSessions.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how server-sent events are received and parsed from a streaming response",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/SSE.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/DefaultClientSSESession.kt"],
+    "category": "semantic"
+  },
+  {
+    "query": "how the engine abstraction separates the client API from the transport",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngineBase.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "HttpClient",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpTimeout",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpRedirect",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpCookies",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpClientEngine",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/laravel-framework.json b/benchmarks/annotations/laravel-framework.json
new file mode 100644
index 0000000..376c82e
--- /dev/null
+++ b/benchmarks/annotations/laravel-framework.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "queue connection resolution and connectors",
+    "relevant": ["src/Illuminate/Queue/QueueManager.php"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "database queue implementation",
+    "relevant": ["src/Illuminate/Queue/DatabaseQueue.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "queue worker command execution",
+    "relevant": ["src/Illuminate/Queue/Console/WorkCommand.php"],
+    "secondary": ["src/Illuminate/Queue/Worker.php"],
+    "category": "architecture"
+  },
+  {
+    "query": "session store behavior",
+    "relevant": ["src/Illuminate/Session/Store.php"],
+    "secondary": ["src/Illuminate/Session/SessionManager.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "QueueManager",
+    "relevant": ["src/Illuminate/Queue/QueueManager.php"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/messagepack-csharp.json b/benchmarks/annotations/messagepack-csharp.json
new file mode 100644
index 0000000..df6a60c
--- /dev/null
+++ b/benchmarks/annotations/messagepack-csharp.json
@@ -0,0 +1,98 @@
+[
+  {
+    "query": "how objects are serialized to MessagePack binary format",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": ["src/MessagePack/MessagePackWriter.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how binary data is deserialized back into typed C# objects",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": ["src/MessagePack/MessagePackReader.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how custom formatters are registered and resolved for types",
+    "relevant": ["src/MessagePack/IFormatterResolver.cs"],
+    "secondary": ["src/MessagePack/Resolvers/CompositeResolver.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the dynamic object resolver generates serialization code at runtime",
+    "relevant": ["src/MessagePack/Resolvers/DynamicObjectResolver.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how collections and arrays are serialized",
+    "relevant": ["src/MessagePack/Formatters/CollectionFormatter.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how union types with subtypes are handled in serialization",
+    "relevant": ["src/MessagePack/Resolvers/DynamicUnionResolver.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how serializer options control compression and resolver configuration",
+    "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how enums are serialized as integers or strings",
+    "relevant": ["src/MessagePack/Resolvers/DynamicEnumResolver.cs"],
+    "secondary": ["src/MessagePack/Formatters/GenericEnumFormatter`1.cs", "src/MessagePack/Formatters/EnumAsStringFormatter`1.cs"],
+    "category": "semantic"
+  },
+  {
+    "query": "reading MessagePack data from a stream incrementally",
+    "relevant": ["src/MessagePack/MessagePackStreamReader.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the sequence pool manages buffer reuse during serialization",
+    "relevant": ["src/MessagePack/SequencePool.cs"],
+    "secondary": ["src/MessagePack/BufferWriter.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how LZ4 compression is applied to MessagePack payloads",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "MessagePackSerializer",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": []
+  },
+  {
+    "query": "MessagePackReader",
+    "relevant": ["src/MessagePack/MessagePackReader.cs"],
+    "secondary": []
+  },
+  {
+    "query": "IMessagePackFormatter",
+    "relevant": ["src/MessagePack/Formatters/IMessagePackFormatter`1.cs"],
+    "secondary": []
+  },
+  {
+    "query": "CompositeResolver",
+    "relevant": ["src/MessagePack/Resolvers/CompositeResolver.cs"],
+    "secondary": []
+  },
+  {
+    "query": "MessagePackSerializerOptions",
+    "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"],
+    "secondary": []
+  },
+  {
+    "query": "StandardResolver",
+    "relevant": ["src/MessagePack/Resolvers/StandardResolver.cs"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/model2vec.json b/benchmarks/annotations/model2vec.json
new file mode 100644
index 0000000..ad58320
--- /dev/null
+++ b/benchmarks/annotations/model2vec.json
@@ -0,0 +1,82 @@
+[
+  {
+    "query": "how the StaticModel encodes text to embeddings",
+    "relevant": ["model2vec/model.py"],
+    "secondary": ["model2vec/inference/model.py"]
+  },
+  {
+    "query": "how a model is distilled from a sentence transformer",
+    "relevant": ["model2vec/distill/distillation.py"],
+    "secondary": []
+  },
+  {
+    "query": "tokenizer construction and vocabulary building",
+    "relevant": ["model2vec/tokenizer/tokenizer.py"],
+    "secondary": ["model2vec/distill/utils.py"]
+  },
+  {
+    "query": "saving and loading models from disk",
+    "relevant": ["model2vec/persistence/persistence.py"],
+    "secondary": ["model2vec/persistence/datamodels.py"]
+  },
+  {
+    "query": "quantization of model weights",
+    "relevant": ["model2vec/quantization.py"],
+    "secondary": ["model2vec/vocabulary_quantization.py"]
+  },
+  {
+    "query": "pushing and loading models from HuggingFace Hub",
+    "relevant": ["model2vec/persistence/hf.py"],
+    "secondary": []
+  },
+  {
+    "query": "distillation inference and embedding extraction",
+    "relevant": ["model2vec/distill/inference.py"],
+    "secondary": []
+  },
+  {
+    "query": "training a classifier on top of embeddings",
+    "relevant": ["model2vec/train/classifier.py"],
+    "secondary": ["model2vec/train/base.py"]
+  },
+  {
+    "query": "generating model cards for publication",
+    "relevant": ["model2vec/modelcards/modelcards.py"],
+    "secondary": []
+  },
+  {
+    "query": "utility functions used across the package",
+    "relevant": ["model2vec/utils.py"],
+    "secondary": ["model2vec/distill/utils.py"]
+  },
+  {
+    "query": "how mean pooling is applied over token embeddings during distillation",
+    "relevant": ["model2vec/distill/inference.py"],
+    "secondary": []
+  },
+  {
+    "query": "how PCA reduces embedding dimensionality",
+    "relevant": ["model2vec/distill/distillation.py"],
+    "secondary": ["model2vec/distill/inference.py"]
+  },
+  {
+    "query": "how out-of-vocabulary tokens are handled at inference time",
+    "relevant": ["model2vec/model.py"],
+    "secondary": ["model2vec/tokenizer/tokenizer.py"]
+  },
+  {
+    "query": "how vocabulary is pruned during distillation",
+    "relevant": ["model2vec/distill/utils.py"],
+    "secondary": ["model2vec/distill/distillation.py"]
+  },
+  {
+    "query": "how subword token weights are aggregated for whole-word embeddings",
+    "relevant": ["model2vec/distill/inference.py"],
+    "secondary": ["model2vec/distill/distillation.py"]
+  },
+  {"query": "StaticModel", "relevant": ["model2vec/model.py"], "secondary": []},
+  {"query": "distill", "relevant": ["model2vec/distill/distillation.py"], "secondary": []},
+  {"query": "PoolingMode", "relevant": ["model2vec/distill/inference.py"], "secondary": []},
+  {"query": "quantize", "relevant": ["model2vec/quantization.py"], "secondary": []},
+  {"query": "Tokenizer", "relevant": ["model2vec/tokenizer/tokenizer.py"], "secondary": []}
+]
diff --git a/benchmarks/annotations/monolog.json b/benchmarks/annotations/monolog.json
new file mode 100644
index 0000000..5b53451
--- /dev/null
+++ b/benchmarks/annotations/monolog.json
@@ -0,0 +1,52 @@
+[
+  {
+    "query": "logger handler stack and processors",
+    "relevant": ["src/Monolog/Logger.php"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "stream log handler writes to files and streams",
+    "relevant": ["src/Monolog/Handler/StreamHandler.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "line formatter output formatting",
+    "relevant": ["src/Monolog/Formatter/LineFormatter.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "json log formatting",
+    "relevant": ["src/Monolog/Formatter/JsonFormatter.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Logger",
+    "relevant": ["src/Monolog/Logger.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "StreamHandler",
+    "relevant": ["src/Monolog/Handler/StreamHandler.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "JsonFormatter",
+    "relevant": ["src/Monolog/Formatter/JsonFormatter.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "how log handlers are registered and invoked by Logger",
+    "relevant": ["src/Monolog/Logger.php"],
+    "secondary": ["src/Monolog/Handler/AbstractHandler.php"],
+    "category": "architecture",
+    "seed": {"path": "src/Monolog/Logger.php", "line": 207},
+    "related": ["src/Monolog/Handler/AbstractHandler.php", "src/Monolog/Handler/HandlerInterface.php"]
+  }
+]
diff --git a/benchmarks/annotations/pydantic.json b/benchmarks/annotations/pydantic.json
new file mode 100644
index 0000000..1484adc
--- /dev/null
+++ b/benchmarks/annotations/pydantic.json
@@ -0,0 +1,82 @@
+[
+  {
+    "query": "how is BaseModel defined and instantiated",
+    "relevant": ["pydantic/main.py"],
+    "secondary": []
+  },
+  {
+    "query": "how are model fields declared and constrained",
+    "relevant": ["pydantic/fields.py"],
+    "secondary": ["pydantic/types.py"]
+  },
+  {
+    "query": "JSON schema generation from models",
+    "relevant": ["pydantic/json_schema.py"],
+    "secondary": []
+  },
+  {
+    "query": "custom field and model validators",
+    "relevant": ["pydantic/functional_validators.py"],
+    "secondary": ["pydantic/class_validators.py"]
+  },
+  {
+    "query": "how to serialize models to JSON",
+    "relevant": ["pydantic/functional_serializers.py"],
+    "secondary": ["pydantic/main.py"]
+  },
+  {
+    "query": "network types like URLs and email addresses",
+    "relevant": ["pydantic/networks.py"],
+    "secondary": []
+  },
+  {
+    "query": "model configuration and settings",
+    "relevant": ["pydantic/config.py"],
+    "secondary": []
+  },
+  {
+    "query": "validating data without a model using TypeAdapter",
+    "relevant": ["pydantic/type_adapter.py"],
+    "secondary": []
+  },
+  {
+    "query": "alias handling for field names",
+    "relevant": ["pydantic/aliases.py"],
+    "secondary": ["pydantic/alias_generators.py"]
+  },
+  {
+    "query": "root model for wrapping a single value",
+    "relevant": ["pydantic/root_model.py"],
+    "secondary": []
+  },
+  {
+    "query": "how discriminated unions select the right model variant",
+    "relevant": ["pydantic/types.py"],
+    "secondary": ["pydantic/main.py"]
+  },
+  {
+    "query": "how computed fields are defined on a model",
+    "relevant": ["pydantic/fields.py"],
+    "secondary": ["pydantic/functional_serializers.py"]
+  },
+  {
+    "query": "what runs after model initialisation in model_post_init",
+    "relevant": ["pydantic/main.py"],
+    "secondary": []
+  },
+  {
+    "query": "how model inheritance and field overriding works",
+    "relevant": ["pydantic/main.py"],
+    "secondary": ["pydantic/fields.py"]
+  },
+  {
+    "query": "how to validate a function's arguments with pydantic",
+    "relevant": ["pydantic/validate_call_decorator.py"],
+    "secondary": ["pydantic/decorator.py"]
+  },
+  {"query": "BaseModel", "relevant": ["pydantic/main.py"], "secondary": []},
+  {"query": "field_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []},
+  {"query": "model_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []},
+  {"query": "ConfigDict", "relevant": ["pydantic/config.py"], "secondary": []},
+  {"query": "Field", "relevant": ["pydantic/fields.py"], "secondary": []}
+]
diff --git a/benchmarks/annotations/rack.json b/benchmarks/annotations/rack.json
new file mode 100644
index 0000000..037b6b1
--- /dev/null
+++ b/benchmarks/annotations/rack.json
@@ -0,0 +1,44 @@
+[
+  {
+    "query": "HTTP request wrapper and forwarded headers",
+    "relevant": ["lib/rack/request.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "HTTP response construction",
+    "relevant": ["lib/rack/response.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "middleware builder DSL",
+    "relevant": ["lib/rack/builder.rb"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "URL path mapping across mounted apps",
+    "relevant": ["lib/rack/urlmap.rb"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "Request",
+    "relevant": ["lib/rack/request.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Rack::Response",
+    "relevant": [{"path": "lib/rack/response.rb", "start_line": 23, "end_line": 62}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Rack::Builder",
+    "relevant": [{"path": "lib/rack/builder.rb", "start_line": 36, "end_line": 80}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/rails.json b/benchmarks/annotations/rails.json
new file mode 100644
index 0000000..9b5d376
--- /dev/null
+++ b/benchmarks/annotations/rails.json
@@ -0,0 +1,49 @@
+[
+  {
+    "query": "application boot process and initialization",
+    "relevant": ["railties/lib/rails/application.rb"],
+    "secondary": ["railties/lib/rails/configuration.rb"],
+    "category": "architecture"
+  },
+  {
+    "query": "engine configuration and load paths",
+    "relevant": ["railties/lib/rails/engine/configuration.rb"],
+    "secondary": ["railties/lib/rails/application.rb"],
+    "category": "architecture"
+  },
+  {
+    "query": "rack integration for rails applications",
+    "relevant": ["railties/lib/rails/rack.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "path management for rails apps",
+    "relevant": ["railties/lib/rails/paths.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Application",
+    "relevant": ["railties/lib/rails/application.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Rails::Engine",
+    "relevant": [{"path": "railties/lib/rails/engine.rb", "start_line": 348, "end_line": 387}],
+    "secondary": ["railties/lib/rails/engine/configuration.rb"],
+    "category": "symbol",
+    "seed": {"path": "railties/lib/rails/engine/configuration.rb", "line": 73},
+    "related": [
+      {"path": "railties/lib/rails/engine/updater.rb", "start_line": 1, "end_line": 21},
+      {"path": "railties/lib/rails/railtie/configuration.rb", "start_line": 1, "end_line": 54}
+    ]
+  },
+  {
+    "query": "Rails::Paths",
+    "relevant": ["railties/lib/rails/paths.rb"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/redux.json b/benchmarks/annotations/redux.json
new file mode 100644
index 0000000..b551227
--- /dev/null
+++ b/benchmarks/annotations/redux.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "store creation and dispatch lifecycle",
+    "relevant": ["src/createStore.ts"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "middleware pipeline composition",
+    "relevant": ["src/applyMiddleware.ts"],
+    "secondary": ["src/compose.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "combining reducers and validating reducer shape",
+    "relevant": ["src/combineReducers.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "function composition utility",
+    "relevant": ["src/compose.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "createStore",
+    "relevant": ["src/createStore.ts"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/requests.json b/benchmarks/annotations/requests.json
new file mode 100644
index 0000000..067c6c2
--- /dev/null
+++ b/benchmarks/annotations/requests.json
@@ -0,0 +1,109 @@
+[
+  {
+    "query": "how HTTP sessions manage state and connections",
+    "relevant": ["src/requests/sessions.py"],
+    "secondary": []
+  },
+  {
+    "query": "authentication mechanisms and credential handling",
+    "relevant": ["src/requests/auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "transport adapters and connection pooling",
+    "relevant": ["src/requests/adapters.py"],
+    "secondary": []
+  },
+  {
+    "query": "cookie storage and persistence",
+    "relevant": ["src/requests/cookies.py"],
+    "secondary": []
+  },
+  {
+    "query": "how redirects are followed",
+    "relevant": ["src/requests/sessions.py"],
+    "secondary": ["src/requests/models.py"]
+  },
+  {
+    "query": "error and exception types",
+    "relevant": ["src/requests/exceptions.py"],
+    "secondary": []
+  },
+  {
+    "query": "request and response model internals",
+    "relevant": ["src/requests/models.py"],
+    "secondary": []
+  },
+  {
+    "query": "SSL certificate verification",
+    "relevant": ["src/requests/adapters.py"],
+    "secondary": ["src/requests/certs.py"]
+  },
+  {
+    "query": "event hooks system",
+    "relevant": ["src/requests/hooks.py"],
+    "secondary": []
+  },
+  {
+    "query": "utility functions for encoding and headers",
+    "relevant": ["src/requests/utils.py"],
+    "secondary": []
+  },
+  {
+    "query": "how a PreparedRequest is built from user-supplied arguments",
+    "relevant": ["src/requests/models.py"],
+    "secondary": ["src/requests/sessions.py"]
+  },
+  {
+    "query": "how digest authentication implements the challenge-response handshake",
+    "relevant": ["src/requests/auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "how response encoding is detected from headers and content",
+    "relevant": ["src/requests/utils.py"],
+    "secondary": ["src/requests/models.py"]
+  },
+  {
+    "query": "how proxy settings are read from environment variables",
+    "relevant": ["src/requests/utils.py"],
+    "secondary": ["src/requests/sessions.py"]
+  },
+  {
+    "query": "how connection keep-alive and pooling limits are configured",
+    "relevant": ["src/requests/adapters.py"],
+    "secondary": []
+  },
+  {
+    "query": "Session",
+    "relevant": [{"path": "src/requests/sessions.py", "start_line": 356, "end_line": 394}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "HTTPDigestAuth",
+    "relevant": [{"path": "src/requests/auth.py", "start_line": 107, "end_line": 136}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "PreparedRequest",
+    "relevant": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 364}],
+    "secondary": [],
+    "category": "symbol",
+    "seed": {"path": "src/requests/sessions.py", "line": 485},
+    "related": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 353}]
+  },
+  {
+    "query": "HTTPAdapter",
+    "relevant": [{"path": "src/requests/adapters.py", "start_line": 144, "end_line": 192}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Response",
+    "relevant": [{"path": "src/requests/models.py", "start_line": 642, "end_line": 691}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/sinatra.json b/benchmarks/annotations/sinatra.json
new file mode 100644
index 0000000..ddec420
--- /dev/null
+++ b/benchmarks/annotations/sinatra.json
@@ -0,0 +1,100 @@
+[
+  {
+    "query": "core Sinatra DSL and routing behavior",
+    "relevant": ["lib/sinatra/base.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "application startup and command line entrypoint",
+    "relevant": ["lib/sinatra/main.rb"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "exception rendering and stack traces",
+    "relevant": ["lib/sinatra/show_exceptions.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "request logging middleware",
+    "relevant": ["lib/sinatra/middleware/logger.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how route handlers are compiled and URL patterns matched",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1795, "end_line": 1817}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "before and after filter hooks for the request lifecycle",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1486, "end_line": 1505}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "response helper methods: halt, pass and redirect",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how templates are rendered with erb haml and other engines",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 742, "end_line": 948}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the Rack middleware stack is assembled before requests are handled",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1819, "end_line": 1888}],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "case-insensitive hash for request params",
+    "relevant": ["lib/sinatra/indifferent_hash.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Sinatra::Base",
+    "relevant": ["lib/sinatra/base.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Sinatra::Application",
+    "relevant": ["lib/sinatra/main.rb"],
+    "secondary": ["lib/sinatra/base.rb"],
+    "category": "symbol"
+  },
+  {
+    "query": "Sinatra::ShowExceptions",
+    "relevant": ["lib/sinatra/show_exceptions.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Sinatra::Helpers",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "IndifferentHash",
+    "relevant": ["lib/sinatra/indifferent_hash.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "how Sinatra::Application inherits and configures Base for standalone use",
+    "relevant": ["lib/sinatra/main.rb"],
+    "secondary": ["lib/sinatra/base.rb"],
+    "category": "architecture",
+    "seed": {"path": "lib/sinatra/main.rb", "line": 30},
+    "related": ["lib/sinatra/base.rb"]
+  }
+]
diff --git a/benchmarks/annotations/starlette.json b/benchmarks/annotations/starlette.json
new file mode 100644
index 0000000..f588020
--- /dev/null
+++ b/benchmarks/annotations/starlette.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how are URL routes registered and matched",
+    "relevant": ["starlette/routing.py"],
+    "secondary": ["starlette/convertors.py"]
+  },
+  {
+    "query": "how does authentication middleware work",
+    "relevant": ["starlette/middleware/authentication.py"],
+    "secondary": ["starlette/authentication.py"]
+  },
+  {
+    "query": "websocket connection and message handling",
+    "relevant": ["starlette/websockets.py"],
+    "secondary": []
+  },
+  {
+    "query": "how are static files served",
+    "relevant": ["starlette/staticfiles.py"],
+    "secondary": []
+  },
+  {
+    "query": "parsing form data and file uploads",
+    "relevant": ["starlette/formparsers.py"],
+    "secondary": ["starlette/datastructures.py"]
+  },
+  {
+    "query": "how does the test client simulate requests",
+    "relevant": ["starlette/testclient.py"],
+    "secondary": []
+  },
+  {
+    "query": "background task scheduling and execution",
+    "relevant": ["starlette/background.py"],
+    "secondary": []
+  },
+  {
+    "query": "application lifespan startup and shutdown events",
+    "relevant": ["starlette/routing.py"],
+    "secondary": ["starlette/applications.py"]
+  },
+  {
+    "query": "streaming response implementation",
+    "relevant": ["starlette/responses.py"],
+    "secondary": []
+  },
+  {
+    "query": "how base middleware wraps request handling",
+    "relevant": ["starlette/middleware/base.py"],
+    "secondary": []
+  },
+  {
+    "query": "how request state persists arbitrary data across middleware",
+    "relevant": ["starlette/requests.py"],
+    "secondary": []
+  },
+  {
+    "query": "how path convertor types work for route parameters",
+    "relevant": ["starlette/convertors.py"],
+    "secondary": ["starlette/routing.py"]
+  },
+  {
+    "query": "how session data is signed and stored in cookies",
+    "relevant": ["starlette/middleware/sessions.py"],
+    "secondary": []
+  },
+  {
+    "query": "how CORS preflight requests are handled",
+    "relevant": ["starlette/middleware/cors.py"],
+    "secondary": []
+  },
+  {
+    "query": "how errors in ASGI apps are caught and turned into responses",
+    "relevant": ["starlette/middleware/errors.py"],
+    "secondary": ["starlette/middleware/exceptions.py"]
+  },
+  {"query": "BaseHTTPMiddleware", "relevant": ["starlette/middleware/base.py"], "secondary": []},
+  {"query": "Request", "relevant": ["starlette/requests.py"], "secondary": []},
+  {"query": "WebSocket", "relevant": ["starlette/websockets.py"], "secondary": []},
+  {"query": "BackgroundTask", "relevant": ["starlette/background.py"], "secondary": []},
+  {"query": "Router", "relevant": ["starlette/routing.py"], "secondary": []},
+  {
+    "query": "how the Starlette application delegates routing and lifespan to Router",
+    "relevant": ["starlette/applications.py"],
+    "secondary": ["starlette/routing.py"],
+    "category": "architecture",
+    "seed": {"path": "starlette/applications.py", "line": 19},
+    "related": ["starlette/routing.py"]
+  }
+]
diff --git a/benchmarks/annotations/tokio.json b/benchmarks/annotations/tokio.json
new file mode 100644
index 0000000..a902e16
--- /dev/null
+++ b/benchmarks/annotations/tokio.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how spawned tasks are scheduled onto threads",
+    "relevant": ["tokio/src/runtime/scheduler/multi_thread/worker.rs"],
+    "secondary": ["tokio/src/task/spawn.rs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the async mutex prevents concurrent access",
+    "relevant": ["tokio/src/sync/mutex.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how a broadcast channel delivers messages to multiple receivers",
+    "relevant": ["tokio/src/sync/broadcast.rs"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "how the timer wheel tracks sleep deadlines",
+    "relevant": ["tokio/src/runtime/time/wheel/level.rs"],
+    "secondary": ["tokio/src/time/sleep.rs"],
+    "category": "architecture"
+  },
+  {
+    "query": "running non-async blocking code inside the async runtime",
+    "relevant": ["tokio/src/task/blocking.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how tasks that are not Send can run on a single thread",
+    "relevant": ["tokio/src/task/local.rs"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "waiting for any of several futures to complete",
+    "relevant": ["tokio/src/macros/select.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the runtime builder configures thread pool size and flavour",
+    "relevant": ["tokio/src/runtime/builder.rs"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "rate-limiting concurrent operations with a semaphore",
+    "relevant": ["tokio/src/sync/semaphore.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "watching a value and being notified when it changes",
+    "relevant": ["tokio/src/sync/watch.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "collecting results from a dynamic set of spawned tasks",
+    "relevant": ["tokio/src/task/join_set.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "integrating a non-async file descriptor with the tokio reactor",
+    "relevant": ["tokio/src/io/async_fd.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "JoinSet",
+    "relevant": ["tokio/src/task/join_set.rs"],
+    "secondary": []
+  },
+  {
+    "query": "Semaphore",
+    "relevant": ["tokio/src/sync/semaphore.rs"],
+    "secondary": []
+  },
+  {
+    "query": "MissedTickBehavior",
+    "relevant": ["tokio/src/time/interval.rs"],
+    "secondary": []
+  },
+  {
+    "query": "LocalSet",
+    "relevant": ["tokio/src/task/local.rs"],
+    "secondary": []
+  },
+  {
+    "query": "Notify",
+    "relevant": ["tokio/src/sync/notify.rs"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/trpc.json b/benchmarks/annotations/trpc.json
new file mode 100644
index 0000000..f3f8d25
--- /dev/null
+++ b/benchmarks/annotations/trpc.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how a tRPC router is created and procedures are registered",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how middleware chains context transformations between procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "how input validation and parsing works for procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/parser.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"],
+    "category": "semantic"
+  },
+  {
+    "query": "how HTTP requests are resolved to tRPC procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/http/resolveResponse.ts"],
+    "secondary": ["packages/server/src/http.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how error formatting and serialization works",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/error/formatter.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"],
+    "category": "semantic"
+  },
+  {
+    "query": "how server-sent events and subscriptions are streamed to the client",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/stream/sse.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/stream/jsonl.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the observable pattern is used for subscriptions",
+    "relevant": ["packages/server/src/observable/observable.ts"],
+    "secondary": ["packages/server/src/observable/operators.ts"],
+    "category": "semantic"
+  },
+  {
+    "query": "how type inference extracts input and output types from procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/clientish/inference.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "adapting tRPC to run as a Node.js HTTP server handler",
+    "relevant": ["packages/server/src/adapters/node-http/nodeHTTPRequestHandler.ts"],
+    "secondary": ["packages/server/src/adapters/node-http/incomingMessageToRequest.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "WebSocket adapter for real-time subscriptions",
+    "relevant": ["packages/server/src/adapters/ws.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how tRPC is initialized with root config and context factory",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/initTRPC.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/rootConfig.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how data transformer is applied to serialize and deserialize procedure payloads",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/transformer.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "TRPCError",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"],
+    "secondary": []
+  },
+  {
+    "query": "AnyRouter",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"],
+    "secondary": []
+  },
+  {
+    "query": "MiddlewareBuilder",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"],
+    "secondary": []
+  },
+  {
+    "query": "inferProcedureInput",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/procedure.ts"],
+    "secondary": []
+  },
+  {
+    "query": "fetchRequestHandler",
+    "relevant": ["packages/server/src/adapters/fetch/fetchRequestHandler.ts"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/data.py b/benchmarks/data.py
new file mode 100644
index 0000000..dac954a
--- /dev/null
+++ b/benchmarks/data.py
@@ -0,0 +1,150 @@
+import json
+from dataclasses import dataclass
+from pathlib import Path
+
+BENCH_ROOT = Path.home() / ".cache" / "semble-bench"
+BENCHMARKS_DIR = Path(__file__).parent
+ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations"
+REPOS_PATH = BENCHMARKS_DIR / "repos.json"
+
+
+@dataclass(frozen=True)
+class Target:
+    path: str
+    start_line: int | None = None
+    end_line: int | None = None
+
+    @property
+    def has_span(self) -> bool:
+        """Return True if both start_line and end_line are set."""
+        return self.start_line is not None and self.end_line is not None
+
+
+@dataclass(frozen=True)
+class RepoSpec:
+    name: str
+    language: str
+    url: str
+    revision: str
+    benchmark_root: str | None = None
+
+    @property
+    def checkout_dir(self) -> Path:
+        """Return the local checkout directory for this repo."""
+        return BENCH_ROOT / self.name
+
+    @property
+    def benchmark_dir(self) -> Path:
+        """Return the root directory to index for benchmarking."""
+        return self.checkout_dir if self.benchmark_root is None else self.checkout_dir / self.benchmark_root
+
+
+@dataclass(frozen=True)
+class Task:
+    repo: str
+    language: str
+    query: str
+    relevant: tuple[Target, ...]
+    secondary: tuple[Target, ...]
+    category: str
+
+    @property
+    def all_relevant(self) -> tuple[Target, ...]:
+        """Return primary and secondary relevant targets combined."""
+        return self.relevant + self.secondary
+
+
+def infer_category(query: str) -> str:
+    """Infer a task category from the query text."""
+    if " " not in query.strip():
+        return "symbol"
+    lowered = query.lower()
+    if lowered.startswith("how ") or lowered.startswith("how does") or lowered.startswith("how are"):
+        return "architecture"
+    return "semantic"
+
+
+def _coerce_int(value: object) -> int:
+    """Coerce a string or int value to int, raising TypeError otherwise."""
+    if not isinstance(value, int | str):
+        raise TypeError(f"expected int-compatible value, got {type(value).__name__}")
+    return int(value)
+
+
+def _parse_target(raw: str | dict[str, object]) -> Target:
+    """Parse a target from a string path or a mapping with optional line span."""
+    if isinstance(raw, str):
+        return Target(path=raw)
+    if not isinstance(raw, dict):
+        raise TypeError(f"expected mapping, got {type(raw).__name__}")
+    start_line = raw.get("start_line")
+    end_line = raw.get("end_line")
+    return Target(
+        path=str(raw["path"]),
+        start_line=_coerce_int(start_line) if start_line is not None else None,
+        end_line=_coerce_int(end_line) if end_line is not None else None,
+    )
+
+
+def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]:
+    """Load all repo specs from the JSON file at the given path."""
+    raw = json.loads(path.read_text(encoding="utf-8"))
+    return {item["name"]: RepoSpec(**item) for item in raw}
+
+
+def available_repo_specs() -> dict[str, RepoSpec]:
+    """Return only the repo specs that have a local checkout and annotation file."""
+    return {
+        name: spec
+        for name, spec in load_repo_specs().items()
+        if spec.checkout_dir.exists() and (ANNOTATIONS_DIR / f"{name}.json").exists()
+    }
+
+
+def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]:
+    """Load all benchmark tasks from annotation files, filtered to available repo specs."""
+    specs = load_repo_specs() if repo_specs is None else repo_specs
+    tasks: list[Task] = []
+    for annotation_file in sorted(ANNOTATIONS_DIR.glob("*.json")):
+        if annotation_file.stem not in specs:
+            continue
+        raw = json.loads(annotation_file.read_text(encoding="utf-8"))
+        default_repo = annotation_file.stem
+        for item in raw:
+            repo = item.get("repo", default_repo)
+            if repo not in specs:
+                continue
+            spec = specs[repo]
+            category = item.get("category")
+            tasks.append(
+                Task(
+                    repo=repo,
+                    language=spec.language,
+                    query=item["query"],
+                    relevant=tuple(_parse_target(t) for t in item.get("relevant", [])),
+                    secondary=tuple(_parse_target(t) for t in item.get("secondary", [])),
+                    category=category if isinstance(category, str) else infer_category(item["query"]),
+                )
+            )
+    return tasks
+
+
+def apply_task_filters(
+    tasks: list[Task],
+    repos: list[str] | None = None,
+    languages: list[str] | None = None,
+) -> list[Task]:
+    """Filter tasks to the given repos and/or languages; None means no filter."""
+    filtered = [task for task in tasks if not repos or task.repo in repos]
+    return [task for task in filtered if not languages or task.language in languages]
+
+
+def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool:
+    """Return True if the chunk at file_path:start_line-end_line covers the target."""
+    norm_file = file_path.replace("\\", "/")
+    norm_target = target.path.replace("\\", "/")
+    if not (norm_file == norm_target or norm_file.endswith(f"/{norm_target}")):
+        return False
+    if not target.has_span:
+        return True
+    return not (end_line < target.start_line or start_line > target.end_line)  # type: ignore[operator]
diff --git a/benchmarks/repos.json b/benchmarks/repos.json
new file mode 100644
index 0000000..56e36e2
--- /dev/null
+++ b/benchmarks/repos.json
@@ -0,0 +1,202 @@
+[
+  {
+    "name": "aiohttp",
+    "language": "python",
+    "url": "https://github.com/aio-libs/aiohttp.git",
+    "revision": "fc67cfdfd7d4bbf53ef76515fae69726626fe256",
+    "benchmark_root": "aiohttp"
+  },
+  {
+    "name": "fastapi",
+    "language": "python",
+    "url": "https://github.com/fastapi/fastapi.git",
+    "revision": "c3c9dd6b1a08bcda766e7b43eafe72c4c5e9e193",
+    "benchmark_root": "fastapi"
+  },
+  {
+    "name": "flask",
+    "language": "python",
+    "url": "https://github.com/pallets/flask.git",
+    "revision": "258d68b6ff5e2244386540f48b48bab90d6ab827",
+    "benchmark_root": "src/flask"
+  },
+  {
+    "name": "httpx",
+    "language": "python",
+    "url": "https://github.com/encode/httpx.git",
+    "revision": "b5addb64f0161ff6bfe94c124ef76f6a1fba5254",
+    "benchmark_root": "httpx"
+  },
+  {
+    "name": "model2vec",
+    "language": "python",
+    "url": "https://github.com/MinishLab/model2vec.git",
+    "revision": "b3012ee04e41c634383a5d735cb3c7c51e806a18",
+    "benchmark_root": "model2vec"
+  },
+  {
+    "name": "pydantic",
+    "language": "python",
+    "url": "https://github.com/pydantic/pydantic.git",
+    "revision": "82c15f0ba8a9f8d8d6ba595df73ad20e2ee2eccf",
+    "benchmark_root": "pydantic"
+  },
+  {
+    "name": "requests",
+    "language": "python",
+    "url": "https://github.com/psf/requests.git",
+    "revision": "ef439eb779c1eba7cbdeeeb302b11e1e061b4b7d",
+    "benchmark_root": "src/requests"
+  },
+  {
+    "name": "starlette",
+    "language": "python",
+    "url": "https://github.com/encode/starlette.git",
+    "revision": "1894d0d89badf43bc8bfe03ed221a8b2e100b2ab",
+    "benchmark_root": "starlette"
+  },
+  {
+    "name": "axios",
+    "language": "javascript",
+    "url": "https://github.com/axios/axios.git",
+    "revision": "c7a76ddbf277db864ee6cfb4ef17b8a08ffbe3f5",
+    "benchmark_root": "lib"
+  },
+  {
+    "name": "express",
+    "language": "javascript",
+    "url": "https://github.com/expressjs/express.git",
+    "revision": "8e022edc9185f540a3fcecaf5e56b850d919cdac",
+    "benchmark_root": "lib"
+  },
+  {
+    "name": "redux",
+    "language": "javascript",
+    "url": "https://github.com/reduxjs/redux.git",
+    "revision": "aaa04ae8402ba2caba55a9c75bfa8d3df6c78f8c",
+    "benchmark_root": "src"
+  },
+  {
+    "name": "gin",
+    "language": "go",
+    "url": "https://github.com/gin-gonic/gin.git",
+    "revision": "d3ffc9985281dcf4d3bef604cce4e662b1a327a6"
+  },
+  {
+    "name": "cobra",
+    "language": "go",
+    "url": "https://github.com/spf13/cobra.git",
+    "revision": "61968e893eee2f27696c2fbc8e34fa5c4afaf7c4"
+  },
+  {
+    "name": "chi",
+    "language": "go",
+    "url": "https://github.com/go-chi/chi.git",
+    "revision": "a54874f0e2f12647a19e82ee70dfa8185014100c"
+  },
+  {
+    "name": "gson",
+    "language": "java",
+    "url": "https://github.com/google/gson.git",
+    "revision": "f4d371d29c04066dbe7fdb31f642831f9c7f40cd",
+    "benchmark_root": "gson"
+  },
+  {
+    "name": "commons-lang",
+    "language": "java",
+    "url": "https://github.com/apache/commons-lang.git",
+    "revision": "0ba92dc402312a38252a3398931ffbfbb4a88f7d",
+    "benchmark_root": "src/main/java/org/apache/commons/lang3"
+  },
+  {
+    "name": "jackson-databind",
+    "language": "java",
+    "url": "https://github.com/FasterXML/jackson-databind.git",
+    "revision": "e30139539416f69f1d7ae31c7e1d6da5b25bf362",
+    "benchmark_root": "src/main/java/tools/jackson/databind"
+  },
+  {
+    "name": "guzzle",
+    "language": "php",
+    "url": "https://github.com/guzzle/guzzle.git",
+    "revision": "fb92d95f80a9da51bf8f2a5b26d8e8ea3b6d99ed",
+    "benchmark_root": "src"
+  },
+  {
+    "name": "monolog",
+    "language": "php",
+    "url": "https://github.com/Seldaek/monolog.git",
+    "revision": "68b974809baff3f071893de61447212e9e688ee7",
+    "benchmark_root": "src/Monolog"
+  },
+  {
+    "name": "laravel-framework",
+    "language": "php",
+    "url": "https://github.com/laravel/framework.git",
+    "revision": "0dcc8d2ba7f41bc8376a08e9ccd5d7b83e6a6d90",
+    "benchmark_root": "src/Illuminate"
+  },
+  {
+    "name": "sinatra",
+    "language": "ruby",
+    "url": "https://github.com/sinatra/sinatra.git",
+    "revision": "f891dd2b6f4911e356600efe6c3b82af97d262c6",
+    "benchmark_root": "lib"
+  },
+  {
+    "name": "rack",
+    "language": "ruby",
+    "url": "https://github.com/rack/rack.git",
+    "revision": "ca8a404704ed043797c4f9d482c97d722c0dc719",
+    "benchmark_root": "lib/rack"
+  },
+  {
+    "name": "rails",
+    "language": "ruby",
+    "url": "https://github.com/rails/rails.git",
+    "revision": "75f9e28379ac7418b82fa950cfa81f6147275308",
+    "benchmark_root": "railties/lib/rails"
+  },
+  {
+    "name": "tokio",
+    "language": "rust",
+    "url": "https://github.com/tokio-rs/tokio.git",
+    "revision": "5db10f538b683fe88d699dfd11be31d193db011c",
+    "benchmark_root": "tokio/src"
+  },
+  {
+    "name": "trpc",
+    "language": "typescript",
+    "url": "https://github.com/trpc/trpc.git",
+    "revision": "c188dab0822caf3615199e4ac95147bc7560d26f",
+    "benchmark_root": "packages/server/src"
+  },
+  {
+    "name": "messagepack-csharp",
+    "language": "csharp",
+    "url": "https://github.com/neuecc/MessagePack-CSharp.git",
+    "revision": "84db9f79e3ecc5f4e8b7c7f77cd15d7745f5f2a7",
+    "benchmark_root": "src/MessagePack"
+  },
+  {
+    "name": "ktor",
+    "language": "kotlin",
+    "url": "https://github.com/ktorio/ktor.git",
+    "revision": "5913745a96101e8c78e47565e52d2baa8414441f",
+    "benchmark_root": "ktor-client/ktor-client-core/common/src"
+  },
+  {
+    "name": "cats",
+    "language": "scala",
+    "url": "https://github.com/typelevel/cats.git",
+    "revision": "2102251a2f24a6ee14e087fc5da7768d267f2d6e",
+    "benchmark_root": "core/src/main/scala"
+  },
+  {
+    "name": "alamofire",
+    "language": "swift",
+    "url": "https://github.com/Alamofire/Alamofire.git",
+    "revision": "e938f8c66708e7352fc7e3512647fa54255b267a",
+    "benchmark_root": "Source"
+  }
+]
diff --git a/benchmarks/results/.gitkeep b/benchmarks/results/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/benchmarks/results/40a76927ded9.json b/benchmarks/results/40a76927ded9.json
new file mode 100644
index 0000000..d20be46
--- /dev/null
+++ b/benchmarks/results/40a76927ded9.json
@@ -0,0 +1,346 @@
+{
+  "sha": "40a76927ded9482f62ebd5f63930ff59605fe9f8",
+  "model": "Pringled/potion-code-16M",
+  "summary": {
+    "ndcg10": 0.8668,
+    "p50_ms": 0.698,
+    "index_ms": 340.0
+  },
+  "by_language": {
+    "csharp": {
+      "repos": 1,
+      "ndcg10": 0.8263,
+      "p50_ms": 0.966,
+      "index_ms": 459.2
+    },
+    "go": {
+      "repos": 3,
+      "ndcg10": 0.9515,
+      "p50_ms": 0.537,
+      "index_ms": 164.0
+    },
+    "java": {
+      "repos": 3,
+      "ndcg10": 0.842,
+      "p50_ms": 1.149,
+      "index_ms": 965.2
+    },
+    "javascript": {
+      "repos": 3,
+      "ndcg10": 0.9282,
+      "p50_ms": 0.432,
+      "index_ms": 36.0
+    },
+    "kotlin": {
+      "repos": 1,
+      "ndcg10": 0.7631,
+      "p50_ms": 0.856,
+      "index_ms": 160.8
+    },
+    "php": {
+      "repos": 3,
+      "ndcg10": 0.906,
+      "p50_ms": 0.949,
+      "index_ms": 738.0
+    },
+    "python": {
+      "repos": 8,
+      "ndcg10": 0.8233,
+      "p50_ms": 0.498,
+      "index_ms": 163.3
+    },
+    "ruby": {
+      "repos": 3,
+      "ndcg10": 0.8911,
+      "p50_ms": 0.623,
+      "index_ms": 97.4
+    },
+    "rust": {
+      "repos": 1,
+      "ndcg10": 0.8878,
+      "p50_ms": 0.996,
+      "index_ms": 930.6
+    },
+    "scala": {
+      "repos": 1,
+      "ndcg10": 0.8415,
+      "p50_ms": 0.942,
+      "index_ms": 648.2
+    },
+    "swift": {
+      "repos": 1,
+      "ndcg10": 0.9316,
+      "p50_ms": 0.543,
+      "index_ms": 229.5
+    },
+    "typescript": {
+      "repos": 1,
+      "ndcg10": 0.7431,
+      "p50_ms": 0.882,
+      "index_ms": 121.8
+    }
+  },
+  "repos": [
+    {
+      "repo": "aiohttp",
+      "language": "python",
+      "chunks": 756,
+      "ndcg5": 0.7132626857513019,
+      "ndcg10": 0.7821229638714016,
+      "p50_ms": 0.5919579998590052,
+      "index_ms": 267.3160420017666
+    },
+    {
+      "repo": "alamofire",
+      "language": "swift",
+      "chunks": 649,
+      "ndcg5": 0.900172569211564,
+      "ndcg10": 0.9315768229529695,
+      "p50_ms": 0.5432909965747967,
+      "index_ms": 229.49254100240069
+    },
+    {
+      "repo": "axios",
+      "language": "javascript",
+      "chunks": 166,
+      "ndcg5": 0.9671522420975631,
+      "ndcg10": 0.9671522420975631,
+      "p50_ms": 0.5915000001550652,
+      "index_ms": 58.03862500033574
+    },
+    {
+      "repo": "cats",
+      "language": "scala",
+      "chunks": 1254,
+      "ndcg5": 0.8157722039023972,
+      "ndcg10": 0.8414671964692401,
+      "p50_ms": 0.9416660032002255,
+      "index_ms": 648.2289169944124
+    },
+    {
+      "repo": "chi",
+      "language": "go",
+      "chunks": 262,
+      "ndcg5": 0.9455120441745608,
+      "ndcg10": 0.9455120441745608,
+      "p50_ms": 0.6279579974943772,
+      "index_ms": 103.15404200082412
+    },
+    {
+      "repo": "cobra",
+      "language": "go",
+      "chunks": 394,
+      "ndcg5": 0.970068981106951,
+      "ndcg10": 0.970068981106951,
+      "p50_ms": 0.41579100070521235,
+      "index_ms": 149.65433299948927
+    },
+    {
+      "repo": "commons-lang",
+      "language": "java",
+      "chunks": 3152,
+      "ndcg5": 0.7688578654609097,
+      "ndcg10": 0.8052591049306037,
+      "p50_ms": 0.9200830027111806,
+      "index_ms": 1038.753667002311
+    },
+    {
+      "repo": "express",
+      "language": "javascript",
+      "chunks": 52,
+      "ndcg5": 0.9593872208972474,
+      "ndcg10": 0.9593872208972474,
+      "p50_ms": 0.25366600311826915,
+      "index_ms": 22.826792002888396
+    },
+    {
+      "repo": "fastapi",
+      "language": "python",
+      "chunks": 597,
+      "ndcg5": 0.7314364449312006,
+      "ndcg10": 0.7693095302894921,
+      "p50_ms": 0.47783299669390544,
+      "index_ms": 189.871916998527
+    },
+    {
+      "repo": "flask",
+      "language": "python",
+      "chunks": 291,
+      "ndcg5": 0.8570900833760776,
+      "ndcg10": 0.8767012186349079,
+      "p50_ms": 0.4285830000299029,
+      "index_ms": 92.66295799898217
+    },
+    {
+      "repo": "gin",
+      "language": "go",
+      "chunks": 576,
+      "ndcg5": 0.8807555442147937,
+      "ndcg10": 0.939064318485603,
+      "p50_ms": 0.5680000031134114,
+      "index_ms": 239.0974170048139
+    },
+    {
+      "repo": "gson",
+      "language": "java",
+      "chunks": 1460,
+      "ndcg5": 0.9261859507142916,
+      "ndcg10": 0.9261859507142916,
+      "p50_ms": 1.0848340025404468,
+      "index_ms": 482.5546249994659
+    },
+    {
+      "repo": "guzzle",
+      "language": "php",
+      "chunks": 206,
+      "ndcg5": 0.8326908338735671,
+      "ndcg10": 0.844506786325837,
+      "p50_ms": 0.5839169971295632,
+      "index_ms": 72.53133400081424
+    },
+    {
+      "repo": "httpx",
+      "language": "python",
+      "chunks": 248,
+      "ndcg5": 0.8519694264932337,
+      "ndcg10": 0.871159099521697,
+      "p50_ms": 0.4406670050229877,
+      "index_ms": 84.04612500453368
+    },
+    {
+      "repo": "jackson-databind",
+      "language": "java",
+      "chunks": 4570,
+      "ndcg5": 0.7667968319202225,
+      "ndcg10": 0.7944291752941182,
+      "p50_ms": 1.442957996914629,
+      "index_ms": 1374.4051670000772
+    },
+    {
+      "repo": "ktor",
+      "language": "kotlin",
+      "chunks": 425,
+      "ndcg5": 0.726275662513606,
+      "ndcg10": 0.7630927329648237,
+      "p50_ms": 0.8556669999961741,
+      "index_ms": 160.8068749992526
+    },
+    {
+      "repo": "laravel-framework",
+      "language": "php",
+      "chunks": 6197,
+      "ndcg5": 0.967888315659275,
+      "ndcg10": 0.967888315659275,
+      "p50_ms": 1.3275840028654784,
+      "index_ms": 1987.1202089998405
+    },
+    {
+      "repo": "messagepack-csharp",
+      "language": "csharp",
+      "chunks": 1125,
+      "ndcg5": 0.8164536328001585,
+      "ndcg10": 0.8262866007393468,
+      "p50_ms": 0.9664999961387366,
+      "index_ms": 459.23387500079116
+    },
+    {
+      "repo": "model2vec",
+      "language": "python",
+      "chunks": 107,
+      "ndcg5": 0.6593701861221591,
+      "ndcg10": 0.695271294655741,
+      "p50_ms": 0.46462499449262396,
+      "index_ms": 44.985666005231906
+    },
+    {
+      "repo": "monolog",
+      "language": "php",
+      "chunks": 417,
+      "ndcg5": 0.9055096182921145,
+      "ndcg10": 0.9055096182921145,
+      "p50_ms": 0.9362909986521117,
+      "index_ms": 154.34570900106337
+    },
+    {
+      "repo": "pydantic",
+      "language": "python",
+      "chunks": 1518,
+      "ndcg5": 0.6795591269045096,
+      "ndcg10": 0.7070408064407742,
+      "p50_ms": 0.6636250036535785,
+      "index_ms": 490.62920799769927
+    },
+    {
+      "repo": "rack",
+      "language": "ruby",
+      "chunks": 249,
+      "ndcg5": 1.0,
+      "ndcg10": 1.0,
+      "p50_ms": 0.5600000004051253,
+      "index_ms": 96.56141699815635
+    },
+    {
+      "repo": "rails",
+      "language": "ruby",
+      "chunks": 465,
+      "ndcg5": 0.7466134836472739,
+      "ndcg10": 0.8346443747935481,
+      "p50_ms": 0.9815000012167729,
+      "index_ms": 168.55954200582346
+    },
+    {
+      "repo": "redux",
+      "language": "javascript",
+      "chunks": 53,
+      "ndcg5": 0.8226294385530917,
+      "ndcg10": 0.8580772959099011,
+      "p50_ms": 0.450166997325141,
+      "index_ms": 27.171499998075888
+    },
+    {
+      "repo": "requests",
+      "language": "python",
+      "chunks": 169,
+      "ndcg5": 0.9550842629661954,
+      "ndcg10": 0.9550842629661954,
+      "p50_ms": 0.40475000423612073,
+      "index_ms": 56.275709001056384
+    },
+    {
+      "repo": "sinatra",
+      "language": "ruby",
+      "chunks": 68,
+      "ndcg5": 0.8387325493217617,
+      "ndcg10": 0.8387325493217617,
+      "p50_ms": 0.3260829980717972,
+      "index_ms": 27.20166600192897
+    },
+    {
+      "repo": "starlette",
+      "language": "python",
+      "chunks": 213,
+      "ndcg5": 0.9058681185722455,
+      "ndcg10": 0.9294136613951622,
+      "p50_ms": 0.5122919974382967,
+      "index_ms": 80.70125000085682
+    },
+    {
+      "repo": "tokio",
+      "language": "rust",
+      "chunks": 2730,
+      "ndcg5": 0.8750003941122573,
+      "ndcg10": 0.8878478903956787,
+      "p50_ms": 0.9958329974324442,
+      "index_ms": 930.5787499979488
+    },
+    {
+      "repo": "trpc",
+      "language": "typescript",
+      "chunks": 362,
+      "ndcg5": 0.6949834508995433,
+      "ndcg10": 0.7431267778412411,
+      "p50_ms": 0.8817499983706512,
+      "index_ms": 121.80820800131187
+    }
+  ]
+}
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
new file mode 100644
index 0000000..c5beaa9
--- /dev/null
+++ b/benchmarks/run_benchmark.py
@@ -0,0 +1,249 @@
+import argparse
+import json
+import math
+import subprocess
+import sys
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+from model2vec import StaticModel
+
+from benchmarks.data import (
+    RepoSpec,
+    Target,
+    Task,
+    apply_task_filters,
+    available_repo_specs,
+    load_tasks,
+    target_matches_location,
+)
+from semble import SembleIndex
+from semble.types import SearchResult
+
+_MODEL_NAME = "Pringled/potion-code-16M"
+_LATENCY_RUNS = 5
+_DIRECT_TOP_K = 10
+
+
+def _target_rank(results: list[SearchResult], target: Target) -> int | None:
+    """Return the 1-based rank of the first result covering target, or None."""
+    for index, result in enumerate(results, 1):
+        chunk = result.chunk
+        if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target):
+            return index
+    return None
+
+
+@dataclass(frozen=True)
+class RepoResult:
+    repo: str
+    language: str
+    chunks: int
+    ndcg5: float
+    ndcg10: float
+    p50_ms: float
+    index_ms: float
+
+
+def _dcg(relevances: list[int]) -> float:
+    """Compute Discounted Cumulative Gain for a ranked relevance list."""
+    return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances))
+
+
+def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float:
+    """Compute NDCG@k given the ranks of relevant results and the total relevant count."""
+    if n_relevant == 0:
+        return 0.0
+    relevances = [0] * k
+    for rank in relevant_ranks:
+        if 1 <= rank <= k:
+            relevances[rank - 1] = 1
+    ideal = _dcg([1] * min(k, n_relevant))
+    return _dcg(relevances) / ideal if ideal > 0 else 0.0
+
+
+def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -> tuple[float, float, float]:
+    """Return mean NDCG@5, NDCG@10, and median query latency (ms) across all tasks."""
+    ndcg5_sum = 0.0
+    ndcg10_sum = 0.0
+    latencies: list[float] = []
+
+    for task in tasks:
+        query_latencies: list[float] = []
+        for _ in range(_LATENCY_RUNS):
+            started = time.perf_counter()
+            results = index.search(task.query, top_k=_DIRECT_TOP_K)
+            query_latencies.append((time.perf_counter() - started) * 1000)
+        latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2])
+
+        relevant_ranks = [rank for target in task.all_relevant if (rank := _target_rank(results, target)) is not None]
+        n_relevant = sum(
+            1
+            for target in task.all_relevant
+            if any(target_matches_location(c.file_path, c.start_line, c.end_line, target) for c in index.chunks)
+        )
+        q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5)
+        q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10)
+        ndcg5_sum += q_ndcg5
+        ndcg10_sum += q_ndcg10
+
+        if verbose:
+            cat = task.category or "?"
+            targets_str = ", ".join(
+                t.path if not t.start_line else f"{t.path}:{t.start_line}-{t.end_line}" for t in task.all_relevant
+            )
+            top_files = [r.chunk.file_path for r in results[:5]]
+            print(
+                f"  [{cat:<12}] ndcg@10={q_ndcg10:.3f}  ranks={relevant_ranks}  n_rel={n_relevant}  q={task.query!r}",
+                file=sys.stderr,
+            )
+            print(f"               targets: {targets_str}", file=sys.stderr)
+            print(f"               top-5:   {top_files}", file=sys.stderr)
+
+    total = len(tasks)
+    latencies.sort()
+    return ndcg5_sum / total, ndcg10_sum / total, latencies[len(latencies) // 2]
+
+
+def _print_summary(results: list[RepoResult]) -> None:
+    """Print per-language and overall benchmark summary to stderr."""
+    languages = sorted({result.language for result in results})
+    by_language = {lang: [r for r in results if r.language == lang] for lang in languages}
+    columns = ["Avg", *[lang.title() for lang in languages]]
+
+    avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results)
+    avg_p50 = sum(r.p50_ms for r in results) / len(results)
+    avg_index = sum(r.index_ms for r in results) / len(results)
+
+    print(file=sys.stderr)
+    print("By language", file=sys.stderr)
+    for language, grouped in by_language.items():
+        print(
+            f"  {language}: repos={len(grouped)}"
+            + f"  ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}"
+            + f"  ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}"
+            + f"  p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms"
+            + f"  index={sum(r.index_ms for r in grouped) / len(grouped):.0f}ms",
+            file=sys.stderr,
+        )
+
+    print(file=sys.stderr)
+    print(f"{'=' * 104}", file=sys.stderr)
+    print("Hybrid benchmark by language", file=sys.stderr)
+    print(f"{'=' * 104}", file=sys.stderr)
+    print(f"\n  {'Metric':<28}  " + "  ".join(f"{column:>9}" for column in columns), file=sys.stderr)
+    print(f"  {'-' * 28}  " + "  ".join(f"{'-' * 9:>9}" for _ in columns), file=sys.stderr)
+
+    ndcg_row = [f"{avg_ndcg10:>9.3f}"]
+    p50_row = [f"{avg_p50:>8.2f}ms"]
+    index_row = [f"{avg_index:>7.0f}ms"]
+    for language, language_results in by_language.items():
+        ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}")
+        p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms")
+        index_row.append(f"{sum(r.index_ms for r in language_results) / len(language_results):>7.0f}ms")
+
+    print(f"  {'NDCG@10':<28}  " + "  ".join(ndcg_row), file=sys.stderr)
+    print(f"  {'q-p50':<28}  " + "  ".join(p50_row), file=sys.stderr)
+    print(f"  {'index':<28}  " + "  ".join(index_row), file=sys.stderr)
+
+
+def _bench_quality(
+    repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec], *, verbose: bool = False
+) -> list[RepoResult]:
+    """Run quality benchmarks (NDCG@5, NDCG@10, latency) for each repo."""
+    print(
+        f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}",
+        file=sys.stderr,
+    )
+    print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 8} {'-' * 8} {'-' * 8}", file=sys.stderr)
+    results: list[RepoResult] = []
+    for repo, tasks in sorted(repo_tasks.items()):
+        spec = specs[repo]
+        started = time.perf_counter()
+        index = SembleIndex.from_path(spec.benchmark_dir, model=model)
+        index_ms = (time.perf_counter() - started) * 1000
+        ndcg5, ndcg10, p50_ms = _evaluate(index, tasks, verbose=verbose)
+        result = RepoResult(
+            repo=repo,
+            language=spec.language,
+            chunks=len(index.chunks),
+            ndcg5=ndcg5,
+            ndcg10=ndcg10,
+            p50_ms=p50_ms,
+            index_ms=index_ms,
+        )
+        results.append(result)
+        print(
+            f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms",
+            file=sys.stderr,
+        )
+    return results
+
+
+def _save_results(results: list[RepoResult]) -> None:
+    """Write results to benchmarks/results/<sha>.json."""
+    try:
+        sha = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
+    except subprocess.CalledProcessError:
+        sha = "unknown"
+
+    languages = sorted({r.language for r in results})
+    by_language = {lang: [r for r in results if r.language == lang] for lang in languages}
+
+    output = {
+        "sha": sha,
+        "model": _MODEL_NAME,
+        "summary": {
+            "ndcg10": round(sum(r.ndcg10 for r in results) / len(results), 4),
+            "p50_ms": round(sum(r.p50_ms for r in results) / len(results), 3),
+            "index_ms": round(sum(r.index_ms for r in results) / len(results), 1),
+        },
+        "by_language": {
+            lang: {
+                "repos": len(grouped),
+                "ndcg10": round(sum(r.ndcg10 for r in grouped) / len(grouped), 4),
+                "p50_ms": round(sum(r.p50_ms for r in grouped) / len(grouped), 3),
+                "index_ms": round(sum(r.index_ms for r in grouped) / len(grouped), 1),
+            }
+            for lang, grouped in by_language.items()
+        },
+        "repos": [asdict(r) for r in results],
+    }
+
+    results_dir = Path(__file__).parent / "results"
+    results_dir.mkdir(exist_ok=True)
+    out_path = results_dir / f"{sha[:12]}.json"
+    out_path.write_text(json.dumps(output, indent=2) + "\n", encoding="utf-8")
+    print(f"\nResults saved to {out_path}", file=sys.stderr)
+
+
+def main() -> None:
+    """Parse arguments and run the selected benchmark mode."""
+    parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
+    parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.")
+    parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.")
+    parser.add_argument("--verbose", action="store_true", help="Print per-query results.")
+    args = parser.parse_args()
+    repo_specs = available_repo_specs()
+    tasks = apply_task_filters(
+        load_tasks(repo_specs=repo_specs), repos=args.repo or None, languages=args.language or None
+    )
+    if not tasks:
+        raise SystemExit("No benchmark tasks matched the requested filters.")
+    print("Loading model...", file=sys.stderr)
+    started = time.perf_counter()
+    model = StaticModel.from_pretrained(_MODEL_NAME)
+    print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr)
+    print(file=sys.stderr)
+    repo_tasks: dict[str, list[Task]] = {}
+    for task in tasks:
+        repo_tasks.setdefault(task.repo, []).append(task)
+    results = _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose)
+    _print_summary(results)
+    if not args.repo and not args.language:
+        _save_results(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py
new file mode 100644
index 0000000..9cf1ebd
--- /dev/null
+++ b/benchmarks/sync_repos.py
@@ -0,0 +1,63 @@
+import argparse
+import subprocess
+import sys
+
+from benchmarks.data import BENCH_ROOT, load_repo_specs
+
+
+def _run(*args: str) -> None:
+    """Run a subprocess command, raising on non-zero exit."""
+    subprocess.run(args, check=True)
+
+
+def _sync_repo(name: str, url: str, revision: str) -> None:
+    """Clone the repo if absent, then fetch and checkout the pinned revision."""
+    repo_dir = BENCH_ROOT / name
+    if not repo_dir.exists():
+        print(f"cloning {name} -> {repo_dir}")
+        _run("git", "clone", url, str(repo_dir))
+    print(f"syncing {name} @ {revision[:12]}")
+    _run("git", "-C", str(repo_dir), "fetch", "--depth", "1", "origin", revision)
+    _run("git", "-C", str(repo_dir), "checkout", "--detach", revision)
+
+
+def _check_repo(name: str, revision: str) -> str | None:
+    """Return an error string if the local checkout is missing or at the wrong revision."""
+    repo_dir = BENCH_ROOT / name
+    if not (repo_dir / ".git").exists():
+        return f"{name}: missing checkout at {repo_dir}"
+    head = subprocess.check_output(("git", "-C", str(repo_dir), "rev-parse", "HEAD"), text=True).strip()
+    if head != revision:
+        return f"{name}: expected {revision}, found {head}"
+    return None
+
+
+def main() -> None:
+    """Parse arguments and sync or verify the pinned benchmark repositories."""
+    parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.")
+    parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.")
+    parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.")
+    args = parser.parse_args()
+    specs = load_repo_specs()
+    selected = {name: spec for name, spec in specs.items() if not args.repo or name in args.repo}
+    BENCH_ROOT.mkdir(parents=True, exist_ok=True)
+
+    if args.check:
+        problems = [
+            problem for name, spec in selected.items() if (problem := _check_repo(name, spec.revision)) is not None
+        ]
+        if problems:
+            for problem in problems:
+                print(problem, file=sys.stderr)
+            raise SystemExit(1)
+        print(f"Verified {len(selected)} pinned repo(s).")
+        return
+
+    for name, spec in selected.items():
+        _sync_repo(name, spec.url, spec.revision)
+
+    print(f"Synced {len(selected)} pinned repo(s).")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 0ce4da0..111905d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ target-version = "py310"
 [tool.ruff.lint.per-file-ignores]
 "tests/**" = ["ANN"]
 "src/semble/cli.py" = ["T20"]
-"local/benchmarks/*.py" = ["T20", "D"]
+"benchmarks/*.py" = ["T20"]
 
 [tool.ruff.lint]
 select = [