From f5f3d7312bcf9244498e7ba8c223dfac38c1eaf0 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Tue, 14 Apr 2026 19:38:04 +0200
Subject: [PATCH 01/15] Add benchmarks directory with cleaned-up bench_hybrid

---
 benchmarks/README.md                          |  21 ++
 benchmarks/__init__.py                        |   1 +
 benchmarks/annotations/aiohttp.json           |  90 +++++++
 benchmarks/annotations/alamofire.json         |  99 +++++++
 benchmarks/annotations/axios.json             |  46 ++++
 benchmarks/annotations/cats.json              |  99 +++++++
 benchmarks/annotations/chi.json               |  32 +++
 benchmarks/annotations/cobra.json             |  32 +++
 benchmarks/annotations/commons-lang.json      |  43 +++
 benchmarks/annotations/express.json           |  32 +++
 benchmarks/annotations/fastapi.json           | 112 ++++++++
 benchmarks/annotations/flask.json             |  90 +++++++
 benchmarks/annotations/gin.json               |  40 +++
 benchmarks/annotations/gson.json              |  32 +++
 benchmarks/annotations/guzzle.json            |  92 +++++++
 benchmarks/annotations/httpx.json             |  90 +++++++
 benchmarks/annotations/jackson-databind.json  |  49 ++++
 benchmarks/annotations/ktor.json              |  93 +++++++
 benchmarks/annotations/laravel-framework.json |  32 +++
 .../annotations/messagepack-csharp.json       |  98 +++++++
 benchmarks/annotations/model2vec.json         |  82 ++++++
 benchmarks/annotations/monolog.json           |  52 ++++
 benchmarks/annotations/pydantic.json          |  82 ++++++
 benchmarks/annotations/rack.json              |  44 ++++
 benchmarks/annotations/rails.json             |  49 ++++
 benchmarks/annotations/redux.json             |  32 +++
 benchmarks/annotations/requests.json          | 109 ++++++++
 benchmarks/annotations/sinatra.json           | 100 +++++++
 benchmarks/annotations/starlette.json         |  90 +++++++
 benchmarks/annotations/tokio.json             |  99 +++++++
 benchmarks/annotations/trpc.json              |  99 +++++++
 benchmarks/bench_hybrid.py                    | 248 ++++++++++++++++++
 benchmarks/common.py                          | 194 ++++++++++++++
 benchmarks/repos.json                         | 202 ++++++++++++++
 benchmarks/sync_repos.py                      |  69 +++++
 pyproject.toml                                |   2 +-
 36 files changed, 2775 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/annotations/aiohttp.json
 create mode 100644 benchmarks/annotations/alamofire.json
 create mode 100644 benchmarks/annotations/axios.json
 create mode 100644 benchmarks/annotations/cats.json
 create mode 100644 benchmarks/annotations/chi.json
 create mode 100644 benchmarks/annotations/cobra.json
 create mode 100644 benchmarks/annotations/commons-lang.json
 create mode 100644 benchmarks/annotations/express.json
 create mode 100644 benchmarks/annotations/fastapi.json
 create mode 100644 benchmarks/annotations/flask.json
 create mode 100644 benchmarks/annotations/gin.json
 create mode 100644 benchmarks/annotations/gson.json
 create mode 100644 benchmarks/annotations/guzzle.json
 create mode 100644 benchmarks/annotations/httpx.json
 create mode 100644 benchmarks/annotations/jackson-databind.json
 create mode 100644 benchmarks/annotations/ktor.json
 create mode 100644 benchmarks/annotations/laravel-framework.json
 create mode 100644 benchmarks/annotations/messagepack-csharp.json
 create mode 100644 benchmarks/annotations/model2vec.json
 create mode 100644 benchmarks/annotations/monolog.json
 create mode 100644 benchmarks/annotations/pydantic.json
 create mode 100644 benchmarks/annotations/rack.json
 create mode 100644 benchmarks/annotations/rails.json
 create mode 100644 benchmarks/annotations/redux.json
 create mode 100644 benchmarks/annotations/requests.json
 create mode 100644 benchmarks/annotations/sinatra.json
 create mode 100644 benchmarks/annotations/starlette.json
 create mode 100644 benchmarks/annotations/tokio.json
 create mode 100644 benchmarks/annotations/trpc.json
 create mode 100644 benchmarks/bench_hybrid.py
 create mode 100644 benchmarks/common.py
 create mode 100644 benchmarks/repos.json
 create mode 100644 benchmarks/sync_repos.py

diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..8d6d906
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,21 @@
+# Benchmarks
+
+Reproducible local benchmarks for `semble`.
+
+Pinned repositories live in `repos.json` and are checked out into `/tmp/bench`.
+
+## Setup
+
+```bash
+uv run python -m benchmarks.sync_repos
+uv run python -m benchmarks.sync_repos --check
+```
+
+## Run
+
+```bash
+uv run python -m benchmarks.bench_hybrid
+uv run python -m benchmarks.bench_hybrid --cache
+uv run python -m benchmarks.bench_hybrid --repo fastapi --repo axios
+uv run python -m benchmarks.bench_hybrid --language python
+```
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000..793aef7
--- /dev/null
+++ b/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Benchmark and eval tooling for semble."""
diff --git a/benchmarks/annotations/aiohttp.json b/benchmarks/annotations/aiohttp.json
new file mode 100644
index 0000000..ea00500
--- /dev/null
+++ b/benchmarks/annotations/aiohttp.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how the async HTTP client session works",
+    "relevant": ["aiohttp/client.py"],
+    "secondary": []
+  },
+  {
+    "query": "connection pooling and TCP connector",
+    "relevant": ["aiohttp/connector.py"],
+    "secondary": []
+  },
+  {
+    "query": "WebSocket client implementation",
+    "relevant": ["aiohttp/client_ws.py"],
+    "secondary": ["aiohttp/_websocket/reader.py"]
+  },
+  {
+    "query": "request and response object internals",
+    "relevant": ["aiohttp/client_reqrep.py"],
+    "secondary": []
+  },
+  {
+    "query": "URL routing and resource dispatching",
+    "relevant": ["aiohttp/web_urldispatcher.py"],
+    "secondary": []
+  },
+  {
+    "query": "server-side middleware execution",
+    "relevant": ["aiohttp/web_middlewares.py"],
+    "secondary": ["aiohttp/web_app.py"]
+  },
+  {
+    "query": "multipart and form data handling",
+    "relevant": ["aiohttp/multipart.py"],
+    "secondary": ["aiohttp/formdata.py"]
+  },
+  {
+    "query": "response streaming and payload",
+    "relevant": ["aiohttp/streams.py"],
+    "secondary": ["aiohttp/payload.py"]
+  },
+  {
+    "query": "HTTP exception types and error responses",
+    "relevant": ["aiohttp/web_exceptions.py"],
+    "secondary": ["aiohttp/client_exceptions.py"]
+  },
+  {
+    "query": "request tracing and observability hooks",
+    "relevant": ["aiohttp/tracing.py"],
+    "secondary": []
+  },
+  {
+    "query": "how chunked transfer encoding is parsed",
+    "relevant": ["aiohttp/http_parser.py"],
+    "secondary": []
+  },
+  {
+    "query": "how DNS resolution is handled asynchronously",
+    "relevant": ["aiohttp/resolver.py"],
+    "secondary": ["aiohttp/connector.py"]
+  },
+  {
+    "query": "how backpressure and flow control work in streaming",
+    "relevant": ["aiohttp/streams.py"],
+    "secondary": []
+  },
+  {
+    "query": "how connection draining and cleanup happen on close",
+    "relevant": ["aiohttp/connector.py"],
+    "secondary": ["aiohttp/client_proto.py"]
+  },
+  {
+    "query": "how the web application sets up and tears down on startup",
+    "relevant": ["aiohttp/web_app.py"],
+    "secondary": ["aiohttp/web_runner.py"]
+  },
+  {"query": "ClientSession", "relevant": ["aiohttp/client.py"], "secondary": []},
+  {"query": "TCPConnector", "relevant": ["aiohttp/connector.py"], "secondary": []},
+  {"query": "UrlDispatcher", "relevant": ["aiohttp/web_urldispatcher.py"], "secondary": []},
+  {"query": "ClientResponse", "relevant": ["aiohttp/client_reqrep.py"], "secondary": []},
+  {"query": "TraceConfig", "relevant": ["aiohttp/tracing.py"], "secondary": []},
+  {
+    "query": "how ClientSession acquires and releases connections from the connector",
+    "relevant": ["aiohttp/client.py"],
+    "secondary": ["aiohttp/connector.py"],
+    "category": "architecture",
+    "seed": {"path": "aiohttp/client.py", "line": 374},
+    "related": ["aiohttp/connector.py"]
+  }
+]
diff --git a/benchmarks/annotations/alamofire.json b/benchmarks/annotations/alamofire.json
new file mode 100644
index 0000000..df5ee2b
--- /dev/null
+++ b/benchmarks/annotations/alamofire.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how the Session manages the underlying URLSession and dispatches requests",
+    "relevant": ["Source/Core/Session.swift"],
+    "secondary": ["Source/Core/SessionDelegate.swift"],
+    "category": "architecture"
+  },
+  {
+    "query": "how request retrying is implemented with backoff and retry conditions",
+    "relevant": ["Source/Features/RetryPolicy.swift"],
+    "secondary": ["Source/Features/RequestInterceptor.swift"],
+    "category": "architecture"
+  },
+  {
+    "query": "how response validation checks status codes and content types",
+    "relevant": ["Source/Features/Validation.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how response serialization decodes JSON, Decodable, and strings",
+    "relevant": ["Source/Features/ResponseSerialization.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how multipart form data encodes fields and file attachments",
+    "relevant": ["Source/Features/MultipartFormData.swift"],
+    "secondary": ["Source/Features/MultipartUpload.swift"],
+    "category": "semantic"
+  },
+  {
+    "query": "how authentication interceptors handle credential challenges",
+    "relevant": ["Source/Features/AuthenticationInterceptor.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how server trust evaluation handles SSL certificate pinning",
+    "relevant": ["Source/Features/ServerTrustEvaluation.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how URL query parameters are encoded from Encodable values",
+    "relevant": ["Source/Features/URLEncodedFormEncoder.swift"],
+    "secondary": ["Source/Core/ParameterEncoder.swift"],
+    "category": "semantic"
+  },
+  {
+    "query": "how network reachability is monitored to detect connectivity changes",
+    "relevant": ["Source/Features/NetworkReachabilityManager.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how download requests save responses to disk",
+    "relevant": ["Source/Core/DownloadRequest.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how request and response events are logged via EventMonitor",
+    "relevant": ["Source/Features/EventMonitor.swift"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the Request class tracks lifecycle state transitions",
+    "relevant": ["Source/Core/Request.swift"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "Session",
+    "relevant": ["Source/Core/Session.swift"],
+    "secondary": []
+  },
+  {
+    "query": "AFError",
+    "relevant": ["Source/Core/AFError.swift"],
+    "secondary": []
+  },
+  {
+    "query": "RetryPolicy",
+    "relevant": ["Source/Features/RetryPolicy.swift"],
+    "secondary": []
+  },
+  {
+    "query": "ServerTrustEvaluating",
+    "relevant": ["Source/Features/ServerTrustEvaluation.swift"],
+    "secondary": []
+  },
+  {
+    "query": "HTTPHeaders",
+    "relevant": ["Source/Core/HTTPHeaders.swift"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/axios.json b/benchmarks/annotations/axios.json
new file mode 100644
index 0000000..3466c21
--- /dev/null
+++ b/benchmarks/annotations/axios.json
@@ -0,0 +1,46 @@
+[
+  {
+    "query": "how HTTP requests are dispatched through the configured adapter",
+    "relevant": ["lib/core/dispatchRequest.js"],
+    "secondary": ["lib/adapters/adapters.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "request and response interceptors",
+    "relevant": ["lib/core/InterceptorManager.js"],
+    "secondary": ["lib/core/Axios.js"],
+    "category": "semantic"
+  },
+  {
+    "query": "node HTTP adapter implementation",
+    "relevant": ["lib/adapters/http.js"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how config defaults are merged before a request is sent",
+    "relevant": ["lib/core/mergeConfig.js"],
+    "secondary": ["lib/core/Axios.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "Axios",
+    "relevant": [{"path": "lib/core/Axios.js", "start_line": 22, "end_line": 61}],
+    "secondary": [],
+    "category": "symbol",
+    "seed": {"path": "lib/core/Axios.js", "line": 46},
+    "related": [{"path": "lib/core/Axios.js", "start_line": 179, "end_line": 239}]
+  },
+  {
+    "query": "InterceptorManager",
+    "relevant": [{"path": "lib/core/InterceptorManager.js", "start_line": 5, "end_line": 33}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "mergeConfig",
+    "relevant": [{"path": "lib/core/mergeConfig.js", "start_line": 17, "end_line": 106}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/cats.json b/benchmarks/annotations/cats.json
new file mode 100644
index 0000000..4140db6
--- /dev/null
+++ b/benchmarks/annotations/cats.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how the Functor type class defines mapping over a context",
+    "relevant": ["core/src/main/scala/cats/Functor.scala"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "how Monad composes dependent effectful computations with flatMap",
+    "relevant": ["core/src/main/scala/cats/Monad.scala"],
+    "secondary": ["core/src/main/scala/cats/FlatMap.scala"],
+    "category": "architecture"
+  },
+  {
+    "query": "how Applicative combines independent effects",
+    "relevant": ["core/src/main/scala/cats/Applicative.scala"],
+    "secondary": ["core/src/main/scala/cats/Apply.scala"],
+    "category": "architecture"
+  },
+  {
+    "query": "how errors are handled and recovered in ApplicativeError and MonadError",
+    "relevant": ["core/src/main/scala/cats/ApplicativeError.scala"],
+    "secondary": ["core/src/main/scala/cats/MonadError.scala"],
+    "category": "semantic"
+  },
+  {
+    "query": "how Validated accumulates errors across independent computations",
+    "relevant": ["core/src/main/scala/cats/data/Validated.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how EitherT combines the Either monad with another effect",
+    "relevant": ["core/src/main/scala/cats/data/EitherT.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Kleisli composes functions that return monadic values",
+    "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Eval provides lazy and memoized evaluation",
+    "relevant": ["core/src/main/scala/cats/Eval.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Foldable traverses and reduces elements in a container",
+    "relevant": ["core/src/main/scala/cats/Foldable.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how Chain provides O(1) concatenation as an alternative to List",
+    "relevant": ["core/src/main/scala/cats/data/Chain.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how natural transformations map between type constructors",
+    "relevant": ["core/src/main/scala/cats/arrow/FunctionK.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the Contravariant functor reverses the mapping direction",
+    "relevant": ["core/src/main/scala/cats/Contravariant.scala"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Monad",
+    "relevant": ["core/src/main/scala/cats/Monad.scala"],
+    "secondary": []
+  },
+  {
+    "query": "Functor",
+    "relevant": ["core/src/main/scala/cats/Functor.scala"],
+    "secondary": []
+  },
+  {
+    "query": "EitherT",
+    "relevant": ["core/src/main/scala/cats/data/EitherT.scala"],
+    "secondary": []
+  },
+  {
+    "query": "Validated",
+    "relevant": ["core/src/main/scala/cats/data/Validated.scala"],
+    "secondary": []
+  },
+  {
+    "query": "Kleisli",
+    "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/chi.json b/benchmarks/annotations/chi.json
new file mode 100644
index 0000000..38223b5
--- /dev/null
+++ b/benchmarks/annotations/chi.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "HTTP router and middleware composition",
+    "relevant": ["mux.go"],
+    "secondary": ["chain.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "radix tree path matching",
+    "relevant": ["tree.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "request routing context storage",
+    "relevant": ["context.go"],
+    "secondary": ["mux.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "request logging middleware",
+    "relevant": ["middleware/logger.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Mux",
+    "relevant": ["mux.go"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/cobra.json b/benchmarks/annotations/cobra.json
new file mode 100644
index 0000000..c8d97a2
--- /dev/null
+++ b/benchmarks/annotations/cobra.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "core command execution and command tree",
+    "relevant": ["command.go"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "shell completion request handling",
+    "relevant": ["completions.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "bash completion generation",
+    "relevant": ["bash_completions.go"],
+    "secondary": ["completions.go"],
+    "category": "semantic"
+  },
+  {
+    "query": "positional argument validators",
+    "relevant": ["args.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Command",
+    "relevant": ["command.go"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/commons-lang.json b/benchmarks/annotations/commons-lang.json
new file mode 100644
index 0000000..9d51c0c
--- /dev/null
+++ b/benchmarks/annotations/commons-lang.json
@@ -0,0 +1,43 @@
+[
+  {
+    "query": "null-safe string operations and text helpers",
+    "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "argument validation helpers and exception rules",
+    "relevant": ["src/main/java/org/apache/commons/lang3/Validate.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "reflection-based equals implementation",
+    "relevant": ["src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java"],
+    "secondary": ["src/main/java/org/apache/commons/lang3/ClassUtils.java"],
+    "category": "architecture"
+  },
+  {
+    "query": "range object with inclusive bounds and comparator support",
+    "relevant": ["src/main/java/org/apache/commons/lang3/Range.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "StringUtils",
+    "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "reflection equals builder internals",
+    "relevant": [{"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "start_line": 89, "end_line": 99}],
+    "secondary": [],
+    "category": "architecture",
+    "seed": {"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "line": 89},
+    "related": [
+      {"path": "src/main/java/org/apache/commons/lang3/builder/HashCodeBuilder.java", "start_line": 105, "end_line": 141},
+      {"path": "src/main/java/org/apache/commons/lang3/builder/IDKey.java", "start_line": 27, "end_line": 74}
+    ]
+  }
+]
diff --git a/benchmarks/annotations/express.json b/benchmarks/annotations/express.json
new file mode 100644
index 0000000..dd82ccf
--- /dev/null
+++ b/benchmarks/annotations/express.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "application initialization and default configuration",
+    "relevant": ["lib/application.js"],
+    "secondary": ["lib/express.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "request API helpers and header access",
+    "relevant": ["lib/request.js"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "response sending and content negotiation",
+    "relevant": ["lib/response.js"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "template view lookup and rendering",
+    "relevant": ["lib/view.js"],
+    "secondary": ["lib/application.js"],
+    "category": "architecture"
+  },
+  {
+    "query": "response",
+    "relevant": ["lib/response.js"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/fastapi.json b/benchmarks/annotations/fastapi.json
new file mode 100644
index 0000000..23a990b
--- /dev/null
+++ b/benchmarks/annotations/fastapi.json
@@ -0,0 +1,112 @@
+[
+  {
+    "query": "how does dependency injection work",
+    "relevant": ["fastapi/dependencies/utils.py"],
+    "secondary": ["fastapi/dependencies/models.py", "fastapi/params.py"]
+  },
+  {
+    "query": "request validation and error handling",
+    "relevant": ["fastapi/exceptions.py"],
+    "secondary": ["fastapi/exception_handlers.py"]
+  },
+  {
+    "query": "how are routes registered",
+    "relevant": ["fastapi/routing.py"],
+    "secondary": ["fastapi/applications.py"]
+  },
+  {
+    "query": "websocket endpoint implementation",
+    "relevant": ["fastapi/websockets.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "OpenAPI schema generation",
+    "relevant": ["fastapi/openapi/utils.py"],
+    "secondary": ["fastapi/openapi/models.py"]
+  },
+  {
+    "query": "middleware stack and CORS",
+    "relevant": ["fastapi/middleware/cors.py"],
+    "secondary": ["fastapi/applications.py"]
+  },
+  {
+    "query": "file upload handling",
+    "relevant": ["fastapi/datastructures.py"],
+    "secondary": []
+  },
+  {
+    "query": "response model serialization",
+    "relevant": ["fastapi/encoders.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "background tasks",
+    "relevant": ["fastapi/background.py"],
+    "secondary": []
+  },
+  {
+    "query": "security and OAuth2 authentication",
+    "relevant": ["fastapi/security/oauth2.py"],
+    "secondary": ["fastapi/security/http.py"]
+  },
+  {
+    "query": "how is response validation and serialization applied before sending",
+    "relevant": ["fastapi/routing.py"],
+    "secondary": ["fastapi/encoders.py"]
+  },
+  {
+    "query": "how are nested and sub-dependencies resolved",
+    "relevant": ["fastapi/dependencies/utils.py"],
+    "secondary": []
+  },
+  {
+    "query": "how does FastAPI run sync route functions without blocking",
+    "relevant": ["fastapi/concurrency.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "how are path parameters extracted and type-converted",
+    "relevant": ["fastapi/routing.py"],
+    "secondary": ["fastapi/dependencies/utils.py"]
+  },
+  {
+    "query": "how does exception propagation work through dependency injection",
+    "relevant": ["fastapi/dependencies/utils.py"],
+    "secondary": ["fastapi/routing.py"]
+  },
+  {
+    "query": "Depends",
+    "relevant": [{"path": "fastapi/params.py", "start_line": 746, "end_line": 749}],
+    "secondary": ["fastapi/param_functions.py"],
+    "category": "symbol",
+    "seed": {"path": "fastapi/params.py", "line": 746},
+    "related": [
+      {"path": "fastapi/params.py", "start_line": 434, "end_line": 468},
+      {"path": "fastapi/params.py", "start_line": 627, "end_line": 664}
+    ]
+  },
+  {
+    "query": "HTTPException",
+    "relevant": [{"path": "fastapi/exceptions.py", "start_line": 17, "end_line": 66}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "APIRouter",
+    "relevant": [{"path": "fastapi/routing.py", "start_line": 1005, "end_line": 1030}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "BackgroundTasks",
+    "relevant": [{"path": "fastapi/background.py", "start_line": 11, "end_line": 39}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "OAuth2PasswordBearer",
+    "relevant": [{"path": "fastapi/security/oauth2.py", "start_line": 433, "end_line": 471}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/flask.json b/benchmarks/annotations/flask.json
new file mode 100644
index 0000000..de9fd41
--- /dev/null
+++ b/benchmarks/annotations/flask.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how are routes registered",
+    "relevant": ["src/flask/sansio/scaffold.py"],
+    "secondary": ["src/flask/sansio/app.py"]
+  },
+  {
+    "query": "request context handling",
+    "relevant": ["src/flask/ctx.py"],
+    "secondary": []
+  },
+  {
+    "query": "session management",
+    "relevant": ["src/flask/sessions.py"],
+    "secondary": []
+  },
+  {
+    "query": "blueprint registration",
+    "relevant": ["src/flask/sansio/blueprints.py"],
+    "secondary": ["src/flask/blueprints.py"]
+  },
+  {
+    "query": "configuration loading",
+    "relevant": ["src/flask/config.py"],
+    "secondary": []
+  },
+  {
+    "query": "template rendering",
+    "relevant": ["src/flask/templating.py"],
+    "secondary": []
+  },
+  {
+    "query": "error handlers",
+    "relevant": ["src/flask/sansio/scaffold.py"],
+    "secondary": ["src/flask/app.py"]
+  },
+  {
+    "query": "CLI commands",
+    "relevant": ["src/flask/cli.py"],
+    "secondary": []
+  },
+  {
+    "query": "testing client",
+    "relevant": ["src/flask/testing.py"],
+    "secondary": []
+  },
+  {
+    "query": "JSON response helpers",
+    "relevant": ["src/flask/json/provider.py"],
+    "secondary": ["src/flask/helpers.py"]
+  },
+  {
+    "query": "how does the application context push and pop around requests",
+    "relevant": ["src/flask/ctx.py"],
+    "secondary": ["src/flask/app.py"]
+  },
+  {
+    "query": "how does Flask select which error handler to invoke for an exception",
+    "relevant": ["src/flask/app.py"],
+    "secondary": ["src/flask/sansio/app.py"]
+  },
+  {
+    "query": "how is g used to store data scoped to the current request",
+    "relevant": ["src/flask/ctx.py"],
+    "secondary": ["src/flask/globals.py"]
+  },
+  {
+    "query": "how are request lifecycle signals emitted",
+    "relevant": ["src/flask/signals.py"],
+    "secondary": ["src/flask/app.py"]
+  },
+  {
+    "query": "how does Flask convert a view return value into a response object",
+    "relevant": ["src/flask/app.py"],
+    "secondary": ["src/flask/wrappers.py"]
+  },
+  {"query": "Blueprint", "relevant": ["src/flask/sansio/blueprints.py"], "secondary": ["src/flask/blueprints.py"]},
+  {"query": "render_template", "relevant": ["src/flask/templating.py"], "secondary": []},
+  {"query": "Flask", "relevant": ["src/flask/app.py"], "secondary": []},
+  {"query": "session", "relevant": ["src/flask/sessions.py"], "secondary": ["src/flask/globals.py"]},
+  {"query": "g", "relevant": ["src/flask/globals.py"], "secondary": ["src/flask/ctx.py"]},
+  {
+    "query": "how Blueprint inherits routing behaviour from its sansio base class",
+    "relevant": ["src/flask/sansio/blueprints.py"],
+    "secondary": ["src/flask/blueprints.py"],
+    "category": "architecture",
+    "seed": {"path": "src/flask/blueprints.py", "line": 18},
+    "related": ["src/flask/sansio/blueprints.py"]
+  }
+]
diff --git a/benchmarks/annotations/gin.json b/benchmarks/annotations/gin.json
new file mode 100644
index 0000000..2ba38bc
--- /dev/null
+++ b/benchmarks/annotations/gin.json
@@ -0,0 +1,40 @@
+[
+  {
+    "query": "how routes are grouped and registered",
+    "relevant": ["routergroup.go"],
+    "secondary": ["gin.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "radix tree path matching",
+    "relevant": ["tree.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "request context lifecycle and helpers",
+    "relevant": ["context.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "panic recovery middleware",
+    "relevant": ["recovery.go"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "JSON request binding and validation",
+    "relevant": ["binding/json.go"],
+    "secondary": ["binding/default_validator.go"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the Gin Engine embeds RouterGroup and uses Context per request",
+    "relevant": ["gin.go"],
+    "secondary": ["routergroup.go", "context.go"],
+    "category": "architecture",
+    "seed": {"path": "gin.go", "line": 92},
+    "related": ["context.go", "routergroup.go"]
+  }
+]
diff --git a/benchmarks/annotations/gson.json b/benchmarks/annotations/gson.json
new file mode 100644
index 0000000..46c2f5e
--- /dev/null
+++ b/benchmarks/annotations/gson.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "main Gson API for toJson and fromJson",
+    "relevant": ["gson/src/main/java/com/google/gson/Gson.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "GsonBuilder configuration and create",
+    "relevant": ["gson/src/main/java/com/google/gson/GsonBuilder.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "reflection-based field serialization and deserialization",
+    "relevant": ["gson/src/main/java/com/google/gson/internal/bind/ReflectiveTypeAdapterFactory.java"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "TypeAdapter",
+    "relevant": ["gson/src/main/java/com/google/gson/TypeAdapter.java"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "JsonParser",
+    "relevant": ["gson/src/main/java/com/google/gson/JsonParser.java"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/guzzle.json b/benchmarks/annotations/guzzle.json
new file mode 100644
index 0000000..2bbbf02
--- /dev/null
+++ b/benchmarks/annotations/guzzle.json
@@ -0,0 +1,92 @@
+[
+  {
+    "query": "HTTP client request sending and defaults",
+    "relevant": ["src/Client.php"],
+    "secondary": ["src/ClientTrait.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "middleware handler stack composition",
+    "relevant": ["src/HandlerStack.php"],
+    "secondary": ["src/Middleware.php"],
+    "category": "architecture"
+  },
+  {
+    "query": "retry middleware and exponential backoff",
+    "relevant": ["src/RetryMiddleware.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "cookie jar implementation",
+    "relevant": ["src/Cookie/CookieJar.php"],
+    "secondary": ["src/Cookie/SetCookie.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "redirect following and location header handling",
+    "relevant": ["src/RedirectMiddleware.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "curl-based HTTP handler and connection management",
+    "relevant": ["src/Handler/CurlFactory.php"],
+    "secondary": ["src/Handler/CurlHandler.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "mock handler for simulating HTTP responses in tests",
+    "relevant": ["src/Handler/MockHandler.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "concurrent request pool with limited parallelism",
+    "relevant": ["src/Pool.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "available request options and their configuration",
+    "relevant": ["src/RequestOptions.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "HTTP message formatting and request logging",
+    "relevant": ["src/MessageFormatter.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how request body and content-type are prepared before sending",
+    "relevant": ["src/PrepareBodyMiddleware.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "exception hierarchy for HTTP and transfer errors",
+    "relevant": ["src/Exception/RequestException.php"],
+    "secondary": ["src/Exception/GuzzleException.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "Client",
+    "relevant": ["src/Client.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "HandlerStack",
+    "relevant": ["src/HandlerStack.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "RedirectMiddleware",
+    "relevant": ["src/RedirectMiddleware.php"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/httpx.json b/benchmarks/annotations/httpx.json
new file mode 100644
index 0000000..08edd0c
--- /dev/null
+++ b/benchmarks/annotations/httpx.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how are HTTP requests sent",
+    "relevant": ["httpx/_client.py"],
+    "secondary": []
+  },
+  {
+    "query": "authentication and credentials",
+    "relevant": ["httpx/_auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "connection pooling and transport",
+    "relevant": ["httpx/_transports/default.py"],
+    "secondary": ["httpx/_transports/base.py"]
+  },
+  {
+    "query": "URL parsing and construction",
+    "relevant": ["httpx/_urlparse.py"],
+    "secondary": ["httpx/_urls.py"]
+  },
+  {
+    "query": "response decoding and content",
+    "relevant": ["httpx/_decoders.py"],
+    "secondary": ["httpx/_models.py"]
+  },
+  {
+    "query": "timeout configuration",
+    "relevant": ["httpx/_config.py"],
+    "secondary": []
+  },
+  {
+    "query": "cookie handling",
+    "relevant": ["httpx/_models.py"],
+    "secondary": ["httpx/_client.py"]
+  },
+  {
+    "query": "multipart file upload",
+    "relevant": ["httpx/_multipart.py"],
+    "secondary": ["httpx/_content.py"]
+  },
+  {
+    "query": "redirect following",
+    "relevant": ["httpx/_client.py"],
+    "secondary": []
+  },
+  {
+    "query": "error and exception types",
+    "relevant": ["httpx/_exceptions.py"],
+    "secondary": []
+  },
+  {
+    "query": "how does digest authentication handle the challenge-response flow",
+    "relevant": ["httpx/_auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "how are keep-alive connections managed and reused",
+    "relevant": ["httpx/_transports/default.py"],
+    "secondary": ["httpx/_config.py"]
+  },
+  {
+    "query": "how does streaming response body iteration work",
+    "relevant": ["httpx/_models.py"],
+    "secondary": ["httpx/_decoders.py"]
+  },
+  {
+    "query": "how are query parameters encoded into the URL",
+    "relevant": ["httpx/_urls.py"],
+    "secondary": ["httpx/_urlparse.py"]
+  },
+  {
+    "query": "how are retries and transport errors surfaced to the caller",
+    "relevant": ["httpx/_exceptions.py"],
+    "secondary": ["httpx/_transports/default.py"]
+  },
+  {"query": "DigestAuth", "relevant": ["httpx/_auth.py"], "secondary": []},
+  {"query": "AsyncClient", "relevant": ["httpx/_client.py"], "secondary": []},
+  {"query": "Timeout", "relevant": ["httpx/_config.py"], "secondary": []},
+  {"query": "HTTPStatusError", "relevant": ["httpx/_exceptions.py"], "secondary": []},
+  {"query": "URL", "relevant": ["httpx/_urls.py"], "secondary": []},
+  {
+    "query": "how the HTTP transport backend sends requests over the wire",
+    "relevant": ["httpx/_transports/default.py"],
+    "secondary": ["httpx/_transports/base.py"],
+    "category": "architecture",
+    "seed": {"path": "httpx/_client.py", "line": 731},
+    "related": ["httpx/_transports/default.py"]
+  }
+]
diff --git a/benchmarks/annotations/jackson-databind.json b/benchmarks/annotations/jackson-databind.json
new file mode 100644
index 0000000..079d2b6
--- /dev/null
+++ b/benchmarks/annotations/jackson-databind.json
@@ -0,0 +1,49 @@
+[
+  {
+    "query": "ObjectMapper entry point for reading and writing JSON",
+    "relevant": ["src/main/java/tools/jackson/databind/ObjectMapper.java"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "JSON-specific mapper builder",
+    "relevant": ["src/main/java/tools/jackson/databind/json/JsonMapper.java"],
+    "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"],
+    "category": "architecture"
+  },
+  {
+    "query": "mutable JSON object node with named field operations",
+    "relevant": ["src/main/java/tools/jackson/databind/node/ObjectNode.java"],
+    "secondary": ["src/main/java/tools/jackson/databind/node/JsonNodeFactory.java"],
+    "category": "semantic"
+  },
+  {
+    "query": "polymorphic type resolution",
+    "relevant": ["src/main/java/tools/jackson/databind/jsontype/impl/StdTypeResolverBuilder.java"],
+    "secondary": ["src/main/java/tools/jackson/databind/jsontype/impl/TypeDeserializerBase.java"],
+    "category": "architecture"
+  },
+  {
+    "query": "ObjectMapper",
+    "relevant": [{"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 93, "end_line": 132}],
+    "secondary": [],
+    "category": "symbol",
+    "seed": {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "line": 109},
+    "related": [
+      {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 356, "end_line": 387},
+      {"path": "src/main/java/tools/jackson/databind/cfg/MapperBuilder.java", "start_line": 338, "end_line": 344}
+    ]
+  },
+  {
+    "query": "JsonMapper",
+    "relevant": [{"path": "src/main/java/tools/jackson/databind/json/JsonMapper.java", "start_line": 16, "end_line": 44}],
+    "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"],
+    "category": "symbol"
+  },
+  {
+    "query": "ObjectNode",
+    "relevant": [{"path": "src/main/java/tools/jackson/databind/node/ObjectNode.java", "start_line": 21, "end_line": 60}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/ktor.json b/benchmarks/annotations/ktor.json
new file mode 100644
index 0000000..b2aa38b
--- /dev/null
+++ b/benchmarks/annotations/ktor.json
@@ -0,0 +1,93 @@
+[
+  {
+    "query": "how the HttpClient is configured with plugins and an engine",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClientConfig.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how request and response pipelines process interceptors",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/HttpRequestPipeline.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/statement/HttpResponsePipeline.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how HTTP caching stores and validates cached responses",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCache.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCacheEntry.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how HTTP status codes are checked and exceptions raised on failure",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpCallValidator.kt"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how request and response timeouts are enforced",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how HTTP redirects are followed automatically",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how cookies are stored and sent with requests",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/AcceptAllCookiesStorage.kt"],
+    "category": "semantic"
+  },
+  {
+    "query": "how multipart form data uploads are constructed",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/FormDataContent.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/formDsl.kt"],
+    "category": "semantic"
+  },
+  {
+    "query": "how WebSocket connections are established and messages exchanged",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/WebSockets.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/ClientSessions.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "how server-sent events are received and parsed from a streaming response",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/SSE.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/DefaultClientSSESession.kt"],
+    "category": "semantic"
+  },
+  {
+    "query": "how the engine abstraction separates the client API from the transport",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"],
+    "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngineBase.kt"],
+    "category": "architecture"
+  },
+  {
+    "query": "HttpClient",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpTimeout",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpRedirect",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpCookies",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"],
+    "secondary": []
+  },
+  {
+    "query": "HttpClientEngine",
+    "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/laravel-framework.json b/benchmarks/annotations/laravel-framework.json
new file mode 100644
index 0000000..376c82e
--- /dev/null
+++ b/benchmarks/annotations/laravel-framework.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "queue connection resolution and connectors",
+    "relevant": ["src/Illuminate/Queue/QueueManager.php"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "database queue implementation",
+    "relevant": ["src/Illuminate/Queue/DatabaseQueue.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "queue worker command execution",
+    "relevant": ["src/Illuminate/Queue/Console/WorkCommand.php"],
+    "secondary": ["src/Illuminate/Queue/Worker.php"],
+    "category": "architecture"
+  },
+  {
+    "query": "session store behavior",
+    "relevant": ["src/Illuminate/Session/Store.php"],
+    "secondary": ["src/Illuminate/Session/SessionManager.php"],
+    "category": "semantic"
+  },
+  {
+    "query": "QueueManager",
+    "relevant": ["src/Illuminate/Queue/QueueManager.php"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/messagepack-csharp.json b/benchmarks/annotations/messagepack-csharp.json
new file mode 100644
index 0000000..df6a60c
--- /dev/null
+++ b/benchmarks/annotations/messagepack-csharp.json
@@ -0,0 +1,98 @@
+[
+  {
+    "query": "how objects are serialized to MessagePack binary format",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": ["src/MessagePack/MessagePackWriter.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how binary data is deserialized back into typed C# objects",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": ["src/MessagePack/MessagePackReader.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how custom formatters are registered and resolved for types",
+    "relevant": ["src/MessagePack/IFormatterResolver.cs"],
+    "secondary": ["src/MessagePack/Resolvers/CompositeResolver.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the dynamic object resolver generates serialization code at runtime",
+    "relevant": ["src/MessagePack/Resolvers/DynamicObjectResolver.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how collections and arrays are serialized",
+    "relevant": ["src/MessagePack/Formatters/CollectionFormatter.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how union types with subtypes are handled in serialization",
+    "relevant": ["src/MessagePack/Resolvers/DynamicUnionResolver.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how serializer options control compression and resolver configuration",
+    "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how enums are serialized as integers or strings",
+    "relevant": ["src/MessagePack/Resolvers/DynamicEnumResolver.cs"],
+    "secondary": ["src/MessagePack/Formatters/GenericEnumFormatter`1.cs", "src/MessagePack/Formatters/EnumAsStringFormatter`1.cs"],
+    "category": "semantic"
+  },
+  {
+    "query": "reading MessagePack data from a stream incrementally",
+    "relevant": ["src/MessagePack/MessagePackStreamReader.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the sequence pool manages buffer reuse during serialization",
+    "relevant": ["src/MessagePack/SequencePool.cs"],
+    "secondary": ["src/MessagePack/BufferWriter.cs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how LZ4 compression is applied to MessagePack payloads",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "MessagePackSerializer",
+    "relevant": ["src/MessagePack/MessagePackSerializer.cs"],
+    "secondary": []
+  },
+  {
+    "query": "MessagePackReader",
+    "relevant": ["src/MessagePack/MessagePackReader.cs"],
+    "secondary": []
+  },
+  {
+    "query": "IMessagePackFormatter",
+    "relevant": ["src/MessagePack/Formatters/IMessagePackFormatter`1.cs"],
+    "secondary": []
+  },
+  {
+    "query": "CompositeResolver",
+    "relevant": ["src/MessagePack/Resolvers/CompositeResolver.cs"],
+    "secondary": []
+  },
+  {
+    "query": "MessagePackSerializerOptions",
+    "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"],
+    "secondary": []
+  },
+  {
+    "query": "StandardResolver",
+    "relevant": ["src/MessagePack/Resolvers/StandardResolver.cs"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/model2vec.json b/benchmarks/annotations/model2vec.json
new file mode 100644
index 0000000..ad58320
--- /dev/null
+++ b/benchmarks/annotations/model2vec.json
@@ -0,0 +1,82 @@
+[
+  {
+    "query": "how the StaticModel encodes text to embeddings",
+    "relevant": ["model2vec/model.py"],
+    "secondary": ["model2vec/inference/model.py"]
+  },
+  {
+    "query": "how a model is distilled from a sentence transformer",
+    "relevant": ["model2vec/distill/distillation.py"],
+    "secondary": []
+  },
+  {
+    "query": "tokenizer construction and vocabulary building",
+    "relevant": ["model2vec/tokenizer/tokenizer.py"],
+    "secondary": ["model2vec/distill/utils.py"]
+  },
+  {
+    "query": "saving and loading models from disk",
+    "relevant": ["model2vec/persistence/persistence.py"],
+    "secondary": ["model2vec/persistence/datamodels.py"]
+  },
+  {
+    "query": "quantization of model weights",
+    "relevant": ["model2vec/quantization.py"],
+    "secondary": ["model2vec/vocabulary_quantization.py"]
+  },
+  {
+    "query": "pushing and loading models from HuggingFace Hub",
+    "relevant": ["model2vec/persistence/hf.py"],
+    "secondary": []
+  },
+  {
+    "query": "distillation inference and embedding extraction",
+    "relevant": ["model2vec/distill/inference.py"],
+    "secondary": []
+  },
+  {
+    "query": "training a classifier on top of embeddings",
+    "relevant": ["model2vec/train/classifier.py"],
+    "secondary": ["model2vec/train/base.py"]
+  },
+  {
+    "query": "generating model cards for publication",
+    "relevant": ["model2vec/modelcards/modelcards.py"],
+    "secondary": []
+  },
+  {
+    "query": "utility functions used across the package",
+    "relevant": ["model2vec/utils.py"],
+    "secondary": ["model2vec/distill/utils.py"]
+  },
+  {
+    "query": "how mean pooling is applied over token embeddings during distillation",
+    "relevant": ["model2vec/distill/inference.py"],
+    "secondary": []
+  },
+  {
+    "query": "how PCA reduces embedding dimensionality",
+    "relevant": ["model2vec/distill/distillation.py"],
+    "secondary": ["model2vec/distill/inference.py"]
+  },
+  {
+    "query": "how out-of-vocabulary tokens are handled at inference time",
+    "relevant": ["model2vec/model.py"],
+    "secondary": ["model2vec/tokenizer/tokenizer.py"]
+  },
+  {
+    "query": "how vocabulary is pruned during distillation",
+    "relevant": ["model2vec/distill/utils.py"],
+    "secondary": ["model2vec/distill/distillation.py"]
+  },
+  {
+    "query": "how subword token weights are aggregated for whole-word embeddings",
+    "relevant": ["model2vec/distill/inference.py"],
+    "secondary": ["model2vec/distill/distillation.py"]
+  },
+  {"query": "StaticModel", "relevant": ["model2vec/model.py"], "secondary": []},
+  {"query": "distill", "relevant": ["model2vec/distill/distillation.py"], "secondary": []},
+  {"query": "PoolingMode", "relevant": ["model2vec/distill/inference.py"], "secondary": []},
+  {"query": "quantize", "relevant": ["model2vec/quantization.py"], "secondary": []},
+  {"query": "Tokenizer", "relevant": ["model2vec/tokenizer/tokenizer.py"], "secondary": []}
+]
diff --git a/benchmarks/annotations/monolog.json b/benchmarks/annotations/monolog.json
new file mode 100644
index 0000000..5b53451
--- /dev/null
+++ b/benchmarks/annotations/monolog.json
@@ -0,0 +1,52 @@
+[
+  {
+    "query": "logger handler stack and processors",
+    "relevant": ["src/Monolog/Logger.php"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "stream log handler writes to files and streams",
+    "relevant": ["src/Monolog/Handler/StreamHandler.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "line formatter output formatting",
+    "relevant": ["src/Monolog/Formatter/LineFormatter.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "json log formatting",
+    "relevant": ["src/Monolog/Formatter/JsonFormatter.php"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Logger",
+    "relevant": ["src/Monolog/Logger.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "StreamHandler",
+    "relevant": ["src/Monolog/Handler/StreamHandler.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "JsonFormatter",
+    "relevant": ["src/Monolog/Formatter/JsonFormatter.php"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "how log handlers are registered and invoked by Logger",
+    "relevant": ["src/Monolog/Logger.php"],
+    "secondary": ["src/Monolog/Handler/AbstractHandler.php"],
+    "category": "architecture",
+    "seed": {"path": "src/Monolog/Logger.php", "line": 207},
+    "related": ["src/Monolog/Handler/AbstractHandler.php", "src/Monolog/Handler/HandlerInterface.php"]
+  }
+]
diff --git a/benchmarks/annotations/pydantic.json b/benchmarks/annotations/pydantic.json
new file mode 100644
index 0000000..1484adc
--- /dev/null
+++ b/benchmarks/annotations/pydantic.json
@@ -0,0 +1,82 @@
+[
+  {
+    "query": "how is BaseModel defined and instantiated",
+    "relevant": ["pydantic/main.py"],
+    "secondary": []
+  },
+  {
+    "query": "how are model fields declared and constrained",
+    "relevant": ["pydantic/fields.py"],
+    "secondary": ["pydantic/types.py"]
+  },
+  {
+    "query": "JSON schema generation from models",
+    "relevant": ["pydantic/json_schema.py"],
+    "secondary": []
+  },
+  {
+    "query": "custom field and model validators",
+    "relevant": ["pydantic/functional_validators.py"],
+    "secondary": ["pydantic/class_validators.py"]
+  },
+  {
+    "query": "how to serialize models to JSON",
+    "relevant": ["pydantic/functional_serializers.py"],
+    "secondary": ["pydantic/main.py"]
+  },
+  {
+    "query": "network types like URLs and email addresses",
+    "relevant": ["pydantic/networks.py"],
+    "secondary": []
+  },
+  {
+    "query": "model configuration and settings",
+    "relevant": ["pydantic/config.py"],
+    "secondary": []
+  },
+  {
+    "query": "validating data without a model using TypeAdapter",
+    "relevant": ["pydantic/type_adapter.py"],
+    "secondary": []
+  },
+  {
+    "query": "alias handling for field names",
+    "relevant": ["pydantic/aliases.py"],
+    "secondary": ["pydantic/alias_generators.py"]
+  },
+  {
+    "query": "root model for wrapping a single value",
+    "relevant": ["pydantic/root_model.py"],
+    "secondary": []
+  },
+  {
+    "query": "how discriminated unions select the right model variant",
+    "relevant": ["pydantic/types.py"],
+    "secondary": ["pydantic/main.py"]
+  },
+  {
+    "query": "how computed fields are defined on a model",
+    "relevant": ["pydantic/fields.py"],
+    "secondary": ["pydantic/functional_serializers.py"]
+  },
+  {
+    "query": "what runs after model initialisation in model_post_init",
+    "relevant": ["pydantic/main.py"],
+    "secondary": []
+  },
+  {
+    "query": "how model inheritance and field overriding works",
+    "relevant": ["pydantic/main.py"],
+    "secondary": ["pydantic/fields.py"]
+  },
+  {
+    "query": "how to validate a function's arguments with pydantic",
+    "relevant": ["pydantic/validate_call_decorator.py"],
+    "secondary": ["pydantic/decorator.py"]
+  },
+  {"query": "BaseModel", "relevant": ["pydantic/main.py"], "secondary": []},
+  {"query": "field_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []},
+  {"query": "model_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []},
+  {"query": "ConfigDict", "relevant": ["pydantic/config.py"], "secondary": []},
+  {"query": "Field", "relevant": ["pydantic/fields.py"], "secondary": []}
+]
diff --git a/benchmarks/annotations/rack.json b/benchmarks/annotations/rack.json
new file mode 100644
index 0000000..037b6b1
--- /dev/null
+++ b/benchmarks/annotations/rack.json
@@ -0,0 +1,44 @@
+[
+  {
+    "query": "HTTP request wrapper and forwarded headers",
+    "relevant": ["lib/rack/request.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "HTTP response construction",
+    "relevant": ["lib/rack/response.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "middleware builder DSL",
+    "relevant": ["lib/rack/builder.rb"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "URL path mapping across mounted apps",
+    "relevant": ["lib/rack/urlmap.rb"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "Request",
+    "relevant": ["lib/rack/request.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Rack::Response",
+    "relevant": [{"path": "lib/rack/response.rb", "start_line": 23, "end_line": 62}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Rack::Builder",
+    "relevant": [{"path": "lib/rack/builder.rb", "start_line": 36, "end_line": 80}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/rails.json b/benchmarks/annotations/rails.json
new file mode 100644
index 0000000..9b5d376
--- /dev/null
+++ b/benchmarks/annotations/rails.json
@@ -0,0 +1,49 @@
+[
+  {
+    "query": "application boot process and initialization",
+    "relevant": ["railties/lib/rails/application.rb"],
+    "secondary": ["railties/lib/rails/configuration.rb"],
+    "category": "architecture"
+  },
+  {
+    "query": "engine configuration and load paths",
+    "relevant": ["railties/lib/rails/engine/configuration.rb"],
+    "secondary": ["railties/lib/rails/application.rb"],
+    "category": "architecture"
+  },
+  {
+    "query": "rack integration for rails applications",
+    "relevant": ["railties/lib/rails/rack.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "path management for rails apps",
+    "relevant": ["railties/lib/rails/paths.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Application",
+    "relevant": ["railties/lib/rails/application.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Rails::Engine",
+    "relevant": [{"path": "railties/lib/rails/engine.rb", "start_line": 348, "end_line": 387}],
+    "secondary": ["railties/lib/rails/engine/configuration.rb"],
+    "category": "symbol",
+    "seed": {"path": "railties/lib/rails/engine/configuration.rb", "line": 73},
+    "related": [
+      {"path": "railties/lib/rails/engine/updater.rb", "start_line": 1, "end_line": 21},
+      {"path": "railties/lib/rails/railtie/configuration.rb", "start_line": 1, "end_line": 54}
+    ]
+  },
+  {
+    "query": "Rails::Paths",
+    "relevant": ["railties/lib/rails/paths.rb"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/redux.json b/benchmarks/annotations/redux.json
new file mode 100644
index 0000000..b551227
--- /dev/null
+++ b/benchmarks/annotations/redux.json
@@ -0,0 +1,32 @@
+[
+  {
+    "query": "store creation and dispatch lifecycle",
+    "relevant": ["src/createStore.ts"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "middleware pipeline composition",
+    "relevant": ["src/applyMiddleware.ts"],
+    "secondary": ["src/compose.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "combining reducers and validating reducer shape",
+    "relevant": ["src/combineReducers.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "function composition utility",
+    "relevant": ["src/compose.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "createStore",
+    "relevant": ["src/createStore.ts"],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/requests.json b/benchmarks/annotations/requests.json
new file mode 100644
index 0000000..067c6c2
--- /dev/null
+++ b/benchmarks/annotations/requests.json
@@ -0,0 +1,109 @@
+[
+  {
+    "query": "how HTTP sessions manage state and connections",
+    "relevant": ["src/requests/sessions.py"],
+    "secondary": []
+  },
+  {
+    "query": "authentication mechanisms and credential handling",
+    "relevant": ["src/requests/auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "transport adapters and connection pooling",
+    "relevant": ["src/requests/adapters.py"],
+    "secondary": []
+  },
+  {
+    "query": "cookie storage and persistence",
+    "relevant": ["src/requests/cookies.py"],
+    "secondary": []
+  },
+  {
+    "query": "how redirects are followed",
+    "relevant": ["src/requests/sessions.py"],
+    "secondary": ["src/requests/models.py"]
+  },
+  {
+    "query": "error and exception types",
+    "relevant": ["src/requests/exceptions.py"],
+    "secondary": []
+  },
+  {
+    "query": "request and response model internals",
+    "relevant": ["src/requests/models.py"],
+    "secondary": []
+  },
+  {
+    "query": "SSL certificate verification",
+    "relevant": ["src/requests/adapters.py"],
+    "secondary": ["src/requests/certs.py"]
+  },
+  {
+    "query": "event hooks system",
+    "relevant": ["src/requests/hooks.py"],
+    "secondary": []
+  },
+  {
+    "query": "utility functions for encoding and headers",
+    "relevant": ["src/requests/utils.py"],
+    "secondary": []
+  },
+  {
+    "query": "how a PreparedRequest is built from user-supplied arguments",
+    "relevant": ["src/requests/models.py"],
+    "secondary": ["src/requests/sessions.py"]
+  },
+  {
+    "query": "how digest authentication implements the challenge-response handshake",
+    "relevant": ["src/requests/auth.py"],
+    "secondary": []
+  },
+  {
+    "query": "how response encoding is detected from headers and content",
+    "relevant": ["src/requests/utils.py"],
+    "secondary": ["src/requests/models.py"]
+  },
+  {
+    "query": "how proxy settings are read from environment variables",
+    "relevant": ["src/requests/utils.py"],
+    "secondary": ["src/requests/sessions.py"]
+  },
+  {
+    "query": "how connection keep-alive and pooling limits are configured",
+    "relevant": ["src/requests/adapters.py"],
+    "secondary": []
+  },
+  {
+    "query": "Session",
+    "relevant": [{"path": "src/requests/sessions.py", "start_line": 356, "end_line": 394}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "HTTPDigestAuth",
+    "relevant": [{"path": "src/requests/auth.py", "start_line": 107, "end_line": 136}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "PreparedRequest",
+    "relevant": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 364}],
+    "secondary": [],
+    "category": "symbol",
+    "seed": {"path": "src/requests/sessions.py", "line": 485},
+    "related": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 353}]
+  },
+  {
+    "query": "HTTPAdapter",
+    "relevant": [{"path": "src/requests/adapters.py", "start_line": 144, "end_line": 192}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Response",
+    "relevant": [{"path": "src/requests/models.py", "start_line": 642, "end_line": 691}],
+    "secondary": [],
+    "category": "symbol"
+  }
+]
diff --git a/benchmarks/annotations/sinatra.json b/benchmarks/annotations/sinatra.json
new file mode 100644
index 0000000..ddec420
--- /dev/null
+++ b/benchmarks/annotations/sinatra.json
@@ -0,0 +1,100 @@
+[
+  {
+    "query": "core Sinatra DSL and routing behavior",
+    "relevant": ["lib/sinatra/base.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "application startup and command line entrypoint",
+    "relevant": ["lib/sinatra/main.rb"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "exception rendering and stack traces",
+    "relevant": ["lib/sinatra/show_exceptions.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "request logging middleware",
+    "relevant": ["lib/sinatra/middleware/logger.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how route handlers are compiled and URL patterns matched",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1795, "end_line": 1817}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "before and after filter hooks for the request lifecycle",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1486, "end_line": 1505}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "response helper methods: halt, pass and redirect",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how templates are rendered with erb haml and other engines",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 742, "end_line": 948}],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the Rack middleware stack is assembled before requests are handled",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1819, "end_line": 1888}],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "case-insensitive hash for request params",
+    "relevant": ["lib/sinatra/indifferent_hash.rb"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "Sinatra::Base",
+    "relevant": ["lib/sinatra/base.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Sinatra::Application",
+    "relevant": ["lib/sinatra/main.rb"],
+    "secondary": ["lib/sinatra/base.rb"],
+    "category": "symbol"
+  },
+  {
+    "query": "Sinatra::ShowExceptions",
+    "relevant": ["lib/sinatra/show_exceptions.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "Sinatra::Helpers",
+    "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "IndifferentHash",
+    "relevant": ["lib/sinatra/indifferent_hash.rb"],
+    "secondary": [],
+    "category": "symbol"
+  },
+  {
+    "query": "how Sinatra::Application inherits and configures Base for standalone use",
+    "relevant": ["lib/sinatra/main.rb"],
+    "secondary": ["lib/sinatra/base.rb"],
+    "category": "architecture",
+    "seed": {"path": "lib/sinatra/main.rb", "line": 30},
+    "related": ["lib/sinatra/base.rb"]
+  }
+]
diff --git a/benchmarks/annotations/starlette.json b/benchmarks/annotations/starlette.json
new file mode 100644
index 0000000..f588020
--- /dev/null
+++ b/benchmarks/annotations/starlette.json
@@ -0,0 +1,90 @@
+[
+  {
+    "query": "how are URL routes registered and matched",
+    "relevant": ["starlette/routing.py"],
+    "secondary": ["starlette/convertors.py"]
+  },
+  {
+    "query": "how does authentication middleware work",
+    "relevant": ["starlette/middleware/authentication.py"],
+    "secondary": ["starlette/authentication.py"]
+  },
+  {
+    "query": "websocket connection and message handling",
+    "relevant": ["starlette/websockets.py"],
+    "secondary": []
+  },
+  {
+    "query": "how are static files served",
+    "relevant": ["starlette/staticfiles.py"],
+    "secondary": []
+  },
+  {
+    "query": "parsing form data and file uploads",
+    "relevant": ["starlette/formparsers.py"],
+    "secondary": ["starlette/datastructures.py"]
+  },
+  {
+    "query": "how does the test client simulate requests",
+    "relevant": ["starlette/testclient.py"],
+    "secondary": []
+  },
+  {
+    "query": "background task scheduling and execution",
+    "relevant": ["starlette/background.py"],
+    "secondary": []
+  },
+  {
+    "query": "application lifespan startup and shutdown events",
+    "relevant": ["starlette/routing.py"],
+    "secondary": ["starlette/applications.py"]
+  },
+  {
+    "query": "streaming response implementation",
+    "relevant": ["starlette/responses.py"],
+    "secondary": []
+  },
+  {
+    "query": "how base middleware wraps request handling",
+    "relevant": ["starlette/middleware/base.py"],
+    "secondary": []
+  },
+  {
+    "query": "how request state persists arbitrary data across middleware",
+    "relevant": ["starlette/requests.py"],
+    "secondary": []
+  },
+  {
+    "query": "how path convertor types work for route parameters",
+    "relevant": ["starlette/convertors.py"],
+    "secondary": ["starlette/routing.py"]
+  },
+  {
+    "query": "how session data is signed and stored in cookies",
+    "relevant": ["starlette/middleware/sessions.py"],
+    "secondary": []
+  },
+  {
+    "query": "how CORS preflight requests are handled",
+    "relevant": ["starlette/middleware/cors.py"],
+    "secondary": []
+  },
+  {
+    "query": "how errors in ASGI apps are caught and turned into responses",
+    "relevant": ["starlette/middleware/errors.py"],
+    "secondary": ["starlette/middleware/exceptions.py"]
+  },
+  {"query": "BaseHTTPMiddleware", "relevant": ["starlette/middleware/base.py"], "secondary": []},
+  {"query": "Request", "relevant": ["starlette/requests.py"], "secondary": []},
+  {"query": "WebSocket", "relevant": ["starlette/websockets.py"], "secondary": []},
+  {"query": "BackgroundTask", "relevant": ["starlette/background.py"], "secondary": []},
+  {"query": "Router", "relevant": ["starlette/routing.py"], "secondary": []},
+  {
+    "query": "how the Starlette application delegates routing and lifespan to Router",
+    "relevant": ["starlette/applications.py"],
+    "secondary": ["starlette/routing.py"],
+    "category": "architecture",
+    "seed": {"path": "starlette/applications.py", "line": 19},
+    "related": ["starlette/routing.py"]
+  }
+]
diff --git a/benchmarks/annotations/tokio.json b/benchmarks/annotations/tokio.json
new file mode 100644
index 0000000..a902e16
--- /dev/null
+++ b/benchmarks/annotations/tokio.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how spawned tasks are scheduled onto threads",
+    "relevant": ["tokio/src/runtime/scheduler/multi_thread/worker.rs"],
+    "secondary": ["tokio/src/task/spawn.rs"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the async mutex prevents concurrent access",
+    "relevant": ["tokio/src/sync/mutex.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how a broadcast channel delivers messages to multiple receivers",
+    "relevant": ["tokio/src/sync/broadcast.rs"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "how the timer wheel tracks sleep deadlines",
+    "relevant": ["tokio/src/runtime/time/wheel/level.rs"],
+    "secondary": ["tokio/src/time/sleep.rs"],
+    "category": "architecture"
+  },
+  {
+    "query": "running non-async blocking code inside the async runtime",
+    "relevant": ["tokio/src/task/blocking.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how tasks that are not Send can run on a single thread",
+    "relevant": ["tokio/src/task/local.rs"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "waiting for any of several futures to complete",
+    "relevant": ["tokio/src/macros/select.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how the runtime builder configures thread pool size and flavour",
+    "relevant": ["tokio/src/runtime/builder.rs"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "rate-limiting concurrent operations with a semaphore",
+    "relevant": ["tokio/src/sync/semaphore.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "watching a value and being notified when it changes",
+    "relevant": ["tokio/src/sync/watch.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "collecting results from a dynamic set of spawned tasks",
+    "relevant": ["tokio/src/task/join_set.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "integrating a non-async file descriptor with the tokio reactor",
+    "relevant": ["tokio/src/io/async_fd.rs"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "JoinSet",
+    "relevant": ["tokio/src/task/join_set.rs"],
+    "secondary": []
+  },
+  {
+    "query": "Semaphore",
+    "relevant": ["tokio/src/sync/semaphore.rs"],
+    "secondary": []
+  },
+  {
+    "query": "MissedTickBehavior",
+    "relevant": ["tokio/src/time/interval.rs"],
+    "secondary": []
+  },
+  {
+    "query": "LocalSet",
+    "relevant": ["tokio/src/task/local.rs"],
+    "secondary": []
+  },
+  {
+    "query": "Notify",
+    "relevant": ["tokio/src/sync/notify.rs"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/annotations/trpc.json b/benchmarks/annotations/trpc.json
new file mode 100644
index 0000000..f3f8d25
--- /dev/null
+++ b/benchmarks/annotations/trpc.json
@@ -0,0 +1,99 @@
+[
+  {
+    "query": "how a tRPC router is created and procedures are registered",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how middleware chains context transformations between procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"],
+    "secondary": [],
+    "category": "architecture"
+  },
+  {
+    "query": "how input validation and parsing works for procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/parser.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"],
+    "category": "semantic"
+  },
+  {
+    "query": "how HTTP requests are resolved to tRPC procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/http/resolveResponse.ts"],
+    "secondary": ["packages/server/src/http.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how error formatting and serialization works",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/error/formatter.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"],
+    "category": "semantic"
+  },
+  {
+    "query": "how server-sent events and subscriptions are streamed to the client",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/stream/sse.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/stream/jsonl.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how the observable pattern is used for subscriptions",
+    "relevant": ["packages/server/src/observable/observable.ts"],
+    "secondary": ["packages/server/src/observable/operators.ts"],
+    "category": "semantic"
+  },
+  {
+    "query": "how type inference extracts input and output types from procedures",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/clientish/inference.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "adapting tRPC to run as a Node.js HTTP server handler",
+    "relevant": ["packages/server/src/adapters/node-http/nodeHTTPRequestHandler.ts"],
+    "secondary": ["packages/server/src/adapters/node-http/incomingMessageToRequest.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "WebSocket adapter for real-time subscriptions",
+    "relevant": ["packages/server/src/adapters/ws.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "how tRPC is initialized with root config and context factory",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/initTRPC.ts"],
+    "secondary": ["packages/server/src/unstable-core-do-not-import/rootConfig.ts"],
+    "category": "architecture"
+  },
+  {
+    "query": "how data transformer is applied to serialize and deserialize procedure payloads",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/transformer.ts"],
+    "secondary": [],
+    "category": "semantic"
+  },
+  {
+    "query": "TRPCError",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"],
+    "secondary": []
+  },
+  {
+    "query": "AnyRouter",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"],
+    "secondary": []
+  },
+  {
+    "query": "MiddlewareBuilder",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"],
+    "secondary": []
+  },
+  {
+    "query": "inferProcedureInput",
+    "relevant": ["packages/server/src/unstable-core-do-not-import/procedure.ts"],
+    "secondary": []
+  },
+  {
+    "query": "fetchRequestHandler",
+    "relevant": ["packages/server/src/adapters/fetch/fetchRequestHandler.ts"],
+    "secondary": []
+  }
+]
diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py
new file mode 100644
index 0000000..b0ad205
--- /dev/null
+++ b/benchmarks/bench_hybrid.py
@@ -0,0 +1,248 @@
+from __future__ import annotations
+
+import argparse
+import math
+import shutil
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+from model2vec import StaticModel
+
+from benchmarks.common import (
+    Task,
+    apply_task_filters,
+    available_repo_specs,
+    count_indexed_targets,
+    grouped_tasks,
+    load_tasks,
+    target_matches_location,
+    target_rank,
+)
+from semble import SembleIndex
+from semble.types import SearchResult
+
+_CACHE_DIR = Path("/tmp/semble-bench-cache")
+_MODEL_NAME = "Pringled/potion-code-16M"
+_LATENCY_RUNS = 5
+_DIRECT_TOP_K = 10
+
+
+@dataclass(frozen=True)
+class RepoResult:
+    repo: str
+    language: str
+    chunks: int
+    ndcg5: float
+    ndcg10: float
+    p50_ms: float
+    cold_ms: float | None = None
+    warm_ms: float | None = None
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
+    parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.")
+    parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.")
+    parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.")
+    parser.add_argument("--verbose", action="store_true", help="Print per-query results.")
+    return parser.parse_args()
+
+
+def _is_relevant(result: SearchResult, task: Task) -> bool:
+    chunk = result.chunk
+    return any(
+        target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target)
+        for target in task.all_relevant
+    )
+
+
+def _dcg(relevances: list[int]) -> float:
+    return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances))
+
+
+def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float:
+    if n_relevant == 0:
+        return 0.0
+    relevances = [0] * k
+    for rank in relevant_ranks:
+        if 1 <= rank <= k:
+            relevances[rank - 1] = 1
+    ideal = _dcg([1] * min(k, n_relevant))
+    return _dcg(relevances) / ideal if ideal > 0 else 0.0
+
+
+def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -> tuple[float, float, float]:
+    ndcg5_sum = 0.0
+    ndcg10_sum = 0.0
+    latencies: list[float] = []
+
+    for task in tasks:
+        query_latencies: list[float] = []
+        for _ in range(_LATENCY_RUNS):
+            started = time.perf_counter()
+            results = index.search(task.query, top_k=_DIRECT_TOP_K)
+            query_latencies.append((time.perf_counter() - started) * 1000)
+        latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2])
+
+        chunk_results = results[:_DIRECT_TOP_K]
+        relevant_ranks = [
+            rank for target in task.all_relevant if (rank := target_rank(chunk_results, target)) is not None
+        ]
+        n_relevant = count_indexed_targets(index.chunks, task.all_relevant)
+        q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5)
+        q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10)
+        ndcg5_sum += q_ndcg5
+        ndcg10_sum += q_ndcg10
+
+        if verbose:
+            cat = task.category or "?"
+            targets_str = ", ".join(
+                t.path if not t.start_line else f"{t.path}:{t.start_line}-{t.end_line}" for t in task.all_relevant
+            )
+            top_files = [r.chunk.file_path for r in chunk_results[:5]]
+            print(
+                f"  [{cat:<12}] ndcg@10={q_ndcg10:.3f}  ranks={relevant_ranks}  n_rel={n_relevant}  q={task.query!r}",
+                file=sys.stderr,
+            )
+            print(f"               targets: {targets_str}", file=sys.stderr)
+            print(f"               top-5:   {top_files}", file=sys.stderr)
+
+    total = len(tasks)
+    latencies.sort()
+    return ndcg5_sum / total, ndcg10_sum / total, latencies[len(latencies) // 2]
+
+
+def _print_group_summary(results: list[RepoResult], group_by: str) -> None:
+    print(file=sys.stderr)
+    print(f"By {group_by}", file=sys.stderr)
+    groups = sorted({getattr(result, group_by) for result in results})
+    for value in groups:
+        grouped = [result for result in results if getattr(result, group_by) == value]
+        print(
+            "  "
+            + f"{value}: repos={len(grouped)}  ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}"
+            + f"  ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}"
+            + f"  p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms",
+            file=sys.stderr,
+        )
+
+
+def _print_language_table(results: list[RepoResult]) -> None:
+    languages = ["python", "javascript", "java", "go", "php", "ruby"]
+    present = [language for language in languages if any(result.language == language for result in results)]
+    columns = ["Avg", *[language.title() for language in present]]
+
+    avg_ndcg10 = sum(result.ndcg10 for result in results) / len(results)
+    avg_p50 = sum(result.p50_ms for result in results) / len(results)
+
+    print(file=sys.stderr)
+    print(f"{'=' * 104}", file=sys.stderr)
+    print("Hybrid benchmark by language", file=sys.stderr)
+    print(f"{'=' * 104}", file=sys.stderr)
+    print(f"\n  {'Metric':<28}  " + "  ".join(f"{column:>9}" for column in columns), file=sys.stderr)
+    print(f"  {'-' * 28}  " + "  ".join(f"{'-' * 9:>9}" for _ in columns), file=sys.stderr)
+
+    ndcg_row = [f"{avg_ndcg10:>9.3f}"]
+    p50_row = [f"{avg_p50:>8.2f}ms"]
+    for language in present:
+        language_results = [result for result in results if result.language == language]
+        ndcg_row.append(f"{sum(result.ndcg10 for result in language_results) / len(language_results):>9.3f}")
+        p50_row.append(f"{sum(result.p50_ms for result in language_results) / len(language_results):>8.2f}ms")
+
+    print(f"  {'NDCG@10':<28}  " + "  ".join(ndcg_row), file=sys.stderr)
+    print(f"  {'q-p50':<28}  " + "  ".join(p50_row), file=sys.stderr)
+
+
+def _bench_quality(repo_tasks: dict[str, list[Task]], model: StaticModel, *, verbose: bool = False) -> list[RepoResult]:
+    print(
+        f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}",
+        file=sys.stderr,
+    )
+    print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 8} {'-' * 8} {'-' * 8}", file=sys.stderr)
+    results: list[RepoResult] = []
+    specs = available_repo_specs()
+    for repo, tasks in sorted(repo_tasks.items()):
+        spec = specs[repo]
+        started = time.perf_counter()
+        index = SembleIndex.from_path(spec.benchmark_dir, model=model)
+        index_ms = (time.perf_counter() - started) * 1000
+        ndcg5, ndcg10, p50_ms = _evaluate(index, tasks, verbose=verbose)
+        result = RepoResult(
+            repo=repo, language=spec.language, chunks=len(index.chunks), ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms
+        )
+        results.append(result)
+        print(
+            f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms",
+            file=sys.stderr,
+        )
+    return results
+
+
+def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[RepoResult]:
+    _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr)
+    print(file=sys.stderr)
+    print(
+        f"{'Repo':<12} {'language':<12} {'chunks':>6} {'cold':>9} {'warm':>9} {'speedup':>8} {'NDCG@10':>8}",
+        file=sys.stderr,
+    )
+    print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 9} {'-' * 8} {'-' * 8}", file=sys.stderr)
+    results: list[RepoResult] = []
+    specs = available_repo_specs()
+    model_ns = _MODEL_NAME.replace("/", "--")
+    for repo, tasks in sorted(repo_tasks.items()):
+        spec = specs[repo]
+        namespace_dir = _CACHE_DIR / model_ns
+        if namespace_dir.exists():
+            shutil.rmtree(namespace_dir)
+        started = time.perf_counter()
+        cold = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME)
+        cold_ms = (time.perf_counter() - started) * 1000
+        started = time.perf_counter()
+        warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME)
+        warm_ms = (time.perf_counter() - started) * 1000
+        _, ndcg10, p50_ms = _evaluate(warm, tasks)
+        result = RepoResult(
+            repo=repo,
+            language=spec.language,
+            chunks=len(cold.chunks),
+            ndcg5=0.0,
+            ndcg10=ndcg10,
+            p50_ms=p50_ms,
+            cold_ms=cold_ms,
+            warm_ms=warm_ms,
+        )
+        results.append(result)
+        speedup = cold_ms / warm_ms if warm_ms > 0 else float("inf")
+        print(
+            f"{repo:<12} {spec.language:<12} {len(cold.chunks):>6} {cold_ms:>8.0f}ms {warm_ms:>8.0f}ms {speedup:>7.1f}x {ndcg10:>8.3f}",
+            file=sys.stderr,
+        )
+    print(file=sys.stderr)
+    print("Warm time still includes file walk plus BM25/Vicinity rebuild; only embedding is skipped.", file=sys.stderr)
+    return results
+
+
+def main() -> None:
+    args = _parse_args()
+    repo_specs = available_repo_specs()
+    tasks = apply_task_filters(
+        load_tasks(repo_specs=repo_specs), repos=args.repo or None, languages=args.language or None
+    )
+    if not tasks:
+        raise SystemExit("No benchmark tasks matched the requested filters.")
+    print("Loading model...", file=sys.stderr)
+    started = time.perf_counter()
+    model = StaticModel.from_pretrained(_MODEL_NAME)
+    print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr)
+    print(file=sys.stderr)
+    repo_tasks = grouped_tasks(tasks)
+    results = _bench_cache(repo_tasks, model) if args.cache else _bench_quality(repo_tasks, model, verbose=args.verbose)
+    _print_group_summary(results, "language")
+    _print_language_table(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/common.py b/benchmarks/common.py
new file mode 100644
index 0000000..c755633
--- /dev/null
+++ b/benchmarks/common.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Protocol, cast
+
+from semble import Chunk
+
+BENCH_ROOT = Path("/tmp/bench")
+BENCHMARKS_DIR = Path(__file__).parent
+ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations"
+REPOS_PATH = BENCHMARKS_DIR / "repos.json"
+
+
+@dataclass(frozen=True)
+class Target:
+    path: str
+    start_line: int | None = None
+    end_line: int | None = None
+
+    @property
+    def has_span(self) -> bool:
+        return self.start_line is not None and self.end_line is not None
+
+
+class _ChunkLike(Protocol):
+    file_path: str
+    start_line: int
+    end_line: int
+
+
+class _ResultLike(Protocol):
+    chunk: Chunk
+
+
+@dataclass(frozen=True)
+class RepoSpec:
+    name: str
+    language: str
+    url: str
+    revision: str
+    benchmark_root: str | None = None
+
+    @property
+    def checkout_dir(self) -> Path:
+        return BENCH_ROOT / self.name
+
+    @property
+    def benchmark_dir(self) -> Path:
+        return self.checkout_dir if self.benchmark_root is None else self.checkout_dir / self.benchmark_root
+
+
+@dataclass(frozen=True)
+class Task:
+    repo: str
+    language: str
+    query: str
+    relevant: tuple[Target, ...]
+    secondary: tuple[Target, ...]
+    category: str
+    category_inferred: bool
+
+    @property
+    def all_relevant(self) -> tuple[Target, ...]:
+        return self.relevant + self.secondary
+
+
+def infer_category(query: str) -> str:
+    if " " not in query.strip():
+        return "symbol"
+    lowered = query.lower()
+    if lowered.startswith("how ") or lowered.startswith("how does") or lowered.startswith("how are"):
+        return "architecture"
+    return "semantic"
+
+
+def _coerce_int(value: object) -> int:
+    if not isinstance(value, int | str):
+        raise TypeError(f"expected int-compatible value, got {type(value).__name__}")
+    return int(value)
+
+
+def _coerce_mapping(raw: object) -> dict[str, object]:
+    if not isinstance(raw, dict):
+        raise TypeError(f"expected mapping, got {type(raw).__name__}")
+    return cast(dict[str, object], raw)
+
+
+def _parse_target(raw: str | dict[str, object]) -> Target:
+    if isinstance(raw, str):
+        return Target(path=raw)
+    raw = _coerce_mapping(raw)
+    start_line = raw.get("start_line")
+    end_line = raw.get("end_line")
+    return Target(
+        path=str(raw["path"]),
+        start_line=_coerce_int(start_line) if start_line is not None else None,
+        end_line=_coerce_int(end_line) if end_line is not None else None,
+    )
+
+
+def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]:
+    raw = json.loads(path.read_text(encoding="utf-8"))
+    return {item["name"]: RepoSpec(**item) for item in raw}
+
+
+def available_repo_specs(repo_specs: dict[str, RepoSpec] | None = None) -> dict[str, RepoSpec]:
+    specs = load_repo_specs() if repo_specs is None else repo_specs
+    return {
+        name: spec
+        for name, spec in specs.items()
+        if spec.checkout_dir.exists() and (ANNOTATIONS_DIR / f"{name}.json").exists()
+    }
+
+
+def load_tasks(
+    path: Path = ANNOTATIONS_DIR,
+    repo_specs: dict[str, RepoSpec] | None = None,
+) -> list[Task]:
+    specs = load_repo_specs() if repo_specs is None else repo_specs
+    tasks: list[Task] = []
+    annotation_files = sorted(path.glob("*.json")) if path.is_dir() else [path]
+    for annotation_file in annotation_files:
+        raw = json.loads(annotation_file.read_text(encoding="utf-8"))
+        default_repo = annotation_file.stem
+        for item in raw:
+            repo = item.get("repo", default_repo)
+            spec = specs[repo]
+            category = item.get("category")
+            tasks.append(
+                Task(
+                    repo=repo,
+                    language=spec.language,
+                    query=item["query"],
+                    relevant=tuple(_parse_target(raw) for raw in item.get("relevant", [])),
+                    secondary=tuple(_parse_target(raw) for raw in item.get("secondary", [])),
+                    category=category if isinstance(category, str) else infer_category(item["query"]),
+                    category_inferred=category is None,
+                )
+            )
+    return tasks
+
+
+def apply_task_filters(
+    tasks: list[Task],
+    repos: list[str] | None = None,
+    languages: list[str] | None = None,
+    limit: int | None = None,
+) -> list[Task]:
+    filtered = [task for task in tasks if not repos or task.repo in repos]
+    filtered = [task for task in filtered if not languages or task.language in languages]
+    return filtered if limit is None else filtered[:limit]
+
+
+def path_matches(file_path: str, relative_path: str) -> bool:
+    normalized_file = file_path.replace("\\", "/")
+    normalized_relative = relative_path.replace("\\", "/")
+    return normalized_file == normalized_relative or normalized_file.endswith(f"/{normalized_relative}")
+
+
+def span_overlaps(start_line: int, end_line: int, target: Target) -> bool:
+    if not target.has_span:
+        return True
+    assert target.start_line is not None
+    assert target.end_line is not None
+    return not (end_line < target.start_line or start_line > target.end_line)
+
+
+def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool:
+    return path_matches(file_path, target.path) and span_overlaps(start_line, end_line, target)
+
+
+def target_rank(results: list[_ResultLike], target: Target) -> int | None:
+    for index, result in enumerate(results, 1):
+        chunk = result.chunk
+        if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target):
+            return index
+    return None
+
+
+def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int:
+    return sum(
+        1
+        for target in targets
+        if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks)
+    )
+
+
+def grouped_tasks(tasks: list[Task]) -> dict[str, list[Task]]:
+    grouped: dict[str, list[Task]] = {}
+    for task in tasks:
+        grouped.setdefault(task.repo, []).append(task)
+    return grouped
diff --git a/benchmarks/repos.json b/benchmarks/repos.json
new file mode 100644
index 0000000..56e36e2
--- /dev/null
+++ b/benchmarks/repos.json
@@ -0,0 +1,202 @@
+[
+  {
+    "name": "aiohttp",
+    "language": "python",
+    "url": "https://github.com/aio-libs/aiohttp.git",
+    "revision": "fc67cfdfd7d4bbf53ef76515fae69726626fe256",
+    "benchmark_root": "aiohttp"
+  },
+  {
+    "name": "fastapi",
+    "language": "python",
+    "url": "https://github.com/fastapi/fastapi.git",
+    "revision": "c3c9dd6b1a08bcda766e7b43eafe72c4c5e9e193",
+    "benchmark_root": "fastapi"
+  },
+  {
+    "name": "flask",
+    "language": "python",
+    "url": "https://github.com/pallets/flask.git",
+    "revision": "258d68b6ff5e2244386540f48b48bab90d6ab827",
+    "benchmark_root": "src/flask"
+  },
+  {
+    "name": "httpx",
+    "language": "python",
+    "url": "https://github.com/encode/httpx.git",
+    "revision": "b5addb64f0161ff6bfe94c124ef76f6a1fba5254",
+    "benchmark_root": "httpx"
+  },
+  {
+    "name": "model2vec",
+    "language": "python",
+    "url": "https://github.com/MinishLab/model2vec.git",
+    "revision": "b3012ee04e41c634383a5d735cb3c7c51e806a18",
+    "benchmark_root": "model2vec"
+  },
+  {
+    "name": "pydantic",
+    "language": "python",
+    "url": "https://github.com/pydantic/pydantic.git",
+    "revision": "82c15f0ba8a9f8d8d6ba595df73ad20e2ee2eccf",
+    "benchmark_root": "pydantic"
+  },
+  {
+    "name": "requests",
+    "language": "python",
+    "url": "https://github.com/psf/requests.git",
+    "revision": "ef439eb779c1eba7cbdeeeb302b11e1e061b4b7d",
+    "benchmark_root": "src/requests"
+  },
+  {
+    "name": "starlette",
+    "language": "python",
+    "url": "https://github.com/encode/starlette.git",
+    "revision": "1894d0d89badf43bc8bfe03ed221a8b2e100b2ab",
+    "benchmark_root": "starlette"
+  },
+  {
+    "name": "axios",
+    "language": "javascript",
+    "url": "https://github.com/axios/axios.git",
+    "revision": "c7a76ddbf277db864ee6cfb4ef17b8a08ffbe3f5",
+    "benchmark_root": "lib"
+  },
+  {
+    "name": "express",
+    "language": "javascript",
+    "url": "https://github.com/expressjs/express.git",
+    "revision": "8e022edc9185f540a3fcecaf5e56b850d919cdac",
+    "benchmark_root": "lib"
+  },
+  {
+    "name": "redux",
+    "language": "javascript",
+    "url": "https://github.com/reduxjs/redux.git",
+    "revision": "aaa04ae8402ba2caba55a9c75bfa8d3df6c78f8c",
+    "benchmark_root": "src"
+  },
+  {
+    "name": "gin",
+    "language": "go",
+    "url": "https://github.com/gin-gonic/gin.git",
+    "revision": "d3ffc9985281dcf4d3bef604cce4e662b1a327a6"
+  },
+  {
+    "name": "cobra",
+    "language": "go",
+    "url": "https://github.com/spf13/cobra.git",
+    "revision": "61968e893eee2f27696c2fbc8e34fa5c4afaf7c4"
+  },
+  {
+    "name": "chi",
+    "language": "go",
+    "url": "https://github.com/go-chi/chi.git",
+    "revision": "a54874f0e2f12647a19e82ee70dfa8185014100c"
+  },
+  {
+    "name": "gson",
+    "language": "java",
+    "url": "https://github.com/google/gson.git",
+    "revision": "f4d371d29c04066dbe7fdb31f642831f9c7f40cd",
+    "benchmark_root": "gson"
+  },
+  {
+    "name": "commons-lang",
+    "language": "java",
+    "url": "https://github.com/apache/commons-lang.git",
+    "revision": "0ba92dc402312a38252a3398931ffbfbb4a88f7d",
+    "benchmark_root": "src/main/java/org/apache/commons/lang3"
+  },
+  {
+    "name": "jackson-databind",
+    "language": "java",
+    "url": "https://github.com/FasterXML/jackson-databind.git",
+    "revision": "e30139539416f69f1d7ae31c7e1d6da5b25bf362",
+    "benchmark_root": "src/main/java/tools/jackson/databind"
+  },
+  {
+    "name": "guzzle",
+    "language": "php",
+    "url": "https://github.com/guzzle/guzzle.git",
+    "revision": "fb92d95f80a9da51bf8f2a5b26d8e8ea3b6d99ed",
+    "benchmark_root": "src"
+  },
+  {
+    "name": "monolog",
+    "language": "php",
+    "url": "https://github.com/Seldaek/monolog.git",
+    "revision": "68b974809baff3f071893de61447212e9e688ee7",
+    "benchmark_root": "src/Monolog"
+  },
+  {
+    "name": "laravel-framework",
+    "language": "php",
+    "url": "https://github.com/laravel/framework.git",
+    "revision": "0dcc8d2ba7f41bc8376a08e9ccd5d7b83e6a6d90",
+    "benchmark_root": "src/Illuminate"
+  },
+  {
+    "name": "sinatra",
+    "language": "ruby",
+    "url": "https://github.com/sinatra/sinatra.git",
+    "revision": "f891dd2b6f4911e356600efe6c3b82af97d262c6",
+    "benchmark_root": "lib"
+  },
+  {
+    "name": "rack",
+    "language": "ruby",
+    "url": "https://github.com/rack/rack.git",
+    "revision": "ca8a404704ed043797c4f9d482c97d722c0dc719",
+    "benchmark_root": "lib/rack"
+  },
+  {
+    "name": "rails",
+    "language": "ruby",
+    "url": "https://github.com/rails/rails.git",
+    "revision": "75f9e28379ac7418b82fa950cfa81f6147275308",
+    "benchmark_root": "railties/lib/rails"
+  },
+  {
+    "name": "tokio",
+    "language": "rust",
+    "url": "https://github.com/tokio-rs/tokio.git",
+    "revision": "5db10f538b683fe88d699dfd11be31d193db011c",
+    "benchmark_root": "tokio/src"
+  },
+  {
+    "name": "trpc",
+    "language": "typescript",
+    "url": "https://github.com/trpc/trpc.git",
+    "revision": "c188dab0822caf3615199e4ac95147bc7560d26f",
+    "benchmark_root": "packages/server/src"
+  },
+  {
+    "name": "messagepack-csharp",
+    "language": "csharp",
+    "url": "https://github.com/neuecc/MessagePack-CSharp.git",
+    "revision": "84db9f79e3ecc5f4e8b7c7f77cd15d7745f5f2a7",
+    "benchmark_root": "src/MessagePack"
+  },
+  {
+    "name": "ktor",
+    "language": "kotlin",
+    "url": "https://github.com/ktorio/ktor.git",
+    "revision": "5913745a96101e8c78e47565e52d2baa8414441f",
+    "benchmark_root": "ktor-client/ktor-client-core/common/src"
+  },
+  {
+    "name": "cats",
+    "language": "scala",
+    "url": "https://github.com/typelevel/cats.git",
+    "revision": "2102251a2f24a6ee14e087fc5da7768d267f2d6e",
+    "benchmark_root": "core/src/main/scala"
+  },
+  {
+    "name": "alamofire",
+    "language": "swift",
+    "url": "https://github.com/Alamofire/Alamofire.git",
+    "revision": "e938f8c66708e7352fc7e3512647fa54255b267a",
+    "benchmark_root": "Source"
+  }
+]
diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py
new file mode 100644
index 0000000..5ba3b24
--- /dev/null
+++ b/benchmarks/sync_repos.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import argparse
+import subprocess
+import sys
+
+from benchmarks.common import BENCH_ROOT, load_repo_specs
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.")
+    parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.")
+    parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.")
+    return parser.parse_args()
+
+
+def _run(*args: str) -> None:
+    subprocess.run(args, check=True)
+
+
+def _output(*args: str) -> str:
+    return subprocess.check_output(args, text=True).strip()
+
+
+def _sync_repo(name: str, url: str, revision: str) -> None:
+    repo_dir = BENCH_ROOT / name
+    if not repo_dir.exists():
+        print(f"cloning {name} -> {repo_dir}")
+        _run("git", "clone", url, str(repo_dir))
+    print(f"syncing {name} @ {revision[:12]}")
+    _run("git", "-C", str(repo_dir), "fetch", "--depth", "1", "origin", revision)
+    _run("git", "-C", str(repo_dir), "checkout", "--detach", revision)
+
+
+def _check_repo(name: str, revision: str) -> str | None:
+    repo_dir = BENCH_ROOT / name
+    if not (repo_dir / ".git").exists():
+        return f"{name}: missing checkout at {repo_dir}"
+    head = _output("git", "-C", str(repo_dir), "rev-parse", "HEAD")
+    if head != revision:
+        return f"{name}: expected {revision}, found {head}"
+    return None
+
+
+def main() -> None:
+    args = _parse_args()
+    specs = load_repo_specs()
+    selected = {name: spec for name, spec in specs.items() if not args.repo or name in args.repo}
+    BENCH_ROOT.mkdir(parents=True, exist_ok=True)
+
+    if args.check:
+        problems = [
+            problem for name, spec in selected.items() if (problem := _check_repo(name, spec.revision)) is not None
+        ]
+        if problems:
+            for problem in problems:
+                print(problem, file=sys.stderr)
+            raise SystemExit(1)
+        print(f"Verified {len(selected)} pinned repo(s).")
+        return
+
+    for name, spec in selected.items():
+        _sync_repo(name, spec.url, spec.revision)
+
+    print(f"Synced {len(selected)} pinned repo(s).")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 0ce4da0..8e5f165 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ target-version = "py310"
 [tool.ruff.lint.per-file-ignores]
 "tests/**" = ["ANN"]
 "src/semble/cli.py" = ["T20"]
-"local/benchmarks/*.py" = ["T20", "D"]
+"benchmarks/*.py" = ["T20", "D"]
 
 [tool.ruff.lint]
 select = [

From d2f98dbbe2051b68b42f3d3f0ed007fe7e27dcca Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:03:45 +0200
Subject: [PATCH 02/15] Fix benchmark review: decouple common from semble, skip
 missing repos in load_tasks, compute ndcg5 in cache mode

---
 benchmarks/bench_hybrid.py | 16 ++++++++++++----
 benchmarks/common.py       | 18 ++++--------------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py
index b0ad205..e22817e 100644
--- a/benchmarks/bench_hybrid.py
+++ b/benchmarks/bench_hybrid.py
@@ -11,6 +11,7 @@
 from model2vec import StaticModel
 
 from benchmarks.common import (
+    Target,
     Task,
     apply_task_filters,
     available_repo_specs,
@@ -18,7 +19,6 @@
     grouped_tasks,
     load_tasks,
     target_matches_location,
-    target_rank,
 )
 from semble import SembleIndex
 from semble.types import SearchResult
@@ -29,6 +29,14 @@
 _DIRECT_TOP_K = 10
 
 
+def _target_rank(results: list[SearchResult], target: Target) -> int | None:
+    for index, result in enumerate(results, 1):
+        chunk = result.chunk
+        if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target):
+            return index
+    return None
+
+
 @dataclass(frozen=True)
 class RepoResult:
     repo: str
@@ -88,7 +96,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
 
         chunk_results = results[:_DIRECT_TOP_K]
         relevant_ranks = [
-            rank for target in task.all_relevant if (rank := target_rank(chunk_results, target)) is not None
+            rank for target in task.all_relevant if (rank := _target_rank(chunk_results, target)) is not None
         ]
         n_relevant = count_indexed_targets(index.chunks, task.all_relevant)
         q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5)
@@ -203,12 +211,12 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[
         started = time.perf_counter()
         warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME)
         warm_ms = (time.perf_counter() - started) * 1000
-        _, ndcg10, p50_ms = _evaluate(warm, tasks)
+        ndcg5, ndcg10, p50_ms = _evaluate(warm, tasks)
         result = RepoResult(
             repo=repo,
             language=spec.language,
             chunks=len(cold.chunks),
-            ndcg5=0.0,
+            ndcg5=ndcg5,
             ndcg10=ndcg10,
             p50_ms=p50_ms,
             cold_ms=cold_ms,
diff --git a/benchmarks/common.py b/benchmarks/common.py
index c755633..379e632 100644
--- a/benchmarks/common.py
+++ b/benchmarks/common.py
@@ -5,8 +5,6 @@
 from pathlib import Path
 from typing import Protocol, cast
 
-from semble import Chunk
-
 BENCH_ROOT = Path("/tmp/bench")
 BENCHMARKS_DIR = Path(__file__).parent
 ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations"
@@ -30,10 +28,6 @@ class _ChunkLike(Protocol):
     end_line: int
 
 
-class _ResultLike(Protocol):
-    chunk: Chunk
-
-
 @dataclass(frozen=True)
 class RepoSpec:
     name: str
@@ -122,10 +116,14 @@ def load_tasks(
     tasks: list[Task] = []
     annotation_files = sorted(path.glob("*.json")) if path.is_dir() else [path]
     for annotation_file in annotation_files:
+        if annotation_file.stem not in specs:
+            continue
         raw = json.loads(annotation_file.read_text(encoding="utf-8"))
         default_repo = annotation_file.stem
         for item in raw:
             repo = item.get("repo", default_repo)
+            if repo not in specs:
+                continue
             spec = specs[repo]
             category = item.get("category")
             tasks.append(
@@ -171,14 +169,6 @@ def target_matches_location(file_path: str, start_line: int, end_line: int, targ
     return path_matches(file_path, target.path) and span_overlaps(start_line, end_line, target)
 
 
-def target_rank(results: list[_ResultLike], target: Target) -> int | None:
-    for index, result in enumerate(results, 1):
-        chunk = result.chunk
-        if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target):
-            return index
-    return None
-
-
 def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int:
     return sum(
         1

From 27175f4f109bac1bd1b1efdc8ce7041bd74e0b82 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:08:49 +0200
Subject: [PATCH 03/15] Address code review: fix language table, ndcg5 in cache
 mode, raw shadowing, asserts, redundant slice

---
 benchmarks/README.md       |  3 +++
 benchmarks/bench_hybrid.py | 19 ++++++++-----------
 benchmarks/common.py       | 10 +++++-----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 8d6d906..99c6c23 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -19,3 +19,6 @@ uv run python -m benchmarks.bench_hybrid --cache
 uv run python -m benchmarks.bench_hybrid --repo fastapi --repo axios
 uv run python -m benchmarks.bench_hybrid --language python
 ```
+
+`--cache` measures cold vs warm index time. Warm time still includes the file walk and
+BM25/Vicinity rebuild; only embedding is skipped.
diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py
index e22817e..1acf182 100644
--- a/benchmarks/bench_hybrid.py
+++ b/benchmarks/bench_hybrid.py
@@ -42,9 +42,9 @@ class RepoResult:
     repo: str
     language: str
     chunks: int
-    ndcg5: float
     ndcg10: float
     p50_ms: float
+    ndcg5: float | None = None
     cold_ms: float | None = None
     warm_ms: float | None = None
 
@@ -94,10 +94,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
             query_latencies.append((time.perf_counter() - started) * 1000)
         latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2])
 
-        chunk_results = results[:_DIRECT_TOP_K]
-        relevant_ranks = [
-            rank for target in task.all_relevant if (rank := _target_rank(chunk_results, target)) is not None
-        ]
+        relevant_ranks = [rank for target in task.all_relevant if (rank := _target_rank(results, target)) is not None]
         n_relevant = count_indexed_targets(index.chunks, task.all_relevant)
         q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5)
         q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10)
@@ -109,7 +106,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
             targets_str = ", ".join(
                 t.path if not t.start_line else f"{t.path}:{t.start_line}-{t.end_line}" for t in task.all_relevant
             )
-            top_files = [r.chunk.file_path for r in chunk_results[:5]]
+            top_files = [r.chunk.file_path for r in results[:5]]
             print(
                 f"  [{cat:<12}] ndcg@10={q_ndcg10:.3f}  ranks={relevant_ranks}  n_rel={n_relevant}  q={task.query!r}",
                 file=sys.stderr,
@@ -128,9 +125,11 @@ def _print_group_summary(results: list[RepoResult], group_by: str) -> None:
     groups = sorted({getattr(result, group_by) for result in results})
     for value in groups:
         grouped = [result for result in results if getattr(result, group_by) == value]
+        ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None]
+        ndcg5_str = f"  ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else ""
         print(
             "  "
-            + f"{value}: repos={len(grouped)}  ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}"
+            + f"{value}: repos={len(grouped)}{ndcg5_str}"
             + f"  ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}"
             + f"  p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms",
             file=sys.stderr,
@@ -138,8 +137,7 @@ def _print_group_summary(results: list[RepoResult], group_by: str) -> None:
 
 
 def _print_language_table(results: list[RepoResult]) -> None:
-    languages = ["python", "javascript", "java", "go", "php", "ruby"]
-    present = [language for language in languages if any(result.language == language for result in results)]
+    present = sorted({result.language for result in results})
     columns = ["Avg", *[language.title() for language in present]]
 
     avg_ndcg10 = sum(result.ndcg10 for result in results) / len(results)
@@ -211,12 +209,11 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[
         started = time.perf_counter()
         warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME)
         warm_ms = (time.perf_counter() - started) * 1000
-        ndcg5, ndcg10, p50_ms = _evaluate(warm, tasks)
+        _, ndcg10, p50_ms = _evaluate(warm, tasks)
         result = RepoResult(
             repo=repo,
             language=spec.language,
             chunks=len(cold.chunks),
-            ndcg5=ndcg5,
             ndcg10=ndcg10,
             p50_ms=p50_ms,
             cold_ms=cold_ms,
diff --git a/benchmarks/common.py b/benchmarks/common.py
index 379e632..62c0b7c 100644
--- a/benchmarks/common.py
+++ b/benchmarks/common.py
@@ -131,8 +131,8 @@ def load_tasks(
                     repo=repo,
                     language=spec.language,
                     query=item["query"],
-                    relevant=tuple(_parse_target(raw) for raw in item.get("relevant", [])),
-                    secondary=tuple(_parse_target(raw) for raw in item.get("secondary", [])),
+                    relevant=tuple(_parse_target(t) for t in item.get("relevant", [])),
+                    secondary=tuple(_parse_target(t) for t in item.get("secondary", [])),
                     category=category if isinstance(category, str) else infer_category(item["query"]),
                     category_inferred=category is None,
                 )
@@ -160,9 +160,9 @@ def path_matches(file_path: str, relative_path: str) -> bool:
 def span_overlaps(start_line: int, end_line: int, target: Target) -> bool:
     if not target.has_span:
         return True
-    assert target.start_line is not None
-    assert target.end_line is not None
-    return not (end_line < target.start_line or start_line > target.end_line)
+    target_start: int = target.start_line  # type: ignore[assignment]
+    target_end: int = target.end_line  # type: ignore[assignment]
+    return not (end_line < target_start or start_line > target_end)
 
 
 def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool:

From c0a0541b37e8b708783b84120fbd58bfcffabd47 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:17:56 +0200
Subject: [PATCH 04/15] Architecture review: remove dead code, fold helpers,
 pass specs, clean up common.py

---
 benchmarks/bench_hybrid.py | 23 ++++++++----------
 benchmarks/common.py       | 50 ++++++++++++--------------------------
 2 files changed, 26 insertions(+), 47 deletions(-)

diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py
index 1acf182..c197da5 100644
--- a/benchmarks/bench_hybrid.py
+++ b/benchmarks/bench_hybrid.py
@@ -11,6 +11,7 @@
 from model2vec import StaticModel
 
 from benchmarks.common import (
+    RepoSpec,
     Target,
     Task,
     apply_task_filters,
@@ -58,14 +59,6 @@ def _parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-def _is_relevant(result: SearchResult, task: Task) -> bool:
-    chunk = result.chunk
-    return any(
-        target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target)
-        for target in task.all_relevant
-    )
-
-
 def _dcg(relevances: list[int]) -> float:
     return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances))
 
@@ -161,14 +154,15 @@ def _print_language_table(results: list[RepoResult]) -> None:
     print(f"  {'q-p50':<28}  " + "  ".join(p50_row), file=sys.stderr)
 
 
-def _bench_quality(repo_tasks: dict[str, list[Task]], model: StaticModel, *, verbose: bool = False) -> list[RepoResult]:
+def _bench_quality(
+    repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec], *, verbose: bool = False
+) -> list[RepoResult]:
     print(
         f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}",
         file=sys.stderr,
     )
     print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 8} {'-' * 8} {'-' * 8}", file=sys.stderr)
     results: list[RepoResult] = []
-    specs = available_repo_specs()
     for repo, tasks in sorted(repo_tasks.items()):
         spec = specs[repo]
         started = time.perf_counter()
@@ -186,7 +180,7 @@ def _bench_quality(repo_tasks: dict[str, list[Task]], model: StaticModel, *, ver
     return results
 
 
-def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[RepoResult]:
+def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec]) -> list[RepoResult]:
     _CACHE_DIR.mkdir(parents=True, exist_ok=True)
     print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr)
     print(file=sys.stderr)
@@ -196,7 +190,6 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[
     )
     print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 9} {'-' * 8} {'-' * 8}", file=sys.stderr)
     results: list[RepoResult] = []
-    specs = available_repo_specs()
     model_ns = _MODEL_NAME.replace("/", "--")
     for repo, tasks in sorted(repo_tasks.items()):
         spec = specs[repo]
@@ -244,7 +237,11 @@ def main() -> None:
     print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr)
     print(file=sys.stderr)
     repo_tasks = grouped_tasks(tasks)
-    results = _bench_cache(repo_tasks, model) if args.cache else _bench_quality(repo_tasks, model, verbose=args.verbose)
+    results = (
+        _bench_cache(repo_tasks, model, repo_specs)
+        if args.cache
+        else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose)
+    )
     _print_group_summary(results, "language")
     _print_language_table(results)
 
diff --git a/benchmarks/common.py b/benchmarks/common.py
index 62c0b7c..3b64bf9 100644
--- a/benchmarks/common.py
+++ b/benchmarks/common.py
@@ -1,9 +1,10 @@
 from __future__ import annotations
 
 import json
+from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Protocol, cast
+from typing import Protocol
 
 BENCH_ROOT = Path("/tmp/bench")
 BENCHMARKS_DIR = Path(__file__).parent
@@ -75,16 +76,11 @@ def _coerce_int(value: object) -> int:
     return int(value)
 
 
-def _coerce_mapping(raw: object) -> dict[str, object]:
-    if not isinstance(raw, dict):
-        raise TypeError(f"expected mapping, got {type(raw).__name__}")
-    return cast(dict[str, object], raw)
-
-
 def _parse_target(raw: str | dict[str, object]) -> Target:
     if isinstance(raw, str):
         return Target(path=raw)
-    raw = _coerce_mapping(raw)
+    if not isinstance(raw, dict):
+        raise TypeError(f"expected mapping, got {type(raw).__name__}")
     start_line = raw.get("start_line")
     end_line = raw.get("end_line")
     return Target(
@@ -108,14 +104,10 @@ def available_repo_specs(repo_specs: dict[str, RepoSpec] | None = None) -> dict[
     }
 
 
-def load_tasks(
-    path: Path = ANNOTATIONS_DIR,
-    repo_specs: dict[str, RepoSpec] | None = None,
-) -> list[Task]:
+def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]:
     specs = load_repo_specs() if repo_specs is None else repo_specs
     tasks: list[Task] = []
-    annotation_files = sorted(path.glob("*.json")) if path.is_dir() else [path]
-    for annotation_file in annotation_files:
+    for annotation_file in sorted(ANNOTATIONS_DIR.glob("*.json")):
         if annotation_file.stem not in specs:
             continue
         raw = json.loads(annotation_file.read_text(encoding="utf-8"))
@@ -144,29 +136,19 @@ def apply_task_filters(
     tasks: list[Task],
     repos: list[str] | None = None,
     languages: list[str] | None = None,
-    limit: int | None = None,
 ) -> list[Task]:
     filtered = [task for task in tasks if not repos or task.repo in repos]
-    filtered = [task for task in filtered if not languages or task.language in languages]
-    return filtered if limit is None else filtered[:limit]
+    return [task for task in filtered if not languages or task.language in languages]
 
 
-def path_matches(file_path: str, relative_path: str) -> bool:
-    normalized_file = file_path.replace("\\", "/")
-    normalized_relative = relative_path.replace("\\", "/")
-    return normalized_file == normalized_relative or normalized_file.endswith(f"/{normalized_relative}")
-
-
-def span_overlaps(start_line: int, end_line: int, target: Target) -> bool:
+def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool:
+    norm_file = file_path.replace("\\", "/")
+    norm_target = target.path.replace("\\", "/")
+    if not (norm_file == norm_target or norm_file.endswith(f"/{norm_target}")):
+        return False
     if not target.has_span:
         return True
-    target_start: int = target.start_line  # type: ignore[assignment]
-    target_end: int = target.end_line  # type: ignore[assignment]
-    return not (end_line < target_start or start_line > target_end)
-
-
-def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool:
-    return path_matches(file_path, target.path) and span_overlaps(start_line, end_line, target)
+    return not (end_line < target.start_line or start_line > target.end_line)  # type: ignore[operator]
 
 
 def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int:
@@ -178,7 +160,7 @@ def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...])
 
 
 def grouped_tasks(tasks: list[Task]) -> dict[str, list[Task]]:
-    grouped: dict[str, list[Task]] = {}
+    result: dict[str, list[Task]] = defaultdict(list)
     for task in tasks:
-        grouped.setdefault(task.repo, []).append(task)
-    return grouped
+        result[task.repo].append(task)
+    return dict(result)

From 570a5baf6d2740ce066df49802d03485db1fdfe3 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:24:51 +0200
Subject: [PATCH 05/15] Simplification pass: inline helpers, remove dead
 fields, merge summary functions

---
 benchmarks/bench_hybrid.py | 58 ++++++++++++++++----------------------
 benchmarks/common.py       | 15 ++--------
 benchmarks/sync_repos.py   | 12 +++-----
 3 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py
index c197da5..f7e0933 100644
--- a/benchmarks/bench_hybrid.py
+++ b/benchmarks/bench_hybrid.py
@@ -17,7 +17,6 @@
     apply_task_filters,
     available_repo_specs,
     count_indexed_targets,
-    grouped_tasks,
     load_tasks,
     target_matches_location,
 )
@@ -50,15 +49,6 @@ class RepoResult:
     warm_ms: float | None = None
 
 
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
-    parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.")
-    parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.")
-    parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.")
-    parser.add_argument("--verbose", action="store_true", help="Print per-query results.")
-    return parser.parse_args()
-
-
 def _dcg(relevances: list[int]) -> float:
     return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances))
 
@@ -112,30 +102,26 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
     return ndcg5_sum / total, ndcg10_sum / total, latencies[len(latencies) // 2]
 
 
-def _print_group_summary(results: list[RepoResult], group_by: str) -> None:
+def _print_summary(results: list[RepoResult]) -> None:
+    languages = sorted({result.language for result in results})
+    columns = ["Avg", *[lang.title() for lang in languages]]
+
+    avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results)
+    avg_p50 = sum(r.p50_ms for r in results) / len(results)
+
     print(file=sys.stderr)
-    print(f"By {group_by}", file=sys.stderr)
-    groups = sorted({getattr(result, group_by) for result in results})
-    for value in groups:
-        grouped = [result for result in results if getattr(result, group_by) == value]
+    print("By language", file=sys.stderr)
+    for language in languages:
+        grouped = [r for r in results if r.language == language]
         ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None]
         ndcg5_str = f"  ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else ""
         print(
-            "  "
-            + f"{value}: repos={len(grouped)}{ndcg5_str}"
+            f"  {language}: repos={len(grouped)}{ndcg5_str}"
             + f"  ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}"
             + f"  p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms",
             file=sys.stderr,
         )
 
-
-def _print_language_table(results: list[RepoResult]) -> None:
-    present = sorted({result.language for result in results})
-    columns = ["Avg", *[language.title() for language in present]]
-
-    avg_ndcg10 = sum(result.ndcg10 for result in results) / len(results)
-    avg_p50 = sum(result.p50_ms for result in results) / len(results)
-
     print(file=sys.stderr)
     print(f"{'=' * 104}", file=sys.stderr)
     print("Hybrid benchmark by language", file=sys.stderr)
@@ -145,10 +131,10 @@ def _print_language_table(results: list[RepoResult]) -> None:
 
     ndcg_row = [f"{avg_ndcg10:>9.3f}"]
     p50_row = [f"{avg_p50:>8.2f}ms"]
-    for language in present:
-        language_results = [result for result in results if result.language == language]
-        ndcg_row.append(f"{sum(result.ndcg10 for result in language_results) / len(language_results):>9.3f}")
-        p50_row.append(f"{sum(result.p50_ms for result in language_results) / len(language_results):>8.2f}ms")
+    for language in languages:
+        language_results = [r for r in results if r.language == language]
+        ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}")
+        p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms")
 
     print(f"  {'NDCG@10':<28}  " + "  ".join(ndcg_row), file=sys.stderr)
     print(f"  {'q-p50':<28}  " + "  ".join(p50_row), file=sys.stderr)
@@ -224,7 +210,12 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: d
 
 
 def main() -> None:
-    args = _parse_args()
+    parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
+    parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.")
+    parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.")
+    parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.")
+    parser.add_argument("--verbose", action="store_true", help="Print per-query results.")
+    args = parser.parse_args()
     repo_specs = available_repo_specs()
     tasks = apply_task_filters(
         load_tasks(repo_specs=repo_specs), repos=args.repo or None, languages=args.language or None
@@ -236,14 +227,15 @@ def main() -> None:
     model = StaticModel.from_pretrained(_MODEL_NAME)
     print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr)
     print(file=sys.stderr)
-    repo_tasks = grouped_tasks(tasks)
+    repo_tasks: dict[str, list[Task]] = {}
+    for task in tasks:
+        repo_tasks.setdefault(task.repo, []).append(task)
     results = (
         _bench_cache(repo_tasks, model, repo_specs)
         if args.cache
         else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose)
     )
-    _print_group_summary(results, "language")
-    _print_language_table(results)
+    _print_summary(results)
 
 
 if __name__ == "__main__":
diff --git a/benchmarks/common.py b/benchmarks/common.py
index 3b64bf9..fd6fa83 100644
--- a/benchmarks/common.py
+++ b/benchmarks/common.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import json
-from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Protocol
@@ -54,7 +53,6 @@ class Task:
     relevant: tuple[Target, ...]
     secondary: tuple[Target, ...]
     category: str
-    category_inferred: bool
 
     @property
     def all_relevant(self) -> tuple[Target, ...]:
@@ -95,11 +93,10 @@ def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]:
     return {item["name"]: RepoSpec(**item) for item in raw}
 
 
-def available_repo_specs(repo_specs: dict[str, RepoSpec] | None = None) -> dict[str, RepoSpec]:
-    specs = load_repo_specs() if repo_specs is None else repo_specs
+def available_repo_specs() -> dict[str, RepoSpec]:
     return {
         name: spec
-        for name, spec in specs.items()
+        for name, spec in load_repo_specs().items()
         if spec.checkout_dir.exists() and (ANNOTATIONS_DIR / f"{name}.json").exists()
     }
 
@@ -126,7 +123,6 @@ def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]:
                     relevant=tuple(_parse_target(t) for t in item.get("relevant", [])),
                     secondary=tuple(_parse_target(t) for t in item.get("secondary", [])),
                     category=category if isinstance(category, str) else infer_category(item["query"]),
-                    category_inferred=category is None,
                 )
             )
     return tasks
@@ -157,10 +153,3 @@ def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...])
         for target in targets
         if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks)
     )
-
-
-def grouped_tasks(tasks: list[Task]) -> dict[str, list[Task]]:
-    result: dict[str, list[Task]] = defaultdict(list)
-    for task in tasks:
-        result[task.repo].append(task)
-    return dict(result)
diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py
index 5ba3b24..8b008f4 100644
--- a/benchmarks/sync_repos.py
+++ b/benchmarks/sync_repos.py
@@ -7,13 +7,6 @@
 from benchmarks.common import BENCH_ROOT, load_repo_specs
 
 
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.")
-    parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.")
-    parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.")
-    return parser.parse_args()
-
-
 def _run(*args: str) -> None:
     subprocess.run(args, check=True)
 
@@ -43,7 +36,10 @@ def _check_repo(name: str, revision: str) -> str | None:
 
 
 def main() -> None:
-    args = _parse_args()
+    parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.")
+    parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.")
+    parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.")
+    args = parser.parse_args()
     specs = load_repo_specs()
     selected = {name: spec for name, spec in specs.items() if not args.repo or name in args.repo}
     BENCH_ROOT.mkdir(parents=True, exist_ok=True)

From f4a87571e5e342f11a0332a13c87d3ba836755ab Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:27:06 +0200
Subject: [PATCH 06/15] Rename bench_hybrid.py to run_benchmark.py

---
 benchmarks/README.md                             | 8 ++++----
 benchmarks/{bench_hybrid.py => run_benchmark.py} | 0
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename benchmarks/{bench_hybrid.py => run_benchmark.py} (100%)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 99c6c23..73194fc 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -14,10 +14,10 @@ uv run python -m benchmarks.sync_repos --check
 ## Run
 
 ```bash
-uv run python -m benchmarks.bench_hybrid
-uv run python -m benchmarks.bench_hybrid --cache
-uv run python -m benchmarks.bench_hybrid --repo fastapi --repo axios
-uv run python -m benchmarks.bench_hybrid --language python
+uv run python -m benchmarks.run_benchmark
+uv run python -m benchmarks.run_benchmark --cache
+uv run python -m benchmarks.run_benchmark --repo fastapi --repo axios
+uv run python -m benchmarks.run_benchmark --language python
 ```
 
 `--cache` measures cold vs warm index time. Warm time still includes the file walk and
diff --git a/benchmarks/bench_hybrid.py b/benchmarks/run_benchmark.py
similarity index 100%
rename from benchmarks/bench_hybrid.py
rename to benchmarks/run_benchmark.py

From 75f6823eaf17dfeb2689d241e987005fdb01def0 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:39:34 +0200
Subject: [PATCH 07/15] Add docstrings to benchmarks and enforce with ruff

Add one-liner docstrings to all functions and methods across
benchmarks/common.py, run_benchmark.py, and sync_repos.py. Remove the D
ruff ignore for benchmarks/*.py so docstrings are enforced going forward.
Also moves count_indexed_targets into run_benchmark.py (where Chunk is
imported) to fix a pre-existing mypy Protocol error in the pre-commit env.
---
 benchmarks/common.py        | 27 ++++++++++++---------------
 benchmarks/run_benchmark.py | 20 ++++++++++++++++++--
 benchmarks/sync_repos.py    |  5 +++++
 pyproject.toml              |  2 +-
 4 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/benchmarks/common.py b/benchmarks/common.py
index fd6fa83..5a03286 100644
--- a/benchmarks/common.py
+++ b/benchmarks/common.py
@@ -3,7 +3,6 @@
 import json
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Protocol
 
 BENCH_ROOT = Path("/tmp/bench")
 BENCHMARKS_DIR = Path(__file__).parent
@@ -19,15 +18,10 @@ class Target:
 
     @property
     def has_span(self) -> bool:
+        """Return True if both start_line and end_line are set."""
         return self.start_line is not None and self.end_line is not None
 
 
-class _ChunkLike(Protocol):
-    file_path: str
-    start_line: int
-    end_line: int
-
-
 @dataclass(frozen=True)
 class RepoSpec:
     name: str
@@ -38,10 +32,12 @@ class RepoSpec:
 
     @property
     def checkout_dir(self) -> Path:
+        """Return the local checkout directory for this repo."""
         return BENCH_ROOT / self.name
 
     @property
     def benchmark_dir(self) -> Path:
+        """Return the root directory to index for benchmarking."""
         return self.checkout_dir if self.benchmark_root is None else self.checkout_dir / self.benchmark_root
 
 
@@ -56,10 +52,12 @@ class Task:
 
     @property
     def all_relevant(self) -> tuple[Target, ...]:
+        """Return primary and secondary relevant targets combined."""
         return self.relevant + self.secondary
 
 
 def infer_category(query: str) -> str:
+    """Infer a task category from the query text."""
     if " " not in query.strip():
         return "symbol"
     lowered = query.lower()
@@ -69,12 +67,14 @@ def infer_category(query: str) -> str:
 
 
 def _coerce_int(value: object) -> int:
+    """Coerce a string or int value to int, raising TypeError otherwise."""
     if not isinstance(value, int | str):
         raise TypeError(f"expected int-compatible value, got {type(value).__name__}")
     return int(value)
 
 
 def _parse_target(raw: str | dict[str, object]) -> Target:
+    """Parse a target from a string path or a mapping with optional line span."""
     if isinstance(raw, str):
         return Target(path=raw)
     if not isinstance(raw, dict):
@@ -89,11 +89,13 @@ def _parse_target(raw: str | dict[str, object]) -> Target:
 
 
 def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]:
+    """Load all repo specs from the JSON file at the given path."""
     raw = json.loads(path.read_text(encoding="utf-8"))
     return {item["name"]: RepoSpec(**item) for item in raw}
 
 
 def available_repo_specs() -> dict[str, RepoSpec]:
+    """Return only the repo specs that have a local checkout and annotation file."""
     return {
         name: spec
         for name, spec in load_repo_specs().items()
@@ -102,6 +104,7 @@ def available_repo_specs() -> dict[str, RepoSpec]:
 
 
 def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]:
+    """Load all benchmark tasks from annotation files, filtered to available repo specs."""
     specs = load_repo_specs() if repo_specs is None else repo_specs
     tasks: list[Task] = []
     for annotation_file in sorted(ANNOTATIONS_DIR.glob("*.json")):
@@ -133,11 +136,13 @@ def apply_task_filters(
     repos: list[str] | None = None,
     languages: list[str] | None = None,
 ) -> list[Task]:
+    """Filter tasks to the given repos and/or languages; None means no filter."""
     filtered = [task for task in tasks if not repos or task.repo in repos]
     return [task for task in filtered if not languages or task.language in languages]
 
 
 def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool:
+    """Return True if the chunk at file_path:start_line-end_line covers the target."""
     norm_file = file_path.replace("\\", "/")
     norm_target = target.path.replace("\\", "/")
     if not (norm_file == norm_target or norm_file.endswith(f"/{norm_target}")):
@@ -145,11 +150,3 @@ def target_matches_location(file_path: str, start_line: int, end_line: int, targ
     if not target.has_span:
         return True
     return not (end_line < target.start_line or start_line > target.end_line)  # type: ignore[operator]
-
-
-def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int:
-    return sum(
-        1
-        for target in targets
-        if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks)
-    )
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index f7e0933..0f4cd4e 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -16,12 +16,11 @@
     Task,
     apply_task_filters,
     available_repo_specs,
-    count_indexed_targets,
     load_tasks,
     target_matches_location,
 )
 from semble import SembleIndex
-from semble.types import SearchResult
+from semble.types import Chunk, SearchResult
 
 _CACHE_DIR = Path("/tmp/semble-bench-cache")
 _MODEL_NAME = "Pringled/potion-code-16M"
@@ -29,7 +28,17 @@
 _DIRECT_TOP_K = 10
 
 
+def count_indexed_targets(chunks: list[Chunk], targets: tuple[Target, ...]) -> int:
+    """Count how many targets are covered by at least one chunk in the index."""
+    return sum(
+        1
+        for target in targets
+        if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks)
+    )
+
+
 def _target_rank(results: list[SearchResult], target: Target) -> int | None:
+    """Return the 1-based rank of the first result covering target, or None."""
     for index, result in enumerate(results, 1):
         chunk = result.chunk
         if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target):
@@ -50,10 +59,12 @@ class RepoResult:
 
 
 def _dcg(relevances: list[int]) -> float:
+    """Compute Discounted Cumulative Gain for a ranked relevance list."""
     return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances))
 
 
 def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float:
+    """Compute NDCG@k given the ranks of relevant results and the total relevant count."""
     if n_relevant == 0:
         return 0.0
     relevances = [0] * k
@@ -65,6 +76,7 @@ def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float:
 
 
 def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -> tuple[float, float, float]:
+    """Return mean NDCG@5, NDCG@10, and median query latency (ms) across all tasks."""
     ndcg5_sum = 0.0
     ndcg10_sum = 0.0
     latencies: list[float] = []
@@ -103,6 +115,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
 
 
 def _print_summary(results: list[RepoResult]) -> None:
+    """Print per-language and overall benchmark summary to stderr."""
     languages = sorted({result.language for result in results})
     columns = ["Avg", *[lang.title() for lang in languages]]
 
@@ -143,6 +156,7 @@ def _print_summary(results: list[RepoResult]) -> None:
 def _bench_quality(
     repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec], *, verbose: bool = False
 ) -> list[RepoResult]:
+    """Run quality benchmarks (NDCG@5, NDCG@10, latency) for each repo."""
     print(
         f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}",
         file=sys.stderr,
@@ -167,6 +181,7 @@ def _bench_quality(
 
 
 def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec]) -> list[RepoResult]:
+    """Run cold vs warm index timing benchmarks using the disk embedding cache."""
     _CACHE_DIR.mkdir(parents=True, exist_ok=True)
     print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr)
     print(file=sys.stderr)
@@ -210,6 +225,7 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: d
 
 
 def main() -> None:
+    """Parse arguments and run the selected benchmark mode."""
     parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
     parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.")
     parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.")
diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py
index 8b008f4..8a1cb0d 100644
--- a/benchmarks/sync_repos.py
+++ b/benchmarks/sync_repos.py
@@ -8,14 +8,17 @@
 
 
 def _run(*args: str) -> None:
+    """Run a subprocess command, raising on non-zero exit."""
     subprocess.run(args, check=True)
 
 
 def _output(*args: str) -> str:
+    """Run a subprocess command and return its stripped stdout."""
     return subprocess.check_output(args, text=True).strip()
 
 
 def _sync_repo(name: str, url: str, revision: str) -> None:
+    """Clone the repo if absent, then fetch and checkout the pinned revision."""
     repo_dir = BENCH_ROOT / name
     if not repo_dir.exists():
         print(f"cloning {name} -> {repo_dir}")
@@ -26,6 +29,7 @@ def _sync_repo(name: str, url: str, revision: str) -> None:
 
 
 def _check_repo(name: str, revision: str) -> str | None:
+    """Return an error string if the local checkout is missing or at the wrong revision."""
     repo_dir = BENCH_ROOT / name
     if not (repo_dir / ".git").exists():
         return f"{name}: missing checkout at {repo_dir}"
@@ -36,6 +40,7 @@ def _check_repo(name: str, revision: str) -> str | None:
 
 
 def main() -> None:
+    """Parse arguments and sync or verify the pinned benchmark repositories."""
     parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.")
     parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.")
     parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.")
diff --git a/pyproject.toml b/pyproject.toml
index 8e5f165..111905d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ target-version = "py310"
 [tool.ruff.lint.per-file-ignores]
 "tests/**" = ["ANN"]
 "src/semble/cli.py" = ["T20"]
-"benchmarks/*.py" = ["T20", "D"]
+"benchmarks/*.py" = ["T20"]
 
 [tool.ruff.lint]
 select = [

From 269c11b26243ffe31f07fa876bf76fb9851b6501 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:53:33 +0200
Subject: [PATCH 08/15] Inline count_indexed_targets, pre-group by_language in
 _print_summary

---
 benchmarks/run_benchmark.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 0f4cd4e..56ca981 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -20,7 +20,7 @@
     target_matches_location,
 )
 from semble import SembleIndex
-from semble.types import Chunk, SearchResult
+from semble.types import SearchResult
 
 _CACHE_DIR = Path("/tmp/semble-bench-cache")
 _MODEL_NAME = "Pringled/potion-code-16M"
@@ -28,15 +28,6 @@
 _DIRECT_TOP_K = 10
 
 
-def count_indexed_targets(chunks: list[Chunk], targets: tuple[Target, ...]) -> int:
-    """Count how many targets are covered by at least one chunk in the index."""
-    return sum(
-        1
-        for target in targets
-        if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks)
-    )
-
-
 def _target_rank(results: list[SearchResult], target: Target) -> int | None:
     """Return the 1-based rank of the first result covering target, or None."""
     for index, result in enumerate(results, 1):
@@ -90,7 +81,11 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
         latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2])
 
         relevant_ranks = [rank for target in task.all_relevant if (rank := _target_rank(results, target)) is not None]
-        n_relevant = count_indexed_targets(index.chunks, task.all_relevant)
+        n_relevant = sum(
+            1
+            for target in task.all_relevant
+            if any(target_matches_location(c.file_path, c.start_line, c.end_line, target) for c in index.chunks)
+        )
         q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5)
         q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10)
         ndcg5_sum += q_ndcg5
@@ -117,6 +112,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -
 def _print_summary(results: list[RepoResult]) -> None:
     """Print per-language and overall benchmark summary to stderr."""
     languages = sorted({result.language for result in results})
+    by_language = {lang: [r for r in results if r.language == lang] for lang in languages}
     columns = ["Avg", *[lang.title() for lang in languages]]
 
     avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results)
@@ -124,8 +120,7 @@ def _print_summary(results: list[RepoResult]) -> None:
 
     print(file=sys.stderr)
     print("By language", file=sys.stderr)
-    for language in languages:
-        grouped = [r for r in results if r.language == language]
+    for language, grouped in by_language.items():
         ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None]
         ndcg5_str = f"  ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else ""
         print(
@@ -144,8 +139,7 @@ def _print_summary(results: list[RepoResult]) -> None:
 
     ndcg_row = [f"{avg_ndcg10:>9.3f}"]
     p50_row = [f"{avg_p50:>8.2f}ms"]
-    for language in languages:
-        language_results = [r for r in results if r.language == language]
+    for language, language_results in by_language.items():
         ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}")
         p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms")
 

From 2bd010b154d43e317e6de81e0ad00905eb3d5e9b Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 08:56:29 +0200
Subject: [PATCH 09/15] Drop from __future__ import annotations where not
 needed

---
 benchmarks/common.py        | 2 --
 benchmarks/run_benchmark.py | 2 --
 benchmarks/sync_repos.py    | 2 --
 3 files changed, 6 deletions(-)

diff --git a/benchmarks/common.py b/benchmarks/common.py
index 5a03286..0183254 100644
--- a/benchmarks/common.py
+++ b/benchmarks/common.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 import json
 from dataclasses import dataclass
 from pathlib import Path
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 56ca981..5147f02 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 import argparse
 import math
 import shutil
diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py
index 8a1cb0d..0deb767 100644
--- a/benchmarks/sync_repos.py
+++ b/benchmarks/sync_repos.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 import argparse
 import subprocess
 import sys

From 1c875897212799469327a8500a9dd534a8f19694 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 09:06:43 +0200
Subject: [PATCH 10/15] Save benchmark results to benchmarks/results/<sha>.json

Full runs (no --repo/--language filters) automatically write results to
benchmarks/results/<sha>.json, keyed by the 12-char git SHA. The file
includes the full SHA, model name, per-repo rows, language aggregates,
and overall summary. Cache mode writes <sha>-cache.json. Filtered runs
are not saved.
---
 benchmarks/results/.gitkeep          |   0
 benchmarks/results/2bd010b154d4.json | 363 +++++++++++++++++++++++++++
 benchmarks/run_benchmark.py          |  43 +++-
 3 files changed, 405 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/results/.gitkeep
 create mode 100644 benchmarks/results/2bd010b154d4.json

diff --git a/benchmarks/results/.gitkeep b/benchmarks/results/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/benchmarks/results/2bd010b154d4.json b/benchmarks/results/2bd010b154d4.json
new file mode 100644
index 0000000..b60d885
--- /dev/null
+++ b/benchmarks/results/2bd010b154d4.json
@@ -0,0 +1,363 @@
+{
+  "sha": "2bd010b154d43e317e6de81e0ad00905eb3d5e9b",
+  "model": "Pringled/potion-code-16M",
+  "cache_mode": false,
+  "summary": {
+    "ndcg10": 0.8668,
+    "p50_ms": 0.697
+  },
+  "by_language": {
+    "csharp": {
+      "repos": 1,
+      "ndcg10": 0.8263,
+      "p50_ms": 1.883
+    },
+    "go": {
+      "repos": 3,
+      "ndcg10": 0.9515,
+      "p50_ms": 0.485
+    },
+    "java": {
+      "repos": 3,
+      "ndcg10": 0.842,
+      "p50_ms": 1.239
+    },
+    "javascript": {
+      "repos": 3,
+      "ndcg10": 0.9282,
+      "p50_ms": 0.386
+    },
+    "kotlin": {
+      "repos": 1,
+      "ndcg10": 0.7631,
+      "p50_ms": 0.832
+    },
+    "php": {
+      "repos": 3,
+      "ndcg10": 0.906,
+      "p50_ms": 0.867
+    },
+    "python": {
+      "repos": 8,
+      "ndcg10": 0.8233,
+      "p50_ms": 0.479
+    },
+    "ruby": {
+      "repos": 3,
+      "ndcg10": 0.8911,
+      "p50_ms": 0.578
+    },
+    "rust": {
+      "repos": 1,
+      "ndcg10": 0.8878,
+      "p50_ms": 0.868
+    },
+    "scala": {
+      "repos": 1,
+      "ndcg10": 0.8415,
+      "p50_ms": 0.829
+    },
+    "swift": {
+      "repos": 1,
+      "ndcg10": 0.9316,
+      "p50_ms": 0.496
+    },
+    "typescript": {
+      "repos": 1,
+      "ndcg10": 0.7431,
+      "p50_ms": 0.808
+    }
+  },
+  "repos": [
+    {
+      "repo": "aiohttp",
+      "language": "python",
+      "chunks": 756,
+      "ndcg10": 0.7821229638714016,
+      "p50_ms": 0.5519580008694902,
+      "ndcg5": 0.7132626857513019,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "alamofire",
+      "language": "swift",
+      "chunks": 649,
+      "ndcg10": 0.9315768229529695,
+      "p50_ms": 0.4956250049872324,
+      "ndcg5": 0.900172569211564,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "axios",
+      "language": "javascript",
+      "chunks": 166,
+      "ndcg10": 0.9671522420975631,
+      "p50_ms": 0.5629590013995767,
+      "ndcg5": 0.9671522420975631,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "cats",
+      "language": "scala",
+      "chunks": 1254,
+      "ndcg10": 0.8414671964692401,
+      "p50_ms": 0.8292909988085739,
+      "ndcg5": 0.8157722039023972,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "chi",
+      "language": "go",
+      "chunks": 262,
+      "ndcg10": 0.9455120441745608,
+      "p50_ms": 0.6020420041750185,
+      "ndcg5": 0.9455120441745608,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "cobra",
+      "language": "go",
+      "chunks": 394,
+      "ndcg10": 0.970068981106951,
+      "p50_ms": 0.3933749976567924,
+      "ndcg5": 0.970068981106951,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "commons-lang",
+      "language": "java",
+      "chunks": 3152,
+      "ndcg10": 0.8052591049306037,
+      "p50_ms": 0.8624999973108061,
+      "ndcg5": 0.7688578654609097,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "express",
+      "language": "javascript",
+      "chunks": 52,
+      "ndcg10": 0.9593872208972474,
+      "p50_ms": 0.2536250030971132,
+      "ndcg5": 0.9593872208972474,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "fastapi",
+      "language": "python",
+      "chunks": 597,
+      "ndcg10": 0.7693095302894921,
+      "p50_ms": 0.4379579986562021,
+      "ndcg5": 0.7314364449312006,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "flask",
+      "language": "python",
+      "chunks": 291,
+      "ndcg10": 0.8767012186349079,
+      "p50_ms": 0.42783399840118363,
+      "ndcg5": 0.8570900833760776,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "gin",
+      "language": "go",
+      "chunks": 576,
+      "ndcg10": 0.939064318485603,
+      "p50_ms": 0.45987500197952613,
+      "ndcg5": 0.8807555442147937,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "gson",
+      "language": "java",
+      "chunks": 1460,
+      "ndcg10": 0.9261859507142916,
+      "p50_ms": 1.0446250016684644,
+      "ndcg5": 0.9261859507142916,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "guzzle",
+      "language": "php",
+      "chunks": 206,
+      "ndcg10": 0.844506786325837,
+      "p50_ms": 0.5477500017150305,
+      "ndcg5": 0.8326908338735671,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "httpx",
+      "language": "python",
+      "chunks": 248,
+      "ndcg10": 0.871159099521697,
+      "p50_ms": 0.44858400360681117,
+      "ndcg5": 0.8519694264932337,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "jackson-databind",
+      "language": "java",
+      "chunks": 4570,
+      "ndcg10": 0.7944291752941182,
+      "p50_ms": 1.810874993680045,
+      "ndcg5": 0.7667968319202225,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "ktor",
+      "language": "kotlin",
+      "chunks": 425,
+      "ndcg10": 0.7630927329648237,
+      "p50_ms": 0.8315000013681129,
+      "ndcg5": 0.726275662513606,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "laravel-framework",
+      "language": "php",
+      "chunks": 6197,
+      "ndcg10": 0.967888315659275,
+      "p50_ms": 1.207750006869901,
+      "ndcg5": 0.967888315659275,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "messagepack-csharp",
+      "language": "csharp",
+      "chunks": 1125,
+      "ndcg10": 0.8262866007393468,
+      "p50_ms": 1.882749995274935,
+      "ndcg5": 0.8164536328001585,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "model2vec",
+      "language": "python",
+      "chunks": 107,
+      "ndcg10": 0.695271294655741,
+      "p50_ms": 0.4195000001345761,
+      "ndcg5": 0.6593701861221591,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "monolog",
+      "language": "php",
+      "chunks": 417,
+      "ndcg10": 0.9055096182921145,
+      "p50_ms": 0.8463749982183799,
+      "ndcg5": 0.9055096182921145,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "pydantic",
+      "language": "python",
+      "chunks": 1518,
+      "ndcg10": 0.7070408064407742,
+      "p50_ms": 0.6517500005429611,
+      "ndcg5": 0.6795591269045096,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "rack",
+      "language": "ruby",
+      "chunks": 249,
+      "ndcg10": 1.0,
+      "p50_ms": 0.4828749952139333,
+      "ndcg5": 1.0,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "rails",
+      "language": "ruby",
+      "chunks": 465,
+      "ndcg10": 0.8346443747935481,
+      "p50_ms": 0.9173330035991967,
+      "ndcg5": 0.7466134836472739,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "redux",
+      "language": "javascript",
+      "chunks": 53,
+      "ndcg10": 0.8580772959099011,
+      "p50_ms": 0.3419580025365576,
+      "ndcg5": 0.8226294385530917,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "requests",
+      "language": "python",
+      "chunks": 169,
+      "ndcg10": 0.9550842629661954,
+      "p50_ms": 0.40808300400385633,
+      "ndcg5": 0.9550842629661954,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "sinatra",
+      "language": "ruby",
+      "chunks": 68,
+      "ndcg10": 0.8387325493217617,
+      "p50_ms": 0.3345000004628673,
+      "ndcg5": 0.8387325493217617,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "starlette",
+      "language": "python",
+      "chunks": 213,
+      "ndcg10": 0.9294136613951622,
+      "p50_ms": 0.4845000003115274,
+      "ndcg5": 0.9058681185722455,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "tokio",
+      "language": "rust",
+      "chunks": 2730,
+      "ndcg10": 0.8878478903956787,
+      "p50_ms": 0.8677080040797591,
+      "ndcg5": 0.8750003941122573,
+      "cold_ms": null,
+      "warm_ms": null
+    },
+    {
+      "repo": "trpc",
+      "language": "typescript",
+      "chunks": 362,
+      "ndcg10": 0.7431267778412411,
+      "p50_ms": 0.8075000005192123,
+      "ndcg5": 0.6949834508995433,
+      "cold_ms": null,
+      "warm_ms": null
+    }
+  ]
+}
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 5147f02..5269479 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -1,9 +1,11 @@
 import argparse
+import json
 import math
 import shutil
+import subprocess
 import sys
 import time
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from pathlib import Path
 
 from model2vec import StaticModel
@@ -216,6 +218,43 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: d
     return results
 
 
+def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None:
+    """Write results to benchmarks/results/<sha>[-cache].json."""
+    try:
+        sha = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
+    except subprocess.CalledProcessError:
+        sha = "unknown"
+
+    languages = sorted({r.language for r in results})
+    by_language = {lang: [r for r in results if r.language == lang] for lang in languages}
+
+    output = {
+        "sha": sha,
+        "model": _MODEL_NAME,
+        "cache_mode": cache_mode,
+        "summary": {
+            "ndcg10": round(sum(r.ndcg10 for r in results) / len(results), 4),
+            "p50_ms": round(sum(r.p50_ms for r in results) / len(results), 3),
+        },
+        "by_language": {
+            lang: {
+                "repos": len(grouped),
+                "ndcg10": round(sum(r.ndcg10 for r in grouped) / len(grouped), 4),
+                "p50_ms": round(sum(r.p50_ms for r in grouped) / len(grouped), 3),
+            }
+            for lang, grouped in by_language.items()
+        },
+        "repos": [asdict(r) for r in results],
+    }
+
+    results_dir = Path(__file__).parent / "results"
+    results_dir.mkdir(exist_ok=True)
+    suffix = "-cache" if cache_mode else ""
+    out_path = results_dir / f"{sha[:12]}{suffix}.json"
+    out_path.write_text(json.dumps(output, indent=2), encoding="utf-8")
+    print(f"\nResults saved to {out_path}", file=sys.stderr)
+
+
 def main() -> None:
     """Parse arguments and run the selected benchmark mode."""
     parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
@@ -244,6 +283,8 @@ def main() -> None:
         else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose)
     )
     _print_summary(results)
+    if not args.repo and not args.language:
+        _save_results(results, cache_mode=args.cache)
 
 
 if __name__ == "__main__":

From 92176df212afb93dd4691b51622c4982abc23f92 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 09:13:48 +0200
Subject: [PATCH 11/15] Replace cache benchmark with index_ms tracking per repo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the --cache mode (cold vs warm build timing) — it was noisy and
not actionable. Instead, add index_ms to RepoResult so every full run
records index build time per repo alongside NDCG and query latency.
index_ms is included in the saved JSON and printed in the summary table.
---
 benchmarks/run_benchmark.py | 84 ++++++++++---------------------------
 1 file changed, 21 insertions(+), 63 deletions(-)

diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 5269479..8267f74 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -1,7 +1,6 @@
 import argparse
 import json
 import math
-import shutil
 import subprocess
 import sys
 import time
@@ -22,7 +21,6 @@
 from semble import SembleIndex
 from semble.types import SearchResult
 
-_CACHE_DIR = Path("/tmp/semble-bench-cache")
 _MODEL_NAME = "Pringled/potion-code-16M"
 _LATENCY_RUNS = 5
 _DIRECT_TOP_K = 10
@@ -42,11 +40,10 @@ class RepoResult:
     repo: str
     language: str
     chunks: int
+    ndcg5: float
     ndcg10: float
     p50_ms: float
-    ndcg5: float | None = None
-    cold_ms: float | None = None
-    warm_ms: float | None = None
+    index_ms: float
 
 
 def _dcg(relevances: list[int]) -> float:
@@ -117,16 +114,17 @@ def _print_summary(results: list[RepoResult]) -> None:
 
     avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results)
     avg_p50 = sum(r.p50_ms for r in results) / len(results)
+    avg_index = sum(r.index_ms for r in results) / len(results)
 
     print(file=sys.stderr)
     print("By language", file=sys.stderr)
     for language, grouped in by_language.items():
-        ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None]
-        ndcg5_str = f"  ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else ""
         print(
-            f"  {language}: repos={len(grouped)}{ndcg5_str}"
+            f"  {language}: repos={len(grouped)}"
+            + f"  ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}"
             + f"  ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}"
-            + f"  p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms",
+            + f"  p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms"
+            + f"  index={sum(r.index_ms for r in grouped) / len(grouped):.0f}ms",
             file=sys.stderr,
         )
 
@@ -139,12 +137,15 @@ def _print_summary(results: list[RepoResult]) -> None:
 
     ndcg_row = [f"{avg_ndcg10:>9.3f}"]
     p50_row = [f"{avg_p50:>8.2f}ms"]
+    index_row = [f"{avg_index:>7.0f}ms"]
     for language, language_results in by_language.items():
         ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}")
         p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms")
+        index_row.append(f"{sum(r.index_ms for r in language_results) / len(language_results):>7.0f}ms")
 
     print(f"  {'NDCG@10':<28}  " + "  ".join(ndcg_row), file=sys.stderr)
     print(f"  {'q-p50':<28}  " + "  ".join(p50_row), file=sys.stderr)
+    print(f"  {'index':<28}  " + "  ".join(index_row), file=sys.stderr)
 
 
 def _bench_quality(
@@ -163,63 +164,25 @@ def _bench_quality(
         index = SembleIndex.from_path(spec.benchmark_dir, model=model)
         index_ms = (time.perf_counter() - started) * 1000
         ndcg5, ndcg10, p50_ms = _evaluate(index, tasks, verbose=verbose)
-        result = RepoResult(
-            repo=repo, language=spec.language, chunks=len(index.chunks), ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms
-        )
-        results.append(result)
-        print(
-            f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms",
-            file=sys.stderr,
-        )
-    return results
-
-
-def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec]) -> list[RepoResult]:
-    """Run cold vs warm index timing benchmarks using the disk embedding cache."""
-    _CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr)
-    print(file=sys.stderr)
-    print(
-        f"{'Repo':<12} {'language':<12} {'chunks':>6} {'cold':>9} {'warm':>9} {'speedup':>8} {'NDCG@10':>8}",
-        file=sys.stderr,
-    )
-    print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 9} {'-' * 8} {'-' * 8}", file=sys.stderr)
-    results: list[RepoResult] = []
-    model_ns = _MODEL_NAME.replace("/", "--")
-    for repo, tasks in sorted(repo_tasks.items()):
-        spec = specs[repo]
-        namespace_dir = _CACHE_DIR / model_ns
-        if namespace_dir.exists():
-            shutil.rmtree(namespace_dir)
-        started = time.perf_counter()
-        cold = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME)
-        cold_ms = (time.perf_counter() - started) * 1000
-        started = time.perf_counter()
-        warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME)
-        warm_ms = (time.perf_counter() - started) * 1000
-        _, ndcg10, p50_ms = _evaluate(warm, tasks)
         result = RepoResult(
             repo=repo,
             language=spec.language,
-            chunks=len(cold.chunks),
+            chunks=len(index.chunks),
+            ndcg5=ndcg5,
             ndcg10=ndcg10,
             p50_ms=p50_ms,
-            cold_ms=cold_ms,
-            warm_ms=warm_ms,
+            index_ms=index_ms,
         )
         results.append(result)
-        speedup = cold_ms / warm_ms if warm_ms > 0 else float("inf")
         print(
-            f"{repo:<12} {spec.language:<12} {len(cold.chunks):>6} {cold_ms:>8.0f}ms {warm_ms:>8.0f}ms {speedup:>7.1f}x {ndcg10:>8.3f}",
+            f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms",
             file=sys.stderr,
         )
-    print(file=sys.stderr)
-    print("Warm time still includes file walk plus BM25/Vicinity rebuild; only embedding is skipped.", file=sys.stderr)
     return results
 
 
-def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None:
-    """Write results to benchmarks/results/<sha>[-cache].json."""
+def _save_results(results: list[RepoResult]) -> None:
+    """Write results to benchmarks/results/<sha>.json."""
     try:
         sha = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
     except subprocess.CalledProcessError:
@@ -231,16 +194,17 @@ def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None:
     output = {
         "sha": sha,
         "model": _MODEL_NAME,
-        "cache_mode": cache_mode,
         "summary": {
             "ndcg10": round(sum(r.ndcg10 for r in results) / len(results), 4),
             "p50_ms": round(sum(r.p50_ms for r in results) / len(results), 3),
+            "index_ms": round(sum(r.index_ms for r in results) / len(results), 1),
         },
         "by_language": {
             lang: {
                 "repos": len(grouped),
                 "ndcg10": round(sum(r.ndcg10 for r in grouped) / len(grouped), 4),
                 "p50_ms": round(sum(r.p50_ms for r in grouped) / len(grouped), 3),
+                "index_ms": round(sum(r.index_ms for r in grouped) / len(grouped), 1),
             }
             for lang, grouped in by_language.items()
         },
@@ -249,8 +213,7 @@ def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None:
 
     results_dir = Path(__file__).parent / "results"
     results_dir.mkdir(exist_ok=True)
-    suffix = "-cache" if cache_mode else ""
-    out_path = results_dir / f"{sha[:12]}{suffix}.json"
+    out_path = results_dir / f"{sha[:12]}.json"
     out_path.write_text(json.dumps(output, indent=2), encoding="utf-8")
     print(f"\nResults saved to {out_path}", file=sys.stderr)
 
@@ -258,7 +221,6 @@ def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None:
 def main() -> None:
     """Parse arguments and run the selected benchmark mode."""
     parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.")
-    parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.")
     parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.")
     parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.")
     parser.add_argument("--verbose", action="store_true", help="Print per-query results.")
@@ -277,14 +239,10 @@ def main() -> None:
     repo_tasks: dict[str, list[Task]] = {}
     for task in tasks:
         repo_tasks.setdefault(task.repo, []).append(task)
-    results = (
-        _bench_cache(repo_tasks, model, repo_specs)
-        if args.cache
-        else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose)
-    )
+    results = _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose)
     _print_summary(results)
     if not args.repo and not args.language:
-        _save_results(results, cache_mode=args.cache)
+        _save_results(results)
 
 
 if __name__ == "__main__":

From c1d788d386f6ddeafa27f04fa6f7b29e5c7dc187 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 09:16:56 +0200
Subject: [PATCH 12/15] Add benchmark results for 92176df

---
 .../{2bd010b154d4.json => 92176df212af.json}  | 245 ++++++++----------
 1 file changed, 114 insertions(+), 131 deletions(-)
 rename benchmarks/results/{2bd010b154d4.json => 92176df212af.json} (66%)

diff --git a/benchmarks/results/2bd010b154d4.json b/benchmarks/results/92176df212af.json
similarity index 66%
rename from benchmarks/results/2bd010b154d4.json
rename to benchmarks/results/92176df212af.json
index b60d885..9d1879e 100644
--- a/benchmarks/results/2bd010b154d4.json
+++ b/benchmarks/results/92176df212af.json
@@ -1,71 +1,83 @@
 {
-  "sha": "2bd010b154d43e317e6de81e0ad00905eb3d5e9b",
+  "sha": "92176df212afb93dd4691b51622c4982abc23f92",
   "model": "Pringled/potion-code-16M",
-  "cache_mode": false,
   "summary": {
     "ndcg10": 0.8668,
-    "p50_ms": 0.697
+    "p50_ms": 0.624,
+    "index_ms": 317.5
   },
   "by_language": {
     "csharp": {
       "repos": 1,
       "ndcg10": 0.8263,
-      "p50_ms": 1.883
+      "p50_ms": 0.837,
+      "index_ms": 413.9
     },
     "go": {
       "repos": 3,
       "ndcg10": 0.9515,
-      "p50_ms": 0.485
+      "p50_ms": 0.473,
+      "index_ms": 148.8
     },
     "java": {
       "repos": 3,
       "ndcg10": 0.842,
-      "p50_ms": 1.239
+      "p50_ms": 1.112,
+      "index_ms": 923.8
     },
     "javascript": {
       "repos": 3,
       "ndcg10": 0.9282,
-      "p50_ms": 0.386
+      "p50_ms": 0.378,
+      "index_ms": 33.1
     },
     "kotlin": {
       "repos": 1,
       "ndcg10": 0.7631,
-      "p50_ms": 0.832
+      "p50_ms": 0.8,
+      "index_ms": 140.3
     },
     "php": {
       "repos": 3,
       "ndcg10": 0.906,
-      "p50_ms": 0.867
+      "p50_ms": 0.852,
+      "index_ms": 672.2
     },
     "python": {
       "repos": 8,
       "ndcg10": 0.8233,
-      "p50_ms": 0.479
+      "p50_ms": 0.446,
+      "index_ms": 153.2
     },
     "ruby": {
       "repos": 3,
       "ndcg10": 0.8911,
-      "p50_ms": 0.578
+      "p50_ms": 0.526,
+      "index_ms": 91.0
     },
     "rust": {
       "repos": 1,
       "ndcg10": 0.8878,
-      "p50_ms": 0.868
+      "p50_ms": 0.753,
+      "index_ms": 886.6
     },
     "scala": {
       "repos": 1,
       "ndcg10": 0.8415,
-      "p50_ms": 0.829
+      "p50_ms": 0.82,
+      "index_ms": 612.4
     },
     "swift": {
       "repos": 1,
       "ndcg10": 0.9316,
-      "p50_ms": 0.496
+      "p50_ms": 0.525,
+      "index_ms": 206.5
     },
     "typescript": {
       "repos": 1,
       "ndcg10": 0.7431,
-      "p50_ms": 0.808
+      "p50_ms": 0.77,
+      "index_ms": 114.2
     }
   },
   "repos": [
@@ -73,291 +85,262 @@
       "repo": "aiohttp",
       "language": "python",
       "chunks": 756,
-      "ndcg10": 0.7821229638714016,
-      "p50_ms": 0.5519580008694902,
       "ndcg5": 0.7132626857513019,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.7821229638714016,
+      "p50_ms": 0.5139580025570467,
+      "index_ms": 245.69045799580636
     },
     {
       "repo": "alamofire",
       "language": "swift",
       "chunks": 649,
-      "ndcg10": 0.9315768229529695,
-      "p50_ms": 0.4956250049872324,
       "ndcg5": 0.900172569211564,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9315768229529695,
+      "p50_ms": 0.5246670043561608,
+      "index_ms": 206.53395800036378
     },
     {
       "repo": "axios",
       "language": "javascript",
       "chunks": 166,
-      "ndcg10": 0.9671522420975631,
-      "p50_ms": 0.5629590013995767,
       "ndcg5": 0.9671522420975631,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9671522420975631,
+      "p50_ms": 0.5355000030249357,
+      "index_ms": 58.22970899316715
     },
     {
       "repo": "cats",
       "language": "scala",
       "chunks": 1254,
-      "ndcg10": 0.8414671964692401,
-      "p50_ms": 0.8292909988085739,
       "ndcg5": 0.8157722039023972,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8414671964692401,
+      "p50_ms": 0.8197499992093071,
+      "index_ms": 612.4209999979939
     },
     {
       "repo": "chi",
       "language": "go",
       "chunks": 262,
-      "ndcg10": 0.9455120441745608,
-      "p50_ms": 0.6020420041750185,
       "ndcg5": 0.9455120441745608,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9455120441745608,
+      "p50_ms": 0.5562090009334497,
+      "index_ms": 96.6573750047246
     },
     {
       "repo": "cobra",
       "language": "go",
       "chunks": 394,
-      "ndcg10": 0.970068981106951,
-      "p50_ms": 0.3933749976567924,
       "ndcg5": 0.970068981106951,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.970068981106951,
+      "p50_ms": 0.4118329961784184,
+      "index_ms": 140.34312500007218
     },
     {
       "repo": "commons-lang",
       "language": "java",
       "chunks": 3152,
-      "ndcg10": 0.8052591049306037,
-      "p50_ms": 0.8624999973108061,
       "ndcg5": 0.7688578654609097,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8052591049306037,
+      "p50_ms": 0.8248750018537976,
+      "index_ms": 990.0399159960216
     },
     {
       "repo": "express",
       "language": "javascript",
       "chunks": 52,
-      "ndcg10": 0.9593872208972474,
-      "p50_ms": 0.2536250030971132,
       "ndcg5": 0.9593872208972474,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9593872208972474,
+      "p50_ms": 0.24083400057861581,
+      "index_ms": 20.915667002554983
     },
     {
       "repo": "fastapi",
       "language": "python",
       "chunks": 597,
-      "ndcg10": 0.7693095302894921,
-      "p50_ms": 0.4379579986562021,
       "ndcg5": 0.7314364449312006,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.7693095302894921,
+      "p50_ms": 0.42249999387422577,
+      "index_ms": 181.48929099697853
     },
     {
       "repo": "flask",
       "language": "python",
       "chunks": 291,
-      "ndcg10": 0.8767012186349079,
-      "p50_ms": 0.42783399840118363,
       "ndcg5": 0.8570900833760776,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8767012186349079,
+      "p50_ms": 0.3891669985023327,
+      "index_ms": 91.24833300302271
     },
     {
       "repo": "gin",
       "language": "go",
       "chunks": 576,
-      "ndcg10": 0.939064318485603,
-      "p50_ms": 0.45987500197952613,
       "ndcg5": 0.8807555442147937,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.939064318485603,
+      "p50_ms": 0.4502500014496036,
+      "index_ms": 209.39629200438503
     },
     {
       "repo": "gson",
       "language": "java",
       "chunks": 1460,
-      "ndcg10": 0.9261859507142916,
-      "p50_ms": 1.0446250016684644,
       "ndcg5": 0.9261859507142916,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9261859507142916,
+      "p50_ms": 1.0612499972921796,
+      "index_ms": 455.70120800402947
     },
     {
       "repo": "guzzle",
       "language": "php",
       "chunks": 206,
-      "ndcg10": 0.844506786325837,
-      "p50_ms": 0.5477500017150305,
       "ndcg5": 0.8326908338735671,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.844506786325837,
+      "p50_ms": 0.516042004164774,
+      "index_ms": 61.87491699529346
     },
     {
       "repo": "httpx",
       "language": "python",
       "chunks": 248,
-      "ndcg10": 0.871159099521697,
-      "p50_ms": 0.44858400360681117,
       "ndcg5": 0.8519694264932337,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.871159099521697,
+      "p50_ms": 0.3951659964513965,
+      "index_ms": 79.40462499391288
     },
     {
       "repo": "jackson-databind",
       "language": "java",
       "chunks": 4570,
-      "ndcg10": 0.7944291752941182,
-      "p50_ms": 1.810874993680045,
       "ndcg5": 0.7667968319202225,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.7944291752941182,
+      "p50_ms": 1.4493329945253208,
+      "index_ms": 1325.5783330023405
     },
     {
       "repo": "ktor",
       "language": "kotlin",
       "chunks": 425,
-      "ndcg10": 0.7630927329648237,
-      "p50_ms": 0.8315000013681129,
       "ndcg5": 0.726275662513606,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.7630927329648237,
+      "p50_ms": 0.7996659987838939,
+      "index_ms": 140.25354200566653
     },
     {
       "repo": "laravel-framework",
       "language": "php",
       "chunks": 6197,
-      "ndcg10": 0.967888315659275,
-      "p50_ms": 1.207750006869901,
       "ndcg5": 0.967888315659275,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.967888315659275,
+      "p50_ms": 1.2009579950245097,
+      "index_ms": 1831.7263749995618
     },
     {
       "repo": "messagepack-csharp",
       "language": "csharp",
       "chunks": 1125,
-      "ndcg10": 0.8262866007393468,
-      "p50_ms": 1.882749995274935,
       "ndcg5": 0.8164536328001585,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8262866007393468,
+      "p50_ms": 0.8374999961233698,
+      "index_ms": 413.8517920000595
     },
     {
       "repo": "model2vec",
       "language": "python",
       "chunks": 107,
-      "ndcg10": 0.695271294655741,
-      "p50_ms": 0.4195000001345761,
       "ndcg5": 0.6593701861221591,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.695271294655741,
+      "p50_ms": 0.39674999425187707,
+      "index_ms": 38.985375002084766
     },
     {
       "repo": "monolog",
       "language": "php",
       "chunks": 417,
-      "ndcg10": 0.9055096182921145,
-      "p50_ms": 0.8463749982183799,
       "ndcg5": 0.9055096182921145,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9055096182921145,
+      "p50_ms": 0.8403329993598163,
+      "index_ms": 123.07549999968614
     },
     {
       "repo": "pydantic",
       "language": "python",
       "chunks": 1518,
-      "ndcg10": 0.7070408064407742,
-      "p50_ms": 0.6517500005429611,
       "ndcg5": 0.6795591269045096,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.7070408064407742,
+      "p50_ms": 0.6055419944459572,
+      "index_ms": 466.1173749991576
     },
     {
       "repo": "rack",
       "language": "ruby",
       "chunks": 249,
-      "ndcg10": 1.0,
-      "p50_ms": 0.4828749952139333,
       "ndcg5": 1.0,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 1.0,
+      "p50_ms": 0.4677079996326938,
+      "index_ms": 87.64591699582525
     },
     {
       "repo": "rails",
       "language": "ruby",
       "chunks": 465,
-      "ndcg10": 0.8346443747935481,
-      "p50_ms": 0.9173330035991967,
       "ndcg5": 0.7466134836472739,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8346443747935481,
+      "p50_ms": 0.8043340058065951,
+      "index_ms": 155.62629100168124
     },
     {
       "repo": "redux",
       "language": "javascript",
       "chunks": 53,
-      "ndcg10": 0.8580772959099011,
-      "p50_ms": 0.3419580025365576,
       "ndcg5": 0.8226294385530917,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8580772959099011,
+      "p50_ms": 0.3574169968487695,
+      "index_ms": 20.11674999812385
     },
     {
       "repo": "requests",
       "language": "python",
       "chunks": 169,
-      "ndcg10": 0.9550842629661954,
-      "p50_ms": 0.40808300400385633,
       "ndcg5": 0.9550842629661954,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9550842629661954,
+      "p50_ms": 0.38050000148359686,
+      "index_ms": 51.37912499776576
     },
     {
       "repo": "sinatra",
       "language": "ruby",
       "chunks": 68,
-      "ndcg10": 0.8387325493217617,
-      "p50_ms": 0.3345000004628673,
       "ndcg5": 0.8387325493217617,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8387325493217617,
+      "p50_ms": 0.30649999825982377,
+      "index_ms": 29.69916599977296
     },
     {
       "repo": "starlette",
       "language": "python",
       "chunks": 213,
-      "ndcg10": 0.9294136613951622,
-      "p50_ms": 0.4845000003115274,
       "ndcg5": 0.9058681185722455,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.9294136613951622,
+      "p50_ms": 0.4645410008379258,
+      "index_ms": 71.30404099734733
     },
     {
       "repo": "tokio",
       "language": "rust",
       "chunks": 2730,
-      "ndcg10": 0.8878478903956787,
-      "p50_ms": 0.8677080040797591,
       "ndcg5": 0.8750003941122573,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.8878478903956787,
+      "p50_ms": 0.7529159993282519,
+      "index_ms": 886.5858749995823
     },
     {
       "repo": "trpc",
       "language": "typescript",
       "chunks": 362,
-      "ndcg10": 0.7431267778412411,
-      "p50_ms": 0.8075000005192123,
       "ndcg5": 0.6949834508995433,
-      "cold_ms": null,
-      "warm_ms": null
+      "ndcg10": 0.7431267778412411,
+      "p50_ms": 0.7703330047661439,
+      "index_ms": 114.18266699911328
     }
   ]
 }

From f1a50f192e857ccc241c1a64a8548d82bd697862 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 09:17:40 +0200
Subject: [PATCH 13/15] Add trailing newline to saved results JSON

---
 benchmarks/run_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 8267f74..343e551 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -214,7 +214,7 @@ def _save_results(results: list[RepoResult]) -> None:
     results_dir = Path(__file__).parent / "results"
     results_dir.mkdir(exist_ok=True)
     out_path = results_dir / f"{sha[:12]}.json"
-    out_path.write_text(json.dumps(output, indent=2), encoding="utf-8")
+    out_path.write_text(json.dumps(output, indent=2) + "\n", encoding="utf-8")
     print(f"\nResults saved to {out_path}", file=sys.stderr)
 
 

From 40a76927ded9482f62ebd5f63930ff59605fe9f8 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 09:23:48 +0200
Subject: [PATCH 14/15] Rename common.py to data.py, move BENCH_ROOT to
 ~/.cache/semble-bench, inline _output

- benchmarks/common.py -> benchmarks/data.py (more descriptive name)
- BENCH_ROOT: /tmp/bench -> ~/.cache/semble-bench (survives reboots)
- Inline _output into _check_repo (single call site)
- Update README to drop --cache docs and reflect new paths
---
 benchmarks/README.md              | 7 +++----
 benchmarks/__init__.py            | 1 -
 benchmarks/{common.py => data.py} | 2 +-
 benchmarks/run_benchmark.py       | 2 +-
 benchmarks/sync_repos.py          | 9 ++-------
 5 files changed, 7 insertions(+), 14 deletions(-)
 rename benchmarks/{common.py => data.py} (99%)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 73194fc..2296162 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -2,7 +2,7 @@
 
 Reproducible local benchmarks for `semble`.
 
-Pinned repositories live in `repos.json` and are checked out into `/tmp/bench`.
+Pinned repositories live in `repos.json` and are checked out into `~/.cache/semble-bench`.
 
 ## Setup
 
@@ -15,10 +15,9 @@ uv run python -m benchmarks.sync_repos --check
 
 ```bash
 uv run python -m benchmarks.run_benchmark
-uv run python -m benchmarks.run_benchmark --cache
 uv run python -m benchmarks.run_benchmark --repo fastapi --repo axios
 uv run python -m benchmarks.run_benchmark --language python
 ```
 
-`--cache` measures cold vs warm index time. Warm time still includes the file walk and
-BM25/Vicinity rebuild; only embedding is skipped.
+Full runs (no `--repo`/`--language` filters) automatically save results to
+`benchmarks/results/<sha>.json`.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
index 793aef7..e69de29 100644
--- a/benchmarks/__init__.py
+++ b/benchmarks/__init__.py
@@ -1 +0,0 @@
-"""Benchmark and eval tooling for semble."""
diff --git a/benchmarks/common.py b/benchmarks/data.py
similarity index 99%
rename from benchmarks/common.py
rename to benchmarks/data.py
index 0183254..dac954a 100644
--- a/benchmarks/common.py
+++ b/benchmarks/data.py
@@ -2,7 +2,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 
-BENCH_ROOT = Path("/tmp/bench")
+BENCH_ROOT = Path.home() / ".cache" / "semble-bench"
 BENCHMARKS_DIR = Path(__file__).parent
 ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations"
 REPOS_PATH = BENCHMARKS_DIR / "repos.json"
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 343e551..c5beaa9 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -9,7 +9,7 @@
 
 from model2vec import StaticModel
 
-from benchmarks.common import (
+from benchmarks.data import (
     RepoSpec,
     Target,
     Task,
diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py
index 0deb767..9cf1ebd 100644
--- a/benchmarks/sync_repos.py
+++ b/benchmarks/sync_repos.py
@@ -2,7 +2,7 @@
 import subprocess
 import sys
 
-from benchmarks.common import BENCH_ROOT, load_repo_specs
+from benchmarks.data import BENCH_ROOT, load_repo_specs
 
 
 def _run(*args: str) -> None:
@@ -10,11 +10,6 @@ def _run(*args: str) -> None:
     subprocess.run(args, check=True)
 
 
-def _output(*args: str) -> str:
-    """Run a subprocess command and return its stripped stdout."""
-    return subprocess.check_output(args, text=True).strip()
-
-
 def _sync_repo(name: str, url: str, revision: str) -> None:
     """Clone the repo if absent, then fetch and checkout the pinned revision."""
     repo_dir = BENCH_ROOT / name
@@ -31,7 +26,7 @@ def _check_repo(name: str, revision: str) -> str | None:
     repo_dir = BENCH_ROOT / name
     if not (repo_dir / ".git").exists():
         return f"{name}: missing checkout at {repo_dir}"
-    head = _output("git", "-C", str(repo_dir), "rev-parse", "HEAD")
+    head = subprocess.check_output(("git", "-C", str(repo_dir), "rev-parse", "HEAD"), text=True).strip()
     if head != revision:
         return f"{name}: expected {revision}, found {head}"
     return None

From 5aa0dda8d9e0d51413ad0ceaa1acc1e6612fb214 Mon Sep 17 00:00:00 2001
From: Pringled <thomas123@live.nl>
Date: Wed, 15 Apr 2026 09:27:13 +0200
Subject: [PATCH 15/15] Add benchmark results for 40a7692

---
 .../{92176df212af.json => 40a76927ded9.json}  | 170 +++++++++---------
 1 file changed, 85 insertions(+), 85 deletions(-)
 rename benchmarks/results/{92176df212af.json => 40a76927ded9.json} (66%)

diff --git a/benchmarks/results/92176df212af.json b/benchmarks/results/40a76927ded9.json
similarity index 66%
rename from benchmarks/results/92176df212af.json
rename to benchmarks/results/40a76927ded9.json
index 9d1879e..d20be46 100644
--- a/benchmarks/results/92176df212af.json
+++ b/benchmarks/results/40a76927ded9.json
@@ -1,83 +1,83 @@
 {
-  "sha": "92176df212afb93dd4691b51622c4982abc23f92",
+  "sha": "40a76927ded9482f62ebd5f63930ff59605fe9f8",
   "model": "Pringled/potion-code-16M",
   "summary": {
     "ndcg10": 0.8668,
-    "p50_ms": 0.624,
-    "index_ms": 317.5
+    "p50_ms": 0.698,
+    "index_ms": 340.0
   },
   "by_language": {
     "csharp": {
       "repos": 1,
       "ndcg10": 0.8263,
-      "p50_ms": 0.837,
-      "index_ms": 413.9
+      "p50_ms": 0.966,
+      "index_ms": 459.2
     },
     "go": {
       "repos": 3,
       "ndcg10": 0.9515,
-      "p50_ms": 0.473,
-      "index_ms": 148.8
+      "p50_ms": 0.537,
+      "index_ms": 164.0
     },
     "java": {
       "repos": 3,
       "ndcg10": 0.842,
-      "p50_ms": 1.112,
-      "index_ms": 923.8
+      "p50_ms": 1.149,
+      "index_ms": 965.2
     },
     "javascript": {
       "repos": 3,
       "ndcg10": 0.9282,
-      "p50_ms": 0.378,
-      "index_ms": 33.1
+      "p50_ms": 0.432,
+      "index_ms": 36.0
     },
     "kotlin": {
       "repos": 1,
       "ndcg10": 0.7631,
-      "p50_ms": 0.8,
-      "index_ms": 140.3
+      "p50_ms": 0.856,
+      "index_ms": 160.8
     },
     "php": {
       "repos": 3,
       "ndcg10": 0.906,
-      "p50_ms": 0.852,
-      "index_ms": 672.2
+      "p50_ms": 0.949,
+      "index_ms": 738.0
     },
     "python": {
       "repos": 8,
       "ndcg10": 0.8233,
-      "p50_ms": 0.446,
-      "index_ms": 153.2
+      "p50_ms": 0.498,
+      "index_ms": 163.3
     },
     "ruby": {
       "repos": 3,
       "ndcg10": 0.8911,
-      "p50_ms": 0.526,
-      "index_ms": 91.0
+      "p50_ms": 0.623,
+      "index_ms": 97.4
     },
     "rust": {
       "repos": 1,
       "ndcg10": 0.8878,
-      "p50_ms": 0.753,
-      "index_ms": 886.6
+      "p50_ms": 0.996,
+      "index_ms": 930.6
     },
     "scala": {
       "repos": 1,
       "ndcg10": 0.8415,
-      "p50_ms": 0.82,
-      "index_ms": 612.4
+      "p50_ms": 0.942,
+      "index_ms": 648.2
     },
     "swift": {
       "repos": 1,
       "ndcg10": 0.9316,
-      "p50_ms": 0.525,
-      "index_ms": 206.5
+      "p50_ms": 0.543,
+      "index_ms": 229.5
     },
     "typescript": {
       "repos": 1,
       "ndcg10": 0.7431,
-      "p50_ms": 0.77,
-      "index_ms": 114.2
+      "p50_ms": 0.882,
+      "index_ms": 121.8
     }
   },
   "repos": [
@@ -87,8 +87,8 @@
       "chunks": 756,
       "ndcg5": 0.7132626857513019,
       "ndcg10": 0.7821229638714016,
-      "p50_ms": 0.5139580025570467,
-      "index_ms": 245.69045799580636
+      "p50_ms": 0.5919579998590052,
+      "index_ms": 267.3160420017666
     },
     {
       "repo": "alamofire",
@@ -96,8 +96,8 @@
       "chunks": 649,
       "ndcg5": 0.900172569211564,
       "ndcg10": 0.9315768229529695,
-      "p50_ms": 0.5246670043561608,
-      "index_ms": 206.53395800036378
+      "p50_ms": 0.5432909965747967,
+      "index_ms": 229.49254100240069
     },
     {
       "repo": "axios",
@@ -105,8 +105,8 @@
       "chunks": 166,
       "ndcg5": 0.9671522420975631,
       "ndcg10": 0.9671522420975631,
-      "p50_ms": 0.5355000030249357,
-      "index_ms": 58.22970899316715
+      "p50_ms": 0.5915000001550652,
+      "index_ms": 58.03862500033574
     },
     {
       "repo": "cats",
@@ -114,8 +114,8 @@
       "chunks": 1254,
       "ndcg5": 0.8157722039023972,
       "ndcg10": 0.8414671964692401,
-      "p50_ms": 0.8197499992093071,
-      "index_ms": 612.4209999979939
+      "p50_ms": 0.9416660032002255,
+      "index_ms": 648.2289169944124
     },
     {
       "repo": "chi",
@@ -123,8 +123,8 @@
       "chunks": 262,
       "ndcg5": 0.9455120441745608,
       "ndcg10": 0.9455120441745608,
-      "p50_ms": 0.5562090009334497,
-      "index_ms": 96.6573750047246
+      "p50_ms": 0.6279579974943772,
+      "index_ms": 103.15404200082412
     },
     {
       "repo": "cobra",
@@ -132,8 +132,8 @@
       "chunks": 394,
       "ndcg5": 0.970068981106951,
       "ndcg10": 0.970068981106951,
-      "p50_ms": 0.4118329961784184,
-      "index_ms": 140.34312500007218
+      "p50_ms": 0.41579100070521235,
+      "index_ms": 149.65433299948927
     },
     {
       "repo": "commons-lang",
@@ -141,8 +141,8 @@
       "chunks": 3152,
       "ndcg5": 0.7688578654609097,
       "ndcg10": 0.8052591049306037,
-      "p50_ms": 0.8248750018537976,
-      "index_ms": 990.0399159960216
+      "p50_ms": 0.9200830027111806,
+      "index_ms": 1038.753667002311
     },
     {
       "repo": "express",
@@ -150,8 +150,8 @@
       "chunks": 52,
       "ndcg5": 0.9593872208972474,
       "ndcg10": 0.9593872208972474,
-      "p50_ms": 0.24083400057861581,
-      "index_ms": 20.915667002554983
+      "p50_ms": 0.25366600311826915,
+      "index_ms": 22.826792002888396
     },
     {
       "repo": "fastapi",
@@ -159,8 +159,8 @@
       "chunks": 597,
       "ndcg5": 0.7314364449312006,
       "ndcg10": 0.7693095302894921,
-      "p50_ms": 0.42249999387422577,
-      "index_ms": 181.48929099697853
+      "p50_ms": 0.47783299669390544,
+      "index_ms": 189.871916998527
     },
     {
       "repo": "flask",
@@ -168,8 +168,8 @@
       "chunks": 291,
       "ndcg5": 0.8570900833760776,
       "ndcg10": 0.8767012186349079,
-      "p50_ms": 0.3891669985023327,
-      "index_ms": 91.24833300302271
+      "p50_ms": 0.4285830000299029,
+      "index_ms": 92.66295799898217
     },
     {
       "repo": "gin",
@@ -177,8 +177,8 @@
       "chunks": 576,
       "ndcg5": 0.8807555442147937,
       "ndcg10": 0.939064318485603,
-      "p50_ms": 0.4502500014496036,
-      "index_ms": 209.39629200438503
+      "p50_ms": 0.5680000031134114,
+      "index_ms": 239.0974170048139
     },
     {
       "repo": "gson",
@@ -186,8 +186,8 @@
       "chunks": 1460,
       "ndcg5": 0.9261859507142916,
       "ndcg10": 0.9261859507142916,
-      "p50_ms": 1.0612499972921796,
-      "index_ms": 455.70120800402947
+      "p50_ms": 1.0848340025404468,
+      "index_ms": 482.5546249994659
     },
     {
       "repo": "guzzle",
@@ -195,8 +195,8 @@
       "chunks": 206,
       "ndcg5": 0.8326908338735671,
       "ndcg10": 0.844506786325837,
-      "p50_ms": 0.516042004164774,
-      "index_ms": 61.87491699529346
+      "p50_ms": 0.5839169971295632,
+      "index_ms": 72.53133400081424
     },
     {
       "repo": "httpx",
@@ -204,8 +204,8 @@
       "chunks": 248,
       "ndcg5": 0.8519694264932337,
       "ndcg10": 0.871159099521697,
-      "p50_ms": 0.3951659964513965,
-      "index_ms": 79.40462499391288
+      "p50_ms": 0.4406670050229877,
+      "index_ms": 84.04612500453368
     },
     {
       "repo": "jackson-databind",
@@ -213,8 +213,8 @@
       "chunks": 4570,
       "ndcg5": 0.7667968319202225,
       "ndcg10": 0.7944291752941182,
-      "p50_ms": 1.4493329945253208,
-      "index_ms": 1325.5783330023405
+      "p50_ms": 1.442957996914629,
+      "index_ms": 1374.4051670000772
     },
     {
       "repo": "ktor",
@@ -222,8 +222,8 @@
       "chunks": 425,
       "ndcg5": 0.726275662513606,
       "ndcg10": 0.7630927329648237,
-      "p50_ms": 0.7996659987838939,
-      "index_ms": 140.25354200566653
+      "p50_ms": 0.8556669999961741,
+      "index_ms": 160.8068749992526
     },
     {
       "repo": "laravel-framework",
@@ -231,8 +231,8 @@
       "chunks": 6197,
       "ndcg5": 0.967888315659275,
       "ndcg10": 0.967888315659275,
-      "p50_ms": 1.2009579950245097,
-      "index_ms": 1831.7263749995618
+      "p50_ms": 1.3275840028654784,
+      "index_ms": 1987.1202089998405
     },
     {
       "repo": "messagepack-csharp",
@@ -240,8 +240,8 @@
       "chunks": 1125,
       "ndcg5": 0.8164536328001585,
       "ndcg10": 0.8262866007393468,
-      "p50_ms": 0.8374999961233698,
-      "index_ms": 413.8517920000595
+      "p50_ms": 0.9664999961387366,
+      "index_ms": 459.23387500079116
     },
     {
       "repo": "model2vec",
@@ -249,8 +249,8 @@
       "chunks": 107,
       "ndcg5": 0.6593701861221591,
       "ndcg10": 0.695271294655741,
-      "p50_ms": 0.39674999425187707,
-      "index_ms": 38.985375002084766
+      "p50_ms": 0.46462499449262396,
+      "index_ms": 44.985666005231906
     },
     {
       "repo": "monolog",
@@ -258,8 +258,8 @@
       "chunks": 417,
       "ndcg5": 0.9055096182921145,
       "ndcg10": 0.9055096182921145,
-      "p50_ms": 0.8403329993598163,
-      "index_ms": 123.07549999968614
+      "p50_ms": 0.9362909986521117,
+      "index_ms": 154.34570900106337
     },
     {
       "repo": "pydantic",
@@ -267,8 +267,8 @@
       "chunks": 1518,
       "ndcg5": 0.6795591269045096,
       "ndcg10": 0.7070408064407742,
-      "p50_ms": 0.6055419944459572,
-      "index_ms": 466.1173749991576
+      "p50_ms": 0.6636250036535785,
+      "index_ms": 490.62920799769927
     },
     {
       "repo": "rack",
@@ -276,8 +276,8 @@
       "chunks": 249,
       "ndcg5": 1.0,
       "ndcg10": 1.0,
-      "p50_ms": 0.4677079996326938,
-      "index_ms": 87.64591699582525
+      "p50_ms": 0.5600000004051253,
+      "index_ms": 96.56141699815635
     },
     {
       "repo": "rails",
@@ -285,8 +285,8 @@
       "chunks": 465,
       "ndcg5": 0.7466134836472739,
       "ndcg10": 0.8346443747935481,
-      "p50_ms": 0.8043340058065951,
-      "index_ms": 155.62629100168124
+      "p50_ms": 0.9815000012167729,
+      "index_ms": 168.55954200582346
     },
     {
       "repo": "redux",
@@ -294,8 +294,8 @@
       "chunks": 53,
       "ndcg5": 0.8226294385530917,
       "ndcg10": 0.8580772959099011,
-      "p50_ms": 0.3574169968487695,
-      "index_ms": 20.11674999812385
+      "p50_ms": 0.450166997325141,
+      "index_ms": 27.171499998075888
     },
     {
       "repo": "requests",
@@ -303,8 +303,8 @@
       "chunks": 169,
       "ndcg5": 0.9550842629661954,
       "ndcg10": 0.9550842629661954,
-      "p50_ms": 0.38050000148359686,
-      "index_ms": 51.37912499776576
+      "p50_ms": 0.40475000423612073,
+      "index_ms": 56.275709001056384
     },
     {
       "repo": "sinatra",
@@ -312,8 +312,8 @@
       "chunks": 68,
       "ndcg5": 0.8387325493217617,
       "ndcg10": 0.8387325493217617,
-      "p50_ms": 0.30649999825982377,
-      "index_ms": 29.69916599977296
+      "p50_ms": 0.3260829980717972,
+      "index_ms": 27.20166600192897
     },
     {
       "repo": "starlette",
@@ -321,8 +321,8 @@
       "chunks": 213,
       "ndcg5": 0.9058681185722455,
       "ndcg10": 0.9294136613951622,
-      "p50_ms": 0.4645410008379258,
-      "index_ms": 71.30404099734733
+      "p50_ms": 0.5122919974382967,
+      "index_ms": 80.70125000085682
     },
     {
       "repo": "tokio",
@@ -330,8 +330,8 @@
       "chunks": 2730,
       "ndcg5": 0.8750003941122573,
       "ndcg10": 0.8878478903956787,
-      "p50_ms": 0.7529159993282519,
-      "index_ms": 886.5858749995823
+      "p50_ms": 0.9958329974324442,
+      "index_ms": 930.5787499979488
     },
     {
       "repo": "trpc",
@@ -339,8 +339,8 @@
       "chunks": 362,
       "ndcg5": 0.6949834508995433,
       "ndcg10": 0.7431267778412411,
-      "p50_ms": 0.7703330047661439,
-      "index_ms": 114.18266699911328
+      "p50_ms": 0.8817499983706512,
+      "index_ms": 121.80820800131187
     }
   ]
 }