From f5f3d7312bcf9244498e7ba8c223dfac38c1eaf0 Mon Sep 17 00:00:00 2001 From: Pringled Date: Tue, 14 Apr 2026 19:38:04 +0200 Subject: [PATCH 01/15] Add benchmarks directory with cleaned-up bench_hybrid --- benchmarks/README.md | 21 ++ benchmarks/__init__.py | 1 + benchmarks/annotations/aiohttp.json | 90 +++++++ benchmarks/annotations/alamofire.json | 99 +++++++ benchmarks/annotations/axios.json | 46 ++++ benchmarks/annotations/cats.json | 99 +++++++ benchmarks/annotations/chi.json | 32 +++ benchmarks/annotations/cobra.json | 32 +++ benchmarks/annotations/commons-lang.json | 43 +++ benchmarks/annotations/express.json | 32 +++ benchmarks/annotations/fastapi.json | 112 ++++++++ benchmarks/annotations/flask.json | 90 +++++++ benchmarks/annotations/gin.json | 40 +++ benchmarks/annotations/gson.json | 32 +++ benchmarks/annotations/guzzle.json | 92 +++++++ benchmarks/annotations/httpx.json | 90 +++++++ benchmarks/annotations/jackson-databind.json | 49 ++++ benchmarks/annotations/ktor.json | 93 +++++++ benchmarks/annotations/laravel-framework.json | 32 +++ .../annotations/messagepack-csharp.json | 98 +++++++ benchmarks/annotations/model2vec.json | 82 ++++++ benchmarks/annotations/monolog.json | 52 ++++ benchmarks/annotations/pydantic.json | 82 ++++++ benchmarks/annotations/rack.json | 44 ++++ benchmarks/annotations/rails.json | 49 ++++ benchmarks/annotations/redux.json | 32 +++ benchmarks/annotations/requests.json | 109 ++++++++ benchmarks/annotations/sinatra.json | 100 +++++++ benchmarks/annotations/starlette.json | 90 +++++++ benchmarks/annotations/tokio.json | 99 +++++++ benchmarks/annotations/trpc.json | 99 +++++++ benchmarks/bench_hybrid.py | 248 ++++++++++++++++++ benchmarks/common.py | 194 ++++++++++++++ benchmarks/repos.json | 202 ++++++++++++++ benchmarks/sync_repos.py | 69 +++++ pyproject.toml | 2 +- 36 files changed, 2775 insertions(+), 1 deletion(-) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/annotations/aiohttp.json create mode 100644 benchmarks/annotations/alamofire.json create mode 100644 benchmarks/annotations/axios.json create mode 100644 benchmarks/annotations/cats.json create mode 100644 benchmarks/annotations/chi.json create mode 100644 benchmarks/annotations/cobra.json create mode 100644 benchmarks/annotations/commons-lang.json create mode 100644 benchmarks/annotations/express.json create mode 100644 benchmarks/annotations/fastapi.json create mode 100644 benchmarks/annotations/flask.json create mode 100644 benchmarks/annotations/gin.json create mode 100644 benchmarks/annotations/gson.json create mode 100644 benchmarks/annotations/guzzle.json create mode 100644 benchmarks/annotations/httpx.json create mode 100644 benchmarks/annotations/jackson-databind.json create mode 100644 benchmarks/annotations/ktor.json create mode 100644 benchmarks/annotations/laravel-framework.json create mode 100644 benchmarks/annotations/messagepack-csharp.json create mode 100644 benchmarks/annotations/model2vec.json create mode 100644 benchmarks/annotations/monolog.json create mode 100644 benchmarks/annotations/pydantic.json create mode 100644 benchmarks/annotations/rack.json create mode 100644 benchmarks/annotations/rails.json create mode 100644 benchmarks/annotations/redux.json create mode 100644 benchmarks/annotations/requests.json create mode 100644 benchmarks/annotations/sinatra.json create mode 100644 benchmarks/annotations/starlette.json create mode 100644 benchmarks/annotations/tokio.json create mode 100644 benchmarks/annotations/trpc.json create mode 100644 benchmarks/bench_hybrid.py create mode 100644 benchmarks/common.py create mode 100644 benchmarks/repos.json create mode 100644 benchmarks/sync_repos.py diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..8d6d906 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,21 @@ +# Benchmarks + +Reproducible local benchmarks for `semble`. + +Pinned repositories live in `repos.json` and are checked out into `/tmp/bench`. + +## Setup + +```bash +uv run python -m benchmarks.sync_repos +uv run python -m benchmarks.sync_repos --check +``` + +## Run + +```bash +uv run python -m benchmarks.bench_hybrid +uv run python -m benchmarks.bench_hybrid --cache +uv run python -m benchmarks.bench_hybrid --repo fastapi --repo axios +uv run python -m benchmarks.bench_hybrid --language python +``` diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..793aef7 --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1 @@ +"""Benchmark and eval tooling for semble.""" diff --git a/benchmarks/annotations/aiohttp.json b/benchmarks/annotations/aiohttp.json new file mode 100644 index 0000000..ea00500 --- /dev/null +++ b/benchmarks/annotations/aiohttp.json @@ -0,0 +1,90 @@ +[ + { + "query": "how the async HTTP client session works", + "relevant": ["aiohttp/client.py"], + "secondary": [] + }, + { + "query": "connection pooling and TCP connector", + "relevant": ["aiohttp/connector.py"], + "secondary": [] + }, + { + "query": "WebSocket client implementation", + "relevant": ["aiohttp/client_ws.py"], + "secondary": ["aiohttp/_websocket/reader.py"] + }, + { + "query": "request and response object internals", + "relevant": ["aiohttp/client_reqrep.py"], + "secondary": [] + }, + { + "query": "URL routing and resource dispatching", + "relevant": ["aiohttp/web_urldispatcher.py"], + "secondary": [] + }, + { + "query": "server-side middleware execution", + "relevant": ["aiohttp/web_middlewares.py"], + "secondary": ["aiohttp/web_app.py"] + }, + { + "query": "multipart and form data handling", + "relevant": ["aiohttp/multipart.py"], + "secondary": ["aiohttp/formdata.py"] + }, + { + "query": "response streaming and payload", + "relevant": ["aiohttp/streams.py"], + "secondary": ["aiohttp/payload.py"] + }, + { + "query": "HTTP exception types and error responses", + "relevant": ["aiohttp/web_exceptions.py"], + "secondary": ["aiohttp/client_exceptions.py"] + }, + { + "query": "request tracing and observability hooks", + "relevant": ["aiohttp/tracing.py"], + "secondary": [] + }, + { + "query": "how chunked transfer encoding is parsed", + "relevant": ["aiohttp/http_parser.py"], + "secondary": [] + }, + { + "query": "how DNS resolution is handled asynchronously", + "relevant": ["aiohttp/resolver.py"], + "secondary": ["aiohttp/connector.py"] + }, + { + "query": "how backpressure and flow control work in streaming", + "relevant": ["aiohttp/streams.py"], + "secondary": [] + }, + { + "query": "how connection draining and cleanup happen on close", + "relevant": ["aiohttp/connector.py"], + "secondary": ["aiohttp/client_proto.py"] + }, + { + "query": "how the web application sets up and tears down on startup", + "relevant": ["aiohttp/web_app.py"], + "secondary": ["aiohttp/web_runner.py"] + }, + {"query": "ClientSession", "relevant": ["aiohttp/client.py"], "secondary": []}, + {"query": "TCPConnector", "relevant": ["aiohttp/connector.py"], "secondary": []}, + {"query": "UrlDispatcher", "relevant": ["aiohttp/web_urldispatcher.py"], "secondary": []}, + {"query": "ClientResponse", "relevant": ["aiohttp/client_reqrep.py"], "secondary": []}, + {"query": "TraceConfig", "relevant": ["aiohttp/tracing.py"], "secondary": []}, + { + "query": "how ClientSession acquires and releases connections from the connector", + "relevant": ["aiohttp/client.py"], + "secondary": ["aiohttp/connector.py"], + "category": "architecture", + "seed": {"path": "aiohttp/client.py", "line": 374}, + "related": ["aiohttp/connector.py"] + } +] diff --git a/benchmarks/annotations/alamofire.json b/benchmarks/annotations/alamofire.json new file mode 100644 index 0000000..df5ee2b --- /dev/null +++ b/benchmarks/annotations/alamofire.json @@ -0,0 +1,99 @@ +[ + { + "query": "how the Session manages the underlying URLSession and dispatches requests", + "relevant": ["Source/Core/Session.swift"], + "secondary": ["Source/Core/SessionDelegate.swift"], + "category": "architecture" + }, + { + "query": "how request retrying is implemented with backoff and retry conditions", + "relevant": ["Source/Features/RetryPolicy.swift"], + "secondary": ["Source/Features/RequestInterceptor.swift"], + "category": "architecture" + }, + { + "query": "how response validation checks status codes and content types", + "relevant": ["Source/Features/Validation.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how response serialization decodes JSON, Decodable, and strings", + "relevant": ["Source/Features/ResponseSerialization.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how multipart form data encodes fields and file attachments", + "relevant": ["Source/Features/MultipartFormData.swift"], + "secondary": ["Source/Features/MultipartUpload.swift"], + "category": "semantic" + }, + { + "query": "how authentication interceptors handle credential challenges", + "relevant": ["Source/Features/AuthenticationInterceptor.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how server trust evaluation handles SSL certificate pinning", + "relevant": ["Source/Features/ServerTrustEvaluation.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how URL query parameters are encoded from Encodable values", + "relevant": ["Source/Features/URLEncodedFormEncoder.swift"], + "secondary": ["Source/Core/ParameterEncoder.swift"], + "category": "semantic" + }, + { + "query": "how network reachability is monitored to detect connectivity changes", + "relevant": ["Source/Features/NetworkReachabilityManager.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how download requests save responses to disk", + "relevant": ["Source/Core/DownloadRequest.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how request and response events are logged via EventMonitor", + "relevant": ["Source/Features/EventMonitor.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the Request class tracks lifecycle state transitions", + "relevant": ["Source/Core/Request.swift"], + "secondary": [], + "category": "architecture" + }, + { + "query": "Session", + "relevant": ["Source/Core/Session.swift"], + "secondary": [] + }, + { + "query": "AFError", + "relevant": ["Source/Core/AFError.swift"], + "secondary": [] + }, + { + "query": "RetryPolicy", + "relevant": ["Source/Features/RetryPolicy.swift"], + "secondary": [] + }, + { + "query": "ServerTrustEvaluating", + "relevant": ["Source/Features/ServerTrustEvaluation.swift"], + "secondary": [] + }, + { + "query": "HTTPHeaders", + "relevant": ["Source/Core/HTTPHeaders.swift"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/axios.json b/benchmarks/annotations/axios.json new file mode 100644 index 0000000..3466c21 --- /dev/null +++ b/benchmarks/annotations/axios.json @@ -0,0 +1,46 @@ +[ + { + "query": "how HTTP requests are dispatched through the configured adapter", + "relevant": ["lib/core/dispatchRequest.js"], + "secondary": ["lib/adapters/adapters.js"], + "category": "architecture" + }, + { + "query": "request and response interceptors", + "relevant": ["lib/core/InterceptorManager.js"], + "secondary": ["lib/core/Axios.js"], + "category": "semantic" + }, + { + "query": "node HTTP adapter implementation", + "relevant": ["lib/adapters/http.js"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how config defaults are merged before a request is sent", + "relevant": ["lib/core/mergeConfig.js"], + "secondary": ["lib/core/Axios.js"], + "category": "architecture" + }, + { + "query": "Axios", + "relevant": [{"path": "lib/core/Axios.js", "start_line": 22, "end_line": 61}], + "secondary": [], + "category": "symbol", + "seed": {"path": "lib/core/Axios.js", "line": 46}, + "related": [{"path": "lib/core/Axios.js", "start_line": 179, "end_line": 239}] + }, + { + "query": "InterceptorManager", + "relevant": [{"path": "lib/core/InterceptorManager.js", "start_line": 5, "end_line": 33}], + "secondary": [], + "category": "symbol" + }, + { + "query": "mergeConfig", + "relevant": [{"path": "lib/core/mergeConfig.js", "start_line": 17, "end_line": 106}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/cats.json b/benchmarks/annotations/cats.json new file mode 100644 index 0000000..4140db6 --- /dev/null +++ b/benchmarks/annotations/cats.json @@ -0,0 +1,99 @@ +[ + { + "query": "how the Functor type class defines mapping over a context", + "relevant": ["core/src/main/scala/cats/Functor.scala"], + "secondary": [], + "category": "architecture" + }, + { + "query": "how Monad composes dependent effectful computations with flatMap", + "relevant": ["core/src/main/scala/cats/Monad.scala"], + "secondary": ["core/src/main/scala/cats/FlatMap.scala"], + "category": "architecture" + }, + { + "query": "how Applicative combines independent effects", + "relevant": ["core/src/main/scala/cats/Applicative.scala"], + "secondary": ["core/src/main/scala/cats/Apply.scala"], + "category": "architecture" + }, + { + "query": "how errors are handled and recovered in ApplicativeError and MonadError", + "relevant": ["core/src/main/scala/cats/ApplicativeError.scala"], + "secondary": ["core/src/main/scala/cats/MonadError.scala"], + "category": "semantic" + }, + { + "query": "how Validated accumulates errors across independent computations", + "relevant": ["core/src/main/scala/cats/data/Validated.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how EitherT combines the Either monad with another effect", + "relevant": ["core/src/main/scala/cats/data/EitherT.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Kleisli composes functions that return monadic values", + "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Eval provides lazy and memoized evaluation", + "relevant": ["core/src/main/scala/cats/Eval.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Foldable traverses and reduces elements in a container", + "relevant": ["core/src/main/scala/cats/Foldable.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Chain provides O(1) concatenation as an alternative to List", + "relevant": ["core/src/main/scala/cats/data/Chain.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how natural transformations map between type constructors", + "relevant": ["core/src/main/scala/cats/arrow/FunctionK.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the Contravariant functor reverses the mapping direction", + "relevant": ["core/src/main/scala/cats/Contravariant.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Monad", + "relevant": ["core/src/main/scala/cats/Monad.scala"], + "secondary": [] + }, + { + "query": "Functor", + "relevant": ["core/src/main/scala/cats/Functor.scala"], + "secondary": [] + }, + { + "query": "EitherT", + "relevant": ["core/src/main/scala/cats/data/EitherT.scala"], + "secondary": [] + }, + { + "query": "Validated", + "relevant": ["core/src/main/scala/cats/data/Validated.scala"], + "secondary": [] + }, + { + "query": "Kleisli", + "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/chi.json b/benchmarks/annotations/chi.json new file mode 100644 index 0000000..38223b5 --- /dev/null +++ b/benchmarks/annotations/chi.json @@ -0,0 +1,32 @@ +[ + { + "query": "HTTP router and middleware composition", + "relevant": ["mux.go"], + "secondary": ["chain.go"], + "category": "architecture" + }, + { + "query": "radix tree path matching", + "relevant": ["tree.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "request routing context storage", + "relevant": ["context.go"], + "secondary": ["mux.go"], + "category": "architecture" + }, + { + "query": "request logging middleware", + "relevant": ["middleware/logger.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Mux", + "relevant": ["mux.go"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/cobra.json b/benchmarks/annotations/cobra.json new file mode 100644 index 0000000..c8d97a2 --- /dev/null +++ b/benchmarks/annotations/cobra.json @@ -0,0 +1,32 @@ +[ + { + "query": "core command execution and command tree", + "relevant": ["command.go"], + "secondary": [], + "category": "architecture" + }, + { + "query": "shell completion request handling", + "relevant": ["completions.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "bash completion generation", + "relevant": ["bash_completions.go"], + "secondary": ["completions.go"], + "category": "semantic" + }, + { + "query": "positional argument validators", + "relevant": ["args.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Command", + "relevant": ["command.go"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/commons-lang.json b/benchmarks/annotations/commons-lang.json new file mode 100644 index 0000000..9d51c0c --- /dev/null +++ b/benchmarks/annotations/commons-lang.json @@ -0,0 +1,43 @@ +[ + { + "query": "null-safe string operations and text helpers", + "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "argument validation helpers and exception rules", + "relevant": ["src/main/java/org/apache/commons/lang3/Validate.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "reflection-based equals implementation", + "relevant": ["src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java"], + "secondary": ["src/main/java/org/apache/commons/lang3/ClassUtils.java"], + "category": "architecture" + }, + { + "query": "range object with inclusive bounds and comparator support", + "relevant": ["src/main/java/org/apache/commons/lang3/Range.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "StringUtils", + "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"], + "secondary": [], + "category": "symbol" + }, + { + "query": "reflection equals builder internals", + "relevant": [{"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "start_line": 89, "end_line": 99}], + "secondary": [], + "category": "architecture", + "seed": {"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "line": 89}, + "related": [ + {"path": "src/main/java/org/apache/commons/lang3/builder/HashCodeBuilder.java", "start_line": 105, "end_line": 141}, + {"path": "src/main/java/org/apache/commons/lang3/builder/IDKey.java", "start_line": 27, "end_line": 74} + ] + } +] diff --git a/benchmarks/annotations/express.json b/benchmarks/annotations/express.json new file mode 100644 index 0000000..dd82ccf --- /dev/null +++ b/benchmarks/annotations/express.json @@ -0,0 +1,32 @@ +[ + { + "query": "application initialization and default configuration", + "relevant": ["lib/application.js"], + "secondary": ["lib/express.js"], + "category": "architecture" + }, + { + "query": "request API helpers and header access", + "relevant": ["lib/request.js"], + "secondary": [], + "category": "semantic" + }, + { + "query": "response sending and content negotiation", + "relevant": ["lib/response.js"], + "secondary": [], + "category": "semantic" + }, + { + "query": "template view lookup and rendering", + "relevant": ["lib/view.js"], + "secondary": ["lib/application.js"], + "category": "architecture" + }, + { + "query": "response", + "relevant": ["lib/response.js"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/fastapi.json b/benchmarks/annotations/fastapi.json new file mode 100644 index 0000000..23a990b --- /dev/null +++ b/benchmarks/annotations/fastapi.json @@ -0,0 +1,112 @@ +[ + { + "query": "how does dependency injection work", + "relevant": ["fastapi/dependencies/utils.py"], + "secondary": ["fastapi/dependencies/models.py", "fastapi/params.py"] + }, + { + "query": "request validation and error handling", + "relevant": ["fastapi/exceptions.py"], + "secondary": ["fastapi/exception_handlers.py"] + }, + { + "query": "how are routes registered", + "relevant": ["fastapi/routing.py"], + "secondary": ["fastapi/applications.py"] + }, + { + "query": "websocket endpoint implementation", + "relevant": ["fastapi/websockets.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "OpenAPI schema generation", + "relevant": ["fastapi/openapi/utils.py"], + "secondary": ["fastapi/openapi/models.py"] + }, + { + "query": "middleware stack and CORS", + "relevant": ["fastapi/middleware/cors.py"], + "secondary": ["fastapi/applications.py"] + }, + { + "query": "file upload handling", + "relevant": ["fastapi/datastructures.py"], + "secondary": [] + }, + { + "query": "response model serialization", + "relevant": ["fastapi/encoders.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "background tasks", + "relevant": ["fastapi/background.py"], + "secondary": [] + }, + { + "query": "security and OAuth2 authentication", + "relevant": ["fastapi/security/oauth2.py"], + "secondary": ["fastapi/security/http.py"] + }, + { + "query": "how is response validation and serialization applied before sending", + "relevant": ["fastapi/routing.py"], + "secondary": ["fastapi/encoders.py"] + }, + { + "query": "how are nested and sub-dependencies resolved", + "relevant": ["fastapi/dependencies/utils.py"], + "secondary": [] + }, + { + "query": "how does FastAPI run sync route functions without blocking", + "relevant": ["fastapi/concurrency.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "how are path parameters extracted and type-converted", + "relevant": ["fastapi/routing.py"], + "secondary": ["fastapi/dependencies/utils.py"] + }, + { + "query": "how does exception propagation work through dependency injection", + "relevant": ["fastapi/dependencies/utils.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "Depends", + "relevant": [{"path": "fastapi/params.py", "start_line": 746, "end_line": 749}], + "secondary": ["fastapi/param_functions.py"], + "category": "symbol", + "seed": {"path": "fastapi/params.py", "line": 746}, + "related": [ + {"path": "fastapi/params.py", "start_line": 434, "end_line": 468}, + {"path": "fastapi/params.py", "start_line": 627, "end_line": 664} + ] + }, + { + "query": "HTTPException", + "relevant": [{"path": "fastapi/exceptions.py", "start_line": 17, "end_line": 66}], + "secondary": [], + "category": "symbol" + }, + { + "query": "APIRouter", + "relevant": [{"path": "fastapi/routing.py", "start_line": 1005, "end_line": 1030}], + "secondary": [], + "category": "symbol" + }, + { + "query": "BackgroundTasks", + "relevant": [{"path": "fastapi/background.py", "start_line": 11, "end_line": 39}], + "secondary": [], + "category": "symbol" + }, + { + "query": "OAuth2PasswordBearer", + "relevant": [{"path": "fastapi/security/oauth2.py", "start_line": 433, "end_line": 471}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/flask.json b/benchmarks/annotations/flask.json new file mode 100644 index 0000000..de9fd41 --- /dev/null +++ b/benchmarks/annotations/flask.json @@ -0,0 +1,90 @@ +[ + { + "query": "how are routes registered", + "relevant": ["src/flask/sansio/scaffold.py"], + "secondary": ["src/flask/sansio/app.py"] + }, + { + "query": "request context handling", + "relevant": ["src/flask/ctx.py"], + "secondary": [] + }, + { + "query": "session management", + "relevant": ["src/flask/sessions.py"], + "secondary": [] + }, + { + "query": "blueprint registration", + "relevant": ["src/flask/sansio/blueprints.py"], + "secondary": ["src/flask/blueprints.py"] + }, + { + "query": "configuration loading", + "relevant": ["src/flask/config.py"], + "secondary": [] + }, + { + "query": "template rendering", + "relevant": ["src/flask/templating.py"], + "secondary": [] + }, + { + "query": "error handlers", + "relevant": ["src/flask/sansio/scaffold.py"], + "secondary": ["src/flask/app.py"] + }, + { + "query": "CLI commands", + "relevant": ["src/flask/cli.py"], + "secondary": [] + }, + { + "query": "testing client", + "relevant": ["src/flask/testing.py"], + "secondary": [] + }, + { + "query": "JSON response helpers", + "relevant": ["src/flask/json/provider.py"], + "secondary": ["src/flask/helpers.py"] + }, + { + "query": "how does the application context push and pop around requests", + "relevant": ["src/flask/ctx.py"], + "secondary": ["src/flask/app.py"] + }, + { + "query": "how does Flask select which error handler to invoke for an exception", + "relevant": ["src/flask/app.py"], + "secondary": ["src/flask/sansio/app.py"] + }, + { + "query": "how is g used to store data scoped to the current request", + "relevant": ["src/flask/ctx.py"], + "secondary": ["src/flask/globals.py"] + }, + { + "query": "how are request lifecycle signals emitted", + "relevant": ["src/flask/signals.py"], + "secondary": ["src/flask/app.py"] + }, + { + "query": "how does Flask convert a view return value into a response object", + "relevant": ["src/flask/app.py"], + "secondary": ["src/flask/wrappers.py"] + }, + {"query": "Blueprint", "relevant": ["src/flask/sansio/blueprints.py"], "secondary": ["src/flask/blueprints.py"]}, + {"query": "render_template", "relevant": ["src/flask/templating.py"], "secondary": []}, + {"query": "Flask", "relevant": ["src/flask/app.py"], "secondary": []}, + {"query": "session", "relevant": ["src/flask/sessions.py"], "secondary": ["src/flask/globals.py"]}, + {"query": "g", "relevant": ["src/flask/globals.py"], "secondary": ["src/flask/ctx.py"]}, + { + "query": "how Blueprint inherits routing behaviour from its sansio base class", + "relevant": ["src/flask/sansio/blueprints.py"], + "secondary": ["src/flask/blueprints.py"], + "category": "architecture", + "seed": {"path": "src/flask/blueprints.py", "line": 18}, + "related": ["src/flask/sansio/blueprints.py"] + } +] diff --git a/benchmarks/annotations/gin.json b/benchmarks/annotations/gin.json new file mode 100644 index 0000000..2ba38bc --- /dev/null +++ b/benchmarks/annotations/gin.json @@ -0,0 +1,40 @@ +[ + { + "query": "how routes are grouped and registered", + "relevant": ["routergroup.go"], + "secondary": ["gin.go"], + "category": "architecture" + }, + { + "query": "radix tree path matching", + "relevant": ["tree.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "request context lifecycle and helpers", + "relevant": ["context.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "panic recovery middleware", + "relevant": ["recovery.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "JSON request binding and validation", + "relevant": ["binding/json.go"], + "secondary": ["binding/default_validator.go"], + "category": "architecture" + }, + { + "query": "how the Gin Engine embeds RouterGroup and uses Context per request", + "relevant": ["gin.go"], + "secondary": ["routergroup.go", "context.go"], + "category": "architecture", + "seed": {"path": "gin.go", "line": 92}, + "related": ["context.go", "routergroup.go"] + } +] diff --git a/benchmarks/annotations/gson.json b/benchmarks/annotations/gson.json new file mode 100644 index 0000000..46c2f5e --- /dev/null +++ b/benchmarks/annotations/gson.json @@ -0,0 +1,32 @@ +[ + { + "query": "main Gson API for toJson and fromJson", + "relevant": ["gson/src/main/java/com/google/gson/Gson.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "GsonBuilder configuration and create", + "relevant": ["gson/src/main/java/com/google/gson/GsonBuilder.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "reflection-based field serialization and deserialization", + "relevant": ["gson/src/main/java/com/google/gson/internal/bind/ReflectiveTypeAdapterFactory.java"], + "secondary": [], + "category": "architecture" + }, + { + "query": "TypeAdapter", + "relevant": ["gson/src/main/java/com/google/gson/TypeAdapter.java"], + "secondary": [], + "category": "symbol" + }, + { + "query": "JsonParser", + "relevant": ["gson/src/main/java/com/google/gson/JsonParser.java"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/guzzle.json b/benchmarks/annotations/guzzle.json new file mode 100644 index 0000000..2bbbf02 --- /dev/null +++ b/benchmarks/annotations/guzzle.json @@ -0,0 +1,92 @@ +[ + { + "query": "HTTP client request sending and defaults", + "relevant": ["src/Client.php"], + "secondary": ["src/ClientTrait.php"], + "category": "semantic" + }, + { + "query": "middleware handler stack composition", + "relevant": ["src/HandlerStack.php"], + "secondary": ["src/Middleware.php"], + "category": "architecture" + }, + { + "query": "retry middleware and exponential backoff", + "relevant": ["src/RetryMiddleware.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "cookie jar implementation", + "relevant": ["src/Cookie/CookieJar.php"], + "secondary": ["src/Cookie/SetCookie.php"], + "category": "semantic" + }, + { + "query": "redirect following and location header handling", + "relevant": ["src/RedirectMiddleware.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "curl-based HTTP handler and connection management", + "relevant": ["src/Handler/CurlFactory.php"], + "secondary": ["src/Handler/CurlHandler.php"], + "category": "semantic" + }, + { + "query": "mock handler for simulating HTTP responses in tests", + "relevant": ["src/Handler/MockHandler.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "concurrent request pool with limited parallelism", + "relevant": ["src/Pool.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "available request options and their configuration", + "relevant": ["src/RequestOptions.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "HTTP message formatting and request logging", + "relevant": ["src/MessageFormatter.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how request body and content-type are prepared before sending", + "relevant": ["src/PrepareBodyMiddleware.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "exception hierarchy for HTTP and transfer errors", + "relevant": ["src/Exception/RequestException.php"], + "secondary": ["src/Exception/GuzzleException.php"], + "category": "semantic" + }, + { + "query": "Client", + "relevant": ["src/Client.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "HandlerStack", + "relevant": ["src/HandlerStack.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "RedirectMiddleware", + "relevant": ["src/RedirectMiddleware.php"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/httpx.json b/benchmarks/annotations/httpx.json new file mode 100644 index 0000000..08edd0c --- /dev/null +++ b/benchmarks/annotations/httpx.json @@ -0,0 +1,90 @@ +[ + { + "query": "how are HTTP requests sent", + "relevant": ["httpx/_client.py"], + "secondary": [] + }, + { + "query": "authentication and credentials", + "relevant": ["httpx/_auth.py"], + "secondary": [] + }, + { + "query": "connection pooling and transport", + "relevant": ["httpx/_transports/default.py"], + "secondary": ["httpx/_transports/base.py"] + }, + { + "query": "URL parsing and construction", + "relevant": ["httpx/_urlparse.py"], + "secondary": ["httpx/_urls.py"] + }, + { + "query": "response decoding and content", + "relevant": ["httpx/_decoders.py"], + "secondary": ["httpx/_models.py"] + }, + { + "query": "timeout configuration", + "relevant": ["httpx/_config.py"], + "secondary": [] + }, + { + "query": "cookie handling", + "relevant": ["httpx/_models.py"], + "secondary": ["httpx/_client.py"] + }, + { + "query": "multipart file upload", + "relevant": ["httpx/_multipart.py"], + "secondary": ["httpx/_content.py"] + }, + { + "query": "redirect following", + "relevant": ["httpx/_client.py"], + "secondary": [] + }, + { + "query": "error and exception types", + "relevant": ["httpx/_exceptions.py"], + "secondary": [] + }, + { + "query": "how does digest authentication handle the challenge-response flow", + "relevant": ["httpx/_auth.py"], + "secondary": [] + }, + { + "query": "how are keep-alive connections managed and reused", + "relevant": ["httpx/_transports/default.py"], + "secondary": ["httpx/_config.py"] + }, + { + "query": "how does streaming response body iteration work", + "relevant": ["httpx/_models.py"], + "secondary": ["httpx/_decoders.py"] + }, + { + "query": "how are query parameters encoded into the URL", + "relevant": ["httpx/_urls.py"], + "secondary": ["httpx/_urlparse.py"] + }, + { + "query": "how are retries and transport errors surfaced to the caller", + "relevant": ["httpx/_exceptions.py"], + "secondary": ["httpx/_transports/default.py"] + }, + {"query": "DigestAuth", "relevant": ["httpx/_auth.py"], "secondary": []}, + {"query": "AsyncClient", "relevant": ["httpx/_client.py"], "secondary": []}, + {"query": "Timeout", "relevant": ["httpx/_config.py"], "secondary": []}, + {"query": "HTTPStatusError", "relevant": ["httpx/_exceptions.py"], "secondary": []}, + {"query": "URL", "relevant": ["httpx/_urls.py"], "secondary": []}, + { + "query": "how the HTTP transport backend sends requests over the wire", + "relevant": ["httpx/_transports/default.py"], + "secondary": ["httpx/_transports/base.py"], + "category": "architecture", + "seed": {"path": "httpx/_client.py", "line": 731}, + "related": ["httpx/_transports/default.py"] + } +] diff --git a/benchmarks/annotations/jackson-databind.json b/benchmarks/annotations/jackson-databind.json new file mode 100644 index 0000000..079d2b6 --- /dev/null +++ b/benchmarks/annotations/jackson-databind.json @@ -0,0 +1,49 @@ +[ + { + "query": "ObjectMapper entry point for reading and writing JSON", + "relevant": ["src/main/java/tools/jackson/databind/ObjectMapper.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "JSON-specific mapper builder", + "relevant": ["src/main/java/tools/jackson/databind/json/JsonMapper.java"], + "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"], + "category": "architecture" + }, + { + "query": "mutable JSON object node with named field operations", + "relevant": ["src/main/java/tools/jackson/databind/node/ObjectNode.java"], + "secondary": ["src/main/java/tools/jackson/databind/node/JsonNodeFactory.java"], + "category": "semantic" + }, + { + "query": "polymorphic type resolution", + "relevant": ["src/main/java/tools/jackson/databind/jsontype/impl/StdTypeResolverBuilder.java"], + "secondary": ["src/main/java/tools/jackson/databind/jsontype/impl/TypeDeserializerBase.java"], + "category": "architecture" + }, + { + "query": "ObjectMapper", + "relevant": [{"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 93, "end_line": 132}], + "secondary": [], + "category": "symbol", + "seed": {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "line": 109}, + "related": [ + {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 356, "end_line": 387}, + {"path": "src/main/java/tools/jackson/databind/cfg/MapperBuilder.java", "start_line": 338, "end_line": 344} + ] + }, + { + "query": "JsonMapper", + "relevant": [{"path": "src/main/java/tools/jackson/databind/json/JsonMapper.java", "start_line": 16, "end_line": 44}], + "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"], + "category": "symbol" + }, + { + "query": "ObjectNode", + "relevant": [{"path": "src/main/java/tools/jackson/databind/node/ObjectNode.java", "start_line": 21, "end_line": 60}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/ktor.json b/benchmarks/annotations/ktor.json new file mode 100644 index 0000000..b2aa38b --- /dev/null +++ b/benchmarks/annotations/ktor.json @@ -0,0 +1,93 @@ +[ + { + "query": "how the HttpClient is configured with plugins and an engine", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClientConfig.kt"], + "category": "architecture" + }, + { + "query": "how request and response pipelines process interceptors", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/HttpRequestPipeline.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/statement/HttpResponsePipeline.kt"], + "category": "architecture" + }, + { + "query": "how HTTP caching stores and validates cached responses", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCache.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCacheEntry.kt"], + "category": "architecture" + }, + { + "query": "how HTTP status codes are checked and exceptions raised on failure", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpCallValidator.kt"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how request and response timeouts are enforced", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how HTTP redirects are followed automatically", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how cookies are stored and sent with requests", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/AcceptAllCookiesStorage.kt"], + "category": "semantic" + }, + { + "query": "how multipart form data uploads are constructed", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/FormDataContent.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/formDsl.kt"], + "category": "semantic" + }, + { + "query": "how WebSocket connections are established and messages exchanged", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/WebSockets.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/ClientSessions.kt"], + "category": "architecture" + }, + { + "query": "how server-sent events are received and parsed from a streaming response", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/SSE.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/DefaultClientSSESession.kt"], + "category": "semantic" + }, + { + "query": "how the engine abstraction separates the client API from the transport", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngineBase.kt"], + "category": "architecture" + }, + { + "query": "HttpClient", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"], + "secondary": [] + }, + { + "query": "HttpTimeout", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"], + "secondary": [] + }, + { + "query": "HttpRedirect", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"], + "secondary": [] + }, + { + "query": "HttpCookies", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"], + "secondary": [] + }, + { + "query": "HttpClientEngine", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/laravel-framework.json b/benchmarks/annotations/laravel-framework.json new file mode 100644 index 0000000..376c82e --- /dev/null +++ b/benchmarks/annotations/laravel-framework.json @@ -0,0 +1,32 @@ +[ + { + "query": "queue connection resolution and connectors", + "relevant": ["src/Illuminate/Queue/QueueManager.php"], + "secondary": [], + "category": "architecture" + }, + { + "query": "database queue implementation", + "relevant": ["src/Illuminate/Queue/DatabaseQueue.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "queue worker command execution", + "relevant": ["src/Illuminate/Queue/Console/WorkCommand.php"], + "secondary": ["src/Illuminate/Queue/Worker.php"], + "category": "architecture" + }, + { + "query": "session store behavior", + "relevant": ["src/Illuminate/Session/Store.php"], + "secondary": ["src/Illuminate/Session/SessionManager.php"], + "category": "semantic" + }, + { + "query": "QueueManager", + "relevant": ["src/Illuminate/Queue/QueueManager.php"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/messagepack-csharp.json b/benchmarks/annotations/messagepack-csharp.json new file mode 100644 index 0000000..df6a60c --- /dev/null +++ b/benchmarks/annotations/messagepack-csharp.json @@ -0,0 +1,98 @@ +[ + { + "query": "how objects are serialized to MessagePack binary format", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": ["src/MessagePack/MessagePackWriter.cs"], + "category": "architecture" + }, + { + "query": "how binary data is deserialized back into typed C# objects", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": ["src/MessagePack/MessagePackReader.cs"], + "category": "architecture" + }, + { + "query": "how custom formatters are registered and resolved for types", + "relevant": ["src/MessagePack/IFormatterResolver.cs"], + "secondary": ["src/MessagePack/Resolvers/CompositeResolver.cs"], + "category": "architecture" + }, + { + "query": "how the dynamic object resolver generates serialization code at runtime", + "relevant": ["src/MessagePack/Resolvers/DynamicObjectResolver.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how collections and arrays are serialized", + "relevant": ["src/MessagePack/Formatters/CollectionFormatter.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how union types with subtypes are handled in serialization", + "relevant": ["src/MessagePack/Resolvers/DynamicUnionResolver.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how serializer options control compression and resolver configuration", + "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how enums are serialized as integers or strings", + "relevant": ["src/MessagePack/Resolvers/DynamicEnumResolver.cs"], + "secondary": ["src/MessagePack/Formatters/GenericEnumFormatter`1.cs", "src/MessagePack/Formatters/EnumAsStringFormatter`1.cs"], + "category": "semantic" + }, + { + "query": "reading MessagePack data from a stream incrementally", + "relevant": ["src/MessagePack/MessagePackStreamReader.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the sequence pool manages buffer reuse during serialization", + "relevant": ["src/MessagePack/SequencePool.cs"], + "secondary": ["src/MessagePack/BufferWriter.cs"], + "category": "architecture" + }, + { + "query": "how LZ4 compression is applied to MessagePack payloads", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "MessagePackSerializer", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": [] + }, + { + "query": "MessagePackReader", + "relevant": ["src/MessagePack/MessagePackReader.cs"], + "secondary": [] + }, + { + "query": "IMessagePackFormatter", + "relevant": ["src/MessagePack/Formatters/IMessagePackFormatter`1.cs"], + "secondary": [] + }, + { + "query": "CompositeResolver", + "relevant": ["src/MessagePack/Resolvers/CompositeResolver.cs"], + "secondary": [] + }, + { + "query": "MessagePackSerializerOptions", + "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"], + "secondary": [] + }, + { + "query": "StandardResolver", + "relevant": ["src/MessagePack/Resolvers/StandardResolver.cs"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/model2vec.json b/benchmarks/annotations/model2vec.json new file mode 100644 index 0000000..ad58320 --- /dev/null +++ b/benchmarks/annotations/model2vec.json @@ -0,0 +1,82 @@ +[ + { + "query": "how the StaticModel encodes text to embeddings", + "relevant": ["model2vec/model.py"], + "secondary": ["model2vec/inference/model.py"] + }, + { + "query": "how a model is distilled from a sentence transformer", + "relevant": ["model2vec/distill/distillation.py"], + "secondary": [] + }, + { + "query": "tokenizer construction and vocabulary building", + "relevant": ["model2vec/tokenizer/tokenizer.py"], + "secondary": ["model2vec/distill/utils.py"] + }, + { + "query": "saving and loading models from disk", + "relevant": ["model2vec/persistence/persistence.py"], + "secondary": ["model2vec/persistence/datamodels.py"] + }, + { + "query": "quantization of model weights", + "relevant": ["model2vec/quantization.py"], + "secondary": ["model2vec/vocabulary_quantization.py"] + }, + { + "query": "pushing and loading models from HuggingFace Hub", + "relevant": ["model2vec/persistence/hf.py"], + "secondary": [] + }, + { + "query": "distillation inference and embedding extraction", + "relevant": ["model2vec/distill/inference.py"], + "secondary": [] + }, + { + "query": "training a classifier on top of embeddings", + "relevant": ["model2vec/train/classifier.py"], + "secondary": ["model2vec/train/base.py"] + }, + { + "query": "generating model cards for publication", + "relevant": ["model2vec/modelcards/modelcards.py"], + "secondary": [] + }, + { + "query": "utility functions used across the package", + "relevant": ["model2vec/utils.py"], + "secondary": ["model2vec/distill/utils.py"] + }, + { + "query": "how mean pooling is applied over token embeddings during distillation", + "relevant": ["model2vec/distill/inference.py"], + "secondary": [] + }, + { + "query": "how PCA reduces embedding dimensionality", + "relevant": ["model2vec/distill/distillation.py"], + "secondary": ["model2vec/distill/inference.py"] + }, + { + "query": "how out-of-vocabulary tokens are handled at inference time", + "relevant": ["model2vec/model.py"], + "secondary": ["model2vec/tokenizer/tokenizer.py"] + }, + { + "query": "how vocabulary is pruned during distillation", + "relevant": ["model2vec/distill/utils.py"], + "secondary": ["model2vec/distill/distillation.py"] + }, + { + "query": "how subword token weights are aggregated for whole-word embeddings", + "relevant": ["model2vec/distill/inference.py"], + "secondary": ["model2vec/distill/distillation.py"] + }, + {"query": "StaticModel", "relevant": ["model2vec/model.py"], "secondary": []}, + {"query": "distill", "relevant": ["model2vec/distill/distillation.py"], "secondary": []}, + {"query": "PoolingMode", "relevant": ["model2vec/distill/inference.py"], "secondary": []}, + {"query": "quantize", "relevant": ["model2vec/quantization.py"], "secondary": []}, + {"query": "Tokenizer", "relevant": ["model2vec/tokenizer/tokenizer.py"], "secondary": []} +] diff --git a/benchmarks/annotations/monolog.json b/benchmarks/annotations/monolog.json new file mode 100644 index 0000000..5b53451 --- /dev/null +++ b/benchmarks/annotations/monolog.json @@ -0,0 +1,52 @@ +[ + { + "query": "logger handler stack and processors", + "relevant": ["src/Monolog/Logger.php"], + "secondary": [], + "category": "architecture" + }, + { + "query": "stream log handler writes to files and streams", + "relevant": ["src/Monolog/Handler/StreamHandler.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "line formatter output formatting", + "relevant": ["src/Monolog/Formatter/LineFormatter.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "json log formatting", + "relevant": ["src/Monolog/Formatter/JsonFormatter.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Logger", + "relevant": ["src/Monolog/Logger.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "StreamHandler", + "relevant": ["src/Monolog/Handler/StreamHandler.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "JsonFormatter", + "relevant": ["src/Monolog/Formatter/JsonFormatter.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "how log handlers are registered and invoked by Logger", + "relevant": ["src/Monolog/Logger.php"], + "secondary": ["src/Monolog/Handler/AbstractHandler.php"], + "category": "architecture", + "seed": {"path": "src/Monolog/Logger.php", "line": 207}, + "related": ["src/Monolog/Handler/AbstractHandler.php", "src/Monolog/Handler/HandlerInterface.php"] + } +] diff --git a/benchmarks/annotations/pydantic.json b/benchmarks/annotations/pydantic.json new file mode 100644 index 0000000..1484adc --- /dev/null +++ b/benchmarks/annotations/pydantic.json @@ -0,0 +1,82 @@ +[ + { + "query": "how is BaseModel defined and instantiated", + "relevant": ["pydantic/main.py"], + "secondary": [] + }, + { + "query": "how are model fields declared and constrained", + "relevant": ["pydantic/fields.py"], + "secondary": ["pydantic/types.py"] + }, + { + "query": "JSON schema generation from models", + "relevant": ["pydantic/json_schema.py"], + "secondary": [] + }, + { + "query": "custom field and model validators", + "relevant": ["pydantic/functional_validators.py"], + "secondary": ["pydantic/class_validators.py"] + }, + { + "query": "how to serialize models to JSON", + "relevant": ["pydantic/functional_serializers.py"], + "secondary": ["pydantic/main.py"] + }, + { + "query": "network types like URLs and email addresses", + "relevant": ["pydantic/networks.py"], + "secondary": [] + }, + { + "query": "model configuration and settings", + "relevant": ["pydantic/config.py"], + "secondary": [] + }, + { + "query": "validating data without a model using TypeAdapter", + "relevant": ["pydantic/type_adapter.py"], + "secondary": [] + }, + { + "query": "alias handling for field names", + "relevant": ["pydantic/aliases.py"], + "secondary": ["pydantic/alias_generators.py"] + }, + { + "query": "root model for wrapping a single value", + "relevant": ["pydantic/root_model.py"], + "secondary": [] + }, + { + "query": "how discriminated unions select the right model variant", + "relevant": ["pydantic/types.py"], + "secondary": ["pydantic/main.py"] + }, + { + "query": "how computed fields are defined on a model", + "relevant": ["pydantic/fields.py"], + "secondary": ["pydantic/functional_serializers.py"] + }, + { + "query": "what runs after model initialisation in model_post_init", + "relevant": ["pydantic/main.py"], + "secondary": [] + }, + { + "query": "how model inheritance and field overriding works", + "relevant": ["pydantic/main.py"], + "secondary": ["pydantic/fields.py"] + }, + { + "query": "how to validate a function's arguments with pydantic", + "relevant": ["pydantic/validate_call_decorator.py"], + "secondary": ["pydantic/decorator.py"] + }, + {"query": "BaseModel", "relevant": ["pydantic/main.py"], "secondary": []}, + {"query": "field_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []}, + {"query": "model_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []}, + {"query": "ConfigDict", "relevant": ["pydantic/config.py"], "secondary": []}, + {"query": "Field", "relevant": ["pydantic/fields.py"], "secondary": []} +] diff --git a/benchmarks/annotations/rack.json b/benchmarks/annotations/rack.json new file mode 100644 index 0000000..037b6b1 --- /dev/null +++ b/benchmarks/annotations/rack.json @@ -0,0 +1,44 @@ +[ + { + "query": "HTTP request wrapper and forwarded headers", + "relevant": ["lib/rack/request.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "HTTP response construction", + "relevant": ["lib/rack/response.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "middleware builder DSL", + "relevant": ["lib/rack/builder.rb"], + "secondary": [], + "category": "architecture" + }, + { + "query": "URL path mapping across mounted apps", + "relevant": ["lib/rack/urlmap.rb"], + "secondary": [], + "category": "architecture" + }, + { + "query": "Request", + "relevant": ["lib/rack/request.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Rack::Response", + "relevant": [{"path": "lib/rack/response.rb", "start_line": 23, "end_line": 62}], + "secondary": [], + "category": "symbol" + }, + { + "query": "Rack::Builder", + "relevant": [{"path": "lib/rack/builder.rb", "start_line": 36, "end_line": 80}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/rails.json b/benchmarks/annotations/rails.json new file mode 100644 index 0000000..9b5d376 --- /dev/null +++ b/benchmarks/annotations/rails.json @@ -0,0 +1,49 @@ +[ + { + "query": "application boot process and initialization", + "relevant": ["railties/lib/rails/application.rb"], + "secondary": ["railties/lib/rails/configuration.rb"], + "category": "architecture" + }, + { + "query": "engine configuration and load paths", + "relevant": ["railties/lib/rails/engine/configuration.rb"], + "secondary": ["railties/lib/rails/application.rb"], + "category": "architecture" + }, + { + "query": "rack integration for rails applications", + "relevant": ["railties/lib/rails/rack.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "path management for rails apps", + "relevant": ["railties/lib/rails/paths.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Application", + "relevant": ["railties/lib/rails/application.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Rails::Engine", + "relevant": [{"path": "railties/lib/rails/engine.rb", "start_line": 348, "end_line": 387}], + "secondary": ["railties/lib/rails/engine/configuration.rb"], + "category": "symbol", + "seed": {"path": "railties/lib/rails/engine/configuration.rb", "line": 73}, + "related": [ + {"path": "railties/lib/rails/engine/updater.rb", "start_line": 1, "end_line": 21}, + {"path": "railties/lib/rails/railtie/configuration.rb", "start_line": 1, "end_line": 54} + ] + }, + { + "query": "Rails::Paths", + "relevant": ["railties/lib/rails/paths.rb"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/redux.json b/benchmarks/annotations/redux.json new file mode 100644 index 0000000..b551227 --- /dev/null +++ b/benchmarks/annotations/redux.json @@ -0,0 +1,32 @@ +[ + { + "query": "store creation and dispatch lifecycle", + "relevant": ["src/createStore.ts"], + "secondary": [], + "category": "architecture" + }, + { + "query": "middleware pipeline composition", + "relevant": ["src/applyMiddleware.ts"], + "secondary": ["src/compose.ts"], + "category": "architecture" + }, + { + "query": "combining reducers and validating reducer shape", + "relevant": ["src/combineReducers.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "function composition utility", + "relevant": ["src/compose.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "createStore", + "relevant": ["src/createStore.ts"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/requests.json b/benchmarks/annotations/requests.json new file mode 100644 index 0000000..067c6c2 --- /dev/null +++ b/benchmarks/annotations/requests.json @@ -0,0 +1,109 @@ +[ + { + "query": "how HTTP sessions manage state and connections", + "relevant": ["src/requests/sessions.py"], + "secondary": [] + }, + { + "query": "authentication mechanisms and credential handling", + "relevant": ["src/requests/auth.py"], + "secondary": [] + }, + { + "query": "transport adapters and connection pooling", + "relevant": ["src/requests/adapters.py"], + "secondary": [] + }, + { + "query": "cookie storage and persistence", + "relevant": ["src/requests/cookies.py"], + "secondary": [] + }, + { + "query": "how redirects are followed", + "relevant": ["src/requests/sessions.py"], + "secondary": ["src/requests/models.py"] + }, + { + "query": "error and exception types", + "relevant": ["src/requests/exceptions.py"], + "secondary": [] + }, + { + "query": "request and response model internals", + "relevant": ["src/requests/models.py"], + "secondary": [] + }, + { + "query": "SSL certificate verification", + "relevant": ["src/requests/adapters.py"], + "secondary": ["src/requests/certs.py"] + }, + { + "query": "event hooks system", + "relevant": ["src/requests/hooks.py"], + "secondary": [] + }, + { + "query": "utility functions for encoding and headers", + "relevant": ["src/requests/utils.py"], + "secondary": [] + }, + { + "query": "how a PreparedRequest is built from user-supplied arguments", + "relevant": ["src/requests/models.py"], + "secondary": ["src/requests/sessions.py"] + }, + { + "query": "how digest authentication implements the challenge-response handshake", + "relevant": ["src/requests/auth.py"], + "secondary": [] + }, + { + "query": "how response encoding is detected from headers and content", + "relevant": ["src/requests/utils.py"], + "secondary": ["src/requests/models.py"] + }, + { + "query": "how proxy settings are read from environment variables", + "relevant": ["src/requests/utils.py"], + "secondary": ["src/requests/sessions.py"] + }, + { + "query": "how connection keep-alive and pooling limits are configured", + "relevant": ["src/requests/adapters.py"], + "secondary": [] + }, + { + "query": "Session", + "relevant": [{"path": "src/requests/sessions.py", "start_line": 356, "end_line": 394}], + "secondary": [], + "category": "symbol" + }, + { + "query": "HTTPDigestAuth", + "relevant": [{"path": "src/requests/auth.py", "start_line": 107, "end_line": 136}], + "secondary": [], + "category": "symbol" + }, + { + "query": "PreparedRequest", + "relevant": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 364}], + "secondary": [], + "category": "symbol", + "seed": {"path": "src/requests/sessions.py", "line": 485}, + "related": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 353}] + }, + { + "query": "HTTPAdapter", + "relevant": [{"path": "src/requests/adapters.py", "start_line": 144, "end_line": 192}], + "secondary": [], + "category": "symbol" + }, + { + "query": "Response", + "relevant": [{"path": "src/requests/models.py", "start_line": 642, "end_line": 691}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/sinatra.json b/benchmarks/annotations/sinatra.json new file mode 100644 index 0000000..ddec420 --- /dev/null +++ b/benchmarks/annotations/sinatra.json @@ -0,0 +1,100 @@ +[ + { + "query": "core Sinatra DSL and routing behavior", + "relevant": ["lib/sinatra/base.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "application startup and command line entrypoint", + "relevant": ["lib/sinatra/main.rb"], + "secondary": [], + "category": "architecture" + }, + { + "query": "exception rendering and stack traces", + "relevant": ["lib/sinatra/show_exceptions.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "request logging middleware", + "relevant": ["lib/sinatra/middleware/logger.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how route handlers are compiled and URL patterns matched", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1795, "end_line": 1817}], + "secondary": [], + "category": "semantic" + }, + { + "query": "before and after filter hooks for the request lifecycle", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1486, "end_line": 1505}], + "secondary": [], + "category": "semantic" + }, + { + "query": "response helper methods: halt, pass and redirect", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}], + "secondary": [], + "category": "semantic" + }, + { + "query": "how templates are rendered with erb haml and other engines", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 742, "end_line": 948}], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the Rack middleware stack is assembled before requests are handled", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1819, "end_line": 1888}], + "secondary": [], + "category": "architecture" + }, + { + "query": "case-insensitive hash for request params", + "relevant": ["lib/sinatra/indifferent_hash.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Sinatra::Base", + "relevant": ["lib/sinatra/base.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Sinatra::Application", + "relevant": ["lib/sinatra/main.rb"], + "secondary": ["lib/sinatra/base.rb"], + "category": "symbol" + }, + { + "query": "Sinatra::ShowExceptions", + "relevant": ["lib/sinatra/show_exceptions.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Sinatra::Helpers", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}], + "secondary": [], + "category": "symbol" + }, + { + "query": "IndifferentHash", + "relevant": ["lib/sinatra/indifferent_hash.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "how Sinatra::Application inherits and configures Base for standalone use", + "relevant": ["lib/sinatra/main.rb"], + "secondary": ["lib/sinatra/base.rb"], + "category": "architecture", + "seed": {"path": "lib/sinatra/main.rb", "line": 30}, + "related": ["lib/sinatra/base.rb"] + } +] diff --git a/benchmarks/annotations/starlette.json b/benchmarks/annotations/starlette.json new file mode 100644 index 0000000..f588020 --- /dev/null +++ b/benchmarks/annotations/starlette.json @@ -0,0 +1,90 @@ +[ + { + "query": "how are URL routes registered and matched", + "relevant": ["starlette/routing.py"], + "secondary": ["starlette/convertors.py"] + }, + { + "query": "how does authentication middleware work", + "relevant": ["starlette/middleware/authentication.py"], + "secondary": ["starlette/authentication.py"] + }, + { + "query": "websocket connection and message handling", + "relevant": ["starlette/websockets.py"], + "secondary": [] + }, + { + "query": "how are static files served", + "relevant": ["starlette/staticfiles.py"], + "secondary": [] + }, + { + "query": "parsing form data and file uploads", + "relevant": ["starlette/formparsers.py"], + "secondary": ["starlette/datastructures.py"] + }, + { + "query": "how does the test client simulate requests", + "relevant": ["starlette/testclient.py"], + "secondary": [] + }, + { + "query": "background task scheduling and execution", + "relevant": ["starlette/background.py"], + "secondary": [] + }, + { + "query": "application lifespan startup and shutdown events", + "relevant": ["starlette/routing.py"], + "secondary": ["starlette/applications.py"] + }, + { + "query": "streaming response implementation", + "relevant": ["starlette/responses.py"], + "secondary": [] + }, + { + "query": "how base middleware wraps request handling", + "relevant": ["starlette/middleware/base.py"], + "secondary": [] + }, + { + "query": "how request state persists arbitrary data across middleware", + "relevant": ["starlette/requests.py"], + "secondary": [] + }, + { + "query": "how path convertor types work for route parameters", + "relevant": ["starlette/convertors.py"], + "secondary": ["starlette/routing.py"] + }, + { + "query": "how session data is signed and stored in cookies", + "relevant": ["starlette/middleware/sessions.py"], + "secondary": [] + }, + { + "query": "how CORS preflight requests are handled", + "relevant": ["starlette/middleware/cors.py"], + "secondary": [] + }, + { + "query": "how errors in ASGI apps are caught and turned into responses", + "relevant": ["starlette/middleware/errors.py"], + "secondary": ["starlette/middleware/exceptions.py"] + }, + {"query": "BaseHTTPMiddleware", "relevant": ["starlette/middleware/base.py"], "secondary": []}, + {"query": "Request", "relevant": ["starlette/requests.py"], "secondary": []}, + {"query": "WebSocket", "relevant": ["starlette/websockets.py"], "secondary": []}, + {"query": "BackgroundTask", "relevant": ["starlette/background.py"], "secondary": []}, + {"query": "Router", "relevant": ["starlette/routing.py"], "secondary": []}, + { + "query": "how the Starlette application delegates routing and lifespan to Router", + "relevant": ["starlette/applications.py"], + "secondary": ["starlette/routing.py"], + "category": "architecture", + "seed": {"path": "starlette/applications.py", "line": 19}, + "related": ["starlette/routing.py"] + } +] diff --git a/benchmarks/annotations/tokio.json b/benchmarks/annotations/tokio.json new file mode 100644 index 0000000..a902e16 --- /dev/null +++ b/benchmarks/annotations/tokio.json @@ -0,0 +1,99 @@ +[ + { + "query": "how spawned tasks are scheduled onto threads", + "relevant": ["tokio/src/runtime/scheduler/multi_thread/worker.rs"], + "secondary": ["tokio/src/task/spawn.rs"], + "category": "architecture" + }, + { + "query": "how the async mutex prevents concurrent access", + "relevant": ["tokio/src/sync/mutex.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how a broadcast channel delivers messages to multiple receivers", + "relevant": ["tokio/src/sync/broadcast.rs"], + "secondary": [], + "category": "architecture" + }, + { + "query": "how the timer wheel tracks sleep deadlines", + "relevant": ["tokio/src/runtime/time/wheel/level.rs"], + "secondary": ["tokio/src/time/sleep.rs"], + "category": "architecture" + }, + { + "query": "running non-async blocking code inside the async runtime", + "relevant": ["tokio/src/task/blocking.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how tasks that are not Send can run on a single thread", + "relevant": ["tokio/src/task/local.rs"], + "secondary": [], + "category": "architecture" + }, + { + "query": "waiting for any of several futures to complete", + "relevant": ["tokio/src/macros/select.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the runtime builder configures thread pool size and flavour", + "relevant": ["tokio/src/runtime/builder.rs"], + "secondary": [], + "category": "architecture" + }, + { + "query": "rate-limiting concurrent operations with a semaphore", + "relevant": ["tokio/src/sync/semaphore.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "watching a value and being notified when it changes", + "relevant": ["tokio/src/sync/watch.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "collecting results from a dynamic set of spawned tasks", + "relevant": ["tokio/src/task/join_set.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "integrating a non-async file descriptor with the tokio reactor", + "relevant": ["tokio/src/io/async_fd.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "JoinSet", + "relevant": ["tokio/src/task/join_set.rs"], + "secondary": [] + }, + { + "query": "Semaphore", + "relevant": ["tokio/src/sync/semaphore.rs"], + "secondary": [] + }, + { + "query": "MissedTickBehavior", + "relevant": ["tokio/src/time/interval.rs"], + "secondary": [] + }, + { + "query": "LocalSet", + "relevant": ["tokio/src/task/local.rs"], + "secondary": [] + }, + { + "query": "Notify", + "relevant": ["tokio/src/sync/notify.rs"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/trpc.json b/benchmarks/annotations/trpc.json new file mode 100644 index 0000000..f3f8d25 --- /dev/null +++ b/benchmarks/annotations/trpc.json @@ -0,0 +1,99 @@ +[ + { + "query": "how a tRPC router is created and procedures are registered", + "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"], + "category": "architecture" + }, + { + "query": "how middleware chains context transformations between procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"], + "secondary": [], + "category": "architecture" + }, + { + "query": "how input validation and parsing works for procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/parser.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"], + "category": "semantic" + }, + { + "query": "how HTTP requests are resolved to tRPC procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/http/resolveResponse.ts"], + "secondary": ["packages/server/src/http.ts"], + "category": "architecture" + }, + { + "query": "how error formatting and serialization works", + "relevant": ["packages/server/src/unstable-core-do-not-import/error/formatter.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"], + "category": "semantic" + }, + { + "query": "how server-sent events and subscriptions are streamed to the client", + "relevant": ["packages/server/src/unstable-core-do-not-import/stream/sse.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/stream/jsonl.ts"], + "category": "architecture" + }, + { + "query": "how the observable pattern is used for subscriptions", + "relevant": ["packages/server/src/observable/observable.ts"], + "secondary": ["packages/server/src/observable/operators.ts"], + "category": "semantic" + }, + { + "query": "how type inference extracts input and output types from procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/clientish/inference.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "adapting tRPC to run as a Node.js HTTP server handler", + "relevant": ["packages/server/src/adapters/node-http/nodeHTTPRequestHandler.ts"], + "secondary": ["packages/server/src/adapters/node-http/incomingMessageToRequest.ts"], + "category": "architecture" + }, + { + "query": "WebSocket adapter for real-time subscriptions", + "relevant": ["packages/server/src/adapters/ws.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how tRPC is initialized with root config and context factory", + "relevant": ["packages/server/src/unstable-core-do-not-import/initTRPC.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/rootConfig.ts"], + "category": "architecture" + }, + { + "query": "how data transformer is applied to serialize and deserialize procedure payloads", + "relevant": ["packages/server/src/unstable-core-do-not-import/transformer.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "TRPCError", + "relevant": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"], + "secondary": [] + }, + { + "query": "AnyRouter", + "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"], + "secondary": [] + }, + { + "query": "MiddlewareBuilder", + "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"], + "secondary": [] + }, + { + "query": "inferProcedureInput", + "relevant": ["packages/server/src/unstable-core-do-not-import/procedure.ts"], + "secondary": [] + }, + { + "query": "fetchRequestHandler", + "relevant": ["packages/server/src/adapters/fetch/fetchRequestHandler.ts"], + "secondary": [] + } +] diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py new file mode 100644 index 0000000..b0ad205 --- /dev/null +++ b/benchmarks/bench_hybrid.py @@ -0,0 +1,248 @@ +from __future__ import annotations + +import argparse +import math +import shutil +import sys +import time +from dataclasses import dataclass +from pathlib import Path + +from model2vec import StaticModel + +from benchmarks.common import ( + Task, + apply_task_filters, + available_repo_specs, + count_indexed_targets, + grouped_tasks, + load_tasks, + target_matches_location, + target_rank, +) +from semble import SembleIndex +from semble.types import SearchResult + +_CACHE_DIR = Path("/tmp/semble-bench-cache") +_MODEL_NAME = "Pringled/potion-code-16M" +_LATENCY_RUNS = 5 +_DIRECT_TOP_K = 10 + + +@dataclass(frozen=True) +class RepoResult: + repo: str + language: str + chunks: int + ndcg5: float + ndcg10: float + p50_ms: float + cold_ms: float | None = None + warm_ms: float | None = None + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") + parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.") + parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.") + parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.") + parser.add_argument("--verbose", action="store_true", help="Print per-query results.") + return parser.parse_args() + + +def _is_relevant(result: SearchResult, task: Task) -> bool: + chunk = result.chunk + return any( + target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) + for target in task.all_relevant + ) + + +def _dcg(relevances: list[int]) -> float: + return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances)) + + +def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float: + if n_relevant == 0: + return 0.0 + relevances = [0] * k + for rank in relevant_ranks: + if 1 <= rank <= k: + relevances[rank - 1] = 1 + ideal = _dcg([1] * min(k, n_relevant)) + return _dcg(relevances) / ideal if ideal > 0 else 0.0 + + +def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -> tuple[float, float, float]: + ndcg5_sum = 0.0 + ndcg10_sum = 0.0 + latencies: list[float] = [] + + for task in tasks: + query_latencies: list[float] = [] + for _ in range(_LATENCY_RUNS): + started = time.perf_counter() + results = index.search(task.query, top_k=_DIRECT_TOP_K) + query_latencies.append((time.perf_counter() - started) * 1000) + latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2]) + + chunk_results = results[:_DIRECT_TOP_K] + relevant_ranks = [ + rank for target in task.all_relevant if (rank := target_rank(chunk_results, target)) is not None + ] + n_relevant = count_indexed_targets(index.chunks, task.all_relevant) + q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5) + q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10) + ndcg5_sum += q_ndcg5 + ndcg10_sum += q_ndcg10 + + if verbose: + cat = task.category or "?" + targets_str = ", ".join( + t.path if not t.start_line else f"{t.path}:{t.start_line}-{t.end_line}" for t in task.all_relevant + ) + top_files = [r.chunk.file_path for r in chunk_results[:5]] + print( + f" [{cat:<12}] ndcg@10={q_ndcg10:.3f} ranks={relevant_ranks} n_rel={n_relevant} q={task.query!r}", + file=sys.stderr, + ) + print(f" targets: {targets_str}", file=sys.stderr) + print(f" top-5: {top_files}", file=sys.stderr) + + total = len(tasks) + latencies.sort() + return ndcg5_sum / total, ndcg10_sum / total, latencies[len(latencies) // 2] + + +def _print_group_summary(results: list[RepoResult], group_by: str) -> None: + print(file=sys.stderr) + print(f"By {group_by}", file=sys.stderr) + groups = sorted({getattr(result, group_by) for result in results}) + for value in groups: + grouped = [result for result in results if getattr(result, group_by) == value] + print( + " " + + f"{value}: repos={len(grouped)} ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}" + + f" ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}" + + f" p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms", + file=sys.stderr, + ) + + +def _print_language_table(results: list[RepoResult]) -> None: + languages = ["python", "javascript", "java", "go", "php", "ruby"] + present = [language for language in languages if any(result.language == language for result in results)] + columns = ["Avg", *[language.title() for language in present]] + + avg_ndcg10 = sum(result.ndcg10 for result in results) / len(results) + avg_p50 = sum(result.p50_ms for result in results) / len(results) + + print(file=sys.stderr) + print(f"{'=' * 104}", file=sys.stderr) + print("Hybrid benchmark by language", file=sys.stderr) + print(f"{'=' * 104}", file=sys.stderr) + print(f"\n {'Metric':<28} " + " ".join(f"{column:>9}" for column in columns), file=sys.stderr) + print(f" {'-' * 28} " + " ".join(f"{'-' * 9:>9}" for _ in columns), file=sys.stderr) + + ndcg_row = [f"{avg_ndcg10:>9.3f}"] + p50_row = [f"{avg_p50:>8.2f}ms"] + for language in present: + language_results = [result for result in results if result.language == language] + ndcg_row.append(f"{sum(result.ndcg10 for result in language_results) / len(language_results):>9.3f}") + p50_row.append(f"{sum(result.p50_ms for result in language_results) / len(language_results):>8.2f}ms") + + print(f" {'NDCG@10':<28} " + " ".join(ndcg_row), file=sys.stderr) + print(f" {'q-p50':<28} " + " ".join(p50_row), file=sys.stderr) + + +def _bench_quality(repo_tasks: dict[str, list[Task]], model: StaticModel, *, verbose: bool = False) -> list[RepoResult]: + print( + f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}", + file=sys.stderr, + ) + print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 8} {'-' * 8} {'-' * 8}", file=sys.stderr) + results: list[RepoResult] = [] + specs = available_repo_specs() + for repo, tasks in sorted(repo_tasks.items()): + spec = specs[repo] + started = time.perf_counter() + index = SembleIndex.from_path(spec.benchmark_dir, model=model) + index_ms = (time.perf_counter() - started) * 1000 + ndcg5, ndcg10, p50_ms = _evaluate(index, tasks, verbose=verbose) + result = RepoResult( + repo=repo, language=spec.language, chunks=len(index.chunks), ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms + ) + results.append(result) + print( + f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms", + file=sys.stderr, + ) + return results + + +def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[RepoResult]: + _CACHE_DIR.mkdir(parents=True, exist_ok=True) + print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr) + print(file=sys.stderr) + print( + f"{'Repo':<12} {'language':<12} {'chunks':>6} {'cold':>9} {'warm':>9} {'speedup':>8} {'NDCG@10':>8}", + file=sys.stderr, + ) + print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 9} {'-' * 8} {'-' * 8}", file=sys.stderr) + results: list[RepoResult] = [] + specs = available_repo_specs() + model_ns = _MODEL_NAME.replace("/", "--") + for repo, tasks in sorted(repo_tasks.items()): + spec = specs[repo] + namespace_dir = _CACHE_DIR / model_ns + if namespace_dir.exists(): + shutil.rmtree(namespace_dir) + started = time.perf_counter() + cold = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME) + cold_ms = (time.perf_counter() - started) * 1000 + started = time.perf_counter() + warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME) + warm_ms = (time.perf_counter() - started) * 1000 + _, ndcg10, p50_ms = _evaluate(warm, tasks) + result = RepoResult( + repo=repo, + language=spec.language, + chunks=len(cold.chunks), + ndcg5=0.0, + ndcg10=ndcg10, + p50_ms=p50_ms, + cold_ms=cold_ms, + warm_ms=warm_ms, + ) + results.append(result) + speedup = cold_ms / warm_ms if warm_ms > 0 else float("inf") + print( + f"{repo:<12} {spec.language:<12} {len(cold.chunks):>6} {cold_ms:>8.0f}ms {warm_ms:>8.0f}ms {speedup:>7.1f}x {ndcg10:>8.3f}", + file=sys.stderr, + ) + print(file=sys.stderr) + print("Warm time still includes file walk plus BM25/Vicinity rebuild; only embedding is skipped.", file=sys.stderr) + return results + + +def main() -> None: + args = _parse_args() + repo_specs = available_repo_specs() + tasks = apply_task_filters( + load_tasks(repo_specs=repo_specs), repos=args.repo or None, languages=args.language or None + ) + if not tasks: + raise SystemExit("No benchmark tasks matched the requested filters.") + print("Loading model...", file=sys.stderr) + started = time.perf_counter() + model = StaticModel.from_pretrained(_MODEL_NAME) + print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr) + print(file=sys.stderr) + repo_tasks = grouped_tasks(tasks) + results = _bench_cache(repo_tasks, model) if args.cache else _bench_quality(repo_tasks, model, verbose=args.verbose) + _print_group_summary(results, "language") + _print_language_table(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/common.py b/benchmarks/common.py new file mode 100644 index 0000000..c755633 --- /dev/null +++ b/benchmarks/common.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Protocol, cast + +from semble import Chunk + +BENCH_ROOT = Path("/tmp/bench") +BENCHMARKS_DIR = Path(__file__).parent +ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations" +REPOS_PATH = BENCHMARKS_DIR / "repos.json" + + +@dataclass(frozen=True) +class Target: + path: str + start_line: int | None = None + end_line: int | None = None + + @property + def has_span(self) -> bool: + return self.start_line is not None and self.end_line is not None + + +class _ChunkLike(Protocol): + file_path: str + start_line: int + end_line: int + + +class _ResultLike(Protocol): + chunk: Chunk + + +@dataclass(frozen=True) +class RepoSpec: + name: str + language: str + url: str + revision: str + benchmark_root: str | None = None + + @property + def checkout_dir(self) -> Path: + return BENCH_ROOT / self.name + + @property + def benchmark_dir(self) -> Path: + return self.checkout_dir if self.benchmark_root is None else self.checkout_dir / self.benchmark_root + + +@dataclass(frozen=True) +class Task: + repo: str + language: str + query: str + relevant: tuple[Target, ...] + secondary: tuple[Target, ...] + category: str + category_inferred: bool + + @property + def all_relevant(self) -> tuple[Target, ...]: + return self.relevant + self.secondary + + +def infer_category(query: str) -> str: + if " " not in query.strip(): + return "symbol" + lowered = query.lower() + if lowered.startswith("how ") or lowered.startswith("how does") or lowered.startswith("how are"): + return "architecture" + return "semantic" + + +def _coerce_int(value: object) -> int: + if not isinstance(value, int | str): + raise TypeError(f"expected int-compatible value, got {type(value).__name__}") + return int(value) + + +def _coerce_mapping(raw: object) -> dict[str, object]: + if not isinstance(raw, dict): + raise TypeError(f"expected mapping, got {type(raw).__name__}") + return cast(dict[str, object], raw) + + +def _parse_target(raw: str | dict[str, object]) -> Target: + if isinstance(raw, str): + return Target(path=raw) + raw = _coerce_mapping(raw) + start_line = raw.get("start_line") + end_line = raw.get("end_line") + return Target( + path=str(raw["path"]), + start_line=_coerce_int(start_line) if start_line is not None else None, + end_line=_coerce_int(end_line) if end_line is not None else None, + ) + + +def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]: + raw = json.loads(path.read_text(encoding="utf-8")) + return {item["name"]: RepoSpec(**item) for item in raw} + + +def available_repo_specs(repo_specs: dict[str, RepoSpec] | None = None) -> dict[str, RepoSpec]: + specs = load_repo_specs() if repo_specs is None else repo_specs + return { + name: spec + for name, spec in specs.items() + if spec.checkout_dir.exists() and (ANNOTATIONS_DIR / f"{name}.json").exists() + } + + +def load_tasks( + path: Path = ANNOTATIONS_DIR, + repo_specs: dict[str, RepoSpec] | None = None, +) -> list[Task]: + specs = load_repo_specs() if repo_specs is None else repo_specs + tasks: list[Task] = [] + annotation_files = sorted(path.glob("*.json")) if path.is_dir() else [path] + for annotation_file in annotation_files: + raw = json.loads(annotation_file.read_text(encoding="utf-8")) + default_repo = annotation_file.stem + for item in raw: + repo = item.get("repo", default_repo) + spec = specs[repo] + category = item.get("category") + tasks.append( + Task( + repo=repo, + language=spec.language, + query=item["query"], + relevant=tuple(_parse_target(raw) for raw in item.get("relevant", [])), + secondary=tuple(_parse_target(raw) for raw in item.get("secondary", [])), + category=category if isinstance(category, str) else infer_category(item["query"]), + category_inferred=category is None, + ) + ) + return tasks + + +def apply_task_filters( + tasks: list[Task], + repos: list[str] | None = None, + languages: list[str] | None = None, + limit: int | None = None, +) -> list[Task]: + filtered = [task for task in tasks if not repos or task.repo in repos] + filtered = [task for task in filtered if not languages or task.language in languages] + return filtered if limit is None else filtered[:limit] + + +def path_matches(file_path: str, relative_path: str) -> bool: + normalized_file = file_path.replace("\\", "/") + normalized_relative = relative_path.replace("\\", "/") + return normalized_file == normalized_relative or normalized_file.endswith(f"/{normalized_relative}") + + +def span_overlaps(start_line: int, end_line: int, target: Target) -> bool: + if not target.has_span: + return True + assert target.start_line is not None + assert target.end_line is not None + return not (end_line < target.start_line or start_line > target.end_line) + + +def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool: + return path_matches(file_path, target.path) and span_overlaps(start_line, end_line, target) + + +def target_rank(results: list[_ResultLike], target: Target) -> int | None: + for index, result in enumerate(results, 1): + chunk = result.chunk + if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target): + return index + return None + + +def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int: + return sum( + 1 + for target in targets + if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks) + ) + + +def grouped_tasks(tasks: list[Task]) -> dict[str, list[Task]]: + grouped: dict[str, list[Task]] = {} + for task in tasks: + grouped.setdefault(task.repo, []).append(task) + return grouped diff --git a/benchmarks/repos.json b/benchmarks/repos.json new file mode 100644 index 0000000..56e36e2 --- /dev/null +++ b/benchmarks/repos.json @@ -0,0 +1,202 @@ +[ + { + "name": "aiohttp", + "language": "python", + "url": "https://github.com/aio-libs/aiohttp.git", + "revision": "fc67cfdfd7d4bbf53ef76515fae69726626fe256", + "benchmark_root": "aiohttp" + }, + { + "name": "fastapi", + "language": "python", + "url": "https://github.com/fastapi/fastapi.git", + "revision": "c3c9dd6b1a08bcda766e7b43eafe72c4c5e9e193", + "benchmark_root": "fastapi" + }, + { + "name": "flask", + "language": "python", + "url": "https://github.com/pallets/flask.git", + "revision": "258d68b6ff5e2244386540f48b48bab90d6ab827", + "benchmark_root": "src/flask" + }, + { + "name": "httpx", + "language": "python", + "url": "https://github.com/encode/httpx.git", + "revision": "b5addb64f0161ff6bfe94c124ef76f6a1fba5254", + "benchmark_root": "httpx" + }, + { + "name": "model2vec", + "language": "python", + "url": "https://github.com/MinishLab/model2vec.git", + "revision": "b3012ee04e41c634383a5d735cb3c7c51e806a18", + "benchmark_root": "model2vec" + }, + { + "name": "pydantic", + "language": "python", + "url": "https://github.com/pydantic/pydantic.git", + "revision": "82c15f0ba8a9f8d8d6ba595df73ad20e2ee2eccf", + "benchmark_root": "pydantic" + }, + { + "name": "requests", + "language": "python", + "url": "https://github.com/psf/requests.git", + "revision": "ef439eb779c1eba7cbdeeeb302b11e1e061b4b7d", + "benchmark_root": "src/requests" + }, + { + "name": "starlette", + "language": "python", + "url": "https://github.com/encode/starlette.git", + "revision": "1894d0d89badf43bc8bfe03ed221a8b2e100b2ab", + "benchmark_root": "starlette" + }, + { + "name": "axios", + "language": "javascript", + "url": "https://github.com/axios/axios.git", + "revision": "c7a76ddbf277db864ee6cfb4ef17b8a08ffbe3f5", + "benchmark_root": "lib" + }, + { + "name": "express", + "language": "javascript", + "url": "https://github.com/expressjs/express.git", + "revision": "8e022edc9185f540a3fcecaf5e56b850d919cdac", + "benchmark_root": "lib" + }, + { + "name": "redux", + "language": "javascript", + "url": "https://github.com/reduxjs/redux.git", + "revision": "aaa04ae8402ba2caba55a9c75bfa8d3df6c78f8c", + "benchmark_root": "src" + }, + { + "name": "gin", + "language": "go", + "url": "https://github.com/gin-gonic/gin.git", + "revision": "d3ffc9985281dcf4d3bef604cce4e662b1a327a6" + }, + { + "name": "cobra", + "language": "go", + "url": "https://github.com/spf13/cobra.git", + "revision": "61968e893eee2f27696c2fbc8e34fa5c4afaf7c4" + }, + { + "name": "chi", + "language": "go", + "url": "https://github.com/go-chi/chi.git", + "revision": "a54874f0e2f12647a19e82ee70dfa8185014100c" + }, + { + "name": "gson", + "language": "java", + "url": "https://github.com/google/gson.git", + "revision": "f4d371d29c04066dbe7fdb31f642831f9c7f40cd", + "benchmark_root": "gson" + }, + { + "name": "commons-lang", + "language": "java", + "url": "https://github.com/apache/commons-lang.git", + "revision": "0ba92dc402312a38252a3398931ffbfbb4a88f7d", + "benchmark_root": "src/main/java/org/apache/commons/lang3" + }, + { + "name": "jackson-databind", + "language": "java", + "url": "https://github.com/FasterXML/jackson-databind.git", + "revision": "e30139539416f69f1d7ae31c7e1d6da5b25bf362", + "benchmark_root": "src/main/java/tools/jackson/databind" + }, + { + "name": "guzzle", + "language": "php", + "url": "https://github.com/guzzle/guzzle.git", + "revision": "fb92d95f80a9da51bf8f2a5b26d8e8ea3b6d99ed", + "benchmark_root": "src" + }, + { + "name": "monolog", + "language": "php", + "url": "https://github.com/Seldaek/monolog.git", + "revision": "68b974809baff3f071893de61447212e9e688ee7", + "benchmark_root": "src/Monolog" + }, + { + "name": "laravel-framework", + "language": "php", + "url": "https://github.com/laravel/framework.git", + "revision": "0dcc8d2ba7f41bc8376a08e9ccd5d7b83e6a6d90", + "benchmark_root": "src/Illuminate" + }, + { + "name": "sinatra", + "language": "ruby", + "url": "https://github.com/sinatra/sinatra.git", + "revision": "f891dd2b6f4911e356600efe6c3b82af97d262c6", + "benchmark_root": "lib" + }, + { + "name": "rack", + "language": "ruby", + "url": "https://github.com/rack/rack.git", + "revision": "ca8a404704ed043797c4f9d482c97d722c0dc719", + "benchmark_root": "lib/rack" + }, + { + "name": "rails", + "language": "ruby", + "url": "https://github.com/rails/rails.git", + "revision": "75f9e28379ac7418b82fa950cfa81f6147275308", + "benchmark_root": "railties/lib/rails" + }, + { + "name": "tokio", + "language": "rust", + "url": "https://github.com/tokio-rs/tokio.git", + "revision": "5db10f538b683fe88d699dfd11be31d193db011c", + "benchmark_root": "tokio/src" + }, + { + "name": "trpc", + "language": "typescript", + "url": "https://github.com/trpc/trpc.git", + "revision": "c188dab0822caf3615199e4ac95147bc7560d26f", + "benchmark_root": "packages/server/src" + }, + { + "name": "messagepack-csharp", + "language": "csharp", + "url": "https://github.com/neuecc/MessagePack-CSharp.git", + "revision": "84db9f79e3ecc5f4e8b7c7f77cd15d7745f5f2a7", + "benchmark_root": "src/MessagePack" + }, + { + "name": "ktor", + "language": "kotlin", + "url": "https://github.com/ktorio/ktor.git", + "revision": "5913745a96101e8c78e47565e52d2baa8414441f", + "benchmark_root": "ktor-client/ktor-client-core/common/src" + }, + { + "name": "cats", + "language": "scala", + "url": "https://github.com/typelevel/cats.git", + "revision": "2102251a2f24a6ee14e087fc5da7768d267f2d6e", + "benchmark_root": "core/src/main/scala" + }, + { + "name": "alamofire", + "language": "swift", + "url": "https://github.com/Alamofire/Alamofire.git", + "revision": "e938f8c66708e7352fc7e3512647fa54255b267a", + "benchmark_root": "Source" + } +] diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py new file mode 100644 index 0000000..5ba3b24 --- /dev/null +++ b/benchmarks/sync_repos.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import argparse +import subprocess +import sys + +from benchmarks.common import BENCH_ROOT, load_repo_specs + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.") + parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.") + parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.") + return parser.parse_args() + + +def _run(*args: str) -> None: + subprocess.run(args, check=True) + + +def _output(*args: str) -> str: + return subprocess.check_output(args, text=True).strip() + + +def _sync_repo(name: str, url: str, revision: str) -> None: + repo_dir = BENCH_ROOT / name + if not repo_dir.exists(): + print(f"cloning {name} -> {repo_dir}") + _run("git", "clone", url, str(repo_dir)) + print(f"syncing {name} @ {revision[:12]}") + _run("git", "-C", str(repo_dir), "fetch", "--depth", "1", "origin", revision) + _run("git", "-C", str(repo_dir), "checkout", "--detach", revision) + + +def _check_repo(name: str, revision: str) -> str | None: + repo_dir = BENCH_ROOT / name + if not (repo_dir / ".git").exists(): + return f"{name}: missing checkout at {repo_dir}" + head = _output("git", "-C", str(repo_dir), "rev-parse", "HEAD") + if head != revision: + return f"{name}: expected {revision}, found {head}" + return None + + +def main() -> None: + args = _parse_args() + specs = load_repo_specs() + selected = {name: spec for name, spec in specs.items() if not args.repo or name in args.repo} + BENCH_ROOT.mkdir(parents=True, exist_ok=True) + + if args.check: + problems = [ + problem for name, spec in selected.items() if (problem := _check_repo(name, spec.revision)) is not None + ] + if problems: + for problem in problems: + print(problem, file=sys.stderr) + raise SystemExit(1) + print(f"Verified {len(selected)} pinned repo(s).") + return + + for name, spec in selected.items(): + _sync_repo(name, spec.url, spec.revision) + + print(f"Synced {len(selected)} pinned repo(s).") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 0ce4da0..8e5f165 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ target-version = "py310" [tool.ruff.lint.per-file-ignores] "tests/**" = ["ANN"] "src/semble/cli.py" = ["T20"] -"local/benchmarks/*.py" = ["T20", "D"] +"benchmarks/*.py" = ["T20", "D"] [tool.ruff.lint] select = [ From d2f98dbbe2051b68b42f3d3f0ed007fe7e27dcca Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:03:45 +0200 Subject: [PATCH 02/15] Fix benchmark review: decouple common from semble, skip missing repos in load_tasks, compute ndcg5 in cache mode --- benchmarks/bench_hybrid.py | 16 ++++++++++++---- benchmarks/common.py | 18 ++++-------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py index b0ad205..e22817e 100644 --- a/benchmarks/bench_hybrid.py +++ b/benchmarks/bench_hybrid.py @@ -11,6 +11,7 @@ from model2vec import StaticModel from benchmarks.common import ( + Target, Task, apply_task_filters, available_repo_specs, @@ -18,7 +19,6 @@ grouped_tasks, load_tasks, target_matches_location, - target_rank, ) from semble import SembleIndex from semble.types import SearchResult @@ -29,6 +29,14 @@ _DIRECT_TOP_K = 10 +def _target_rank(results: list[SearchResult], target: Target) -> int | None: + for index, result in enumerate(results, 1): + chunk = result.chunk + if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target): + return index + return None + + @dataclass(frozen=True) class RepoResult: repo: str @@ -88,7 +96,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - chunk_results = results[:_DIRECT_TOP_K] relevant_ranks = [ - rank for target in task.all_relevant if (rank := target_rank(chunk_results, target)) is not None + rank for target in task.all_relevant if (rank := _target_rank(chunk_results, target)) is not None ] n_relevant = count_indexed_targets(index.chunks, task.all_relevant) q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5) @@ -203,12 +211,12 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[ started = time.perf_counter() warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME) warm_ms = (time.perf_counter() - started) * 1000 - _, ndcg10, p50_ms = _evaluate(warm, tasks) + ndcg5, ndcg10, p50_ms = _evaluate(warm, tasks) result = RepoResult( repo=repo, language=spec.language, chunks=len(cold.chunks), - ndcg5=0.0, + ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms, cold_ms=cold_ms, diff --git a/benchmarks/common.py b/benchmarks/common.py index c755633..379e632 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -5,8 +5,6 @@ from pathlib import Path from typing import Protocol, cast -from semble import Chunk - BENCH_ROOT = Path("/tmp/bench") BENCHMARKS_DIR = Path(__file__).parent ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations" @@ -30,10 +28,6 @@ class _ChunkLike(Protocol): end_line: int -class _ResultLike(Protocol): - chunk: Chunk - - @dataclass(frozen=True) class RepoSpec: name: str @@ -122,10 +116,14 @@ def load_tasks( tasks: list[Task] = [] annotation_files = sorted(path.glob("*.json")) if path.is_dir() else [path] for annotation_file in annotation_files: + if annotation_file.stem not in specs: + continue raw = json.loads(annotation_file.read_text(encoding="utf-8")) default_repo = annotation_file.stem for item in raw: repo = item.get("repo", default_repo) + if repo not in specs: + continue spec = specs[repo] category = item.get("category") tasks.append( @@ -171,14 +169,6 @@ def target_matches_location(file_path: str, start_line: int, end_line: int, targ return path_matches(file_path, target.path) and span_overlaps(start_line, end_line, target) -def target_rank(results: list[_ResultLike], target: Target) -> int | None: - for index, result in enumerate(results, 1): - chunk = result.chunk - if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target): - return index - return None - - def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int: return sum( 1 From 27175f4f109bac1bd1b1efdc8ce7041bd74e0b82 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:08:49 +0200 Subject: [PATCH 03/15] Address code review: fix language table, ndcg5 in cache mode, raw shadowing, asserts, redundant slice --- benchmarks/README.md | 3 +++ benchmarks/bench_hybrid.py | 19 ++++++++----------- benchmarks/common.py | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 8d6d906..99c6c23 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -19,3 +19,6 @@ uv run python -m benchmarks.bench_hybrid --cache uv run python -m benchmarks.bench_hybrid --repo fastapi --repo axios uv run python -m benchmarks.bench_hybrid --language python ``` + +`--cache` measures cold vs warm index time. Warm time still includes the file walk and +BM25/Vicinity rebuild; only embedding is skipped. diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py index e22817e..1acf182 100644 --- a/benchmarks/bench_hybrid.py +++ b/benchmarks/bench_hybrid.py @@ -42,9 +42,9 @@ class RepoResult: repo: str language: str chunks: int - ndcg5: float ndcg10: float p50_ms: float + ndcg5: float | None = None cold_ms: float | None = None warm_ms: float | None = None @@ -94,10 +94,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - query_latencies.append((time.perf_counter() - started) * 1000) latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2]) - chunk_results = results[:_DIRECT_TOP_K] - relevant_ranks = [ - rank for target in task.all_relevant if (rank := _target_rank(chunk_results, target)) is not None - ] + relevant_ranks = [rank for target in task.all_relevant if (rank := _target_rank(results, target)) is not None] n_relevant = count_indexed_targets(index.chunks, task.all_relevant) q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5) q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10) @@ -109,7 +106,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - targets_str = ", ".join( t.path if not t.start_line else f"{t.path}:{t.start_line}-{t.end_line}" for t in task.all_relevant ) - top_files = [r.chunk.file_path for r in chunk_results[:5]] + top_files = [r.chunk.file_path for r in results[:5]] print( f" [{cat:<12}] ndcg@10={q_ndcg10:.3f} ranks={relevant_ranks} n_rel={n_relevant} q={task.query!r}", file=sys.stderr, @@ -128,9 +125,11 @@ def _print_group_summary(results: list[RepoResult], group_by: str) -> None: groups = sorted({getattr(result, group_by) for result in results}) for value in groups: grouped = [result for result in results if getattr(result, group_by) == value] + ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None] + ndcg5_str = f" ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else "" print( " " - + f"{value}: repos={len(grouped)} ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}" + + f"{value}: repos={len(grouped)}{ndcg5_str}" + f" ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}" + f" p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms", file=sys.stderr, @@ -138,8 +137,7 @@ def _print_group_summary(results: list[RepoResult], group_by: str) -> None: def _print_language_table(results: list[RepoResult]) -> None: - languages = ["python", "javascript", "java", "go", "php", "ruby"] - present = [language for language in languages if any(result.language == language for result in results)] + present = sorted({result.language for result in results}) columns = ["Avg", *[language.title() for language in present]] avg_ndcg10 = sum(result.ndcg10 for result in results) / len(results) @@ -211,12 +209,11 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[ started = time.perf_counter() warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME) warm_ms = (time.perf_counter() - started) * 1000 - ndcg5, ndcg10, p50_ms = _evaluate(warm, tasks) + _, ndcg10, p50_ms = _evaluate(warm, tasks) result = RepoResult( repo=repo, language=spec.language, chunks=len(cold.chunks), - ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms, cold_ms=cold_ms, diff --git a/benchmarks/common.py b/benchmarks/common.py index 379e632..62c0b7c 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -131,8 +131,8 @@ def load_tasks( repo=repo, language=spec.language, query=item["query"], - relevant=tuple(_parse_target(raw) for raw in item.get("relevant", [])), - secondary=tuple(_parse_target(raw) for raw in item.get("secondary", [])), + relevant=tuple(_parse_target(t) for t in item.get("relevant", [])), + secondary=tuple(_parse_target(t) for t in item.get("secondary", [])), category=category if isinstance(category, str) else infer_category(item["query"]), category_inferred=category is None, ) @@ -160,9 +160,9 @@ def path_matches(file_path: str, relative_path: str) -> bool: def span_overlaps(start_line: int, end_line: int, target: Target) -> bool: if not target.has_span: return True - assert target.start_line is not None - assert target.end_line is not None - return not (end_line < target.start_line or start_line > target.end_line) + target_start: int = target.start_line # type: ignore[assignment] + target_end: int = target.end_line # type: ignore[assignment] + return not (end_line < target_start or start_line > target_end) def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool: From c0a0541b37e8b708783b84120fbd58bfcffabd47 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:17:56 +0200 Subject: [PATCH 04/15] Architecture review: remove dead code, fold helpers, pass specs, clean up common.py --- benchmarks/bench_hybrid.py | 23 ++++++++---------- benchmarks/common.py | 50 ++++++++++++-------------------------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py index 1acf182..c197da5 100644 --- a/benchmarks/bench_hybrid.py +++ b/benchmarks/bench_hybrid.py @@ -11,6 +11,7 @@ from model2vec import StaticModel from benchmarks.common import ( + RepoSpec, Target, Task, apply_task_filters, @@ -58,14 +59,6 @@ def _parse_args() -> argparse.Namespace: return parser.parse_args() -def _is_relevant(result: SearchResult, task: Task) -> bool: - chunk = result.chunk - return any( - target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) - for target in task.all_relevant - ) - - def _dcg(relevances: list[int]) -> float: return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances)) @@ -161,14 +154,15 @@ def _print_language_table(results: list[RepoResult]) -> None: print(f" {'q-p50':<28} " + " ".join(p50_row), file=sys.stderr) -def _bench_quality(repo_tasks: dict[str, list[Task]], model: StaticModel, *, verbose: bool = False) -> list[RepoResult]: +def _bench_quality( + repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec], *, verbose: bool = False +) -> list[RepoResult]: print( f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}", file=sys.stderr, ) print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 8} {'-' * 8} {'-' * 8}", file=sys.stderr) results: list[RepoResult] = [] - specs = available_repo_specs() for repo, tasks in sorted(repo_tasks.items()): spec = specs[repo] started = time.perf_counter() @@ -186,7 +180,7 @@ def _bench_quality(repo_tasks: dict[str, list[Task]], model: StaticModel, *, ver return results -def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[RepoResult]: +def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec]) -> list[RepoResult]: _CACHE_DIR.mkdir(parents=True, exist_ok=True) print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr) print(file=sys.stderr) @@ -196,7 +190,6 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel) -> list[ ) print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 9} {'-' * 8} {'-' * 8}", file=sys.stderr) results: list[RepoResult] = [] - specs = available_repo_specs() model_ns = _MODEL_NAME.replace("/", "--") for repo, tasks in sorted(repo_tasks.items()): spec = specs[repo] @@ -244,7 +237,11 @@ def main() -> None: print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr) print(file=sys.stderr) repo_tasks = grouped_tasks(tasks) - results = _bench_cache(repo_tasks, model) if args.cache else _bench_quality(repo_tasks, model, verbose=args.verbose) + results = ( + _bench_cache(repo_tasks, model, repo_specs) + if args.cache + else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose) + ) _print_group_summary(results, "language") _print_language_table(results) diff --git a/benchmarks/common.py b/benchmarks/common.py index 62c0b7c..3b64bf9 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -1,9 +1,10 @@ from __future__ import annotations import json +from collections import defaultdict from dataclasses import dataclass from pathlib import Path -from typing import Protocol, cast +from typing import Protocol BENCH_ROOT = Path("/tmp/bench") BENCHMARKS_DIR = Path(__file__).parent @@ -75,16 +76,11 @@ def _coerce_int(value: object) -> int: return int(value) -def _coerce_mapping(raw: object) -> dict[str, object]: - if not isinstance(raw, dict): - raise TypeError(f"expected mapping, got {type(raw).__name__}") - return cast(dict[str, object], raw) - - def _parse_target(raw: str | dict[str, object]) -> Target: if isinstance(raw, str): return Target(path=raw) - raw = _coerce_mapping(raw) + if not isinstance(raw, dict): + raise TypeError(f"expected mapping, got {type(raw).__name__}") start_line = raw.get("start_line") end_line = raw.get("end_line") return Target( @@ -108,14 +104,10 @@ def available_repo_specs(repo_specs: dict[str, RepoSpec] | None = None) -> dict[ } -def load_tasks( - path: Path = ANNOTATIONS_DIR, - repo_specs: dict[str, RepoSpec] | None = None, -) -> list[Task]: +def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]: specs = load_repo_specs() if repo_specs is None else repo_specs tasks: list[Task] = [] - annotation_files = sorted(path.glob("*.json")) if path.is_dir() else [path] - for annotation_file in annotation_files: + for annotation_file in sorted(ANNOTATIONS_DIR.glob("*.json")): if annotation_file.stem not in specs: continue raw = json.loads(annotation_file.read_text(encoding="utf-8")) @@ -144,29 +136,19 @@ def apply_task_filters( tasks: list[Task], repos: list[str] | None = None, languages: list[str] | None = None, - limit: int | None = None, ) -> list[Task]: filtered = [task for task in tasks if not repos or task.repo in repos] - filtered = [task for task in filtered if not languages or task.language in languages] - return filtered if limit is None else filtered[:limit] + return [task for task in filtered if not languages or task.language in languages] -def path_matches(file_path: str, relative_path: str) -> bool: - normalized_file = file_path.replace("\\", "/") - normalized_relative = relative_path.replace("\\", "/") - return normalized_file == normalized_relative or normalized_file.endswith(f"/{normalized_relative}") - - -def span_overlaps(start_line: int, end_line: int, target: Target) -> bool: +def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool: + norm_file = file_path.replace("\\", "/") + norm_target = target.path.replace("\\", "/") + if not (norm_file == norm_target or norm_file.endswith(f"/{norm_target}")): + return False if not target.has_span: return True - target_start: int = target.start_line # type: ignore[assignment] - target_end: int = target.end_line # type: ignore[assignment] - return not (end_line < target_start or start_line > target_end) - - -def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool: - return path_matches(file_path, target.path) and span_overlaps(start_line, end_line, target) + return not (end_line < target.start_line or start_line > target.end_line) # type: ignore[operator] def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int: @@ -178,7 +160,7 @@ def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) def grouped_tasks(tasks: list[Task]) -> dict[str, list[Task]]: - grouped: dict[str, list[Task]] = {} + result: dict[str, list[Task]] = defaultdict(list) for task in tasks: - grouped.setdefault(task.repo, []).append(task) - return grouped + result[task.repo].append(task) + return dict(result) From 570a5baf6d2740ce066df49802d03485db1fdfe3 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:24:51 +0200 Subject: [PATCH 05/15] Simplification pass: inline helpers, remove dead fields, merge summary functions --- benchmarks/bench_hybrid.py | 58 ++++++++++++++++---------------------- benchmarks/common.py | 15 ++-------- benchmarks/sync_repos.py | 12 +++----- 3 files changed, 31 insertions(+), 54 deletions(-) diff --git a/benchmarks/bench_hybrid.py b/benchmarks/bench_hybrid.py index c197da5..f7e0933 100644 --- a/benchmarks/bench_hybrid.py +++ b/benchmarks/bench_hybrid.py @@ -17,7 +17,6 @@ apply_task_filters, available_repo_specs, count_indexed_targets, - grouped_tasks, load_tasks, target_matches_location, ) @@ -50,15 +49,6 @@ class RepoResult: warm_ms: float | None = None -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") - parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.") - parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.") - parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.") - parser.add_argument("--verbose", action="store_true", help="Print per-query results.") - return parser.parse_args() - - def _dcg(relevances: list[int]) -> float: return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances)) @@ -112,30 +102,26 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - return ndcg5_sum / total, ndcg10_sum / total, latencies[len(latencies) // 2] -def _print_group_summary(results: list[RepoResult], group_by: str) -> None: +def _print_summary(results: list[RepoResult]) -> None: + languages = sorted({result.language for result in results}) + columns = ["Avg", *[lang.title() for lang in languages]] + + avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results) + avg_p50 = sum(r.p50_ms for r in results) / len(results) + print(file=sys.stderr) - print(f"By {group_by}", file=sys.stderr) - groups = sorted({getattr(result, group_by) for result in results}) - for value in groups: - grouped = [result for result in results if getattr(result, group_by) == value] + print("By language", file=sys.stderr) + for language in languages: + grouped = [r for r in results if r.language == language] ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None] ndcg5_str = f" ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else "" print( - " " - + f"{value}: repos={len(grouped)}{ndcg5_str}" + f" {language}: repos={len(grouped)}{ndcg5_str}" + f" ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}" + f" p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms", file=sys.stderr, ) - -def _print_language_table(results: list[RepoResult]) -> None: - present = sorted({result.language for result in results}) - columns = ["Avg", *[language.title() for language in present]] - - avg_ndcg10 = sum(result.ndcg10 for result in results) / len(results) - avg_p50 = sum(result.p50_ms for result in results) / len(results) - print(file=sys.stderr) print(f"{'=' * 104}", file=sys.stderr) print("Hybrid benchmark by language", file=sys.stderr) @@ -145,10 +131,10 @@ def _print_language_table(results: list[RepoResult]) -> None: ndcg_row = [f"{avg_ndcg10:>9.3f}"] p50_row = [f"{avg_p50:>8.2f}ms"] - for language in present: - language_results = [result for result in results if result.language == language] - ndcg_row.append(f"{sum(result.ndcg10 for result in language_results) / len(language_results):>9.3f}") - p50_row.append(f"{sum(result.p50_ms for result in language_results) / len(language_results):>8.2f}ms") + for language in languages: + language_results = [r for r in results if r.language == language] + ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}") + p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms") print(f" {'NDCG@10':<28} " + " ".join(ndcg_row), file=sys.stderr) print(f" {'q-p50':<28} " + " ".join(p50_row), file=sys.stderr) @@ -224,7 +210,12 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: d def main() -> None: - args = _parse_args() + parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") + parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.") + parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.") + parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.") + parser.add_argument("--verbose", action="store_true", help="Print per-query results.") + args = parser.parse_args() repo_specs = available_repo_specs() tasks = apply_task_filters( load_tasks(repo_specs=repo_specs), repos=args.repo or None, languages=args.language or None @@ -236,14 +227,15 @@ def main() -> None: model = StaticModel.from_pretrained(_MODEL_NAME) print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr) print(file=sys.stderr) - repo_tasks = grouped_tasks(tasks) + repo_tasks: dict[str, list[Task]] = {} + for task in tasks: + repo_tasks.setdefault(task.repo, []).append(task) results = ( _bench_cache(repo_tasks, model, repo_specs) if args.cache else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose) ) - _print_group_summary(results, "language") - _print_language_table(results) + _print_summary(results) if __name__ == "__main__": diff --git a/benchmarks/common.py b/benchmarks/common.py index 3b64bf9..fd6fa83 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -from collections import defaultdict from dataclasses import dataclass from pathlib import Path from typing import Protocol @@ -54,7 +53,6 @@ class Task: relevant: tuple[Target, ...] secondary: tuple[Target, ...] category: str - category_inferred: bool @property def all_relevant(self) -> tuple[Target, ...]: @@ -95,11 +93,10 @@ def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]: return {item["name"]: RepoSpec(**item) for item in raw} -def available_repo_specs(repo_specs: dict[str, RepoSpec] | None = None) -> dict[str, RepoSpec]: - specs = load_repo_specs() if repo_specs is None else repo_specs +def available_repo_specs() -> dict[str, RepoSpec]: return { name: spec - for name, spec in specs.items() + for name, spec in load_repo_specs().items() if spec.checkout_dir.exists() and (ANNOTATIONS_DIR / f"{name}.json").exists() } @@ -126,7 +123,6 @@ def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]: relevant=tuple(_parse_target(t) for t in item.get("relevant", [])), secondary=tuple(_parse_target(t) for t in item.get("secondary", [])), category=category if isinstance(category, str) else infer_category(item["query"]), - category_inferred=category is None, ) ) return tasks @@ -157,10 +153,3 @@ def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) for target in targets if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks) ) - - -def grouped_tasks(tasks: list[Task]) -> dict[str, list[Task]]: - result: dict[str, list[Task]] = defaultdict(list) - for task in tasks: - result[task.repo].append(task) - return dict(result) diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py index 5ba3b24..8b008f4 100644 --- a/benchmarks/sync_repos.py +++ b/benchmarks/sync_repos.py @@ -7,13 +7,6 @@ from benchmarks.common import BENCH_ROOT, load_repo_specs -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.") - parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.") - parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.") - return parser.parse_args() - - def _run(*args: str) -> None: subprocess.run(args, check=True) @@ -43,7 +36,10 @@ def _check_repo(name: str, revision: str) -> str | None: def main() -> None: - args = _parse_args() + parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.") + parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.") + parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.") + args = parser.parse_args() specs = load_repo_specs() selected = {name: spec for name, spec in specs.items() if not args.repo or name in args.repo} BENCH_ROOT.mkdir(parents=True, exist_ok=True) From f4a87571e5e342f11a0332a13c87d3ba836755ab Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:27:06 +0200 Subject: [PATCH 06/15] Rename bench_hybrid.py to run_benchmark.py --- benchmarks/README.md | 8 ++++---- benchmarks/{bench_hybrid.py => run_benchmark.py} | 0 2 files changed, 4 insertions(+), 4 deletions(-) rename benchmarks/{bench_hybrid.py => run_benchmark.py} (100%) diff --git a/benchmarks/README.md b/benchmarks/README.md index 99c6c23..73194fc 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -14,10 +14,10 @@ uv run python -m benchmarks.sync_repos --check ## Run ```bash -uv run python -m benchmarks.bench_hybrid -uv run python -m benchmarks.bench_hybrid --cache -uv run python -m benchmarks.bench_hybrid --repo fastapi --repo axios -uv run python -m benchmarks.bench_hybrid --language python +uv run python -m benchmarks.run_benchmark +uv run python -m benchmarks.run_benchmark --cache +uv run python -m benchmarks.run_benchmark --repo fastapi --repo axios +uv run python -m benchmarks.run_benchmark --language python ``` `--cache` measures cold vs warm index time. Warm time still includes the file walk and diff --git a/benchmarks/bench_hybrid.py b/benchmarks/run_benchmark.py similarity index 100% rename from benchmarks/bench_hybrid.py rename to benchmarks/run_benchmark.py From 75f6823eaf17dfeb2689d241e987005fdb01def0 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:39:34 +0200 Subject: [PATCH 07/15] Add docstrings to benchmarks and enforce with ruff Add one-liner docstrings to all functions and methods across benchmarks/common.py, run_benchmark.py, and sync_repos.py. Remove the D ruff ignore for benchmarks/*.py so docstrings are enforced going forward. Also moves count_indexed_targets into run_benchmark.py (where Chunk is imported) to fix a pre-existing mypy Protocol error in the pre-commit env. --- benchmarks/common.py | 27 ++++++++++++--------------- benchmarks/run_benchmark.py | 20 ++++++++++++++++++-- benchmarks/sync_repos.py | 5 +++++ pyproject.toml | 2 +- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/benchmarks/common.py b/benchmarks/common.py index fd6fa83..5a03286 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -3,7 +3,6 @@ import json from dataclasses import dataclass from pathlib import Path -from typing import Protocol BENCH_ROOT = Path("/tmp/bench") BENCHMARKS_DIR = Path(__file__).parent @@ -19,15 +18,10 @@ class Target: @property def has_span(self) -> bool: + """Return True if both start_line and end_line are set.""" return self.start_line is not None and self.end_line is not None -class _ChunkLike(Protocol): - file_path: str - start_line: int - end_line: int - - @dataclass(frozen=True) class RepoSpec: name: str @@ -38,10 +32,12 @@ class RepoSpec: @property def checkout_dir(self) -> Path: + """Return the local checkout directory for this repo.""" return BENCH_ROOT / self.name @property def benchmark_dir(self) -> Path: + """Return the root directory to index for benchmarking.""" return self.checkout_dir if self.benchmark_root is None else self.checkout_dir / self.benchmark_root @@ -56,10 +52,12 @@ class Task: @property def all_relevant(self) -> tuple[Target, ...]: + """Return primary and secondary relevant targets combined.""" return self.relevant + self.secondary def infer_category(query: str) -> str: + """Infer a task category from the query text.""" if " " not in query.strip(): return "symbol" lowered = query.lower() @@ -69,12 +67,14 @@ def infer_category(query: str) -> str: def _coerce_int(value: object) -> int: + """Coerce a string or int value to int, raising TypeError otherwise.""" if not isinstance(value, int | str): raise TypeError(f"expected int-compatible value, got {type(value).__name__}") return int(value) def _parse_target(raw: str | dict[str, object]) -> Target: + """Parse a target from a string path or a mapping with optional line span.""" if isinstance(raw, str): return Target(path=raw) if not isinstance(raw, dict): @@ -89,11 +89,13 @@ def _parse_target(raw: str | dict[str, object]) -> Target: def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]: + """Load all repo specs from the JSON file at the given path.""" raw = json.loads(path.read_text(encoding="utf-8")) return {item["name"]: RepoSpec(**item) for item in raw} def available_repo_specs() -> dict[str, RepoSpec]: + """Return only the repo specs that have a local checkout and annotation file.""" return { name: spec for name, spec in load_repo_specs().items() @@ -102,6 +104,7 @@ def available_repo_specs() -> dict[str, RepoSpec]: def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]: + """Load all benchmark tasks from annotation files, filtered to available repo specs.""" specs = load_repo_specs() if repo_specs is None else repo_specs tasks: list[Task] = [] for annotation_file in sorted(ANNOTATIONS_DIR.glob("*.json")): @@ -133,11 +136,13 @@ def apply_task_filters( repos: list[str] | None = None, languages: list[str] | None = None, ) -> list[Task]: + """Filter tasks to the given repos and/or languages; None means no filter.""" filtered = [task for task in tasks if not repos or task.repo in repos] return [task for task in filtered if not languages or task.language in languages] def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool: + """Return True if the chunk at file_path:start_line-end_line covers the target.""" norm_file = file_path.replace("\\", "/") norm_target = target.path.replace("\\", "/") if not (norm_file == norm_target or norm_file.endswith(f"/{norm_target}")): @@ -145,11 +150,3 @@ def target_matches_location(file_path: str, start_line: int, end_line: int, targ if not target.has_span: return True return not (end_line < target.start_line or start_line > target.end_line) # type: ignore[operator] - - -def count_indexed_targets(chunks: list[_ChunkLike], targets: tuple[Target, ...]) -> int: - return sum( - 1 - for target in targets - if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks) - ) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index f7e0933..0f4cd4e 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -16,12 +16,11 @@ Task, apply_task_filters, available_repo_specs, - count_indexed_targets, load_tasks, target_matches_location, ) from semble import SembleIndex -from semble.types import SearchResult +from semble.types import Chunk, SearchResult _CACHE_DIR = Path("/tmp/semble-bench-cache") _MODEL_NAME = "Pringled/potion-code-16M" @@ -29,7 +28,17 @@ _DIRECT_TOP_K = 10 +def count_indexed_targets(chunks: list[Chunk], targets: tuple[Target, ...]) -> int: + """Count how many targets are covered by at least one chunk in the index.""" + return sum( + 1 + for target in targets + if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks) + ) + + def _target_rank(results: list[SearchResult], target: Target) -> int | None: + """Return the 1-based rank of the first result covering target, or None.""" for index, result in enumerate(results, 1): chunk = result.chunk if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target): @@ -50,10 +59,12 @@ class RepoResult: def _dcg(relevances: list[int]) -> float: + """Compute Discounted Cumulative Gain for a ranked relevance list.""" return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances)) def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float: + """Compute NDCG@k given the ranks of relevant results and the total relevant count.""" if n_relevant == 0: return 0.0 relevances = [0] * k @@ -65,6 +76,7 @@ def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float: def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -> tuple[float, float, float]: + """Return mean NDCG@5, NDCG@10, and median query latency (ms) across all tasks.""" ndcg5_sum = 0.0 ndcg10_sum = 0.0 latencies: list[float] = [] @@ -103,6 +115,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - def _print_summary(results: list[RepoResult]) -> None: + """Print per-language and overall benchmark summary to stderr.""" languages = sorted({result.language for result in results}) columns = ["Avg", *[lang.title() for lang in languages]] @@ -143,6 +156,7 @@ def _print_summary(results: list[RepoResult]) -> None: def _bench_quality( repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec], *, verbose: bool = False ) -> list[RepoResult]: + """Run quality benchmarks (NDCG@5, NDCG@10, latency) for each repo.""" print( f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}", file=sys.stderr, @@ -167,6 +181,7 @@ def _bench_quality( def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec]) -> list[RepoResult]: + """Run cold vs warm index timing benchmarks using the disk embedding cache.""" _CACHE_DIR.mkdir(parents=True, exist_ok=True) print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr) print(file=sys.stderr) @@ -210,6 +225,7 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: d def main() -> None: + """Parse arguments and run the selected benchmark mode.""" parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.") parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.") diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py index 8b008f4..8a1cb0d 100644 --- a/benchmarks/sync_repos.py +++ b/benchmarks/sync_repos.py @@ -8,14 +8,17 @@ def _run(*args: str) -> None: + """Run a subprocess command, raising on non-zero exit.""" subprocess.run(args, check=True) def _output(*args: str) -> str: + """Run a subprocess command and return its stripped stdout.""" return subprocess.check_output(args, text=True).strip() def _sync_repo(name: str, url: str, revision: str) -> None: + """Clone the repo if absent, then fetch and checkout the pinned revision.""" repo_dir = BENCH_ROOT / name if not repo_dir.exists(): print(f"cloning {name} -> {repo_dir}") @@ -26,6 +29,7 @@ def _sync_repo(name: str, url: str, revision: str) -> None: def _check_repo(name: str, revision: str) -> str | None: + """Return an error string if the local checkout is missing or at the wrong revision.""" repo_dir = BENCH_ROOT / name if not (repo_dir / ".git").exists(): return f"{name}: missing checkout at {repo_dir}" @@ -36,6 +40,7 @@ def _check_repo(name: str, revision: str) -> str | None: def main() -> None: + """Parse arguments and sync or verify the pinned benchmark repositories.""" parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.") parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.") parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.") diff --git a/pyproject.toml b/pyproject.toml index 8e5f165..111905d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ target-version = "py310" [tool.ruff.lint.per-file-ignores] "tests/**" = ["ANN"] "src/semble/cli.py" = ["T20"] -"benchmarks/*.py" = ["T20", "D"] +"benchmarks/*.py" = ["T20"] [tool.ruff.lint] select = [ From 269c11b26243ffe31f07fa876bf76fb9851b6501 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:53:33 +0200 Subject: [PATCH 08/15] Inline count_indexed_targets, pre-group by_language in _print_summary --- benchmarks/run_benchmark.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 0f4cd4e..56ca981 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -20,7 +20,7 @@ target_matches_location, ) from semble import SembleIndex -from semble.types import Chunk, SearchResult +from semble.types import SearchResult _CACHE_DIR = Path("/tmp/semble-bench-cache") _MODEL_NAME = "Pringled/potion-code-16M" @@ -28,15 +28,6 @@ _DIRECT_TOP_K = 10 -def count_indexed_targets(chunks: list[Chunk], targets: tuple[Target, ...]) -> int: - """Count how many targets are covered by at least one chunk in the index.""" - return sum( - 1 - for target in targets - if any(target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target) for chunk in chunks) - ) - - def _target_rank(results: list[SearchResult], target: Target) -> int | None: """Return the 1-based rank of the first result covering target, or None.""" for index, result in enumerate(results, 1): @@ -90,7 +81,11 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2]) relevant_ranks = [rank for target in task.all_relevant if (rank := _target_rank(results, target)) is not None] - n_relevant = count_indexed_targets(index.chunks, task.all_relevant) + n_relevant = sum( + 1 + for target in task.all_relevant + if any(target_matches_location(c.file_path, c.start_line, c.end_line, target) for c in index.chunks) + ) q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5) q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10) ndcg5_sum += q_ndcg5 @@ -117,6 +112,7 @@ def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) - def _print_summary(results: list[RepoResult]) -> None: """Print per-language and overall benchmark summary to stderr.""" languages = sorted({result.language for result in results}) + by_language = {lang: [r for r in results if r.language == lang] for lang in languages} columns = ["Avg", *[lang.title() for lang in languages]] avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results) @@ -124,8 +120,7 @@ def _print_summary(results: list[RepoResult]) -> None: print(file=sys.stderr) print("By language", file=sys.stderr) - for language in languages: - grouped = [r for r in results if r.language == language] + for language, grouped in by_language.items(): ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None] ndcg5_str = f" ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else "" print( @@ -144,8 +139,7 @@ def _print_summary(results: list[RepoResult]) -> None: ndcg_row = [f"{avg_ndcg10:>9.3f}"] p50_row = [f"{avg_p50:>8.2f}ms"] - for language in languages: - language_results = [r for r in results if r.language == language] + for language, language_results in by_language.items(): ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}") p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms") From 2bd010b154d43e317e6de81e0ad00905eb3d5e9b Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 08:56:29 +0200 Subject: [PATCH 09/15] Drop from __future__ import annotations where not needed --- benchmarks/common.py | 2 -- benchmarks/run_benchmark.py | 2 -- benchmarks/sync_repos.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/benchmarks/common.py b/benchmarks/common.py index 5a03286..0183254 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json from dataclasses import dataclass from pathlib import Path diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 56ca981..5147f02 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import argparse import math import shutil diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py index 8a1cb0d..0deb767 100644 --- a/benchmarks/sync_repos.py +++ b/benchmarks/sync_repos.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import argparse import subprocess import sys From 1c875897212799469327a8500a9dd534a8f19694 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 09:06:43 +0200 Subject: [PATCH 10/15] Save benchmark results to benchmarks/results/.json Full runs (no --repo/--language filters) automatically write results to benchmarks/results/.json, keyed by the 12-char git SHA. The file includes the full SHA, model name, per-repo rows, language aggregates, and overall summary. Cache mode writes -cache.json. Filtered runs are not saved. --- benchmarks/results/.gitkeep | 0 benchmarks/results/2bd010b154d4.json | 363 +++++++++++++++++++++++++++ benchmarks/run_benchmark.py | 43 +++- 3 files changed, 405 insertions(+), 1 deletion(-) create mode 100644 benchmarks/results/.gitkeep create mode 100644 benchmarks/results/2bd010b154d4.json diff --git a/benchmarks/results/.gitkeep b/benchmarks/results/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/results/2bd010b154d4.json b/benchmarks/results/2bd010b154d4.json new file mode 100644 index 0000000..b60d885 --- /dev/null +++ b/benchmarks/results/2bd010b154d4.json @@ -0,0 +1,363 @@ +{ + "sha": "2bd010b154d43e317e6de81e0ad00905eb3d5e9b", + "model": "Pringled/potion-code-16M", + "cache_mode": false, + "summary": { + "ndcg10": 0.8668, + "p50_ms": 0.697 + }, + "by_language": { + "csharp": { + "repos": 1, + "ndcg10": 0.8263, + "p50_ms": 1.883 + }, + "go": { + "repos": 3, + "ndcg10": 0.9515, + "p50_ms": 0.485 + }, + "java": { + "repos": 3, + "ndcg10": 0.842, + "p50_ms": 1.239 + }, + "javascript": { + "repos": 3, + "ndcg10": 0.9282, + "p50_ms": 0.386 + }, + "kotlin": { + "repos": 1, + "ndcg10": 0.7631, + "p50_ms": 0.832 + }, + "php": { + "repos": 3, + "ndcg10": 0.906, + "p50_ms": 0.867 + }, + "python": { + "repos": 8, + "ndcg10": 0.8233, + "p50_ms": 0.479 + }, + "ruby": { + "repos": 3, + "ndcg10": 0.8911, + "p50_ms": 0.578 + }, + "rust": { + "repos": 1, + "ndcg10": 0.8878, + "p50_ms": 0.868 + }, + "scala": { + "repos": 1, + "ndcg10": 0.8415, + "p50_ms": 0.829 + }, + "swift": { + "repos": 1, + "ndcg10": 0.9316, + "p50_ms": 0.496 + }, + "typescript": { + "repos": 1, + "ndcg10": 0.7431, + "p50_ms": 0.808 + } + }, + "repos": [ + { + "repo": "aiohttp", + "language": "python", + "chunks": 756, + "ndcg10": 0.7821229638714016, + "p50_ms": 0.5519580008694902, + "ndcg5": 0.7132626857513019, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "alamofire", + "language": "swift", + "chunks": 649, + "ndcg10": 0.9315768229529695, + "p50_ms": 0.4956250049872324, + "ndcg5": 0.900172569211564, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "axios", + "language": "javascript", + "chunks": 166, + "ndcg10": 0.9671522420975631, + "p50_ms": 0.5629590013995767, + "ndcg5": 0.9671522420975631, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "cats", + "language": "scala", + "chunks": 1254, + "ndcg10": 0.8414671964692401, + "p50_ms": 0.8292909988085739, + "ndcg5": 0.8157722039023972, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "chi", + "language": "go", + "chunks": 262, + "ndcg10": 0.9455120441745608, + "p50_ms": 0.6020420041750185, + "ndcg5": 0.9455120441745608, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "cobra", + "language": "go", + "chunks": 394, + "ndcg10": 0.970068981106951, + "p50_ms": 0.3933749976567924, + "ndcg5": 0.970068981106951, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "commons-lang", + "language": "java", + "chunks": 3152, + "ndcg10": 0.8052591049306037, + "p50_ms": 0.8624999973108061, + "ndcg5": 0.7688578654609097, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "express", + "language": "javascript", + "chunks": 52, + "ndcg10": 0.9593872208972474, + "p50_ms": 0.2536250030971132, + "ndcg5": 0.9593872208972474, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "fastapi", + "language": "python", + "chunks": 597, + "ndcg10": 0.7693095302894921, + "p50_ms": 0.4379579986562021, + "ndcg5": 0.7314364449312006, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "flask", + "language": "python", + "chunks": 291, + "ndcg10": 0.8767012186349079, + "p50_ms": 0.42783399840118363, + "ndcg5": 0.8570900833760776, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "gin", + "language": "go", + "chunks": 576, + "ndcg10": 0.939064318485603, + "p50_ms": 0.45987500197952613, + "ndcg5": 0.8807555442147937, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "gson", + "language": "java", + "chunks": 1460, + "ndcg10": 0.9261859507142916, + "p50_ms": 1.0446250016684644, + "ndcg5": 0.9261859507142916, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "guzzle", + "language": "php", + "chunks": 206, + "ndcg10": 0.844506786325837, + "p50_ms": 0.5477500017150305, + "ndcg5": 0.8326908338735671, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "httpx", + "language": "python", + "chunks": 248, + "ndcg10": 0.871159099521697, + "p50_ms": 0.44858400360681117, + "ndcg5": 0.8519694264932337, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "jackson-databind", + "language": "java", + "chunks": 4570, + "ndcg10": 0.7944291752941182, + "p50_ms": 1.810874993680045, + "ndcg5": 0.7667968319202225, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "ktor", + "language": "kotlin", + "chunks": 425, + "ndcg10": 0.7630927329648237, + "p50_ms": 0.8315000013681129, + "ndcg5": 0.726275662513606, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "laravel-framework", + "language": "php", + "chunks": 6197, + "ndcg10": 0.967888315659275, + "p50_ms": 1.207750006869901, + "ndcg5": 0.967888315659275, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "messagepack-csharp", + "language": "csharp", + "chunks": 1125, + "ndcg10": 0.8262866007393468, + "p50_ms": 1.882749995274935, + "ndcg5": 0.8164536328001585, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "model2vec", + "language": "python", + "chunks": 107, + "ndcg10": 0.695271294655741, + "p50_ms": 0.4195000001345761, + "ndcg5": 0.6593701861221591, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "monolog", + "language": "php", + "chunks": 417, + "ndcg10": 0.9055096182921145, + "p50_ms": 0.8463749982183799, + "ndcg5": 0.9055096182921145, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "pydantic", + "language": "python", + "chunks": 1518, + "ndcg10": 0.7070408064407742, + "p50_ms": 0.6517500005429611, + "ndcg5": 0.6795591269045096, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "rack", + "language": "ruby", + "chunks": 249, + "ndcg10": 1.0, + "p50_ms": 0.4828749952139333, + "ndcg5": 1.0, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "rails", + "language": "ruby", + "chunks": 465, + "ndcg10": 0.8346443747935481, + "p50_ms": 0.9173330035991967, + "ndcg5": 0.7466134836472739, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "redux", + "language": "javascript", + "chunks": 53, + "ndcg10": 0.8580772959099011, + "p50_ms": 0.3419580025365576, + "ndcg5": 0.8226294385530917, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "requests", + "language": "python", + "chunks": 169, + "ndcg10": 0.9550842629661954, + "p50_ms": 0.40808300400385633, + "ndcg5": 0.9550842629661954, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "sinatra", + "language": "ruby", + "chunks": 68, + "ndcg10": 0.8387325493217617, + "p50_ms": 0.3345000004628673, + "ndcg5": 0.8387325493217617, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "starlette", + "language": "python", + "chunks": 213, + "ndcg10": 0.9294136613951622, + "p50_ms": 0.4845000003115274, + "ndcg5": 0.9058681185722455, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "tokio", + "language": "rust", + "chunks": 2730, + "ndcg10": 0.8878478903956787, + "p50_ms": 0.8677080040797591, + "ndcg5": 0.8750003941122573, + "cold_ms": null, + "warm_ms": null + }, + { + "repo": "trpc", + "language": "typescript", + "chunks": 362, + "ndcg10": 0.7431267778412411, + "p50_ms": 0.8075000005192123, + "ndcg5": 0.6949834508995433, + "cold_ms": null, + "warm_ms": null + } + ] +} diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 5147f02..5269479 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,9 +1,11 @@ import argparse +import json import math import shutil +import subprocess import sys import time -from dataclasses import dataclass +from dataclasses import asdict, dataclass from pathlib import Path from model2vec import StaticModel @@ -216,6 +218,43 @@ def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: d return results +def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None: + """Write results to benchmarks/results/[-cache].json.""" + try: + sha = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() + except subprocess.CalledProcessError: + sha = "unknown" + + languages = sorted({r.language for r in results}) + by_language = {lang: [r for r in results if r.language == lang] for lang in languages} + + output = { + "sha": sha, + "model": _MODEL_NAME, + "cache_mode": cache_mode, + "summary": { + "ndcg10": round(sum(r.ndcg10 for r in results) / len(results), 4), + "p50_ms": round(sum(r.p50_ms for r in results) / len(results), 3), + }, + "by_language": { + lang: { + "repos": len(grouped), + "ndcg10": round(sum(r.ndcg10 for r in grouped) / len(grouped), 4), + "p50_ms": round(sum(r.p50_ms for r in grouped) / len(grouped), 3), + } + for lang, grouped in by_language.items() + }, + "repos": [asdict(r) for r in results], + } + + results_dir = Path(__file__).parent / "results" + results_dir.mkdir(exist_ok=True) + suffix = "-cache" if cache_mode else "" + out_path = results_dir / f"{sha[:12]}{suffix}.json" + out_path.write_text(json.dumps(output, indent=2), encoding="utf-8") + print(f"\nResults saved to {out_path}", file=sys.stderr) + + def main() -> None: """Parse arguments and run the selected benchmark mode.""" parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") @@ -244,6 +283,8 @@ def main() -> None: else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose) ) _print_summary(results) + if not args.repo and not args.language: + _save_results(results, cache_mode=args.cache) if __name__ == "__main__": From 92176df212afb93dd4691b51622c4982abc23f92 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 09:13:48 +0200 Subject: [PATCH 11/15] Replace cache benchmark with index_ms tracking per repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the --cache mode (cold vs warm build timing) — it was noisy and not actionable. Instead, add index_ms to RepoResult so every full run records index build time per repo alongside NDCG and query latency. index_ms is included in the saved JSON and printed in the summary table. --- benchmarks/run_benchmark.py | 84 ++++++++++--------------------------- 1 file changed, 21 insertions(+), 63 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 5269479..8267f74 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,7 +1,6 @@ import argparse import json import math -import shutil import subprocess import sys import time @@ -22,7 +21,6 @@ from semble import SembleIndex from semble.types import SearchResult -_CACHE_DIR = Path("/tmp/semble-bench-cache") _MODEL_NAME = "Pringled/potion-code-16M" _LATENCY_RUNS = 5 _DIRECT_TOP_K = 10 @@ -42,11 +40,10 @@ class RepoResult: repo: str language: str chunks: int + ndcg5: float ndcg10: float p50_ms: float - ndcg5: float | None = None - cold_ms: float | None = None - warm_ms: float | None = None + index_ms: float def _dcg(relevances: list[int]) -> float: @@ -117,16 +114,17 @@ def _print_summary(results: list[RepoResult]) -> None: avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results) avg_p50 = sum(r.p50_ms for r in results) / len(results) + avg_index = sum(r.index_ms for r in results) / len(results) print(file=sys.stderr) print("By language", file=sys.stderr) for language, grouped in by_language.items(): - ndcg5_values = [r.ndcg5 for r in grouped if r.ndcg5 is not None] - ndcg5_str = f" ndcg@5={sum(ndcg5_values) / len(ndcg5_values):.3f}" if ndcg5_values else "" print( - f" {language}: repos={len(grouped)}{ndcg5_str}" + f" {language}: repos={len(grouped)}" + + f" ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}" + f" ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}" - + f" p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms", + + f" p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms" + + f" index={sum(r.index_ms for r in grouped) / len(grouped):.0f}ms", file=sys.stderr, ) @@ -139,12 +137,15 @@ def _print_summary(results: list[RepoResult]) -> None: ndcg_row = [f"{avg_ndcg10:>9.3f}"] p50_row = [f"{avg_p50:>8.2f}ms"] + index_row = [f"{avg_index:>7.0f}ms"] for language, language_results in by_language.items(): ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}") p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms") + index_row.append(f"{sum(r.index_ms for r in language_results) / len(language_results):>7.0f}ms") print(f" {'NDCG@10':<28} " + " ".join(ndcg_row), file=sys.stderr) print(f" {'q-p50':<28} " + " ".join(p50_row), file=sys.stderr) + print(f" {'index':<28} " + " ".join(index_row), file=sys.stderr) def _bench_quality( @@ -163,63 +164,25 @@ def _bench_quality( index = SembleIndex.from_path(spec.benchmark_dir, model=model) index_ms = (time.perf_counter() - started) * 1000 ndcg5, ndcg10, p50_ms = _evaluate(index, tasks, verbose=verbose) - result = RepoResult( - repo=repo, language=spec.language, chunks=len(index.chunks), ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms - ) - results.append(result) - print( - f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms", - file=sys.stderr, - ) - return results - - -def _bench_cache(repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec]) -> list[RepoResult]: - """Run cold vs warm index timing benchmarks using the disk embedding cache.""" - _CACHE_DIR.mkdir(parents=True, exist_ok=True) - print(f"Cache dir: {_CACHE_DIR}", file=sys.stderr) - print(file=sys.stderr) - print( - f"{'Repo':<12} {'language':<12} {'chunks':>6} {'cold':>9} {'warm':>9} {'speedup':>8} {'NDCG@10':>8}", - file=sys.stderr, - ) - print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 9} {'-' * 8} {'-' * 8}", file=sys.stderr) - results: list[RepoResult] = [] - model_ns = _MODEL_NAME.replace("/", "--") - for repo, tasks in sorted(repo_tasks.items()): - spec = specs[repo] - namespace_dir = _CACHE_DIR / model_ns - if namespace_dir.exists(): - shutil.rmtree(namespace_dir) - started = time.perf_counter() - cold = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME) - cold_ms = (time.perf_counter() - started) * 1000 - started = time.perf_counter() - warm = SembleIndex.from_path(spec.benchmark_dir, model=model, cache_dir=_CACHE_DIR, model_name=_MODEL_NAME) - warm_ms = (time.perf_counter() - started) * 1000 - _, ndcg10, p50_ms = _evaluate(warm, tasks) result = RepoResult( repo=repo, language=spec.language, - chunks=len(cold.chunks), + chunks=len(index.chunks), + ndcg5=ndcg5, ndcg10=ndcg10, p50_ms=p50_ms, - cold_ms=cold_ms, - warm_ms=warm_ms, + index_ms=index_ms, ) results.append(result) - speedup = cold_ms / warm_ms if warm_ms > 0 else float("inf") print( - f"{repo:<12} {spec.language:<12} {len(cold.chunks):>6} {cold_ms:>8.0f}ms {warm_ms:>8.0f}ms {speedup:>7.1f}x {ndcg10:>8.3f}", + f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms", file=sys.stderr, ) - print(file=sys.stderr) - print("Warm time still includes file walk plus BM25/Vicinity rebuild; only embedding is skipped.", file=sys.stderr) return results -def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None: - """Write results to benchmarks/results/[-cache].json.""" +def _save_results(results: list[RepoResult]) -> None: + """Write results to benchmarks/results/.json.""" try: sha = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() except subprocess.CalledProcessError: @@ -231,16 +194,17 @@ def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None: output = { "sha": sha, "model": _MODEL_NAME, - "cache_mode": cache_mode, "summary": { "ndcg10": round(sum(r.ndcg10 for r in results) / len(results), 4), "p50_ms": round(sum(r.p50_ms for r in results) / len(results), 3), + "index_ms": round(sum(r.index_ms for r in results) / len(results), 1), }, "by_language": { lang: { "repos": len(grouped), "ndcg10": round(sum(r.ndcg10 for r in grouped) / len(grouped), 4), "p50_ms": round(sum(r.p50_ms for r in grouped) / len(grouped), 3), + "index_ms": round(sum(r.index_ms for r in grouped) / len(grouped), 1), } for lang, grouped in by_language.items() }, @@ -249,8 +213,7 @@ def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None: results_dir = Path(__file__).parent / "results" results_dir.mkdir(exist_ok=True) - suffix = "-cache" if cache_mode else "" - out_path = results_dir / f"{sha[:12]}{suffix}.json" + out_path = results_dir / f"{sha[:12]}.json" out_path.write_text(json.dumps(output, indent=2), encoding="utf-8") print(f"\nResults saved to {out_path}", file=sys.stderr) @@ -258,7 +221,6 @@ def _save_results(results: list[RepoResult], *, cache_mode: bool) -> None: def main() -> None: """Parse arguments and run the selected benchmark mode.""" parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") - parser.add_argument("--cache", action="store_true", help="Show cold vs warm index time using the disk cache.") parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.") parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.") parser.add_argument("--verbose", action="store_true", help="Print per-query results.") @@ -277,14 +239,10 @@ def main() -> None: repo_tasks: dict[str, list[Task]] = {} for task in tasks: repo_tasks.setdefault(task.repo, []).append(task) - results = ( - _bench_cache(repo_tasks, model, repo_specs) - if args.cache - else _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose) - ) + results = _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose) _print_summary(results) if not args.repo and not args.language: - _save_results(results, cache_mode=args.cache) + _save_results(results) if __name__ == "__main__": From c1d788d386f6ddeafa27f04fa6f7b29e5c7dc187 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 09:16:56 +0200 Subject: [PATCH 12/15] Add benchmark results for 92176df --- .../{2bd010b154d4.json => 92176df212af.json} | 245 ++++++++---------- 1 file changed, 114 insertions(+), 131 deletions(-) rename benchmarks/results/{2bd010b154d4.json => 92176df212af.json} (66%) diff --git a/benchmarks/results/2bd010b154d4.json b/benchmarks/results/92176df212af.json similarity index 66% rename from benchmarks/results/2bd010b154d4.json rename to benchmarks/results/92176df212af.json index b60d885..9d1879e 100644 --- a/benchmarks/results/2bd010b154d4.json +++ b/benchmarks/results/92176df212af.json @@ -1,71 +1,83 @@ { - "sha": "2bd010b154d43e317e6de81e0ad00905eb3d5e9b", + "sha": "92176df212afb93dd4691b51622c4982abc23f92", "model": "Pringled/potion-code-16M", - "cache_mode": false, "summary": { "ndcg10": 0.8668, - "p50_ms": 0.697 + "p50_ms": 0.624, + "index_ms": 317.5 }, "by_language": { "csharp": { "repos": 1, "ndcg10": 0.8263, - "p50_ms": 1.883 + "p50_ms": 0.837, + "index_ms": 413.9 }, "go": { "repos": 3, "ndcg10": 0.9515, - "p50_ms": 0.485 + "p50_ms": 0.473, + "index_ms": 148.8 }, "java": { "repos": 3, "ndcg10": 0.842, - "p50_ms": 1.239 + "p50_ms": 1.112, + "index_ms": 923.8 }, "javascript": { "repos": 3, "ndcg10": 0.9282, - "p50_ms": 0.386 + "p50_ms": 0.378, + "index_ms": 33.1 }, "kotlin": { "repos": 1, "ndcg10": 0.7631, - "p50_ms": 0.832 + "p50_ms": 0.8, + "index_ms": 140.3 }, "php": { "repos": 3, "ndcg10": 0.906, - "p50_ms": 0.867 + "p50_ms": 0.852, + "index_ms": 672.2 }, "python": { "repos": 8, "ndcg10": 0.8233, - "p50_ms": 0.479 + "p50_ms": 0.446, + "index_ms": 153.2 }, "ruby": { "repos": 3, "ndcg10": 0.8911, - "p50_ms": 0.578 + "p50_ms": 0.526, + "index_ms": 91.0 }, "rust": { "repos": 1, "ndcg10": 0.8878, - "p50_ms": 0.868 + "p50_ms": 0.753, + "index_ms": 886.6 }, "scala": { "repos": 1, "ndcg10": 0.8415, - "p50_ms": 0.829 + "p50_ms": 0.82, + "index_ms": 612.4 }, "swift": { "repos": 1, "ndcg10": 0.9316, - "p50_ms": 0.496 + "p50_ms": 0.525, + "index_ms": 206.5 }, "typescript": { "repos": 1, "ndcg10": 0.7431, - "p50_ms": 0.808 + "p50_ms": 0.77, + "index_ms": 114.2 } }, "repos": [ @@ -73,291 +85,262 @@ "repo": "aiohttp", "language": "python", "chunks": 756, - "ndcg10": 0.7821229638714016, - "p50_ms": 0.5519580008694902, "ndcg5": 0.7132626857513019, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.7821229638714016, + "p50_ms": 0.5139580025570467, + "index_ms": 245.69045799580636 }, { "repo": "alamofire", "language": "swift", "chunks": 649, - "ndcg10": 0.9315768229529695, - "p50_ms": 0.4956250049872324, "ndcg5": 0.900172569211564, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9315768229529695, + "p50_ms": 0.5246670043561608, + "index_ms": 206.53395800036378 }, { "repo": "axios", "language": "javascript", "chunks": 166, - "ndcg10": 0.9671522420975631, - "p50_ms": 0.5629590013995767, "ndcg5": 0.9671522420975631, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9671522420975631, + "p50_ms": 0.5355000030249357, + "index_ms": 58.22970899316715 }, { "repo": "cats", "language": "scala", "chunks": 1254, - "ndcg10": 0.8414671964692401, - "p50_ms": 0.8292909988085739, "ndcg5": 0.8157722039023972, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8414671964692401, + "p50_ms": 0.8197499992093071, + "index_ms": 612.4209999979939 }, { "repo": "chi", "language": "go", "chunks": 262, - "ndcg10": 0.9455120441745608, - "p50_ms": 0.6020420041750185, "ndcg5": 0.9455120441745608, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9455120441745608, + "p50_ms": 0.5562090009334497, + "index_ms": 96.6573750047246 }, { "repo": "cobra", "language": "go", "chunks": 394, - "ndcg10": 0.970068981106951, - "p50_ms": 0.3933749976567924, "ndcg5": 0.970068981106951, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.970068981106951, + "p50_ms": 0.4118329961784184, + "index_ms": 140.34312500007218 }, { "repo": "commons-lang", "language": "java", "chunks": 3152, - "ndcg10": 0.8052591049306037, - "p50_ms": 0.8624999973108061, "ndcg5": 0.7688578654609097, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8052591049306037, + "p50_ms": 0.8248750018537976, + "index_ms": 990.0399159960216 }, { "repo": "express", "language": "javascript", "chunks": 52, - "ndcg10": 0.9593872208972474, - "p50_ms": 0.2536250030971132, "ndcg5": 0.9593872208972474, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9593872208972474, + "p50_ms": 0.24083400057861581, + "index_ms": 20.915667002554983 }, { "repo": "fastapi", "language": "python", "chunks": 597, - "ndcg10": 0.7693095302894921, - "p50_ms": 0.4379579986562021, "ndcg5": 0.7314364449312006, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.7693095302894921, + "p50_ms": 0.42249999387422577, + "index_ms": 181.48929099697853 }, { "repo": "flask", "language": "python", "chunks": 291, - "ndcg10": 0.8767012186349079, - "p50_ms": 0.42783399840118363, "ndcg5": 0.8570900833760776, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8767012186349079, + "p50_ms": 0.3891669985023327, + "index_ms": 91.24833300302271 }, { "repo": "gin", "language": "go", "chunks": 576, - "ndcg10": 0.939064318485603, - "p50_ms": 0.45987500197952613, "ndcg5": 0.8807555442147937, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.939064318485603, + "p50_ms": 0.4502500014496036, + "index_ms": 209.39629200438503 }, { "repo": "gson", "language": "java", "chunks": 1460, - "ndcg10": 0.9261859507142916, - "p50_ms": 1.0446250016684644, "ndcg5": 0.9261859507142916, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9261859507142916, + "p50_ms": 1.0612499972921796, + "index_ms": 455.70120800402947 }, { "repo": "guzzle", "language": "php", "chunks": 206, - "ndcg10": 0.844506786325837, - "p50_ms": 0.5477500017150305, "ndcg5": 0.8326908338735671, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.844506786325837, + "p50_ms": 0.516042004164774, + "index_ms": 61.87491699529346 }, { "repo": "httpx", "language": "python", "chunks": 248, - "ndcg10": 0.871159099521697, - "p50_ms": 0.44858400360681117, "ndcg5": 0.8519694264932337, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.871159099521697, + "p50_ms": 0.3951659964513965, + "index_ms": 79.40462499391288 }, { "repo": "jackson-databind", "language": "java", "chunks": 4570, - "ndcg10": 0.7944291752941182, - "p50_ms": 1.810874993680045, "ndcg5": 0.7667968319202225, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.7944291752941182, + "p50_ms": 1.4493329945253208, + "index_ms": 1325.5783330023405 }, { "repo": "ktor", "language": "kotlin", "chunks": 425, - "ndcg10": 0.7630927329648237, - "p50_ms": 0.8315000013681129, "ndcg5": 0.726275662513606, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.7630927329648237, + "p50_ms": 0.7996659987838939, + "index_ms": 140.25354200566653 }, { "repo": "laravel-framework", "language": "php", "chunks": 6197, - "ndcg10": 0.967888315659275, - "p50_ms": 1.207750006869901, "ndcg5": 0.967888315659275, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.967888315659275, + "p50_ms": 1.2009579950245097, + "index_ms": 1831.7263749995618 }, { "repo": "messagepack-csharp", "language": "csharp", "chunks": 1125, - "ndcg10": 0.8262866007393468, - "p50_ms": 1.882749995274935, "ndcg5": 0.8164536328001585, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8262866007393468, + "p50_ms": 0.8374999961233698, + "index_ms": 413.8517920000595 }, { "repo": "model2vec", "language": "python", "chunks": 107, - "ndcg10": 0.695271294655741, - "p50_ms": 0.4195000001345761, "ndcg5": 0.6593701861221591, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.695271294655741, + "p50_ms": 0.39674999425187707, + "index_ms": 38.985375002084766 }, { "repo": "monolog", "language": "php", "chunks": 417, - "ndcg10": 0.9055096182921145, - "p50_ms": 0.8463749982183799, "ndcg5": 0.9055096182921145, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9055096182921145, + "p50_ms": 0.8403329993598163, + "index_ms": 123.07549999968614 }, { "repo": "pydantic", "language": "python", "chunks": 1518, - "ndcg10": 0.7070408064407742, - "p50_ms": 0.6517500005429611, "ndcg5": 0.6795591269045096, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.7070408064407742, + "p50_ms": 0.6055419944459572, + "index_ms": 466.1173749991576 }, { "repo": "rack", "language": "ruby", "chunks": 249, - "ndcg10": 1.0, - "p50_ms": 0.4828749952139333, "ndcg5": 1.0, - "cold_ms": null, - "warm_ms": null + "ndcg10": 1.0, + "p50_ms": 0.4677079996326938, + "index_ms": 87.64591699582525 }, { "repo": "rails", "language": "ruby", "chunks": 465, - "ndcg10": 0.8346443747935481, - "p50_ms": 0.9173330035991967, "ndcg5": 0.7466134836472739, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8346443747935481, + "p50_ms": 0.8043340058065951, + "index_ms": 155.62629100168124 }, { "repo": "redux", "language": "javascript", "chunks": 53, - "ndcg10": 0.8580772959099011, - "p50_ms": 0.3419580025365576, "ndcg5": 0.8226294385530917, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8580772959099011, + "p50_ms": 0.3574169968487695, + "index_ms": 20.11674999812385 }, { "repo": "requests", "language": "python", "chunks": 169, - "ndcg10": 0.9550842629661954, - "p50_ms": 0.40808300400385633, "ndcg5": 0.9550842629661954, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9550842629661954, + "p50_ms": 0.38050000148359686, + "index_ms": 51.37912499776576 }, { "repo": "sinatra", "language": "ruby", "chunks": 68, - "ndcg10": 0.8387325493217617, - "p50_ms": 0.3345000004628673, "ndcg5": 0.8387325493217617, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8387325493217617, + "p50_ms": 0.30649999825982377, + "index_ms": 29.69916599977296 }, { "repo": "starlette", "language": "python", "chunks": 213, - "ndcg10": 0.9294136613951622, - "p50_ms": 0.4845000003115274, "ndcg5": 0.9058681185722455, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.9294136613951622, + "p50_ms": 0.4645410008379258, + "index_ms": 71.30404099734733 }, { "repo": "tokio", "language": "rust", "chunks": 2730, - "ndcg10": 0.8878478903956787, - "p50_ms": 0.8677080040797591, "ndcg5": 0.8750003941122573, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.8878478903956787, + "p50_ms": 0.7529159993282519, + "index_ms": 886.5858749995823 }, { "repo": "trpc", "language": "typescript", "chunks": 362, - "ndcg10": 0.7431267778412411, - "p50_ms": 0.8075000005192123, "ndcg5": 0.6949834508995433, - "cold_ms": null, - "warm_ms": null + "ndcg10": 0.7431267778412411, + "p50_ms": 0.7703330047661439, + "index_ms": 114.18266699911328 } ] } From f1a50f192e857ccc241c1a64a8548d82bd697862 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 09:17:40 +0200 Subject: [PATCH 13/15] Add trailing newline to saved results JSON --- benchmarks/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 8267f74..343e551 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -214,7 +214,7 @@ def _save_results(results: list[RepoResult]) -> None: results_dir = Path(__file__).parent / "results" results_dir.mkdir(exist_ok=True) out_path = results_dir / f"{sha[:12]}.json" - out_path.write_text(json.dumps(output, indent=2), encoding="utf-8") + out_path.write_text(json.dumps(output, indent=2) + "\n", encoding="utf-8") print(f"\nResults saved to {out_path}", file=sys.stderr) From 40a76927ded9482f62ebd5f63930ff59605fe9f8 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 09:23:48 +0200 Subject: [PATCH 14/15] Rename common.py to data.py, move BENCH_ROOT to ~/.cache/semble-bench, inline _output - benchmarks/common.py -> benchmarks/data.py (more descriptive name) - BENCH_ROOT: /tmp/bench -> ~/.cache/semble-bench (survives reboots) - Inline _output into _check_repo (single call site) - Update README to drop --cache docs and reflect new paths --- benchmarks/README.md | 7 +++---- benchmarks/__init__.py | 1 - benchmarks/{common.py => data.py} | 2 +- benchmarks/run_benchmark.py | 2 +- benchmarks/sync_repos.py | 9 ++------- 5 files changed, 7 insertions(+), 14 deletions(-) rename benchmarks/{common.py => data.py} (99%) diff --git a/benchmarks/README.md b/benchmarks/README.md index 73194fc..2296162 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -2,7 +2,7 @@ Reproducible local benchmarks for `semble`. -Pinned repositories live in `repos.json` and are checked out into `/tmp/bench`. +Pinned repositories live in `repos.json` and are checked out into `~/.cache/semble-bench`. ## Setup @@ -15,10 +15,9 @@ uv run python -m benchmarks.sync_repos --check ```bash uv run python -m benchmarks.run_benchmark -uv run python -m benchmarks.run_benchmark --cache uv run python -m benchmarks.run_benchmark --repo fastapi --repo axios uv run python -m benchmarks.run_benchmark --language python ``` -`--cache` measures cold vs warm index time. Warm time still includes the file walk and -BM25/Vicinity rebuild; only embedding is skipped. +Full runs (no `--repo`/`--language` filters) automatically save results to +`benchmarks/results/.json`. diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py index 793aef7..e69de29 100644 --- a/benchmarks/__init__.py +++ b/benchmarks/__init__.py @@ -1 +0,0 @@ -"""Benchmark and eval tooling for semble.""" diff --git a/benchmarks/common.py b/benchmarks/data.py similarity index 99% rename from benchmarks/common.py rename to benchmarks/data.py index 0183254..dac954a 100644 --- a/benchmarks/common.py +++ b/benchmarks/data.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from pathlib import Path -BENCH_ROOT = Path("/tmp/bench") +BENCH_ROOT = Path.home() / ".cache" / "semble-bench" BENCHMARKS_DIR = Path(__file__).parent ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations" REPOS_PATH = BENCHMARKS_DIR / "repos.json" diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 343e551..c5beaa9 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -9,7 +9,7 @@ from model2vec import StaticModel -from benchmarks.common import ( +from benchmarks.data import ( RepoSpec, Target, Task, diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py index 0deb767..9cf1ebd 100644 --- a/benchmarks/sync_repos.py +++ b/benchmarks/sync_repos.py @@ -2,7 +2,7 @@ import subprocess import sys -from benchmarks.common import BENCH_ROOT, load_repo_specs +from benchmarks.data import BENCH_ROOT, load_repo_specs def _run(*args: str) -> None: @@ -10,11 +10,6 @@ def _run(*args: str) -> None: subprocess.run(args, check=True) -def _output(*args: str) -> str: - """Run a subprocess command and return its stripped stdout.""" - return subprocess.check_output(args, text=True).strip() - - def _sync_repo(name: str, url: str, revision: str) -> None: """Clone the repo if absent, then fetch and checkout the pinned revision.""" repo_dir = BENCH_ROOT / name @@ -31,7 +26,7 @@ def _check_repo(name: str, revision: str) -> str | None: repo_dir = BENCH_ROOT / name if not (repo_dir / ".git").exists(): return f"{name}: missing checkout at {repo_dir}" - head = _output("git", "-C", str(repo_dir), "rev-parse", "HEAD") + head = subprocess.check_output(("git", "-C", str(repo_dir), "rev-parse", "HEAD"), text=True).strip() if head != revision: return f"{name}: expected {revision}, found {head}" return None From 5aa0dda8d9e0d51413ad0ceaa1acc1e6612fb214 Mon Sep 17 00:00:00 2001 From: Pringled Date: Wed, 15 Apr 2026 09:27:13 +0200 Subject: [PATCH 15/15] Add benchmark results for 40a7692 --- .../{92176df212af.json => 40a76927ded9.json} | 170 +++++++++--------- 1 file changed, 85 insertions(+), 85 deletions(-) rename benchmarks/results/{92176df212af.json => 40a76927ded9.json} (66%) diff --git a/benchmarks/results/92176df212af.json b/benchmarks/results/40a76927ded9.json similarity index 66% rename from benchmarks/results/92176df212af.json rename to benchmarks/results/40a76927ded9.json index 9d1879e..d20be46 100644 --- a/benchmarks/results/92176df212af.json +++ b/benchmarks/results/40a76927ded9.json @@ -1,83 +1,83 @@ { - "sha": "92176df212afb93dd4691b51622c4982abc23f92", + "sha": "40a76927ded9482f62ebd5f63930ff59605fe9f8", "model": "Pringled/potion-code-16M", "summary": { "ndcg10": 0.8668, - "p50_ms": 0.624, - "index_ms": 317.5 + "p50_ms": 0.698, + "index_ms": 340.0 }, "by_language": { "csharp": { "repos": 1, "ndcg10": 0.8263, - "p50_ms": 0.837, - "index_ms": 413.9 + "p50_ms": 0.966, + "index_ms": 459.2 }, "go": { "repos": 3, "ndcg10": 0.9515, - "p50_ms": 0.473, - "index_ms": 148.8 + "p50_ms": 0.537, + "index_ms": 164.0 }, "java": { "repos": 3, "ndcg10": 0.842, - "p50_ms": 1.112, - "index_ms": 923.8 + "p50_ms": 1.149, + "index_ms": 965.2 }, "javascript": { "repos": 3, "ndcg10": 0.9282, - "p50_ms": 0.378, - "index_ms": 33.1 + "p50_ms": 0.432, + "index_ms": 36.0 }, "kotlin": { "repos": 1, "ndcg10": 0.7631, - "p50_ms": 0.8, - "index_ms": 140.3 + "p50_ms": 0.856, + "index_ms": 160.8 }, "php": { "repos": 3, "ndcg10": 0.906, - "p50_ms": 0.852, - "index_ms": 672.2 + "p50_ms": 0.949, + "index_ms": 738.0 }, "python": { "repos": 8, "ndcg10": 0.8233, - "p50_ms": 0.446, - "index_ms": 153.2 + "p50_ms": 0.498, + "index_ms": 163.3 }, "ruby": { "repos": 3, "ndcg10": 0.8911, - "p50_ms": 0.526, - "index_ms": 91.0 + "p50_ms": 0.623, + "index_ms": 97.4 }, "rust": { "repos": 1, "ndcg10": 0.8878, - "p50_ms": 0.753, - "index_ms": 886.6 + "p50_ms": 0.996, + "index_ms": 930.6 }, "scala": { "repos": 1, "ndcg10": 0.8415, - "p50_ms": 0.82, - "index_ms": 612.4 + "p50_ms": 0.942, + "index_ms": 648.2 }, "swift": { "repos": 1, "ndcg10": 0.9316, - "p50_ms": 0.525, - "index_ms": 206.5 + "p50_ms": 0.543, + "index_ms": 229.5 }, "typescript": { "repos": 1, "ndcg10": 0.7431, - "p50_ms": 0.77, - "index_ms": 114.2 + "p50_ms": 0.882, + "index_ms": 121.8 } }, "repos": [ @@ -87,8 +87,8 @@ "chunks": 756, "ndcg5": 0.7132626857513019, "ndcg10": 0.7821229638714016, - "p50_ms": 0.5139580025570467, - "index_ms": 245.69045799580636 + "p50_ms": 0.5919579998590052, + "index_ms": 267.3160420017666 }, { "repo": "alamofire", @@ -96,8 +96,8 @@ "chunks": 649, "ndcg5": 0.900172569211564, "ndcg10": 0.9315768229529695, - "p50_ms": 0.5246670043561608, - "index_ms": 206.53395800036378 + "p50_ms": 0.5432909965747967, + "index_ms": 229.49254100240069 }, { "repo": "axios", @@ -105,8 +105,8 @@ "chunks": 166, "ndcg5": 0.9671522420975631, "ndcg10": 0.9671522420975631, - "p50_ms": 0.5355000030249357, - "index_ms": 58.22970899316715 + "p50_ms": 0.5915000001550652, + "index_ms": 58.03862500033574 }, { "repo": "cats", @@ -114,8 +114,8 @@ "chunks": 1254, "ndcg5": 0.8157722039023972, "ndcg10": 0.8414671964692401, - "p50_ms": 0.8197499992093071, - "index_ms": 612.4209999979939 + "p50_ms": 0.9416660032002255, + "index_ms": 648.2289169944124 }, { "repo": "chi", @@ -123,8 +123,8 @@ "chunks": 262, "ndcg5": 0.9455120441745608, "ndcg10": 0.9455120441745608, - "p50_ms": 0.5562090009334497, - "index_ms": 96.6573750047246 + "p50_ms": 0.6279579974943772, + "index_ms": 103.15404200082412 }, { "repo": "cobra", @@ -132,8 +132,8 @@ "chunks": 394, "ndcg5": 0.970068981106951, "ndcg10": 0.970068981106951, - "p50_ms": 0.4118329961784184, - "index_ms": 140.34312500007218 + "p50_ms": 0.41579100070521235, + "index_ms": 149.65433299948927 }, { "repo": "commons-lang", @@ -141,8 +141,8 @@ "chunks": 3152, "ndcg5": 0.7688578654609097, "ndcg10": 0.8052591049306037, - "p50_ms": 0.8248750018537976, - "index_ms": 990.0399159960216 + "p50_ms": 0.9200830027111806, + "index_ms": 1038.753667002311 }, { "repo": "express", @@ -150,8 +150,8 @@ "chunks": 52, "ndcg5": 0.9593872208972474, "ndcg10": 0.9593872208972474, - "p50_ms": 0.24083400057861581, - "index_ms": 20.915667002554983 + "p50_ms": 0.25366600311826915, + "index_ms": 22.826792002888396 }, { "repo": "fastapi", @@ -159,8 +159,8 @@ "chunks": 597, "ndcg5": 0.7314364449312006, "ndcg10": 0.7693095302894921, - "p50_ms": 0.42249999387422577, - "index_ms": 181.48929099697853 + "p50_ms": 0.47783299669390544, + "index_ms": 189.871916998527 }, { "repo": "flask", @@ -168,8 +168,8 @@ "chunks": 291, "ndcg5": 0.8570900833760776, "ndcg10": 0.8767012186349079, - "p50_ms": 0.3891669985023327, - "index_ms": 91.24833300302271 + "p50_ms": 0.4285830000299029, + "index_ms": 92.66295799898217 }, { "repo": "gin", @@ -177,8 +177,8 @@ "chunks": 576, "ndcg5": 0.8807555442147937, "ndcg10": 0.939064318485603, - "p50_ms": 0.4502500014496036, - "index_ms": 209.39629200438503 + "p50_ms": 0.5680000031134114, + "index_ms": 239.0974170048139 }, { "repo": "gson", @@ -186,8 +186,8 @@ "chunks": 1460, "ndcg5": 0.9261859507142916, "ndcg10": 0.9261859507142916, - "p50_ms": 1.0612499972921796, - "index_ms": 455.70120800402947 + "p50_ms": 1.0848340025404468, + "index_ms": 482.5546249994659 }, { "repo": "guzzle", @@ -195,8 +195,8 @@ "chunks": 206, "ndcg5": 0.8326908338735671, "ndcg10": 0.844506786325837, - "p50_ms": 0.516042004164774, - "index_ms": 61.87491699529346 + "p50_ms": 0.5839169971295632, + "index_ms": 72.53133400081424 }, { "repo": "httpx", @@ -204,8 +204,8 @@ "chunks": 248, "ndcg5": 0.8519694264932337, "ndcg10": 0.871159099521697, - "p50_ms": 0.3951659964513965, - "index_ms": 79.40462499391288 + "p50_ms": 0.4406670050229877, + "index_ms": 84.04612500453368 }, { "repo": "jackson-databind", @@ -213,8 +213,8 @@ "chunks": 4570, "ndcg5": 0.7667968319202225, "ndcg10": 0.7944291752941182, - "p50_ms": 1.4493329945253208, - "index_ms": 1325.5783330023405 + "p50_ms": 1.442957996914629, + "index_ms": 1374.4051670000772 }, { "repo": "ktor", @@ -222,8 +222,8 @@ "chunks": 425, "ndcg5": 0.726275662513606, "ndcg10": 0.7630927329648237, - "p50_ms": 0.7996659987838939, - "index_ms": 140.25354200566653 + "p50_ms": 0.8556669999961741, + "index_ms": 160.8068749992526 }, { "repo": "laravel-framework", @@ -231,8 +231,8 @@ "chunks": 6197, "ndcg5": 0.967888315659275, "ndcg10": 0.967888315659275, - "p50_ms": 1.2009579950245097, - "index_ms": 1831.7263749995618 + "p50_ms": 1.3275840028654784, + "index_ms": 1987.1202089998405 }, { "repo": "messagepack-csharp", @@ -240,8 +240,8 @@ "chunks": 1125, "ndcg5": 0.8164536328001585, "ndcg10": 0.8262866007393468, - "p50_ms": 0.8374999961233698, - "index_ms": 413.8517920000595 + "p50_ms": 0.9664999961387366, + "index_ms": 459.23387500079116 }, { "repo": "model2vec", @@ -249,8 +249,8 @@ "chunks": 107, "ndcg5": 0.6593701861221591, "ndcg10": 0.695271294655741, - "p50_ms": 0.39674999425187707, - "index_ms": 38.985375002084766 + "p50_ms": 0.46462499449262396, + "index_ms": 44.985666005231906 }, { "repo": "monolog", @@ -258,8 +258,8 @@ "chunks": 417, "ndcg5": 0.9055096182921145, "ndcg10": 0.9055096182921145, - "p50_ms": 0.8403329993598163, - "index_ms": 123.07549999968614 + "p50_ms": 0.9362909986521117, + "index_ms": 154.34570900106337 }, { "repo": "pydantic", @@ -267,8 +267,8 @@ "chunks": 1518, "ndcg5": 0.6795591269045096, "ndcg10": 0.7070408064407742, - "p50_ms": 0.6055419944459572, - "index_ms": 466.1173749991576 + "p50_ms": 0.6636250036535785, + "index_ms": 490.62920799769927 }, { "repo": "rack", @@ -276,8 +276,8 @@ "chunks": 249, "ndcg5": 1.0, "ndcg10": 1.0, - "p50_ms": 0.4677079996326938, - "index_ms": 87.64591699582525 + "p50_ms": 0.5600000004051253, + "index_ms": 96.56141699815635 }, { "repo": "rails", @@ -285,8 +285,8 @@ "chunks": 465, "ndcg5": 0.7466134836472739, "ndcg10": 0.8346443747935481, - "p50_ms": 0.8043340058065951, - "index_ms": 155.62629100168124 + "p50_ms": 0.9815000012167729, + "index_ms": 168.55954200582346 }, { "repo": "redux", @@ -294,8 +294,8 @@ "chunks": 53, "ndcg5": 0.8226294385530917, "ndcg10": 0.8580772959099011, - "p50_ms": 0.3574169968487695, - "index_ms": 20.11674999812385 + "p50_ms": 0.450166997325141, + "index_ms": 27.171499998075888 }, { "repo": "requests", @@ -303,8 +303,8 @@ "chunks": 169, "ndcg5": 0.9550842629661954, "ndcg10": 0.9550842629661954, - "p50_ms": 0.38050000148359686, - "index_ms": 51.37912499776576 + "p50_ms": 0.40475000423612073, + "index_ms": 56.275709001056384 }, { "repo": "sinatra", @@ -312,8 +312,8 @@ "chunks": 68, "ndcg5": 0.8387325493217617, "ndcg10": 0.8387325493217617, - "p50_ms": 0.30649999825982377, - "index_ms": 29.69916599977296 + "p50_ms": 0.3260829980717972, + "index_ms": 27.20166600192897 }, { "repo": "starlette", @@ -321,8 +321,8 @@ "chunks": 213, "ndcg5": 0.9058681185722455, "ndcg10": 0.9294136613951622, - "p50_ms": 0.4645410008379258, - "index_ms": 71.30404099734733 + "p50_ms": 0.5122919974382967, + "index_ms": 80.70125000085682 }, { "repo": "tokio", @@ -330,8 +330,8 @@ "chunks": 2730, "ndcg5": 0.8750003941122573, "ndcg10": 0.8878478903956787, - "p50_ms": 0.7529159993282519, - "index_ms": 886.5858749995823 + "p50_ms": 0.9958329974324442, + "index_ms": 930.5787499979488 }, { "repo": "trpc", @@ -339,8 +339,8 @@ "chunks": 362, "ndcg5": 0.6949834508995433, "ndcg10": 0.7431267778412411, - "p50_ms": 0.7703330047661439, - "index_ms": 114.18266699911328 + "p50_ms": 0.8817499983706512, + "index_ms": 121.80820800131187 } ] }