diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..2296162 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,23 @@ +# Benchmarks + +Reproducible local benchmarks for `semble`. + +Pinned repositories live in `repos.json` and are checked out into `~/.cache/semble-bench`. + +## Setup + +```bash +uv run python -m benchmarks.sync_repos +uv run python -m benchmarks.sync_repos --check +``` + +## Run + +```bash +uv run python -m benchmarks.run_benchmark +uv run python -m benchmarks.run_benchmark --repo fastapi --repo axios +uv run python -m benchmarks.run_benchmark --language python +``` + +Full runs (no `--repo`/`--language` filters) automatically save results to +`benchmarks/results/.json`. diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/annotations/aiohttp.json b/benchmarks/annotations/aiohttp.json new file mode 100644 index 0000000..ea00500 --- /dev/null +++ b/benchmarks/annotations/aiohttp.json @@ -0,0 +1,90 @@ +[ + { + "query": "how the async HTTP client session works", + "relevant": ["aiohttp/client.py"], + "secondary": [] + }, + { + "query": "connection pooling and TCP connector", + "relevant": ["aiohttp/connector.py"], + "secondary": [] + }, + { + "query": "WebSocket client implementation", + "relevant": ["aiohttp/client_ws.py"], + "secondary": ["aiohttp/_websocket/reader.py"] + }, + { + "query": "request and response object internals", + "relevant": ["aiohttp/client_reqrep.py"], + "secondary": [] + }, + { + "query": "URL routing and resource dispatching", + "relevant": ["aiohttp/web_urldispatcher.py"], + "secondary": [] + }, + { + "query": "server-side middleware execution", + "relevant": ["aiohttp/web_middlewares.py"], + "secondary": ["aiohttp/web_app.py"] + }, + { + "query": "multipart and form data handling", + "relevant": ["aiohttp/multipart.py"], + "secondary": ["aiohttp/formdata.py"] + }, + { + "query": "response streaming and payload", + "relevant": ["aiohttp/streams.py"], + "secondary": ["aiohttp/payload.py"] + }, + { + "query": "HTTP exception types and error responses", + "relevant": ["aiohttp/web_exceptions.py"], + "secondary": ["aiohttp/client_exceptions.py"] + }, + { + "query": "request tracing and observability hooks", + "relevant": ["aiohttp/tracing.py"], + "secondary": [] + }, + { + "query": "how chunked transfer encoding is parsed", + "relevant": ["aiohttp/http_parser.py"], + "secondary": [] + }, + { + "query": "how DNS resolution is handled asynchronously", + "relevant": ["aiohttp/resolver.py"], + "secondary": ["aiohttp/connector.py"] + }, + { + "query": "how backpressure and flow control work in streaming", + "relevant": ["aiohttp/streams.py"], + "secondary": [] + }, + { + "query": "how connection draining and cleanup happen on close", + "relevant": ["aiohttp/connector.py"], + "secondary": ["aiohttp/client_proto.py"] + }, + { + "query": "how the web application sets up and tears down on startup", + "relevant": ["aiohttp/web_app.py"], + "secondary": ["aiohttp/web_runner.py"] + }, + {"query": "ClientSession", "relevant": ["aiohttp/client.py"], "secondary": []}, + {"query": "TCPConnector", "relevant": ["aiohttp/connector.py"], "secondary": []}, + {"query": "UrlDispatcher", "relevant": ["aiohttp/web_urldispatcher.py"], "secondary": []}, + {"query": "ClientResponse", "relevant": ["aiohttp/client_reqrep.py"], "secondary": []}, + {"query": "TraceConfig", "relevant": ["aiohttp/tracing.py"], "secondary": []}, + { + "query": "how ClientSession acquires and releases connections from the connector", + "relevant": ["aiohttp/client.py"], + "secondary": ["aiohttp/connector.py"], + "category": "architecture", + "seed": {"path": "aiohttp/client.py", "line": 374}, + "related": ["aiohttp/connector.py"] + } +] diff --git a/benchmarks/annotations/alamofire.json b/benchmarks/annotations/alamofire.json new file mode 100644 index 0000000..df5ee2b --- /dev/null +++ b/benchmarks/annotations/alamofire.json @@ -0,0 +1,99 @@ +[ + { + "query": "how the Session manages the underlying URLSession and dispatches requests", + "relevant": ["Source/Core/Session.swift"], + "secondary": ["Source/Core/SessionDelegate.swift"], + "category": "architecture" + }, + { + "query": "how request retrying is implemented with backoff and retry conditions", + "relevant": ["Source/Features/RetryPolicy.swift"], + "secondary": ["Source/Features/RequestInterceptor.swift"], + "category": "architecture" + }, + { + "query": "how response validation checks status codes and content types", + "relevant": ["Source/Features/Validation.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how response serialization decodes JSON, Decodable, and strings", + "relevant": ["Source/Features/ResponseSerialization.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how multipart form data encodes fields and file attachments", + "relevant": ["Source/Features/MultipartFormData.swift"], + "secondary": ["Source/Features/MultipartUpload.swift"], + "category": "semantic" + }, + { + "query": "how authentication interceptors handle credential challenges", + "relevant": ["Source/Features/AuthenticationInterceptor.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how server trust evaluation handles SSL certificate pinning", + "relevant": ["Source/Features/ServerTrustEvaluation.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how URL query parameters are encoded from Encodable values", + "relevant": ["Source/Features/URLEncodedFormEncoder.swift"], + "secondary": ["Source/Core/ParameterEncoder.swift"], + "category": "semantic" + }, + { + "query": "how network reachability is monitored to detect connectivity changes", + "relevant": ["Source/Features/NetworkReachabilityManager.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how download requests save responses to disk", + "relevant": ["Source/Core/DownloadRequest.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how request and response events are logged via EventMonitor", + "relevant": ["Source/Features/EventMonitor.swift"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the Request class tracks lifecycle state transitions", + "relevant": ["Source/Core/Request.swift"], + "secondary": [], + "category": "architecture" + }, + { + "query": "Session", + "relevant": ["Source/Core/Session.swift"], + "secondary": [] + }, + { + "query": "AFError", + "relevant": ["Source/Core/AFError.swift"], + "secondary": [] + }, + { + "query": "RetryPolicy", + "relevant": ["Source/Features/RetryPolicy.swift"], + "secondary": [] + }, + { + "query": "ServerTrustEvaluating", + "relevant": ["Source/Features/ServerTrustEvaluation.swift"], + "secondary": [] + }, + { + "query": "HTTPHeaders", + "relevant": ["Source/Core/HTTPHeaders.swift"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/axios.json b/benchmarks/annotations/axios.json new file mode 100644 index 0000000..3466c21 --- /dev/null +++ b/benchmarks/annotations/axios.json @@ -0,0 +1,46 @@ +[ + { + "query": "how HTTP requests are dispatched through the configured adapter", + "relevant": ["lib/core/dispatchRequest.js"], + "secondary": ["lib/adapters/adapters.js"], + "category": "architecture" + }, + { + "query": "request and response interceptors", + "relevant": ["lib/core/InterceptorManager.js"], + "secondary": ["lib/core/Axios.js"], + "category": "semantic" + }, + { + "query": "node HTTP adapter implementation", + "relevant": ["lib/adapters/http.js"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how config defaults are merged before a request is sent", + "relevant": ["lib/core/mergeConfig.js"], + "secondary": ["lib/core/Axios.js"], + "category": "architecture" + }, + { + "query": "Axios", + "relevant": [{"path": "lib/core/Axios.js", "start_line": 22, "end_line": 61}], + "secondary": [], + "category": "symbol", + "seed": {"path": "lib/core/Axios.js", "line": 46}, + "related": [{"path": "lib/core/Axios.js", "start_line": 179, "end_line": 239}] + }, + { + "query": "InterceptorManager", + "relevant": [{"path": "lib/core/InterceptorManager.js", "start_line": 5, "end_line": 33}], + "secondary": [], + "category": "symbol" + }, + { + "query": "mergeConfig", + "relevant": [{"path": "lib/core/mergeConfig.js", "start_line": 17, "end_line": 106}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/cats.json b/benchmarks/annotations/cats.json new file mode 100644 index 0000000..4140db6 --- /dev/null +++ b/benchmarks/annotations/cats.json @@ -0,0 +1,99 @@ +[ + { + "query": "how the Functor type class defines mapping over a context", + "relevant": ["core/src/main/scala/cats/Functor.scala"], + "secondary": [], + "category": "architecture" + }, + { + "query": "how Monad composes dependent effectful computations with flatMap", + "relevant": ["core/src/main/scala/cats/Monad.scala"], + "secondary": ["core/src/main/scala/cats/FlatMap.scala"], + "category": "architecture" + }, + { + "query": "how Applicative combines independent effects", + "relevant": ["core/src/main/scala/cats/Applicative.scala"], + "secondary": ["core/src/main/scala/cats/Apply.scala"], + "category": "architecture" + }, + { + "query": "how errors are handled and recovered in ApplicativeError and MonadError", + "relevant": ["core/src/main/scala/cats/ApplicativeError.scala"], + "secondary": ["core/src/main/scala/cats/MonadError.scala"], + "category": "semantic" + }, + { + "query": "how Validated accumulates errors across independent computations", + "relevant": ["core/src/main/scala/cats/data/Validated.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how EitherT combines the Either monad with another effect", + "relevant": ["core/src/main/scala/cats/data/EitherT.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Kleisli composes functions that return monadic values", + "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Eval provides lazy and memoized evaluation", + "relevant": ["core/src/main/scala/cats/Eval.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Foldable traverses and reduces elements in a container", + "relevant": ["core/src/main/scala/cats/Foldable.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how Chain provides O(1) concatenation as an alternative to List", + "relevant": ["core/src/main/scala/cats/data/Chain.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how natural transformations map between type constructors", + "relevant": ["core/src/main/scala/cats/arrow/FunctionK.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the Contravariant functor reverses the mapping direction", + "relevant": ["core/src/main/scala/cats/Contravariant.scala"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Monad", + "relevant": ["core/src/main/scala/cats/Monad.scala"], + "secondary": [] + }, + { + "query": "Functor", + "relevant": ["core/src/main/scala/cats/Functor.scala"], + "secondary": [] + }, + { + "query": "EitherT", + "relevant": ["core/src/main/scala/cats/data/EitherT.scala"], + "secondary": [] + }, + { + "query": "Validated", + "relevant": ["core/src/main/scala/cats/data/Validated.scala"], + "secondary": [] + }, + { + "query": "Kleisli", + "relevant": ["core/src/main/scala/cats/data/Kleisli.scala"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/chi.json b/benchmarks/annotations/chi.json new file mode 100644 index 0000000..38223b5 --- /dev/null +++ b/benchmarks/annotations/chi.json @@ -0,0 +1,32 @@ +[ + { + "query": "HTTP router and middleware composition", + "relevant": ["mux.go"], + "secondary": ["chain.go"], + "category": "architecture" + }, + { + "query": "radix tree path matching", + "relevant": ["tree.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "request routing context storage", + "relevant": ["context.go"], + "secondary": ["mux.go"], + "category": "architecture" + }, + { + "query": "request logging middleware", + "relevant": ["middleware/logger.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Mux", + "relevant": ["mux.go"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/cobra.json b/benchmarks/annotations/cobra.json new file mode 100644 index 0000000..c8d97a2 --- /dev/null +++ b/benchmarks/annotations/cobra.json @@ -0,0 +1,32 @@ +[ + { + "query": "core command execution and command tree", + "relevant": ["command.go"], + "secondary": [], + "category": "architecture" + }, + { + "query": "shell completion request handling", + "relevant": ["completions.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "bash completion generation", + "relevant": ["bash_completions.go"], + "secondary": ["completions.go"], + "category": "semantic" + }, + { + "query": "positional argument validators", + "relevant": ["args.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Command", + "relevant": ["command.go"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/commons-lang.json b/benchmarks/annotations/commons-lang.json new file mode 100644 index 0000000..9d51c0c --- /dev/null +++ b/benchmarks/annotations/commons-lang.json @@ -0,0 +1,43 @@ +[ + { + "query": "null-safe string operations and text helpers", + "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "argument validation helpers and exception rules", + "relevant": ["src/main/java/org/apache/commons/lang3/Validate.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "reflection-based equals implementation", + "relevant": ["src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java"], + "secondary": ["src/main/java/org/apache/commons/lang3/ClassUtils.java"], + "category": "architecture" + }, + { + "query": "range object with inclusive bounds and comparator support", + "relevant": ["src/main/java/org/apache/commons/lang3/Range.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "StringUtils", + "relevant": ["src/main/java/org/apache/commons/lang3/StringUtils.java"], + "secondary": [], + "category": "symbol" + }, + { + "query": "reflection equals builder internals", + "relevant": [{"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "start_line": 89, "end_line": 99}], + "secondary": [], + "category": "architecture", + "seed": {"path": "src/main/java/org/apache/commons/lang3/builder/EqualsBuilder.java", "line": 89}, + "related": [ + {"path": "src/main/java/org/apache/commons/lang3/builder/HashCodeBuilder.java", "start_line": 105, "end_line": 141}, + {"path": "src/main/java/org/apache/commons/lang3/builder/IDKey.java", "start_line": 27, "end_line": 74} + ] + } +] diff --git a/benchmarks/annotations/express.json b/benchmarks/annotations/express.json new file mode 100644 index 0000000..dd82ccf --- /dev/null +++ b/benchmarks/annotations/express.json @@ -0,0 +1,32 @@ +[ + { + "query": "application initialization and default configuration", + "relevant": ["lib/application.js"], + "secondary": ["lib/express.js"], + "category": "architecture" + }, + { + "query": "request API helpers and header access", + "relevant": ["lib/request.js"], + "secondary": [], + "category": "semantic" + }, + { + "query": "response sending and content negotiation", + "relevant": ["lib/response.js"], + "secondary": [], + "category": "semantic" + }, + { + "query": "template view lookup and rendering", + "relevant": ["lib/view.js"], + "secondary": ["lib/application.js"], + "category": "architecture" + }, + { + "query": "response", + "relevant": ["lib/response.js"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/fastapi.json b/benchmarks/annotations/fastapi.json new file mode 100644 index 0000000..23a990b --- /dev/null +++ b/benchmarks/annotations/fastapi.json @@ -0,0 +1,112 @@ +[ + { + "query": "how does dependency injection work", + "relevant": ["fastapi/dependencies/utils.py"], + "secondary": ["fastapi/dependencies/models.py", "fastapi/params.py"] + }, + { + "query": "request validation and error handling", + "relevant": ["fastapi/exceptions.py"], + "secondary": ["fastapi/exception_handlers.py"] + }, + { + "query": "how are routes registered", + "relevant": ["fastapi/routing.py"], + "secondary": ["fastapi/applications.py"] + }, + { + "query": "websocket endpoint implementation", + "relevant": ["fastapi/websockets.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "OpenAPI schema generation", + "relevant": ["fastapi/openapi/utils.py"], + "secondary": ["fastapi/openapi/models.py"] + }, + { + "query": "middleware stack and CORS", + "relevant": ["fastapi/middleware/cors.py"], + "secondary": ["fastapi/applications.py"] + }, + { + "query": "file upload handling", + "relevant": ["fastapi/datastructures.py"], + "secondary": [] + }, + { + "query": "response model serialization", + "relevant": ["fastapi/encoders.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "background tasks", + "relevant": ["fastapi/background.py"], + "secondary": [] + }, + { + "query": "security and OAuth2 authentication", + "relevant": ["fastapi/security/oauth2.py"], + "secondary": ["fastapi/security/http.py"] + }, + { + "query": "how is response validation and serialization applied before sending", + "relevant": ["fastapi/routing.py"], + "secondary": ["fastapi/encoders.py"] + }, + { + "query": "how are nested and sub-dependencies resolved", + "relevant": ["fastapi/dependencies/utils.py"], + "secondary": [] + }, + { + "query": "how does FastAPI run sync route functions without blocking", + "relevant": ["fastapi/concurrency.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "how are path parameters extracted and type-converted", + "relevant": ["fastapi/routing.py"], + "secondary": ["fastapi/dependencies/utils.py"] + }, + { + "query": "how does exception propagation work through dependency injection", + "relevant": ["fastapi/dependencies/utils.py"], + "secondary": ["fastapi/routing.py"] + }, + { + "query": "Depends", + "relevant": [{"path": "fastapi/params.py", "start_line": 746, "end_line": 749}], + "secondary": ["fastapi/param_functions.py"], + "category": "symbol", + "seed": {"path": "fastapi/params.py", "line": 746}, + "related": [ + {"path": "fastapi/params.py", "start_line": 434, "end_line": 468}, + {"path": "fastapi/params.py", "start_line": 627, "end_line": 664} + ] + }, + { + "query": "HTTPException", + "relevant": [{"path": "fastapi/exceptions.py", "start_line": 17, "end_line": 66}], + "secondary": [], + "category": "symbol" + }, + { + "query": "APIRouter", + "relevant": [{"path": "fastapi/routing.py", "start_line": 1005, "end_line": 1030}], + "secondary": [], + "category": "symbol" + }, + { + "query": "BackgroundTasks", + "relevant": [{"path": "fastapi/background.py", "start_line": 11, "end_line": 39}], + "secondary": [], + "category": "symbol" + }, + { + "query": "OAuth2PasswordBearer", + "relevant": [{"path": "fastapi/security/oauth2.py", "start_line": 433, "end_line": 471}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/flask.json b/benchmarks/annotations/flask.json new file mode 100644 index 0000000..de9fd41 --- /dev/null +++ b/benchmarks/annotations/flask.json @@ -0,0 +1,90 @@ +[ + { + "query": "how are routes registered", + "relevant": ["src/flask/sansio/scaffold.py"], + "secondary": ["src/flask/sansio/app.py"] + }, + { + "query": "request context handling", + "relevant": ["src/flask/ctx.py"], + "secondary": [] + }, + { + "query": "session management", + "relevant": ["src/flask/sessions.py"], + "secondary": [] + }, + { + "query": "blueprint registration", + "relevant": ["src/flask/sansio/blueprints.py"], + "secondary": ["src/flask/blueprints.py"] + }, + { + "query": "configuration loading", + "relevant": ["src/flask/config.py"], + "secondary": [] + }, + { + "query": "template rendering", + "relevant": ["src/flask/templating.py"], + "secondary": [] + }, + { + "query": "error handlers", + "relevant": ["src/flask/sansio/scaffold.py"], + "secondary": ["src/flask/app.py"] + }, + { + "query": "CLI commands", + "relevant": ["src/flask/cli.py"], + "secondary": [] + }, + { + "query": "testing client", + "relevant": ["src/flask/testing.py"], + "secondary": [] + }, + { + "query": "JSON response helpers", + "relevant": ["src/flask/json/provider.py"], + "secondary": ["src/flask/helpers.py"] + }, + { + "query": "how does the application context push and pop around requests", + "relevant": ["src/flask/ctx.py"], + "secondary": ["src/flask/app.py"] + }, + { + "query": "how does Flask select which error handler to invoke for an exception", + "relevant": ["src/flask/app.py"], + "secondary": ["src/flask/sansio/app.py"] + }, + { + "query": "how is g used to store data scoped to the current request", + "relevant": ["src/flask/ctx.py"], + "secondary": ["src/flask/globals.py"] + }, + { + "query": "how are request lifecycle signals emitted", + "relevant": ["src/flask/signals.py"], + "secondary": ["src/flask/app.py"] + }, + { + "query": "how does Flask convert a view return value into a response object", + "relevant": ["src/flask/app.py"], + "secondary": ["src/flask/wrappers.py"] + }, + {"query": "Blueprint", "relevant": ["src/flask/sansio/blueprints.py"], "secondary": ["src/flask/blueprints.py"]}, + {"query": "render_template", "relevant": ["src/flask/templating.py"], "secondary": []}, + {"query": "Flask", "relevant": ["src/flask/app.py"], "secondary": []}, + {"query": "session", "relevant": ["src/flask/sessions.py"], "secondary": ["src/flask/globals.py"]}, + {"query": "g", "relevant": ["src/flask/globals.py"], "secondary": ["src/flask/ctx.py"]}, + { + "query": "how Blueprint inherits routing behaviour from its sansio base class", + "relevant": ["src/flask/sansio/blueprints.py"], + "secondary": ["src/flask/blueprints.py"], + "category": "architecture", + "seed": {"path": "src/flask/blueprints.py", "line": 18}, + "related": ["src/flask/sansio/blueprints.py"] + } +] diff --git a/benchmarks/annotations/gin.json b/benchmarks/annotations/gin.json new file mode 100644 index 0000000..2ba38bc --- /dev/null +++ b/benchmarks/annotations/gin.json @@ -0,0 +1,40 @@ +[ + { + "query": "how routes are grouped and registered", + "relevant": ["routergroup.go"], + "secondary": ["gin.go"], + "category": "architecture" + }, + { + "query": "radix tree path matching", + "relevant": ["tree.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "request context lifecycle and helpers", + "relevant": ["context.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "panic recovery middleware", + "relevant": ["recovery.go"], + "secondary": [], + "category": "semantic" + }, + { + "query": "JSON request binding and validation", + "relevant": ["binding/json.go"], + "secondary": ["binding/default_validator.go"], + "category": "architecture" + }, + { + "query": "how the Gin Engine embeds RouterGroup and uses Context per request", + "relevant": ["gin.go"], + "secondary": ["routergroup.go", "context.go"], + "category": "architecture", + "seed": {"path": "gin.go", "line": 92}, + "related": ["context.go", "routergroup.go"] + } +] diff --git a/benchmarks/annotations/gson.json b/benchmarks/annotations/gson.json new file mode 100644 index 0000000..46c2f5e --- /dev/null +++ b/benchmarks/annotations/gson.json @@ -0,0 +1,32 @@ +[ + { + "query": "main Gson API for toJson and fromJson", + "relevant": ["gson/src/main/java/com/google/gson/Gson.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "GsonBuilder configuration and create", + "relevant": ["gson/src/main/java/com/google/gson/GsonBuilder.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "reflection-based field serialization and deserialization", + "relevant": ["gson/src/main/java/com/google/gson/internal/bind/ReflectiveTypeAdapterFactory.java"], + "secondary": [], + "category": "architecture" + }, + { + "query": "TypeAdapter", + "relevant": ["gson/src/main/java/com/google/gson/TypeAdapter.java"], + "secondary": [], + "category": "symbol" + }, + { + "query": "JsonParser", + "relevant": ["gson/src/main/java/com/google/gson/JsonParser.java"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/guzzle.json b/benchmarks/annotations/guzzle.json new file mode 100644 index 0000000..2bbbf02 --- /dev/null +++ b/benchmarks/annotations/guzzle.json @@ -0,0 +1,92 @@ +[ + { + "query": "HTTP client request sending and defaults", + "relevant": ["src/Client.php"], + "secondary": ["src/ClientTrait.php"], + "category": "semantic" + }, + { + "query": "middleware handler stack composition", + "relevant": ["src/HandlerStack.php"], + "secondary": ["src/Middleware.php"], + "category": "architecture" + }, + { + "query": "retry middleware and exponential backoff", + "relevant": ["src/RetryMiddleware.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "cookie jar implementation", + "relevant": ["src/Cookie/CookieJar.php"], + "secondary": ["src/Cookie/SetCookie.php"], + "category": "semantic" + }, + { + "query": "redirect following and location header handling", + "relevant": ["src/RedirectMiddleware.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "curl-based HTTP handler and connection management", + "relevant": ["src/Handler/CurlFactory.php"], + "secondary": ["src/Handler/CurlHandler.php"], + "category": "semantic" + }, + { + "query": "mock handler for simulating HTTP responses in tests", + "relevant": ["src/Handler/MockHandler.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "concurrent request pool with limited parallelism", + "relevant": ["src/Pool.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "available request options and their configuration", + "relevant": ["src/RequestOptions.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "HTTP message formatting and request logging", + "relevant": ["src/MessageFormatter.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how request body and content-type are prepared before sending", + "relevant": ["src/PrepareBodyMiddleware.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "exception hierarchy for HTTP and transfer errors", + "relevant": ["src/Exception/RequestException.php"], + "secondary": ["src/Exception/GuzzleException.php"], + "category": "semantic" + }, + { + "query": "Client", + "relevant": ["src/Client.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "HandlerStack", + "relevant": ["src/HandlerStack.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "RedirectMiddleware", + "relevant": ["src/RedirectMiddleware.php"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/httpx.json b/benchmarks/annotations/httpx.json new file mode 100644 index 0000000..08edd0c --- /dev/null +++ b/benchmarks/annotations/httpx.json @@ -0,0 +1,90 @@ +[ + { + "query": "how are HTTP requests sent", + "relevant": ["httpx/_client.py"], + "secondary": [] + }, + { + "query": "authentication and credentials", + "relevant": ["httpx/_auth.py"], + "secondary": [] + }, + { + "query": "connection pooling and transport", + "relevant": ["httpx/_transports/default.py"], + "secondary": ["httpx/_transports/base.py"] + }, + { + "query": "URL parsing and construction", + "relevant": ["httpx/_urlparse.py"], + "secondary": ["httpx/_urls.py"] + }, + { + "query": "response decoding and content", + "relevant": ["httpx/_decoders.py"], + "secondary": ["httpx/_models.py"] + }, + { + "query": "timeout configuration", + "relevant": ["httpx/_config.py"], + "secondary": [] + }, + { + "query": "cookie handling", + "relevant": ["httpx/_models.py"], + "secondary": ["httpx/_client.py"] + }, + { + "query": "multipart file upload", + "relevant": ["httpx/_multipart.py"], + "secondary": ["httpx/_content.py"] + }, + { + "query": "redirect following", + "relevant": ["httpx/_client.py"], + "secondary": [] + }, + { + "query": "error and exception types", + "relevant": ["httpx/_exceptions.py"], + "secondary": [] + }, + { + "query": "how does digest authentication handle the challenge-response flow", + "relevant": ["httpx/_auth.py"], + "secondary": [] + }, + { + "query": "how are keep-alive connections managed and reused", + "relevant": ["httpx/_transports/default.py"], + "secondary": ["httpx/_config.py"] + }, + { + "query": "how does streaming response body iteration work", + "relevant": ["httpx/_models.py"], + "secondary": ["httpx/_decoders.py"] + }, + { + "query": "how are query parameters encoded into the URL", + "relevant": ["httpx/_urls.py"], + "secondary": ["httpx/_urlparse.py"] + }, + { + "query": "how are retries and transport errors surfaced to the caller", + "relevant": ["httpx/_exceptions.py"], + "secondary": ["httpx/_transports/default.py"] + }, + {"query": "DigestAuth", "relevant": ["httpx/_auth.py"], "secondary": []}, + {"query": "AsyncClient", "relevant": ["httpx/_client.py"], "secondary": []}, + {"query": "Timeout", "relevant": ["httpx/_config.py"], "secondary": []}, + {"query": "HTTPStatusError", "relevant": ["httpx/_exceptions.py"], "secondary": []}, + {"query": "URL", "relevant": ["httpx/_urls.py"], "secondary": []}, + { + "query": "how the HTTP transport backend sends requests over the wire", + "relevant": ["httpx/_transports/default.py"], + "secondary": ["httpx/_transports/base.py"], + "category": "architecture", + "seed": {"path": "httpx/_client.py", "line": 731}, + "related": ["httpx/_transports/default.py"] + } +] diff --git a/benchmarks/annotations/jackson-databind.json b/benchmarks/annotations/jackson-databind.json new file mode 100644 index 0000000..079d2b6 --- /dev/null +++ b/benchmarks/annotations/jackson-databind.json @@ -0,0 +1,49 @@ +[ + { + "query": "ObjectMapper entry point for reading and writing JSON", + "relevant": ["src/main/java/tools/jackson/databind/ObjectMapper.java"], + "secondary": [], + "category": "semantic" + }, + { + "query": "JSON-specific mapper builder", + "relevant": ["src/main/java/tools/jackson/databind/json/JsonMapper.java"], + "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"], + "category": "architecture" + }, + { + "query": "mutable JSON object node with named field operations", + "relevant": ["src/main/java/tools/jackson/databind/node/ObjectNode.java"], + "secondary": ["src/main/java/tools/jackson/databind/node/JsonNodeFactory.java"], + "category": "semantic" + }, + { + "query": "polymorphic type resolution", + "relevant": ["src/main/java/tools/jackson/databind/jsontype/impl/StdTypeResolverBuilder.java"], + "secondary": ["src/main/java/tools/jackson/databind/jsontype/impl/TypeDeserializerBase.java"], + "category": "architecture" + }, + { + "query": "ObjectMapper", + "relevant": [{"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 93, "end_line": 132}], + "secondary": [], + "category": "symbol", + "seed": {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "line": 109}, + "related": [ + {"path": "src/main/java/tools/jackson/databind/ObjectMapper.java", "start_line": 356, "end_line": 387}, + {"path": "src/main/java/tools/jackson/databind/cfg/MapperBuilder.java", "start_line": 338, "end_line": 344} + ] + }, + { + "query": "JsonMapper", + "relevant": [{"path": "src/main/java/tools/jackson/databind/json/JsonMapper.java", "start_line": 16, "end_line": 44}], + "secondary": ["src/main/java/tools/jackson/databind/ObjectMapper.java"], + "category": "symbol" + }, + { + "query": "ObjectNode", + "relevant": [{"path": "src/main/java/tools/jackson/databind/node/ObjectNode.java", "start_line": 21, "end_line": 60}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/ktor.json b/benchmarks/annotations/ktor.json new file mode 100644 index 0000000..b2aa38b --- /dev/null +++ b/benchmarks/annotations/ktor.json @@ -0,0 +1,93 @@ +[ + { + "query": "how the HttpClient is configured with plugins and an engine", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClientConfig.kt"], + "category": "architecture" + }, + { + "query": "how request and response pipelines process interceptors", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/HttpRequestPipeline.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/statement/HttpResponsePipeline.kt"], + "category": "architecture" + }, + { + "query": "how HTTP caching stores and validates cached responses", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCache.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cache/HttpCacheEntry.kt"], + "category": "architecture" + }, + { + "query": "how HTTP status codes are checked and exceptions raised on failure", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpCallValidator.kt"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how request and response timeouts are enforced", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how HTTP redirects are followed automatically", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how cookies are stored and sent with requests", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/AcceptAllCookiesStorage.kt"], + "category": "semantic" + }, + { + "query": "how multipart form data uploads are constructed", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/FormDataContent.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/request/forms/formDsl.kt"], + "category": "semantic" + }, + { + "query": "how WebSocket connections are established and messages exchanged", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/WebSockets.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/websocket/ClientSessions.kt"], + "category": "architecture" + }, + { + "query": "how server-sent events are received and parsed from a streaming response", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/SSE.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/sse/DefaultClientSSESession.kt"], + "category": "semantic" + }, + { + "query": "how the engine abstraction separates the client API from the transport", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"], + "secondary": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngineBase.kt"], + "category": "architecture" + }, + { + "query": "HttpClient", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/HttpClient.kt"], + "secondary": [] + }, + { + "query": "HttpTimeout", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpTimeout.kt"], + "secondary": [] + }, + { + "query": "HttpRedirect", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/HttpRedirect.kt"], + "secondary": [] + }, + { + "query": "HttpCookies", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/plugins/cookies/HttpCookies.kt"], + "secondary": [] + }, + { + "query": "HttpClientEngine", + "relevant": ["ktor-client/ktor-client-core/common/src/io/ktor/client/engine/HttpClientEngine.kt"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/laravel-framework.json b/benchmarks/annotations/laravel-framework.json new file mode 100644 index 0000000..376c82e --- /dev/null +++ b/benchmarks/annotations/laravel-framework.json @@ -0,0 +1,32 @@ +[ + { + "query": "queue connection resolution and connectors", + "relevant": ["src/Illuminate/Queue/QueueManager.php"], + "secondary": [], + "category": "architecture" + }, + { + "query": "database queue implementation", + "relevant": ["src/Illuminate/Queue/DatabaseQueue.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "queue worker command execution", + "relevant": ["src/Illuminate/Queue/Console/WorkCommand.php"], + "secondary": ["src/Illuminate/Queue/Worker.php"], + "category": "architecture" + }, + { + "query": "session store behavior", + "relevant": ["src/Illuminate/Session/Store.php"], + "secondary": ["src/Illuminate/Session/SessionManager.php"], + "category": "semantic" + }, + { + "query": "QueueManager", + "relevant": ["src/Illuminate/Queue/QueueManager.php"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/messagepack-csharp.json b/benchmarks/annotations/messagepack-csharp.json new file mode 100644 index 0000000..df6a60c --- /dev/null +++ b/benchmarks/annotations/messagepack-csharp.json @@ -0,0 +1,98 @@ +[ + { + "query": "how objects are serialized to MessagePack binary format", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": ["src/MessagePack/MessagePackWriter.cs"], + "category": "architecture" + }, + { + "query": "how binary data is deserialized back into typed C# objects", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": ["src/MessagePack/MessagePackReader.cs"], + "category": "architecture" + }, + { + "query": "how custom formatters are registered and resolved for types", + "relevant": ["src/MessagePack/IFormatterResolver.cs"], + "secondary": ["src/MessagePack/Resolvers/CompositeResolver.cs"], + "category": "architecture" + }, + { + "query": "how the dynamic object resolver generates serialization code at runtime", + "relevant": ["src/MessagePack/Resolvers/DynamicObjectResolver.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how collections and arrays are serialized", + "relevant": ["src/MessagePack/Formatters/CollectionFormatter.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how union types with subtypes are handled in serialization", + "relevant": ["src/MessagePack/Resolvers/DynamicUnionResolver.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how serializer options control compression and resolver configuration", + "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how enums are serialized as integers or strings", + "relevant": ["src/MessagePack/Resolvers/DynamicEnumResolver.cs"], + "secondary": ["src/MessagePack/Formatters/GenericEnumFormatter`1.cs", "src/MessagePack/Formatters/EnumAsStringFormatter`1.cs"], + "category": "semantic" + }, + { + "query": "reading MessagePack data from a stream incrementally", + "relevant": ["src/MessagePack/MessagePackStreamReader.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the sequence pool manages buffer reuse during serialization", + "relevant": ["src/MessagePack/SequencePool.cs"], + "secondary": ["src/MessagePack/BufferWriter.cs"], + "category": "architecture" + }, + { + "query": "how LZ4 compression is applied to MessagePack payloads", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "MessagePackSerializer", + "relevant": ["src/MessagePack/MessagePackSerializer.cs"], + "secondary": [] + }, + { + "query": "MessagePackReader", + "relevant": ["src/MessagePack/MessagePackReader.cs"], + "secondary": [] + }, + { + "query": "IMessagePackFormatter", + "relevant": ["src/MessagePack/Formatters/IMessagePackFormatter`1.cs"], + "secondary": [] + }, + { + "query": "CompositeResolver", + "relevant": ["src/MessagePack/Resolvers/CompositeResolver.cs"], + "secondary": [] + }, + { + "query": "MessagePackSerializerOptions", + "relevant": ["src/MessagePack/MessagePackSerializerOptions.cs"], + "secondary": [] + }, + { + "query": "StandardResolver", + "relevant": ["src/MessagePack/Resolvers/StandardResolver.cs"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/model2vec.json b/benchmarks/annotations/model2vec.json new file mode 100644 index 0000000..ad58320 --- /dev/null +++ b/benchmarks/annotations/model2vec.json @@ -0,0 +1,82 @@ +[ + { + "query": "how the StaticModel encodes text to embeddings", + "relevant": ["model2vec/model.py"], + "secondary": ["model2vec/inference/model.py"] + }, + { + "query": "how a model is distilled from a sentence transformer", + "relevant": ["model2vec/distill/distillation.py"], + "secondary": [] + }, + { + "query": "tokenizer construction and vocabulary building", + "relevant": ["model2vec/tokenizer/tokenizer.py"], + "secondary": ["model2vec/distill/utils.py"] + }, + { + "query": "saving and loading models from disk", + "relevant": ["model2vec/persistence/persistence.py"], + "secondary": ["model2vec/persistence/datamodels.py"] + }, + { + "query": "quantization of model weights", + "relevant": ["model2vec/quantization.py"], + "secondary": ["model2vec/vocabulary_quantization.py"] + }, + { + "query": "pushing and loading models from HuggingFace Hub", + "relevant": ["model2vec/persistence/hf.py"], + "secondary": [] + }, + { + "query": "distillation inference and embedding extraction", + "relevant": ["model2vec/distill/inference.py"], + "secondary": [] + }, + { + "query": "training a classifier on top of embeddings", + "relevant": ["model2vec/train/classifier.py"], + "secondary": ["model2vec/train/base.py"] + }, + { + "query": "generating model cards for publication", + "relevant": ["model2vec/modelcards/modelcards.py"], + "secondary": [] + }, + { + "query": "utility functions used across the package", + "relevant": ["model2vec/utils.py"], + "secondary": ["model2vec/distill/utils.py"] + }, + { + "query": "how mean pooling is applied over token embeddings during distillation", + "relevant": ["model2vec/distill/inference.py"], + "secondary": [] + }, + { + "query": "how PCA reduces embedding dimensionality", + "relevant": ["model2vec/distill/distillation.py"], + "secondary": ["model2vec/distill/inference.py"] + }, + { + "query": "how out-of-vocabulary tokens are handled at inference time", + "relevant": ["model2vec/model.py"], + "secondary": ["model2vec/tokenizer/tokenizer.py"] + }, + { + "query": "how vocabulary is pruned during distillation", + "relevant": ["model2vec/distill/utils.py"], + "secondary": ["model2vec/distill/distillation.py"] + }, + { + "query": "how subword token weights are aggregated for whole-word embeddings", + "relevant": ["model2vec/distill/inference.py"], + "secondary": ["model2vec/distill/distillation.py"] + }, + {"query": "StaticModel", "relevant": ["model2vec/model.py"], "secondary": []}, + {"query": "distill", "relevant": ["model2vec/distill/distillation.py"], "secondary": []}, + {"query": "PoolingMode", "relevant": ["model2vec/distill/inference.py"], "secondary": []}, + {"query": "quantize", "relevant": ["model2vec/quantization.py"], "secondary": []}, + {"query": "Tokenizer", "relevant": ["model2vec/tokenizer/tokenizer.py"], "secondary": []} +] diff --git a/benchmarks/annotations/monolog.json b/benchmarks/annotations/monolog.json new file mode 100644 index 0000000..5b53451 --- /dev/null +++ b/benchmarks/annotations/monolog.json @@ -0,0 +1,52 @@ +[ + { + "query": "logger handler stack and processors", + "relevant": ["src/Monolog/Logger.php"], + "secondary": [], + "category": "architecture" + }, + { + "query": "stream log handler writes to files and streams", + "relevant": ["src/Monolog/Handler/StreamHandler.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "line formatter output formatting", + "relevant": ["src/Monolog/Formatter/LineFormatter.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "json log formatting", + "relevant": ["src/Monolog/Formatter/JsonFormatter.php"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Logger", + "relevant": ["src/Monolog/Logger.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "StreamHandler", + "relevant": ["src/Monolog/Handler/StreamHandler.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "JsonFormatter", + "relevant": ["src/Monolog/Formatter/JsonFormatter.php"], + "secondary": [], + "category": "symbol" + }, + { + "query": "how log handlers are registered and invoked by Logger", + "relevant": ["src/Monolog/Logger.php"], + "secondary": ["src/Monolog/Handler/AbstractHandler.php"], + "category": "architecture", + "seed": {"path": "src/Monolog/Logger.php", "line": 207}, + "related": ["src/Monolog/Handler/AbstractHandler.php", "src/Monolog/Handler/HandlerInterface.php"] + } +] diff --git a/benchmarks/annotations/pydantic.json b/benchmarks/annotations/pydantic.json new file mode 100644 index 0000000..1484adc --- /dev/null +++ b/benchmarks/annotations/pydantic.json @@ -0,0 +1,82 @@ +[ + { + "query": "how is BaseModel defined and instantiated", + "relevant": ["pydantic/main.py"], + "secondary": [] + }, + { + "query": "how are model fields declared and constrained", + "relevant": ["pydantic/fields.py"], + "secondary": ["pydantic/types.py"] + }, + { + "query": "JSON schema generation from models", + "relevant": ["pydantic/json_schema.py"], + "secondary": [] + }, + { + "query": "custom field and model validators", + "relevant": ["pydantic/functional_validators.py"], + "secondary": ["pydantic/class_validators.py"] + }, + { + "query": "how to serialize models to JSON", + "relevant": ["pydantic/functional_serializers.py"], + "secondary": ["pydantic/main.py"] + }, + { + "query": "network types like URLs and email addresses", + "relevant": ["pydantic/networks.py"], + "secondary": [] + }, + { + "query": "model configuration and settings", + "relevant": ["pydantic/config.py"], + "secondary": [] + }, + { + "query": "validating data without a model using TypeAdapter", + "relevant": ["pydantic/type_adapter.py"], + "secondary": [] + }, + { + "query": "alias handling for field names", + "relevant": ["pydantic/aliases.py"], + "secondary": ["pydantic/alias_generators.py"] + }, + { + "query": "root model for wrapping a single value", + "relevant": ["pydantic/root_model.py"], + "secondary": [] + }, + { + "query": "how discriminated unions select the right model variant", + "relevant": ["pydantic/types.py"], + "secondary": ["pydantic/main.py"] + }, + { + "query": "how computed fields are defined on a model", + "relevant": ["pydantic/fields.py"], + "secondary": ["pydantic/functional_serializers.py"] + }, + { + "query": "what runs after model initialisation in model_post_init", + "relevant": ["pydantic/main.py"], + "secondary": [] + }, + { + "query": "how model inheritance and field overriding works", + "relevant": ["pydantic/main.py"], + "secondary": ["pydantic/fields.py"] + }, + { + "query": "how to validate a function's arguments with pydantic", + "relevant": ["pydantic/validate_call_decorator.py"], + "secondary": ["pydantic/decorator.py"] + }, + {"query": "BaseModel", "relevant": ["pydantic/main.py"], "secondary": []}, + {"query": "field_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []}, + {"query": "model_validator", "relevant": ["pydantic/functional_validators.py"], "secondary": []}, + {"query": "ConfigDict", "relevant": ["pydantic/config.py"], "secondary": []}, + {"query": "Field", "relevant": ["pydantic/fields.py"], "secondary": []} +] diff --git a/benchmarks/annotations/rack.json b/benchmarks/annotations/rack.json new file mode 100644 index 0000000..037b6b1 --- /dev/null +++ b/benchmarks/annotations/rack.json @@ -0,0 +1,44 @@ +[ + { + "query": "HTTP request wrapper and forwarded headers", + "relevant": ["lib/rack/request.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "HTTP response construction", + "relevant": ["lib/rack/response.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "middleware builder DSL", + "relevant": ["lib/rack/builder.rb"], + "secondary": [], + "category": "architecture" + }, + { + "query": "URL path mapping across mounted apps", + "relevant": ["lib/rack/urlmap.rb"], + "secondary": [], + "category": "architecture" + }, + { + "query": "Request", + "relevant": ["lib/rack/request.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Rack::Response", + "relevant": [{"path": "lib/rack/response.rb", "start_line": 23, "end_line": 62}], + "secondary": [], + "category": "symbol" + }, + { + "query": "Rack::Builder", + "relevant": [{"path": "lib/rack/builder.rb", "start_line": 36, "end_line": 80}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/rails.json b/benchmarks/annotations/rails.json new file mode 100644 index 0000000..9b5d376 --- /dev/null +++ b/benchmarks/annotations/rails.json @@ -0,0 +1,49 @@ +[ + { + "query": "application boot process and initialization", + "relevant": ["railties/lib/rails/application.rb"], + "secondary": ["railties/lib/rails/configuration.rb"], + "category": "architecture" + }, + { + "query": "engine configuration and load paths", + "relevant": ["railties/lib/rails/engine/configuration.rb"], + "secondary": ["railties/lib/rails/application.rb"], + "category": "architecture" + }, + { + "query": "rack integration for rails applications", + "relevant": ["railties/lib/rails/rack.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "path management for rails apps", + "relevant": ["railties/lib/rails/paths.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Application", + "relevant": ["railties/lib/rails/application.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Rails::Engine", + "relevant": [{"path": "railties/lib/rails/engine.rb", "start_line": 348, "end_line": 387}], + "secondary": ["railties/lib/rails/engine/configuration.rb"], + "category": "symbol", + "seed": {"path": "railties/lib/rails/engine/configuration.rb", "line": 73}, + "related": [ + {"path": "railties/lib/rails/engine/updater.rb", "start_line": 1, "end_line": 21}, + {"path": "railties/lib/rails/railtie/configuration.rb", "start_line": 1, "end_line": 54} + ] + }, + { + "query": "Rails::Paths", + "relevant": ["railties/lib/rails/paths.rb"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/redux.json b/benchmarks/annotations/redux.json new file mode 100644 index 0000000..b551227 --- /dev/null +++ b/benchmarks/annotations/redux.json @@ -0,0 +1,32 @@ +[ + { + "query": "store creation and dispatch lifecycle", + "relevant": ["src/createStore.ts"], + "secondary": [], + "category": "architecture" + }, + { + "query": "middleware pipeline composition", + "relevant": ["src/applyMiddleware.ts"], + "secondary": ["src/compose.ts"], + "category": "architecture" + }, + { + "query": "combining reducers and validating reducer shape", + "relevant": ["src/combineReducers.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "function composition utility", + "relevant": ["src/compose.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "createStore", + "relevant": ["src/createStore.ts"], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/requests.json b/benchmarks/annotations/requests.json new file mode 100644 index 0000000..067c6c2 --- /dev/null +++ b/benchmarks/annotations/requests.json @@ -0,0 +1,109 @@ +[ + { + "query": "how HTTP sessions manage state and connections", + "relevant": ["src/requests/sessions.py"], + "secondary": [] + }, + { + "query": "authentication mechanisms and credential handling", + "relevant": ["src/requests/auth.py"], + "secondary": [] + }, + { + "query": "transport adapters and connection pooling", + "relevant": ["src/requests/adapters.py"], + "secondary": [] + }, + { + "query": "cookie storage and persistence", + "relevant": ["src/requests/cookies.py"], + "secondary": [] + }, + { + "query": "how redirects are followed", + "relevant": ["src/requests/sessions.py"], + "secondary": ["src/requests/models.py"] + }, + { + "query": "error and exception types", + "relevant": ["src/requests/exceptions.py"], + "secondary": [] + }, + { + "query": "request and response model internals", + "relevant": ["src/requests/models.py"], + "secondary": [] + }, + { + "query": "SSL certificate verification", + "relevant": ["src/requests/adapters.py"], + "secondary": ["src/requests/certs.py"] + }, + { + "query": "event hooks system", + "relevant": ["src/requests/hooks.py"], + "secondary": [] + }, + { + "query": "utility functions for encoding and headers", + "relevant": ["src/requests/utils.py"], + "secondary": [] + }, + { + "query": "how a PreparedRequest is built from user-supplied arguments", + "relevant": ["src/requests/models.py"], + "secondary": ["src/requests/sessions.py"] + }, + { + "query": "how digest authentication implements the challenge-response handshake", + "relevant": ["src/requests/auth.py"], + "secondary": [] + }, + { + "query": "how response encoding is detected from headers and content", + "relevant": ["src/requests/utils.py"], + "secondary": ["src/requests/models.py"] + }, + { + "query": "how proxy settings are read from environment variables", + "relevant": ["src/requests/utils.py"], + "secondary": ["src/requests/sessions.py"] + }, + { + "query": "how connection keep-alive and pooling limits are configured", + "relevant": ["src/requests/adapters.py"], + "secondary": [] + }, + { + "query": "Session", + "relevant": [{"path": "src/requests/sessions.py", "start_line": 356, "end_line": 394}], + "secondary": [], + "category": "symbol" + }, + { + "query": "HTTPDigestAuth", + "relevant": [{"path": "src/requests/auth.py", "start_line": 107, "end_line": 136}], + "secondary": [], + "category": "symbol" + }, + { + "query": "PreparedRequest", + "relevant": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 364}], + "secondary": [], + "category": "symbol", + "seed": {"path": "src/requests/sessions.py", "line": 485}, + "related": [{"path": "src/requests/models.py", "start_line": 315, "end_line": 353}] + }, + { + "query": "HTTPAdapter", + "relevant": [{"path": "src/requests/adapters.py", "start_line": 144, "end_line": 192}], + "secondary": [], + "category": "symbol" + }, + { + "query": "Response", + "relevant": [{"path": "src/requests/models.py", "start_line": 642, "end_line": 691}], + "secondary": [], + "category": "symbol" + } +] diff --git a/benchmarks/annotations/sinatra.json b/benchmarks/annotations/sinatra.json new file mode 100644 index 0000000..ddec420 --- /dev/null +++ b/benchmarks/annotations/sinatra.json @@ -0,0 +1,100 @@ +[ + { + "query": "core Sinatra DSL and routing behavior", + "relevant": ["lib/sinatra/base.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "application startup and command line entrypoint", + "relevant": ["lib/sinatra/main.rb"], + "secondary": [], + "category": "architecture" + }, + { + "query": "exception rendering and stack traces", + "relevant": ["lib/sinatra/show_exceptions.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "request logging middleware", + "relevant": ["lib/sinatra/middleware/logger.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how route handlers are compiled and URL patterns matched", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1795, "end_line": 1817}], + "secondary": [], + "category": "semantic" + }, + { + "query": "before and after filter hooks for the request lifecycle", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1486, "end_line": 1505}], + "secondary": [], + "category": "semantic" + }, + { + "query": "response helper methods: halt, pass and redirect", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}], + "secondary": [], + "category": "semantic" + }, + { + "query": "how templates are rendered with erb haml and other engines", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 742, "end_line": 948}], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the Rack middleware stack is assembled before requests are handled", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 1819, "end_line": 1888}], + "secondary": [], + "category": "architecture" + }, + { + "query": "case-insensitive hash for request params", + "relevant": ["lib/sinatra/indifferent_hash.rb"], + "secondary": [], + "category": "semantic" + }, + { + "query": "Sinatra::Base", + "relevant": ["lib/sinatra/base.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Sinatra::Application", + "relevant": ["lib/sinatra/main.rb"], + "secondary": ["lib/sinatra/base.rb"], + "category": "symbol" + }, + { + "query": "Sinatra::ShowExceptions", + "relevant": ["lib/sinatra/show_exceptions.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "Sinatra::Helpers", + "relevant": [{"path": "lib/sinatra/base.rb", "start_line": 286, "end_line": 741}], + "secondary": [], + "category": "symbol" + }, + { + "query": "IndifferentHash", + "relevant": ["lib/sinatra/indifferent_hash.rb"], + "secondary": [], + "category": "symbol" + }, + { + "query": "how Sinatra::Application inherits and configures Base for standalone use", + "relevant": ["lib/sinatra/main.rb"], + "secondary": ["lib/sinatra/base.rb"], + "category": "architecture", + "seed": {"path": "lib/sinatra/main.rb", "line": 30}, + "related": ["lib/sinatra/base.rb"] + } +] diff --git a/benchmarks/annotations/starlette.json b/benchmarks/annotations/starlette.json new file mode 100644 index 0000000..f588020 --- /dev/null +++ b/benchmarks/annotations/starlette.json @@ -0,0 +1,90 @@ +[ + { + "query": "how are URL routes registered and matched", + "relevant": ["starlette/routing.py"], + "secondary": ["starlette/convertors.py"] + }, + { + "query": "how does authentication middleware work", + "relevant": ["starlette/middleware/authentication.py"], + "secondary": ["starlette/authentication.py"] + }, + { + "query": "websocket connection and message handling", + "relevant": ["starlette/websockets.py"], + "secondary": [] + }, + { + "query": "how are static files served", + "relevant": ["starlette/staticfiles.py"], + "secondary": [] + }, + { + "query": "parsing form data and file uploads", + "relevant": ["starlette/formparsers.py"], + "secondary": ["starlette/datastructures.py"] + }, + { + "query": "how does the test client simulate requests", + "relevant": ["starlette/testclient.py"], + "secondary": [] + }, + { + "query": "background task scheduling and execution", + "relevant": ["starlette/background.py"], + "secondary": [] + }, + { + "query": "application lifespan startup and shutdown events", + "relevant": ["starlette/routing.py"], + "secondary": ["starlette/applications.py"] + }, + { + "query": "streaming response implementation", + "relevant": ["starlette/responses.py"], + "secondary": [] + }, + { + "query": "how base middleware wraps request handling", + "relevant": ["starlette/middleware/base.py"], + "secondary": [] + }, + { + "query": "how request state persists arbitrary data across middleware", + "relevant": ["starlette/requests.py"], + "secondary": [] + }, + { + "query": "how path convertor types work for route parameters", + "relevant": ["starlette/convertors.py"], + "secondary": ["starlette/routing.py"] + }, + { + "query": "how session data is signed and stored in cookies", + "relevant": ["starlette/middleware/sessions.py"], + "secondary": [] + }, + { + "query": "how CORS preflight requests are handled", + "relevant": ["starlette/middleware/cors.py"], + "secondary": [] + }, + { + "query": "how errors in ASGI apps are caught and turned into responses", + "relevant": ["starlette/middleware/errors.py"], + "secondary": ["starlette/middleware/exceptions.py"] + }, + {"query": "BaseHTTPMiddleware", "relevant": ["starlette/middleware/base.py"], "secondary": []}, + {"query": "Request", "relevant": ["starlette/requests.py"], "secondary": []}, + {"query": "WebSocket", "relevant": ["starlette/websockets.py"], "secondary": []}, + {"query": "BackgroundTask", "relevant": ["starlette/background.py"], "secondary": []}, + {"query": "Router", "relevant": ["starlette/routing.py"], "secondary": []}, + { + "query": "how the Starlette application delegates routing and lifespan to Router", + "relevant": ["starlette/applications.py"], + "secondary": ["starlette/routing.py"], + "category": "architecture", + "seed": {"path": "starlette/applications.py", "line": 19}, + "related": ["starlette/routing.py"] + } +] diff --git a/benchmarks/annotations/tokio.json b/benchmarks/annotations/tokio.json new file mode 100644 index 0000000..a902e16 --- /dev/null +++ b/benchmarks/annotations/tokio.json @@ -0,0 +1,99 @@ +[ + { + "query": "how spawned tasks are scheduled onto threads", + "relevant": ["tokio/src/runtime/scheduler/multi_thread/worker.rs"], + "secondary": ["tokio/src/task/spawn.rs"], + "category": "architecture" + }, + { + "query": "how the async mutex prevents concurrent access", + "relevant": ["tokio/src/sync/mutex.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how a broadcast channel delivers messages to multiple receivers", + "relevant": ["tokio/src/sync/broadcast.rs"], + "secondary": [], + "category": "architecture" + }, + { + "query": "how the timer wheel tracks sleep deadlines", + "relevant": ["tokio/src/runtime/time/wheel/level.rs"], + "secondary": ["tokio/src/time/sleep.rs"], + "category": "architecture" + }, + { + "query": "running non-async blocking code inside the async runtime", + "relevant": ["tokio/src/task/blocking.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how tasks that are not Send can run on a single thread", + "relevant": ["tokio/src/task/local.rs"], + "secondary": [], + "category": "architecture" + }, + { + "query": "waiting for any of several futures to complete", + "relevant": ["tokio/src/macros/select.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how the runtime builder configures thread pool size and flavour", + "relevant": ["tokio/src/runtime/builder.rs"], + "secondary": [], + "category": "architecture" + }, + { + "query": "rate-limiting concurrent operations with a semaphore", + "relevant": ["tokio/src/sync/semaphore.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "watching a value and being notified when it changes", + "relevant": ["tokio/src/sync/watch.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "collecting results from a dynamic set of spawned tasks", + "relevant": ["tokio/src/task/join_set.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "integrating a non-async file descriptor with the tokio reactor", + "relevant": ["tokio/src/io/async_fd.rs"], + "secondary": [], + "category": "semantic" + }, + { + "query": "JoinSet", + "relevant": ["tokio/src/task/join_set.rs"], + "secondary": [] + }, + { + "query": "Semaphore", + "relevant": ["tokio/src/sync/semaphore.rs"], + "secondary": [] + }, + { + "query": "MissedTickBehavior", + "relevant": ["tokio/src/time/interval.rs"], + "secondary": [] + }, + { + "query": "LocalSet", + "relevant": ["tokio/src/task/local.rs"], + "secondary": [] + }, + { + "query": "Notify", + "relevant": ["tokio/src/sync/notify.rs"], + "secondary": [] + } +] diff --git a/benchmarks/annotations/trpc.json b/benchmarks/annotations/trpc.json new file mode 100644 index 0000000..f3f8d25 --- /dev/null +++ b/benchmarks/annotations/trpc.json @@ -0,0 +1,99 @@ +[ + { + "query": "how a tRPC router is created and procedures are registered", + "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"], + "category": "architecture" + }, + { + "query": "how middleware chains context transformations between procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"], + "secondary": [], + "category": "architecture" + }, + { + "query": "how input validation and parsing works for procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/parser.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/procedureBuilder.ts"], + "category": "semantic" + }, + { + "query": "how HTTP requests are resolved to tRPC procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/http/resolveResponse.ts"], + "secondary": ["packages/server/src/http.ts"], + "category": "architecture" + }, + { + "query": "how error formatting and serialization works", + "relevant": ["packages/server/src/unstable-core-do-not-import/error/formatter.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"], + "category": "semantic" + }, + { + "query": "how server-sent events and subscriptions are streamed to the client", + "relevant": ["packages/server/src/unstable-core-do-not-import/stream/sse.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/stream/jsonl.ts"], + "category": "architecture" + }, + { + "query": "how the observable pattern is used for subscriptions", + "relevant": ["packages/server/src/observable/observable.ts"], + "secondary": ["packages/server/src/observable/operators.ts"], + "category": "semantic" + }, + { + "query": "how type inference extracts input and output types from procedures", + "relevant": ["packages/server/src/unstable-core-do-not-import/clientish/inference.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "adapting tRPC to run as a Node.js HTTP server handler", + "relevant": ["packages/server/src/adapters/node-http/nodeHTTPRequestHandler.ts"], + "secondary": ["packages/server/src/adapters/node-http/incomingMessageToRequest.ts"], + "category": "architecture" + }, + { + "query": "WebSocket adapter for real-time subscriptions", + "relevant": ["packages/server/src/adapters/ws.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "how tRPC is initialized with root config and context factory", + "relevant": ["packages/server/src/unstable-core-do-not-import/initTRPC.ts"], + "secondary": ["packages/server/src/unstable-core-do-not-import/rootConfig.ts"], + "category": "architecture" + }, + { + "query": "how data transformer is applied to serialize and deserialize procedure payloads", + "relevant": ["packages/server/src/unstable-core-do-not-import/transformer.ts"], + "secondary": [], + "category": "semantic" + }, + { + "query": "TRPCError", + "relevant": ["packages/server/src/unstable-core-do-not-import/error/TRPCError.ts"], + "secondary": [] + }, + { + "query": "AnyRouter", + "relevant": ["packages/server/src/unstable-core-do-not-import/router.ts"], + "secondary": [] + }, + { + "query": "MiddlewareBuilder", + "relevant": ["packages/server/src/unstable-core-do-not-import/middleware.ts"], + "secondary": [] + }, + { + "query": "inferProcedureInput", + "relevant": ["packages/server/src/unstable-core-do-not-import/procedure.ts"], + "secondary": [] + }, + { + "query": "fetchRequestHandler", + "relevant": ["packages/server/src/adapters/fetch/fetchRequestHandler.ts"], + "secondary": [] + } +] diff --git a/benchmarks/data.py b/benchmarks/data.py new file mode 100644 index 0000000..dac954a --- /dev/null +++ b/benchmarks/data.py @@ -0,0 +1,150 @@ +import json +from dataclasses import dataclass +from pathlib import Path + +BENCH_ROOT = Path.home() / ".cache" / "semble-bench" +BENCHMARKS_DIR = Path(__file__).parent +ANNOTATIONS_DIR = BENCHMARKS_DIR / "annotations" +REPOS_PATH = BENCHMARKS_DIR / "repos.json" + + +@dataclass(frozen=True) +class Target: + path: str + start_line: int | None = None + end_line: int | None = None + + @property + def has_span(self) -> bool: + """Return True if both start_line and end_line are set.""" + return self.start_line is not None and self.end_line is not None + + +@dataclass(frozen=True) +class RepoSpec: + name: str + language: str + url: str + revision: str + benchmark_root: str | None = None + + @property + def checkout_dir(self) -> Path: + """Return the local checkout directory for this repo.""" + return BENCH_ROOT / self.name + + @property + def benchmark_dir(self) -> Path: + """Return the root directory to index for benchmarking.""" + return self.checkout_dir if self.benchmark_root is None else self.checkout_dir / self.benchmark_root + + +@dataclass(frozen=True) +class Task: + repo: str + language: str + query: str + relevant: tuple[Target, ...] + secondary: tuple[Target, ...] + category: str + + @property + def all_relevant(self) -> tuple[Target, ...]: + """Return primary and secondary relevant targets combined.""" + return self.relevant + self.secondary + + +def infer_category(query: str) -> str: + """Infer a task category from the query text.""" + if " " not in query.strip(): + return "symbol" + lowered = query.lower() + if lowered.startswith("how ") or lowered.startswith("how does") or lowered.startswith("how are"): + return "architecture" + return "semantic" + + +def _coerce_int(value: object) -> int: + """Coerce a string or int value to int, raising TypeError otherwise.""" + if not isinstance(value, int | str): + raise TypeError(f"expected int-compatible value, got {type(value).__name__}") + return int(value) + + +def _parse_target(raw: str | dict[str, object]) -> Target: + """Parse a target from a string path or a mapping with optional line span.""" + if isinstance(raw, str): + return Target(path=raw) + if not isinstance(raw, dict): + raise TypeError(f"expected mapping, got {type(raw).__name__}") + start_line = raw.get("start_line") + end_line = raw.get("end_line") + return Target( + path=str(raw["path"]), + start_line=_coerce_int(start_line) if start_line is not None else None, + end_line=_coerce_int(end_line) if end_line is not None else None, + ) + + +def load_repo_specs(path: Path = REPOS_PATH) -> dict[str, RepoSpec]: + """Load all repo specs from the JSON file at the given path.""" + raw = json.loads(path.read_text(encoding="utf-8")) + return {item["name"]: RepoSpec(**item) for item in raw} + + +def available_repo_specs() -> dict[str, RepoSpec]: + """Return only the repo specs that have a local checkout and annotation file.""" + return { + name: spec + for name, spec in load_repo_specs().items() + if spec.checkout_dir.exists() and (ANNOTATIONS_DIR / f"{name}.json").exists() + } + + +def load_tasks(repo_specs: dict[str, RepoSpec] | None = None) -> list[Task]: + """Load all benchmark tasks from annotation files, filtered to available repo specs.""" + specs = load_repo_specs() if repo_specs is None else repo_specs + tasks: list[Task] = [] + for annotation_file in sorted(ANNOTATIONS_DIR.glob("*.json")): + if annotation_file.stem not in specs: + continue + raw = json.loads(annotation_file.read_text(encoding="utf-8")) + default_repo = annotation_file.stem + for item in raw: + repo = item.get("repo", default_repo) + if repo not in specs: + continue + spec = specs[repo] + category = item.get("category") + tasks.append( + Task( + repo=repo, + language=spec.language, + query=item["query"], + relevant=tuple(_parse_target(t) for t in item.get("relevant", [])), + secondary=tuple(_parse_target(t) for t in item.get("secondary", [])), + category=category if isinstance(category, str) else infer_category(item["query"]), + ) + ) + return tasks + + +def apply_task_filters( + tasks: list[Task], + repos: list[str] | None = None, + languages: list[str] | None = None, +) -> list[Task]: + """Filter tasks to the given repos and/or languages; None means no filter.""" + filtered = [task for task in tasks if not repos or task.repo in repos] + return [task for task in filtered if not languages or task.language in languages] + + +def target_matches_location(file_path: str, start_line: int, end_line: int, target: Target) -> bool: + """Return True if the chunk at file_path:start_line-end_line covers the target.""" + norm_file = file_path.replace("\\", "/") + norm_target = target.path.replace("\\", "/") + if not (norm_file == norm_target or norm_file.endswith(f"/{norm_target}")): + return False + if not target.has_span: + return True + return not (end_line < target.start_line or start_line > target.end_line) # type: ignore[operator] diff --git a/benchmarks/repos.json b/benchmarks/repos.json new file mode 100644 index 0000000..56e36e2 --- /dev/null +++ b/benchmarks/repos.json @@ -0,0 +1,202 @@ +[ + { + "name": "aiohttp", + "language": "python", + "url": "https://github.com/aio-libs/aiohttp.git", + "revision": "fc67cfdfd7d4bbf53ef76515fae69726626fe256", + "benchmark_root": "aiohttp" + }, + { + "name": "fastapi", + "language": "python", + "url": "https://github.com/fastapi/fastapi.git", + "revision": "c3c9dd6b1a08bcda766e7b43eafe72c4c5e9e193", + "benchmark_root": "fastapi" + }, + { + "name": "flask", + "language": "python", + "url": "https://github.com/pallets/flask.git", + "revision": "258d68b6ff5e2244386540f48b48bab90d6ab827", + "benchmark_root": "src/flask" + }, + { + "name": "httpx", + "language": "python", + "url": "https://github.com/encode/httpx.git", + "revision": "b5addb64f0161ff6bfe94c124ef76f6a1fba5254", + "benchmark_root": "httpx" + }, + { + "name": "model2vec", + "language": "python", + "url": "https://github.com/MinishLab/model2vec.git", + "revision": "b3012ee04e41c634383a5d735cb3c7c51e806a18", + "benchmark_root": "model2vec" + }, + { + "name": "pydantic", + "language": "python", + "url": "https://github.com/pydantic/pydantic.git", + "revision": "82c15f0ba8a9f8d8d6ba595df73ad20e2ee2eccf", + "benchmark_root": "pydantic" + }, + { + "name": "requests", + "language": "python", + "url": "https://github.com/psf/requests.git", + "revision": "ef439eb779c1eba7cbdeeeb302b11e1e061b4b7d", + "benchmark_root": "src/requests" + }, + { + "name": "starlette", + "language": "python", + "url": "https://github.com/encode/starlette.git", + "revision": "1894d0d89badf43bc8bfe03ed221a8b2e100b2ab", + "benchmark_root": "starlette" + }, + { + "name": "axios", + "language": "javascript", + "url": "https://github.com/axios/axios.git", + "revision": "c7a76ddbf277db864ee6cfb4ef17b8a08ffbe3f5", + "benchmark_root": "lib" + }, + { + "name": "express", + "language": "javascript", + "url": "https://github.com/expressjs/express.git", + "revision": "8e022edc9185f540a3fcecaf5e56b850d919cdac", + "benchmark_root": "lib" + }, + { + "name": "redux", + "language": "javascript", + "url": "https://github.com/reduxjs/redux.git", + "revision": "aaa04ae8402ba2caba55a9c75bfa8d3df6c78f8c", + "benchmark_root": "src" + }, + { + "name": "gin", + "language": "go", + "url": "https://github.com/gin-gonic/gin.git", + "revision": "d3ffc9985281dcf4d3bef604cce4e662b1a327a6" + }, + { + "name": "cobra", + "language": "go", + "url": "https://github.com/spf13/cobra.git", + "revision": "61968e893eee2f27696c2fbc8e34fa5c4afaf7c4" + }, + { + "name": "chi", + "language": "go", + "url": "https://github.com/go-chi/chi.git", + "revision": "a54874f0e2f12647a19e82ee70dfa8185014100c" + }, + { + "name": "gson", + "language": "java", + "url": "https://github.com/google/gson.git", + "revision": "f4d371d29c04066dbe7fdb31f642831f9c7f40cd", + "benchmark_root": "gson" + }, + { + "name": "commons-lang", + "language": "java", + "url": "https://github.com/apache/commons-lang.git", + "revision": "0ba92dc402312a38252a3398931ffbfbb4a88f7d", + "benchmark_root": "src/main/java/org/apache/commons/lang3" + }, + { + "name": "jackson-databind", + "language": "java", + "url": "https://github.com/FasterXML/jackson-databind.git", + "revision": "e30139539416f69f1d7ae31c7e1d6da5b25bf362", + "benchmark_root": "src/main/java/tools/jackson/databind" + }, + { + "name": "guzzle", + "language": "php", + "url": "https://github.com/guzzle/guzzle.git", + "revision": "fb92d95f80a9da51bf8f2a5b26d8e8ea3b6d99ed", + "benchmark_root": "src" + }, + { + "name": "monolog", + "language": "php", + "url": "https://github.com/Seldaek/monolog.git", + "revision": "68b974809baff3f071893de61447212e9e688ee7", + "benchmark_root": "src/Monolog" + }, + { + "name": "laravel-framework", + "language": "php", + "url": "https://github.com/laravel/framework.git", + "revision": "0dcc8d2ba7f41bc8376a08e9ccd5d7b83e6a6d90", + "benchmark_root": "src/Illuminate" + }, + { + "name": "sinatra", + "language": "ruby", + "url": "https://github.com/sinatra/sinatra.git", + "revision": "f891dd2b6f4911e356600efe6c3b82af97d262c6", + "benchmark_root": "lib" + }, + { + "name": "rack", + "language": "ruby", + "url": "https://github.com/rack/rack.git", + "revision": "ca8a404704ed043797c4f9d482c97d722c0dc719", + "benchmark_root": "lib/rack" + }, + { + "name": "rails", + "language": "ruby", + "url": "https://github.com/rails/rails.git", + "revision": "75f9e28379ac7418b82fa950cfa81f6147275308", + "benchmark_root": "railties/lib/rails" + }, + { + "name": "tokio", + "language": "rust", + "url": "https://github.com/tokio-rs/tokio.git", + "revision": "5db10f538b683fe88d699dfd11be31d193db011c", + "benchmark_root": "tokio/src" + }, + { + "name": "trpc", + "language": "typescript", + "url": "https://github.com/trpc/trpc.git", + "revision": "c188dab0822caf3615199e4ac95147bc7560d26f", + "benchmark_root": "packages/server/src" + }, + { + "name": "messagepack-csharp", + "language": "csharp", + "url": "https://github.com/neuecc/MessagePack-CSharp.git", + "revision": "84db9f79e3ecc5f4e8b7c7f77cd15d7745f5f2a7", + "benchmark_root": "src/MessagePack" + }, + { + "name": "ktor", + "language": "kotlin", + "url": "https://github.com/ktorio/ktor.git", + "revision": "5913745a96101e8c78e47565e52d2baa8414441f", + "benchmark_root": "ktor-client/ktor-client-core/common/src" + }, + { + "name": "cats", + "language": "scala", + "url": "https://github.com/typelevel/cats.git", + "revision": "2102251a2f24a6ee14e087fc5da7768d267f2d6e", + "benchmark_root": "core/src/main/scala" + }, + { + "name": "alamofire", + "language": "swift", + "url": "https://github.com/Alamofire/Alamofire.git", + "revision": "e938f8c66708e7352fc7e3512647fa54255b267a", + "benchmark_root": "Source" + } +] diff --git a/benchmarks/results/.gitkeep b/benchmarks/results/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/results/40a76927ded9.json b/benchmarks/results/40a76927ded9.json new file mode 100644 index 0000000..d20be46 --- /dev/null +++ b/benchmarks/results/40a76927ded9.json @@ -0,0 +1,346 @@ +{ + "sha": "40a76927ded9482f62ebd5f63930ff59605fe9f8", + "model": "Pringled/potion-code-16M", + "summary": { + "ndcg10": 0.8668, + "p50_ms": 0.698, + "index_ms": 340.0 + }, + "by_language": { + "csharp": { + "repos": 1, + "ndcg10": 0.8263, + "p50_ms": 0.966, + "index_ms": 459.2 + }, + "go": { + "repos": 3, + "ndcg10": 0.9515, + "p50_ms": 0.537, + "index_ms": 164.0 + }, + "java": { + "repos": 3, + "ndcg10": 0.842, + "p50_ms": 1.149, + "index_ms": 965.2 + }, + "javascript": { + "repos": 3, + "ndcg10": 0.9282, + "p50_ms": 0.432, + "index_ms": 36.0 + }, + "kotlin": { + "repos": 1, + "ndcg10": 0.7631, + "p50_ms": 0.856, + "index_ms": 160.8 + }, + "php": { + "repos": 3, + "ndcg10": 0.906, + "p50_ms": 0.949, + "index_ms": 738.0 + }, + "python": { + "repos": 8, + "ndcg10": 0.8233, + "p50_ms": 0.498, + "index_ms": 163.3 + }, + "ruby": { + "repos": 3, + "ndcg10": 0.8911, + "p50_ms": 0.623, + "index_ms": 97.4 + }, + "rust": { + "repos": 1, + "ndcg10": 0.8878, + "p50_ms": 0.996, + "index_ms": 930.6 + }, + "scala": { + "repos": 1, + "ndcg10": 0.8415, + "p50_ms": 0.942, + "index_ms": 648.2 + }, + "swift": { + "repos": 1, + "ndcg10": 0.9316, + "p50_ms": 0.543, + "index_ms": 229.5 + }, + "typescript": { + "repos": 1, + "ndcg10": 0.7431, + "p50_ms": 0.882, + "index_ms": 121.8 + } + }, + "repos": [ + { + "repo": "aiohttp", + "language": "python", + "chunks": 756, + "ndcg5": 0.7132626857513019, + "ndcg10": 0.7821229638714016, + "p50_ms": 0.5919579998590052, + "index_ms": 267.3160420017666 + }, + { + "repo": "alamofire", + "language": "swift", + "chunks": 649, + "ndcg5": 0.900172569211564, + "ndcg10": 0.9315768229529695, + "p50_ms": 0.5432909965747967, + "index_ms": 229.49254100240069 + }, + { + "repo": "axios", + "language": "javascript", + "chunks": 166, + "ndcg5": 0.9671522420975631, + "ndcg10": 0.9671522420975631, + "p50_ms": 0.5915000001550652, + "index_ms": 58.03862500033574 + }, + { + "repo": "cats", + "language": "scala", + "chunks": 1254, + "ndcg5": 0.8157722039023972, + "ndcg10": 0.8414671964692401, + "p50_ms": 0.9416660032002255, + "index_ms": 648.2289169944124 + }, + { + "repo": "chi", + "language": "go", + "chunks": 262, + "ndcg5": 0.9455120441745608, + "ndcg10": 0.9455120441745608, + "p50_ms": 0.6279579974943772, + "index_ms": 103.15404200082412 + }, + { + "repo": "cobra", + "language": "go", + "chunks": 394, + "ndcg5": 0.970068981106951, + "ndcg10": 0.970068981106951, + "p50_ms": 0.41579100070521235, + "index_ms": 149.65433299948927 + }, + { + "repo": "commons-lang", + "language": "java", + "chunks": 3152, + "ndcg5": 0.7688578654609097, + "ndcg10": 0.8052591049306037, + "p50_ms": 0.9200830027111806, + "index_ms": 1038.753667002311 + }, + { + "repo": "express", + "language": "javascript", + "chunks": 52, + "ndcg5": 0.9593872208972474, + "ndcg10": 0.9593872208972474, + "p50_ms": 0.25366600311826915, + "index_ms": 22.826792002888396 + }, + { + "repo": "fastapi", + "language": "python", + "chunks": 597, + "ndcg5": 0.7314364449312006, + "ndcg10": 0.7693095302894921, + "p50_ms": 0.47783299669390544, + "index_ms": 189.871916998527 + }, + { + "repo": "flask", + "language": "python", + "chunks": 291, + "ndcg5": 0.8570900833760776, + "ndcg10": 0.8767012186349079, + "p50_ms": 0.4285830000299029, + "index_ms": 92.66295799898217 + }, + { + "repo": "gin", + "language": "go", + "chunks": 576, + "ndcg5": 0.8807555442147937, + "ndcg10": 0.939064318485603, + "p50_ms": 0.5680000031134114, + "index_ms": 239.0974170048139 + }, + { + "repo": "gson", + "language": "java", + "chunks": 1460, + "ndcg5": 0.9261859507142916, + "ndcg10": 0.9261859507142916, + "p50_ms": 1.0848340025404468, + "index_ms": 482.5546249994659 + }, + { + "repo": "guzzle", + "language": "php", + "chunks": 206, + "ndcg5": 0.8326908338735671, + "ndcg10": 0.844506786325837, + "p50_ms": 0.5839169971295632, + "index_ms": 72.53133400081424 + }, + { + "repo": "httpx", + "language": "python", + "chunks": 248, + "ndcg5": 0.8519694264932337, + "ndcg10": 0.871159099521697, + "p50_ms": 0.4406670050229877, + "index_ms": 84.04612500453368 + }, + { + "repo": "jackson-databind", + "language": "java", + "chunks": 4570, + "ndcg5": 0.7667968319202225, + "ndcg10": 0.7944291752941182, + "p50_ms": 1.442957996914629, + "index_ms": 1374.4051670000772 + }, + { + "repo": "ktor", + "language": "kotlin", + "chunks": 425, + "ndcg5": 0.726275662513606, + "ndcg10": 0.7630927329648237, + "p50_ms": 0.8556669999961741, + "index_ms": 160.8068749992526 + }, + { + "repo": "laravel-framework", + "language": "php", + "chunks": 6197, + "ndcg5": 0.967888315659275, + "ndcg10": 0.967888315659275, + "p50_ms": 1.3275840028654784, + "index_ms": 1987.1202089998405 + }, + { + "repo": "messagepack-csharp", + "language": "csharp", + "chunks": 1125, + "ndcg5": 0.8164536328001585, + "ndcg10": 0.8262866007393468, + "p50_ms": 0.9664999961387366, + "index_ms": 459.23387500079116 + }, + { + "repo": "model2vec", + "language": "python", + "chunks": 107, + "ndcg5": 0.6593701861221591, + "ndcg10": 0.695271294655741, + "p50_ms": 0.46462499449262396, + "index_ms": 44.985666005231906 + }, + { + "repo": "monolog", + "language": "php", + "chunks": 417, + "ndcg5": 0.9055096182921145, + "ndcg10": 0.9055096182921145, + "p50_ms": 0.9362909986521117, + "index_ms": 154.34570900106337 + }, + { + "repo": "pydantic", + "language": "python", + "chunks": 1518, + "ndcg5": 0.6795591269045096, + "ndcg10": 0.7070408064407742, + "p50_ms": 0.6636250036535785, + "index_ms": 490.62920799769927 + }, + { + "repo": "rack", + "language": "ruby", + "chunks": 249, + "ndcg5": 1.0, + "ndcg10": 1.0, + "p50_ms": 0.5600000004051253, + "index_ms": 96.56141699815635 + }, + { + "repo": "rails", + "language": "ruby", + "chunks": 465, + "ndcg5": 0.7466134836472739, + "ndcg10": 0.8346443747935481, + "p50_ms": 0.9815000012167729, + "index_ms": 168.55954200582346 + }, + { + "repo": "redux", + "language": "javascript", + "chunks": 53, + "ndcg5": 0.8226294385530917, + "ndcg10": 0.8580772959099011, + "p50_ms": 0.450166997325141, + "index_ms": 27.171499998075888 + }, + { + "repo": "requests", + "language": "python", + "chunks": 169, + "ndcg5": 0.9550842629661954, + "ndcg10": 0.9550842629661954, + "p50_ms": 0.40475000423612073, + "index_ms": 56.275709001056384 + }, + { + "repo": "sinatra", + "language": "ruby", + "chunks": 68, + "ndcg5": 0.8387325493217617, + "ndcg10": 0.8387325493217617, + "p50_ms": 0.3260829980717972, + "index_ms": 27.20166600192897 + }, + { + "repo": "starlette", + "language": "python", + "chunks": 213, + "ndcg5": 0.9058681185722455, + "ndcg10": 0.9294136613951622, + "p50_ms": 0.5122919974382967, + "index_ms": 80.70125000085682 + }, + { + "repo": "tokio", + "language": "rust", + "chunks": 2730, + "ndcg5": 0.8750003941122573, + "ndcg10": 0.8878478903956787, + "p50_ms": 0.9958329974324442, + "index_ms": 930.5787499979488 + }, + { + "repo": "trpc", + "language": "typescript", + "chunks": 362, + "ndcg5": 0.6949834508995433, + "ndcg10": 0.7431267778412411, + "p50_ms": 0.8817499983706512, + "index_ms": 121.80820800131187 + } + ] +} diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py new file mode 100644 index 0000000..c5beaa9 --- /dev/null +++ b/benchmarks/run_benchmark.py @@ -0,0 +1,249 @@ +import argparse +import json +import math +import subprocess +import sys +import time +from dataclasses import asdict, dataclass +from pathlib import Path + +from model2vec import StaticModel + +from benchmarks.data import ( + RepoSpec, + Target, + Task, + apply_task_filters, + available_repo_specs, + load_tasks, + target_matches_location, +) +from semble import SembleIndex +from semble.types import SearchResult + +_MODEL_NAME = "Pringled/potion-code-16M" +_LATENCY_RUNS = 5 +_DIRECT_TOP_K = 10 + + +def _target_rank(results: list[SearchResult], target: Target) -> int | None: + """Return the 1-based rank of the first result covering target, or None.""" + for index, result in enumerate(results, 1): + chunk = result.chunk + if target_matches_location(chunk.file_path, chunk.start_line, chunk.end_line, target): + return index + return None + + +@dataclass(frozen=True) +class RepoResult: + repo: str + language: str + chunks: int + ndcg5: float + ndcg10: float + p50_ms: float + index_ms: float + + +def _dcg(relevances: list[int]) -> float: + """Compute Discounted Cumulative Gain for a ranked relevance list.""" + return sum(rel / math.log2(i + 2) for i, rel in enumerate(relevances)) + + +def _ndcg_at_k(relevant_ranks: list[int], n_relevant: int, k: int) -> float: + """Compute NDCG@k given the ranks of relevant results and the total relevant count.""" + if n_relevant == 0: + return 0.0 + relevances = [0] * k + for rank in relevant_ranks: + if 1 <= rank <= k: + relevances[rank - 1] = 1 + ideal = _dcg([1] * min(k, n_relevant)) + return _dcg(relevances) / ideal if ideal > 0 else 0.0 + + +def _evaluate(index: SembleIndex, tasks: list[Task], *, verbose: bool = False) -> tuple[float, float, float]: + """Return mean NDCG@5, NDCG@10, and median query latency (ms) across all tasks.""" + ndcg5_sum = 0.0 + ndcg10_sum = 0.0 + latencies: list[float] = [] + + for task in tasks: + query_latencies: list[float] = [] + for _ in range(_LATENCY_RUNS): + started = time.perf_counter() + results = index.search(task.query, top_k=_DIRECT_TOP_K) + query_latencies.append((time.perf_counter() - started) * 1000) + latencies.append(sorted(query_latencies)[_LATENCY_RUNS // 2]) + + relevant_ranks = [rank for target in task.all_relevant if (rank := _target_rank(results, target)) is not None] + n_relevant = sum( + 1 + for target in task.all_relevant + if any(target_matches_location(c.file_path, c.start_line, c.end_line, target) for c in index.chunks) + ) + q_ndcg5 = _ndcg_at_k(relevant_ranks, n_relevant, 5) + q_ndcg10 = _ndcg_at_k(relevant_ranks, n_relevant, 10) + ndcg5_sum += q_ndcg5 + ndcg10_sum += q_ndcg10 + + if verbose: + cat = task.category or "?" + targets_str = ", ".join( + t.path if not t.start_line else f"{t.path}:{t.start_line}-{t.end_line}" for t in task.all_relevant + ) + top_files = [r.chunk.file_path for r in results[:5]] + print( + f" [{cat:<12}] ndcg@10={q_ndcg10:.3f} ranks={relevant_ranks} n_rel={n_relevant} q={task.query!r}", + file=sys.stderr, + ) + print(f" targets: {targets_str}", file=sys.stderr) + print(f" top-5: {top_files}", file=sys.stderr) + + total = len(tasks) + latencies.sort() + return ndcg5_sum / total, ndcg10_sum / total, latencies[len(latencies) // 2] + + +def _print_summary(results: list[RepoResult]) -> None: + """Print per-language and overall benchmark summary to stderr.""" + languages = sorted({result.language for result in results}) + by_language = {lang: [r for r in results if r.language == lang] for lang in languages} + columns = ["Avg", *[lang.title() for lang in languages]] + + avg_ndcg10 = sum(r.ndcg10 for r in results) / len(results) + avg_p50 = sum(r.p50_ms for r in results) / len(results) + avg_index = sum(r.index_ms for r in results) / len(results) + + print(file=sys.stderr) + print("By language", file=sys.stderr) + for language, grouped in by_language.items(): + print( + f" {language}: repos={len(grouped)}" + + f" ndcg@5={sum(r.ndcg5 for r in grouped) / len(grouped):.3f}" + + f" ndcg@10={sum(r.ndcg10 for r in grouped) / len(grouped):.3f}" + + f" p50={sum(r.p50_ms for r in grouped) / len(grouped):.2f}ms" + + f" index={sum(r.index_ms for r in grouped) / len(grouped):.0f}ms", + file=sys.stderr, + ) + + print(file=sys.stderr) + print(f"{'=' * 104}", file=sys.stderr) + print("Hybrid benchmark by language", file=sys.stderr) + print(f"{'=' * 104}", file=sys.stderr) + print(f"\n {'Metric':<28} " + " ".join(f"{column:>9}" for column in columns), file=sys.stderr) + print(f" {'-' * 28} " + " ".join(f"{'-' * 9:>9}" for _ in columns), file=sys.stderr) + + ndcg_row = [f"{avg_ndcg10:>9.3f}"] + p50_row = [f"{avg_p50:>8.2f}ms"] + index_row = [f"{avg_index:>7.0f}ms"] + for language, language_results in by_language.items(): + ndcg_row.append(f"{sum(r.ndcg10 for r in language_results) / len(language_results):>9.3f}") + p50_row.append(f"{sum(r.p50_ms for r in language_results) / len(language_results):>8.2f}ms") + index_row.append(f"{sum(r.index_ms for r in language_results) / len(language_results):>7.0f}ms") + + print(f" {'NDCG@10':<28} " + " ".join(ndcg_row), file=sys.stderr) + print(f" {'q-p50':<28} " + " ".join(p50_row), file=sys.stderr) + print(f" {'index':<28} " + " ".join(index_row), file=sys.stderr) + + +def _bench_quality( + repo_tasks: dict[str, list[Task]], model: StaticModel, specs: dict[str, RepoSpec], *, verbose: bool = False +) -> list[RepoResult]: + """Run quality benchmarks (NDCG@5, NDCG@10, latency) for each repo.""" + print( + f"{'Repo':<12} {'language':<12} {'chunks':>6} {'index':>9} {'NDCG@5':>8} {'NDCG@10':>8} {'p50':>8}", + file=sys.stderr, + ) + print(f"{'-' * 12} {'-' * 12} {'-' * 6} {'-' * 9} {'-' * 8} {'-' * 8} {'-' * 8}", file=sys.stderr) + results: list[RepoResult] = [] + for repo, tasks in sorted(repo_tasks.items()): + spec = specs[repo] + started = time.perf_counter() + index = SembleIndex.from_path(spec.benchmark_dir, model=model) + index_ms = (time.perf_counter() - started) * 1000 + ndcg5, ndcg10, p50_ms = _evaluate(index, tasks, verbose=verbose) + result = RepoResult( + repo=repo, + language=spec.language, + chunks=len(index.chunks), + ndcg5=ndcg5, + ndcg10=ndcg10, + p50_ms=p50_ms, + index_ms=index_ms, + ) + results.append(result) + print( + f"{repo:<12} {spec.language:<12} {len(index.chunks):>6} {index_ms:>8.0f}ms {ndcg5:>8.3f} {ndcg10:>8.3f} {p50_ms:>7.2f}ms", + file=sys.stderr, + ) + return results + + +def _save_results(results: list[RepoResult]) -> None: + """Write results to benchmarks/results/.json.""" + try: + sha = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() + except subprocess.CalledProcessError: + sha = "unknown" + + languages = sorted({r.language for r in results}) + by_language = {lang: [r for r in results if r.language == lang] for lang in languages} + + output = { + "sha": sha, + "model": _MODEL_NAME, + "summary": { + "ndcg10": round(sum(r.ndcg10 for r in results) / len(results), 4), + "p50_ms": round(sum(r.p50_ms for r in results) / len(results), 3), + "index_ms": round(sum(r.index_ms for r in results) / len(results), 1), + }, + "by_language": { + lang: { + "repos": len(grouped), + "ndcg10": round(sum(r.ndcg10 for r in grouped) / len(grouped), 4), + "p50_ms": round(sum(r.p50_ms for r in grouped) / len(grouped), 3), + "index_ms": round(sum(r.index_ms for r in grouped) / len(grouped), 1), + } + for lang, grouped in by_language.items() + }, + "repos": [asdict(r) for r in results], + } + + results_dir = Path(__file__).parent / "results" + results_dir.mkdir(exist_ok=True) + out_path = results_dir / f"{sha[:12]}.json" + out_path.write_text(json.dumps(output, indent=2) + "\n", encoding="utf-8") + print(f"\nResults saved to {out_path}", file=sys.stderr) + + +def main() -> None: + """Parse arguments and run the selected benchmark mode.""" + parser = argparse.ArgumentParser(description="Benchmark hybrid semble search across the pinned benchmark repos.") + parser.add_argument("--repo", action="append", default=[], help="Limit to one or more repo names.") + parser.add_argument("--language", action="append", default=[], help="Limit to one or more languages.") + parser.add_argument("--verbose", action="store_true", help="Print per-query results.") + args = parser.parse_args() + repo_specs = available_repo_specs() + tasks = apply_task_filters( + load_tasks(repo_specs=repo_specs), repos=args.repo or None, languages=args.language or None + ) + if not tasks: + raise SystemExit("No benchmark tasks matched the requested filters.") + print("Loading model...", file=sys.stderr) + started = time.perf_counter() + model = StaticModel.from_pretrained(_MODEL_NAME) + print(f"Loaded in {(time.perf_counter() - started) * 1000:.0f} ms", file=sys.stderr) + print(file=sys.stderr) + repo_tasks: dict[str, list[Task]] = {} + for task in tasks: + repo_tasks.setdefault(task.repo, []).append(task) + results = _bench_quality(repo_tasks, model, repo_specs, verbose=args.verbose) + _print_summary(results) + if not args.repo and not args.language: + _save_results(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/sync_repos.py b/benchmarks/sync_repos.py new file mode 100644 index 0000000..9cf1ebd --- /dev/null +++ b/benchmarks/sync_repos.py @@ -0,0 +1,63 @@ +import argparse +import subprocess +import sys + +from benchmarks.data import BENCH_ROOT, load_repo_specs + + +def _run(*args: str) -> None: + """Run a subprocess command, raising on non-zero exit.""" + subprocess.run(args, check=True) + + +def _sync_repo(name: str, url: str, revision: str) -> None: + """Clone the repo if absent, then fetch and checkout the pinned revision.""" + repo_dir = BENCH_ROOT / name + if not repo_dir.exists(): + print(f"cloning {name} -> {repo_dir}") + _run("git", "clone", url, str(repo_dir)) + print(f"syncing {name} @ {revision[:12]}") + _run("git", "-C", str(repo_dir), "fetch", "--depth", "1", "origin", revision) + _run("git", "-C", str(repo_dir), "checkout", "--detach", revision) + + +def _check_repo(name: str, revision: str) -> str | None: + """Return an error string if the local checkout is missing or at the wrong revision.""" + repo_dir = BENCH_ROOT / name + if not (repo_dir / ".git").exists(): + return f"{name}: missing checkout at {repo_dir}" + head = subprocess.check_output(("git", "-C", str(repo_dir), "rev-parse", "HEAD"), text=True).strip() + if head != revision: + return f"{name}: expected {revision}, found {head}" + return None + + +def main() -> None: + """Parse arguments and sync or verify the pinned benchmark repositories.""" + parser = argparse.ArgumentParser(description="Clone or update pinned benchmark repositories.") + parser.add_argument("--repo", action="append", default=[], help="Restrict to one or more repo names.") + parser.add_argument("--check", action="store_true", help="Only verify local checkouts against pinned revisions.") + args = parser.parse_args() + specs = load_repo_specs() + selected = {name: spec for name, spec in specs.items() if not args.repo or name in args.repo} + BENCH_ROOT.mkdir(parents=True, exist_ok=True) + + if args.check: + problems = [ + problem for name, spec in selected.items() if (problem := _check_repo(name, spec.revision)) is not None + ] + if problems: + for problem in problems: + print(problem, file=sys.stderr) + raise SystemExit(1) + print(f"Verified {len(selected)} pinned repo(s).") + return + + for name, spec in selected.items(): + _sync_repo(name, spec.url, spec.revision) + + print(f"Synced {len(selected)} pinned repo(s).") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 0ce4da0..111905d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ target-version = "py310" [tool.ruff.lint.per-file-ignores] "tests/**" = ["ANN"] "src/semble/cli.py" = ["T20"] -"local/benchmarks/*.py" = ["T20", "D"] +"benchmarks/*.py" = ["T20"] [tool.ruff.lint] select = [