GoodbyePlanet · GoodbyePlanet · May 2, 2026
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -27,6 +27,22 @@ services:
       timeout: 5s
       retries: 20
 
+  jina-reranker:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
+    platform: linux/amd64
+    container_name: jina-reranker
+    command: --model-id BAAI/bge-reranker-v2-m3 --max-client-batch-size 64 --max-batch-tokens 65536
+    volumes:
+      - reranker_cache:/data
+    ports:
+      - "8088:80"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+      start_period: 360s
+
   code-search:
     build: .
     platform: linux/amd64
@@ -37,14 +53,19 @@ services:
     environment:
       EMBEDDINGS_URL: http://jina-embeddings:80
       QDRANT_URL: http://qdrant:6333
+      RERANKER_URL: http://jina-reranker:80
+      RERANKER_ENABLED: "true"
     depends_on:
       qdrant:
         condition: service_healthy
       jina-embeddings:
         condition: service_healthy
+      jina-reranker:
+        condition: service_healthy
     volumes:
       - ./config.yaml:/app/config.yaml:ro
 
 volumes:
   qdrant_data:
   embeddings_cache:
+  reranker_cache:
diff --git a/eval/__init__.py b/eval/__init__.py
diff --git a/eval/__main__.py b/eval/__main__.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+from datetime import datetime
+from pathlib import Path
+
+logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(name)s: %(message)s")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Evaluate search_code retrieval quality against a golden dataset."
+    )
+    parser.add_argument("--dataset", required=True, help="Path to a YAML query dataset file")
+    parser.add_argument(
+        "--mode",
+        default="baseline",
+        choices=["baseline", "rerank"],
+        help="baseline = vector search only; rerank = vector + cross-encoder",
+    )
+    parser.add_argument(
+        "--output",
+        default=None,
+        help="Path to write JSON results (auto-generated if omitted)",
+    )
+    parser.add_argument(
+        "--top-k",
+        nargs="+",
+        type=int,
+        default=[1, 3, 5, 10],
+        metavar="K",
+        help="K values for Recall@K (default: 1 3 5 10)",
+    )
+    args = parser.parse_args()
+
+    output = args.output
+    if output is None:
+        ts = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+        stem = Path(args.dataset).stem
+        output = f"eval/results/{stem}-{args.mode}-{ts}.json"
+
+    from eval.runner import run_evaluation
+
+    asyncio.run(
+        run_evaluation(
+            dataset_path=args.dataset,
+            mode=args.mode,
+            top_k_values=args.top_k,
+            output_path=output,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/eval/datasets/queries.yaml b/eval/datasets/queries.yaml
@@ -0,0 +1,124 @@
+# Evaluation dataset — primary golden query set
+#
+# Each case has:
+#   id:       unique string (used in reports)
+#   query:    natural language search query
+#   filters:  optional Qdrant filters (language, service, symbol_type)
+#   expected: list of acceptable correct answers — OR semantics
+#             symbol_name: substring match (case-insensitive)
+#             file_path_contains: substring match on the stored file path
+#   notes:    human-readable explanation of what the case tests
+#
+# A result is correct if ANY expected entry matches the hit's payload.
+#
+# Run: uv run python -m eval --dataset eval/datasets/queries.yaml
+
+# ---------------------------------------------------------------------------
+# Java / Spring — auth-server, gateway (GoodbyePlanet/spring-cg-bff)
+# ---------------------------------------------------------------------------
+
+- id: java-webauthn-controller
+  query: "REST controller for WebAuthn passkey authentication"
+  filters: { language: java }
+  expected:
+    - { symbol_name: "WebAuthnController", file_path_contains: "WebAuthnController" }
+  notes: "Tests @RestController detection in auth-server"
+
+- id: java-entity-authorization
+  query: "JPA entity that stores OAuth2 authorization data"
+  filters: { language: java }
+  expected:
+    - { symbol_name: "Authorization", file_path_contains: "entity/authorization" }
+  notes: "Tests @Entity extraction — Authorization table"
+
+- id: java-entity-consent
+  query: "JPA entity for OAuth2 authorization consent"
+  filters: { language: java }
+  expected:
+    - { symbol_name: "AuthorizationConsent", file_path_contains: "authorizationconsent" }
+  notes: "Tests @Entity extraction — AuthorizationConsent table"
+
+- id: java-service-begin-auth
+  query: "service method that initiates the WebAuthn authentication flow"
+  filters: { language: java, symbol_type: method }
+  expected:
+    - { symbol_name: "beginAuthentication" }
+  notes: "Tests method-in-class chunking (parent WebAuthnService)"
+
+- id: java-check-leaked-password
+  query: "service method that checks whether a password has been leaked or compromised"
+  filters: { language: java, symbol_type: method }
+  expected:
+    - { symbol_name: "isPasswordLeaked" }
+  notes: "Tests semantic retrieval of a boolean check method"
+
+- id: java-webauthn-exception
+  query: "custom runtime exception thrown when WebAuthn authentication fails"
+  filters: { language: java }
+  expected:
+    - { symbol_name: "WebAuthnException", file_path_contains: "WebAuthnException" }
+  notes: "Tests exception class extraction"
+
+- id: java-login-controller
+  query: "controller that serves the login page view"
+  filters: { language: java }
+  expected:
+    - { symbol_name: "LoginPageController", file_path_contains: "LoginPageController" }
+  notes: "Tests @Controller (non-REST) detection"
+
+# ---------------------------------------------------------------------------
+# TypeScript / React — fe-client (GoodbyePlanet/spring-cg-bff)
+# ---------------------------------------------------------------------------
+
+- id: ts-react-app-component
+  query: "main React application component that manages authentication state"
+  filters: { language: typescript, symbol_type: react_component }
+  expected:
+    - { symbol_name: "App", file_path_contains: "App.tsx" }
+  notes: "Tests react_component detection for root component"
+
+- id: ts-interface-passkey
+  query: "TypeScript interface representing a registered passkey with a name and creation date"
+  filters: { language: typescript }
+  expected:
+    - { symbol_name: "RegisteredPasskey" }
+  notes: "Tests interface extraction from App.tsx"
+
+# ---------------------------------------------------------------------------
+# Go — leaked-passwords-api and passkey-service (GoodbyePlanet repos)
+# ---------------------------------------------------------------------------
+
+- id: go-struct-user
+  query: "Go struct representing a user with WebAuthn credentials"
+  filters: { language: go }
+  expected:
+    - { symbol_name: "User", file_path_contains: "user" }
+  notes: "Tests Go struct extraction — passkey-service models"
+
+- id: go-struct-credential
+  query: "Go struct storing WebAuthn credential and authenticator data"
+  filters: { language: go }
+  expected:
+    - { symbol_name: "Credential", file_path_contains: "user" }
+  notes: "Tests Go struct extraction — Credential model"
+
+- id: go-handler-check-password
+  query: "HTTP handler function that checks if a password has been leaked"
+  filters: { language: go }
+  expected:
+    - { symbol_name: "CheckPasswordHandler" }
+  notes: "Tests Go HTTP handler detection in leaked-passwords-api"
+
+- id: go-handler-get-by-hash
+  query: "HTTP handler that looks up a password hash and returns breach count"
+  filters: { language: go }
+  expected:
+    - { symbol_name: "GetByHashHandler" }
+  notes: "Tests Go function extraction — hash lookup handler"
+
+- id: go-service-password
+  query: "Go struct that wraps the password service with store and logger dependencies"
+  filters: { language: go, symbol_type: struct }
+  expected:
+    - { symbol_name: "PasswordService" }
+  notes: "Tests Go struct with dependency fields"
diff --git a/eval/datasets/queries_html_css.yaml b/eval/datasets/queries_html_css.yaml
@@ -0,0 +1,125 @@
+# Evaluation dataset — HTML and CSS specific cases
+#
+# These cases exist specifically to track retrieval quality for the HTML/CSS parsers,
+# where the embedding model (Jina Code V2) is trained primarily on programming languages
+# and its behaviour on CSS selectors / HTML structure is unverified.
+#
+# CSS symbol names are the selector string (e.g. ".btn-primary", "#header", "nav > ul").
+# HTML symbol names are the tag name or heading text (for headings) or id/class value
+# (for elements with id/class attributes).
+#
+# Run: uv run python -m eval --dataset eval/datasets/queries_html_css.yaml
+
+# ---------------------------------------------------------------------------
+# CSS examples — replace selectors with ones from your actual indexed files
+# ---------------------------------------------------------------------------
+
+- id: css-primary-button
+  query: "primary button styles"
+  filters: { language: css }
+  expected:
+    - { symbol_name: "btn-primary" }
+    - { symbol_name: ".btn-primary" }
+    - { symbol_name: ".button--primary" }
+  notes: "Button component CSS; tests selector-as-symbol-name pattern"
+
+- id: css-hover-state
+  query: "hover state styles for navigation links"
+  filters: { language: css }
+  expected:
+    - { symbol_name: "nav" }
+    - { symbol_name: "a:hover" }
+    - { file_path_contains: "nav" }
+  notes: "Tests pseudo-class selector embedding quality"
+
+- id: css-mobile-responsive
+  query: "mobile responsive breakpoint styles"
+  filters: { language: css }
+  expected:
+    - { file_path_contains: "responsive" }
+    - { file_path_contains: "mobile" }
+    - { symbol_name: "@media" }
+  notes: "Tests @media rule detection"
+
+- id: css-grid-layout
+  query: "CSS grid layout for the main content area"
+  filters: { language: css }
+  expected:
+    - { symbol_name: ".container" }
+    - { symbol_name: ".main" }
+    - { symbol_name: ".grid" }
+  notes: "Tests layout-related class naming"
+
+- id: css-form-input
+  query: "form input field styling"
+  filters: { language: css }
+  expected:
+    - { symbol_name: "input" }
+    - { symbol_name: ".form-control" }
+    - { symbol_name: ".input" }
+  notes: "Tests element + class selector disambiguation"
+
+- id: css-error-state
+  query: "error state styling for form validation"
+  filters: { language: css }
+  expected:
+    - { symbol_name: ".error" }
+    - { symbol_name: ".is-invalid" }
+    - { symbol_name: ".form-error" }
+  notes: "Tests semantic understanding of validation-related CSS"
+
+- id: css-card-component
+  query: "card component with shadow and border radius"
+  filters: { language: css }
+  expected:
+    - { symbol_name: ".card" }
+    - { symbol_name: ".card-container" }
+  notes: "Tests component-level CSS matching"
+
+- id: css-color-variables
+  query: "CSS custom properties for brand colors"
+  filters: { language: css }
+  expected:
+    - { symbol_name: ":root" }
+    - { symbol_name: "--primary" }
+  notes: "Tests CSS custom property / variable detection"
+
+# ---------------------------------------------------------------------------
+# HTML examples
+# ---------------------------------------------------------------------------
+
+- id: html-login-form
+  query: "login form with username and password fields"
+  filters: { language: html }
+  expected:
+    - { file_path_contains: "login" }
+    - { symbol_name: "login" }
+    - { symbol_name: "login-form" }
+  notes: "Tests HTML form element extraction"
+
+- id: html-nav-menu
+  query: "navigation menu with links"
+  filters: { language: html }
+  expected:
+    - { symbol_name: "nav" }
+    - { symbol_name: "navbar" }
+    - { symbol_name: "navigation" }
+  notes: "Tests nav element / id extraction"
+
+- id: html-page-header
+  query: "page header with site title"
+  filters: { language: html }
+  expected:
+    - { symbol_name: "header" }
+    - { symbol_name: "site-header" }
+    - { symbol_name: "page-header" }
+  notes: "Tests heading and landmark element detection"
+
+- id: html-product-card
+  query: "product card showing image, name and price"
+  filters: { language: html }
+  expected:
+    - { symbol_name: "product-card" }
+    - { symbol_name: "card" }
+    - { file_path_contains: "product" }
+  notes: "Tests class-named element extraction"
diff --git a/eval/matching.py b/eval/matching.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from qdrant_client.models import ScoredPoint
+
+
+def find_first_correct_rank(
+    hits: list[ScoredPoint],
+    expected: list[dict],
+) -> int | None:
+    """Return 1-based rank of the first hit that satisfies any expected entry, or None."""
+    for rank, hit in enumerate(hits, 1):
+        if any(_matches(hit.payload, exp) for exp in expected):
+            return rank
+    return None
+
+
+def _matches(payload: dict, expected: dict) -> bool:
+    if "symbol_name" in expected:
+        hit_name = (payload.get("symbol_name") or "").lower()
+        if expected["symbol_name"].lower() not in hit_name:
+            return False
+    if "file_path_contains" in expected:
+        hit_path = (payload.get("file_path") or "").lower()
+        if expected["file_path_contains"].lower() not in hit_path:
+            return False
+    return True
diff --git a/eval/metrics.py b/eval/metrics.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+
+def recall_at_k(ranks: list[int | None], k: int) -> float:
+    """Fraction of cases where the first correct hit appeared at rank <= k."""
+    if not ranks:
+        return 0.0
+    return sum(1 for r in ranks if r is not None and r <= k) / len(ranks)
+
+
+def mrr(ranks: list[int | None]) -> float:
+    """Mean Reciprocal Rank — 1/rank of first correct hit, averaged across cases."""
+    if not ranks:
+        return 0.0
+    return sum(1 / r for r in ranks if r is not None) / len(ranks)
diff --git a/eval/results/.gitkeep b/eval/results/.gitkeep