From cc2425efca9f7e71571b89e4340be7e3483c25aa Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Sat, 18 Apr 2026 17:02:50 -0400 Subject: [PATCH 1/4] feat: added some info logs and prometheus health check on startup --- .../src/drivers/query/servers/http.rs | 16 +++++++++ asap-query-engine/src/main.rs | 34 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/asap-query-engine/src/drivers/query/servers/http.rs b/asap-query-engine/src/drivers/query/servers/http.rs index 3866bbdb..7eee5880 100644 --- a/asap-query-engine/src/drivers/query/servers/http.rs +++ b/asap-query-engine/src/drivers/query/servers/http.rs @@ -217,6 +217,12 @@ async fn process_query_request( total_duration.as_secs_f64() * 1000.0 ); debug!("=== RETURNING SUCCESS RESPONSE ==="); + info!( + "query='{}' destination=asap asap_latency_ms={:.2} total_latency_ms={:.2}", + parsed_request.query, + query_duration.as_secs_f64() * 1000.0, + total_duration.as_secs_f64() * 1000.0 + ); match state .adapter @@ -238,6 +244,11 @@ async fn process_query_request( // Step 4: Handle unsupported query using fallback client if let Some(fallback) = &state.fallback { debug!("Query not supported locally, forwarding to fallback"); + info!( + "query='{}' destination=prometheus total_latency_ms={:.2}", + parsed_request.query, + total_duration.as_secs_f64() * 1000.0 + ); // Fallback client handles the HTTP call and returns formatted response match fallback .execute_query_with_headers(parsed_request, headers) @@ -248,6 +259,11 @@ async fn process_query_request( } } else { debug!("Query not supported and forwarding disabled, returning error"); + info!( + "query='{}' destination=none_unsupported total_latency_ms={:.2}", + parsed_request.query, + total_duration.as_secs_f64() * 1000.0 + ); // Adapter formats the unsupported query error for its protocol match state.adapter.format_unsupported_query_response().await { Ok(json) => json.into_response(), diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs index 92aade49..80fb52f2 100644 --- a/asap-query-engine/src/main.rs +++ b/asap-query-engine/src/main.rs @@ -403,6 +403,40 @@ async fn main() -> Result<()> { adapter_config, }; + // Verify Prometheus is reachable before starting + { + let client = reqwest::Client::new(); + let health_url = format!( + "{}/api/v1/status/runtimeinfo", + args.prometheus_server.trim_end_matches('/') + ); + match client + .get(&health_url) + .timeout(std::time::Duration::from_secs(5)) + .send() + .await + { + Ok(resp) if resp.status().is_success() => { + info!("Prometheus reachable at {}", args.prometheus_server); + } + Ok(resp) => { + error!( + "Prometheus at {} returned HTTP {} — cannot start", + args.prometheus_server, + resp.status() + ); + std::process::exit(1); + } + Err(e) => { + error!( + "Cannot reach Prometheus at {}: {}", + args.prometheus_server, e + ); + std::process::exit(1); + } + } + } + let query_tracker = if args.enable_query_tracker { use query_engine_rust::planner_client::{LocalPlannerClient, PlannerResult}; use query_engine_rust::QueryTrackerConfig; From 2d1fedfb89e581c04f3b14f4a8c33bbb9bdfc00e Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Sat, 18 Apr 2026 17:16:20 -0400 Subject: [PATCH 2/4] Updated readme and added observation window to .env --- asap-dropin/.env | 7 ++ asap-dropin/README.md | 116 ++++++++++++++++++++++++++------- asap-dropin/docker-compose.yml | 4 +- 3 files changed, 103 insertions(+), 24 deletions(-) diff --git a/asap-dropin/.env b/asap-dropin/.env index 79e0f408..592c6a1d 100644 --- a/asap-dropin/.env +++ b/asap-dropin/.env @@ -19,3 +19,10 @@ REMOTE_WRITE_PORT=9091 # Point your Grafana Prometheus datasource here: # http://localhost:${QUERY_ENGINE_PORT} QUERY_ENGINE_PORT=8088 + +# ── Observation window ──────────────────────────────────────────────────────── +# How long ASAPQuery observes Grafana queries before generating an acceleration +# plan (in seconds). All queries are forwarded to Prometheus during this window. +# Rule of thumb: set this to at least 3× your Grafana dashboard refresh interval. +# Example: 30s refresh → 90s minimum; 1m refresh → 180s minimum. +TRACKER_OBSERVATION_WINDOW_SECS=180 diff --git a/asap-dropin/README.md b/asap-dropin/README.md index bb16ec99..e44d87c2 100644 --- a/asap-dropin/README.md +++ b/asap-dropin/README.md @@ -2,7 +2,7 @@ A self-contained single-container Docker Compose that adds ASAPQuery to an existing Prometheus and Grafana deployment. -On startup, all queries are forwarded transparently to your upstream Prometheus. After one observation window (default 10 min), the engine automatically plans and activates sketch-based acceleration based on the real queries it observed from Grafana. +On startup, all queries are forwarded transparently to your upstream Prometheus. After one observation window (default 180s), the engine automatically plans and activates sketch-based acceleration based on the real queries it observed from Grafana. ## Prerequisites @@ -10,33 +10,64 @@ On startup, all queries are forwarded transparently to your upstream Prometheus. - A running Prometheus instance - A running Grafana instance (with a Prometheus datasource) -## Quick Start +## Architecture + +``` +Your Prometheus ──remote_write──▸ ASAPQuery (:9091/receive) + │ + ▼ +Your Grafana ◂──query──── ASAPQuery Query Engine (:8088) + │ + ▼ (fallback / passthrough) + Your Prometheus +``` + +The query engine embeds the planner and runs it automatically after observing real Grafana queries for one observation window. No separate planner container, no Kafka, no Arroyo. -### 1. Configure environment +## Setup -Edit `.env`: +### Step 1 — Configure environment + +Edit `.env` to match your deployment: | Variable | Default | Description | |---|---|---| -| `PROMETHEUS_URL` | `http://host.docker.internal:9090` | URL of your Prometheus, reachable from inside Docker | +| `PROMETHEUS_URL` | `http://host.docker.internal:9090` | URL of your Prometheus, reachable from inside the ASAPQuery container | | `PROMETHEUS_SCRAPE_INTERVAL` | `15` | Your Prometheus scrape interval in seconds | | `REMOTE_WRITE_PORT` | `9091` | Host port for the remote-write receiver | | `QUERY_ENGINE_PORT` | `8088` | Host port for the ASAPQuery query engine | +| `TRACKER_OBSERVATION_WINDOW_SECS` | `180` | How long to observe queries before planning (see note below) | **Finding the right `PROMETHEUS_URL`:** - **Docker Desktop (Mac/Windows):** `http://host.docker.internal:9090` (default) - **Linux (Prometheus on host):** `http://172.17.0.1:9090` (default Docker bridge gateway) -- **Prometheus in another Docker Compose:** create a shared external network +- **Prometheus in another Docker Compose:** use a shared external Docker network and the Prometheus service name + +**Setting `TRACKER_OBSERVATION_WINDOW_SECS`:** +Set this to at least 3× your Grafana dashboard refresh interval so ASAPQuery sees enough query repetitions to build a useful plan. +- Grafana refresh 30s → set to 90 or higher +- Grafana refresh 1m → set to 180 or higher (default) +- Grafana refresh 5m → set to 900 or higher -### 2. Start ASAPQuery +### Step 2 — Start ASAPQuery ```bash docker compose up -d ``` -### 3. Add remote_write to your Prometheus +Verify it started: + +```bash +docker compose logs -f queryengine +``` + +You should see a line confirming Prometheus is reachable, then the engine waiting for the observation window. -Add this to your `prometheus.yml` and reload Prometheus: +### Step 3 — Configure Prometheus remote_write + +Prometheus needs to send all ingested samples to ASAPQuery so it can build sketches. + +**Add this block to your `prometheus.yml`:** ```yaml remote_write: @@ -46,29 +77,70 @@ remote_write: sample_age_limit: 5m ``` -### 4. Point Grafana at ASAPQuery +> **Finding the right `remote_write` URL:** The URL is from Prometheus's perspective, not your browser's. +> - **Prometheus on the same host as Docker:** `http://localhost:9091/receive` (default above) +> - **Prometheus in Docker on the same host:** `http://host.docker.internal:9091/receive` (Mac/Windows) or `http://172.17.0.1:9091/receive` (Linux) +> - Change `9091` if you set a different `REMOTE_WRITE_PORT` in `.env` -Change your Grafana Prometheus datasource URL from your Prometheus address to: +**Reload Prometheus to apply the change:** +If Prometheus was started with `--web.enable-lifecycle`: +```bash +curl -X POST http://localhost:9090/-/reload ``` -http://localhost:8088 + +Otherwise, restart your Prometheus process or container: +```bash +# systemd +sudo systemctl restart prometheus + +# Docker Compose +docker compose restart prometheus ``` -ASAPQuery speaks the Prometheus query API. Queries it can accelerate are answered from sketches; all others are transparently forwarded to your upstream Prometheus. +**Verify remote_write is active** by checking Prometheus logs for a line like: +``` +level=info msg="Remote storage started" +``` -## Architecture +### Step 4 — Point Grafana at ASAPQuery + +Grafana needs to send its queries to ASAPQuery instead of directly to Prometheus. + +1. Open Grafana in your browser +2. Go to **Connections → Data Sources** (or **Configuration → Data Sources** in older Grafana) +3. Click on your existing Prometheus datasource +4. Change the **URL** field from your current Prometheus address to: + ``` + http://localhost:8088 + ``` + (Change the port if you set a different `QUERY_ENGINE_PORT` in `.env`) +5. Click **Save & Test** — you should see "Data source is working" + +ASAPQuery speaks the Prometheus HTTP API. Grafana does not need any other changes. +### Step 5 — Verify end-to-end + +Open your Grafana dashboards and use them normally. During the observation window, all queries pass through to Prometheus transparently — your dashboards continue to work. + +After the observation window elapses, check the ASAPQuery logs: + +```bash +docker compose logs queryengine | grep query_tracker ``` -Your Prometheus ──remote_write──▸ ASAPQuery (:9091/receive) - │ - ▼ -Your Grafana ◂──query──── ASAPQuery Query Engine (:8088) - │ - ▼ (fallback / passthrough) - Your Prometheus + +You should see lines like: +``` +query_tracker: planner succeeded — streaming aggregations: N, inference queries: M ``` -The query engine embeds the planner and runs it automatically after observing real Grafana queries for one observation window. No separate planner container, no Kafka, no Arroyo. +From this point on, queries that ASAPQuery can accelerate are served from sketches. Check the routing in the logs: + +```bash +docker compose logs queryengine | grep "destination=" +``` + +Lines with `destination=asap` are served by ASAPQuery; lines with `destination=prometheus` are forwarded to your upstream Prometheus. ## Development diff --git a/asap-dropin/docker-compose.yml b/asap-dropin/docker-compose.yml index 8ce572b1..ec3fe5c4 100644 --- a/asap-dropin/docker-compose.yml +++ b/asap-dropin/docker-compose.yml @@ -9,7 +9,7 @@ name: asapquery-dropin # 4. Point your Grafana datasource URL -> http://localhost:${QUERY_ENGINE_PORT} # # The query engine starts with an empty plan and forwards all queries to Prometheus. -# After the observation window (default 10 min), it automatically generates a plan +# After the observation window (default 180s), it automatically generates a plan # based on real query patterns and begins precomputing sketches. networks: @@ -43,7 +43,7 @@ services: - "--lock-strategy=per-key" - "--forward-unsupported-queries" - "--enable-query-tracker" - - "--tracker-observation-window-secs=600" + - "--tracker-observation-window-secs=${TRACKER_OBSERVATION_WINDOW_SECS:-60}" healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/8088' 2>/dev/null || exit 1"] interval: 10s From 46cf7ff1641eb44e777f94ae8d5e734fd84fc2e5 Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Sat, 18 Apr 2026 18:47:25 -0400 Subject: [PATCH 3/4] updated readme --- asap-dropin/README.md | 57 +++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/asap-dropin/README.md b/asap-dropin/README.md index e44d87c2..fdf206b4 100644 --- a/asap-dropin/README.md +++ b/asap-dropin/README.md @@ -13,17 +13,12 @@ On startup, all queries are forwarded transparently to your upstream Prometheus. ## Architecture ``` -Your Prometheus ──remote_write──▸ ASAPQuery (:9091/receive) - │ - ▼ -Your Grafana ◂──query──── ASAPQuery Query Engine (:8088) - │ - ▼ (fallback / passthrough) - Your Prometheus +Prometheus ──remote_write──▶ ASAPQuery (:9091) + ▲ │ + │ unsupported queries ▼ builds sketches + └──────────── ASAPQuery (:8088) ◀── Grafana ``` -The query engine embeds the planner and runs it automatically after observing real Grafana queries for one observation window. No separate planner container, no Kafka, no Arroyo. - ## Setup ### Step 1 — Configure environment @@ -34,13 +29,12 @@ Edit `.env` to match your deployment: |---|---|---| | `PROMETHEUS_URL` | `http://host.docker.internal:9090` | URL of your Prometheus, reachable from inside the ASAPQuery container | | `PROMETHEUS_SCRAPE_INTERVAL` | `15` | Your Prometheus scrape interval in seconds | -| `REMOTE_WRITE_PORT` | `9091` | Host port for the remote-write receiver | -| `QUERY_ENGINE_PORT` | `8088` | Host port for the ASAPQuery query engine | +| `REMOTE_WRITE_PORT` | `9091` | ASAPQuery data ingest port — must be free on the host | +| `QUERY_ENGINE_PORT` | `8088` | ASAPQuery query endpoint port — must be free on the host | | `TRACKER_OBSERVATION_WINDOW_SECS` | `180` | How long to observe queries before planning (see note below) | **Finding the right `PROMETHEUS_URL`:** -- **Docker Desktop (Mac/Windows):** `http://host.docker.internal:9090` (default) -- **Linux (Prometheus on host):** `http://172.17.0.1:9090` (default Docker bridge gateway) +- **Prometheus on the same host as Docker:** `http://172.17.0.1:9090` (default Docker bridge gateway on Linux) - **Prometheus in another Docker Compose:** use a shared external Docker network and the Prometheus service name **Setting `TRACKER_OBSERVATION_WINDOW_SECS`:** @@ -58,7 +52,7 @@ docker compose up -d Verify it started: ```bash -docker compose logs -f queryengine +docker compose logs queryengine ``` You should see a line confirming Prometheus is reachable, then the engine waiting for the observation window. @@ -77,7 +71,7 @@ remote_write: sample_age_limit: 5m ``` -> **Finding the right `remote_write` URL:** The URL is from Prometheus's perspective, not your browser's. +> **Finding the right `remote_write` URL:** The URL is from Prometheus's perspective. > - **Prometheus on the same host as Docker:** `http://localhost:9091/receive` (default above) > - **Prometheus in Docker on the same host:** `http://host.docker.internal:9091/receive` (Mac/Windows) or `http://172.17.0.1:9091/receive` (Linux) > - Change `9091` if you set a different `REMOTE_WRITE_PORT` in `.env` @@ -89,39 +83,34 @@ If Prometheus was started with `--web.enable-lifecycle`: curl -X POST http://localhost:9090/-/reload ``` -Otherwise, restart your Prometheus process or container: +Otherwise, send SIGHUP to the Prometheus process: ```bash -# systemd -sudo systemctl restart prometheus - -# Docker Compose -docker compose restart prometheus +kill -HUP $(pgrep prometheus) ``` -**Verify remote_write is active** by checking Prometheus logs for a line like: -``` -level=info msg="Remote storage started" -``` +See the [Prometheus configuration docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) for more details on reloading. -### Step 4 — Point Grafana at ASAPQuery +### Step 4 — Add an ASAPQuery datasource in Grafana -Grafana needs to send its queries to ASAPQuery instead of directly to Prometheus. +Create a new datasource in Grafana pointing at ASAPQuery, then switch your dashboards to use it. 1. Open Grafana in your browser -2. Go to **Connections → Data Sources** (or **Configuration → Data Sources** in older Grafana) -3. Click on your existing Prometheus datasource -4. Change the **URL** field from your current Prometheus address to: +2. Go to **Connections → Data Sources** +3. Click **Add new data source** and select **Prometheus** +4. Set the **Name** to something like `ASAPQuery` +5. Set the **URL** to: ``` http://localhost:8088 ``` (Change the port if you set a different `QUERY_ENGINE_PORT` in `.env`) -5. Click **Save & Test** — you should see "Data source is working" +6. Click **Save & Test** — you should see "Data source is working" +7. Open your dashboards and switch their datasource to `ASAPQuery` -ASAPQuery speaks the Prometheus HTTP API. Grafana does not need any other changes. +ASAPQuery speaks the Prometheus query API. Queries it can accelerate are answered from sketches; all others are transparently forwarded to your upstream Prometheus, so your dashboards continue to work. ### Step 5 — Verify end-to-end -Open your Grafana dashboards and use them normally. During the observation window, all queries pass through to Prometheus transparently — your dashboards continue to work. +Use your Grafana dashboards normally. During the observation window, all queries pass through to Prometheus transparently. After the observation window elapses, check the ASAPQuery logs: @@ -134,7 +123,7 @@ You should see lines like: query_tracker: planner succeeded — streaming aggregations: N, inference queries: M ``` -From this point on, queries that ASAPQuery can accelerate are served from sketches. Check the routing in the logs: +From this point on, check the routing in the logs: ```bash docker compose logs queryengine | grep "destination=" From eaa83ee496dd885846c3dc43aedf0cd142c2de7d Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Sat, 18 Apr 2026 18:57:18 -0400 Subject: [PATCH 4/4] updated docker restart to no --- asap-dropin/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asap-dropin/docker-compose.yml b/asap-dropin/docker-compose.yml index ec3fe5c4..fb527d75 100644 --- a/asap-dropin/docker-compose.yml +++ b/asap-dropin/docker-compose.yml @@ -50,4 +50,4 @@ services: timeout: 5s retries: 10 start_period: 15s - restart: unless-stopped + restart: no