From 5de738e95f2425c5b09b4b81a98843600b6d1308 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 4 Jun 2025 14:51:19 +0000 Subject: [PATCH 1/4] Run Grafana alloy to replace exporters --- observability/build-docker.sh | 8 +++++ .../examples/full/docker-compose.yml | 26 ++++++++++++++ .../examples/full/grafana-alloy/config.alloy | 36 +++++++++++++++++++ .../prometheus/Dockerfile.prometheus | 3 +- 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 observability/build-docker.sh create mode 100644 observability/examples/full/grafana-alloy/config.alloy diff --git a/observability/build-docker.sh b/observability/build-docker.sh new file mode 100644 index 0000000..d3521cc --- /dev/null +++ b/observability/build-docker.sh @@ -0,0 +1,8 @@ + +docker build -t cogstacksystems/cogstack-observability-prometheus:latest -f prometheus/Dockerfile.prometheus ./prometheus + +docker build -t cogstacksystems/cogstack-observability-blackbox-exporter:latest -f prometheus/Dockerfile.blackbox ./prometheus + +docker build -t cogstacksystems/cogstack-observability-grafana:latest -f grafana/Dockerfile ./grafana + +docker build -t cogstacksystems/cogstack-observability-traefik:latest -f traefik/Dockerfile ./traefik \ No newline at end of file diff --git a/observability/examples/full/docker-compose.yml b/observability/examples/full/docker-compose.yml index 8fb9362..79322b0 100755 --- a/observability/examples/full/docker-compose.yml +++ b/observability/examples/full/docker-compose.yml @@ -5,6 +5,8 @@ services: prometheus: image: cogstacksystems/cogstack-observability-prometheus:latest restart: unless-stopped + ports: + - "9090:9090" volumes: - ./prometheus:/etc/prometheus/cogstack/site/ - prometheus-data:/prometheus @@ -29,6 +31,30 @@ services: - "80:80" volumes: - /var/run/docker.sock:/var/run/docker.sock:ro # So that Traefik can listen to the Docker events + alloy: + image: grafana/alloy:latest + command: + - run + - --server.http.listen-addr=0.0.0.0:12345 + - --storage.path=/var/lib/alloy/data + - --server.http.ui-path-prefix=/alloy + - /etc/alloy/config.alloy + ports: + - "12345:12345" + volumes: + - ./grafana-alloy/config.alloy:/etc/alloy/config.alloy + # CAdvisor + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + labels: + - "traefik.enable=true" + - "traefik.http.routers.cadvisor.rule=PathPrefix(`/alloy`)" + environment: + - PROMETHEUS_URL=http://cogstack-observability-prometheus-1:9090/prometheus/api/v1/write + networks: + - observability networks: observability: driver: bridge diff --git a/observability/examples/full/grafana-alloy/config.alloy b/observability/examples/full/grafana-alloy/config.alloy new file mode 100644 index 0000000..d3af4a2 --- /dev/null +++ b/observability/examples/full/grafana-alloy/config.alloy @@ -0,0 +1,36 @@ +logging { + level = "debug" + format = "logfmt" +} + +prometheus.remote_write "default" { + endpoint { + url = sys.env("PROMETHEUS_URL") + } +} + +prometheus.scrape "exporter" { + + scrape_interval = "15s" + + targets = array.concat( + prometheus.exporter.self.alloy.targets, + prometheus.exporter.cadvisor.local_cadvisor.targets, + prometheus.exporter.unix.local_node_exporter.targets, + ) + forward_to = [prometheus.remote_write.default.receiver] +} + +// Alloys internal metrics +prometheus.exporter.self "alloy" { +} + +// CAdvisor +prometheus.exporter.cadvisor "local_cadvisor" { + docker_host = "unix:///var/run/docker.sock" + storage_duration = "5m" +} + +// Node exporter +prometheus.exporter.unix "local_node_exporter" { +} diff --git a/observability/prometheus/Dockerfile.prometheus b/observability/prometheus/Dockerfile.prometheus index 0db9a47..a743fcb 100644 --- a/observability/prometheus/Dockerfile.prometheus +++ b/observability/prometheus/Dockerfile.prometheus @@ -10,5 +10,6 @@ CMD [ \ "--storage.tsdb.path=/prometheus", \ "--storage.tsdb.retention.time=30d", \ "--web.external-url=/prometheus", \ - "--web.route-prefix=/prometheus" \ + "--web.route-prefix=/prometheus", \ + "--web.enable-remote-write-receiver" \ ] \ No newline at end of file From 4584769aff82252f97500502b9b488bf83322cdd Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 4 Jun 2025 15:11:44 +0000 Subject: [PATCH 2/4] Make sample folder using alloy --- .../examples/alloy/docker-compose.yml | 71 +++++++++++++++++++ .../grafana-alloy/config.alloy | 0 .../examples/full/docker-compose.yml | 26 ------- 3 files changed, 71 insertions(+), 26 deletions(-) create mode 100755 observability/examples/alloy/docker-compose.yml rename observability/examples/{full => alloy}/grafana-alloy/config.alloy (100%) diff --git a/observability/examples/alloy/docker-compose.yml b/observability/examples/alloy/docker-compose.yml new file mode 100755 index 0000000..c2088b1 --- /dev/null +++ b/observability/examples/alloy/docker-compose.yml @@ -0,0 +1,71 @@ +# Observability main stack. Prometheus and Grafana. +# Depends on docker-compose.exporters.yml for the network +name: "cogstack-observability" +services: + prometheus: + image: cogstacksystems/cogstack-observability-prometheus:latest + restart: unless-stopped + ports: + - "9090:9090" + volumes: + - ${BASE_PATH-.}/prometheus:/etc/prometheus/cogstack/site/ + - prometheus-data:/prometheus + networks: + - observability + command: + - "--config.file=/etc/prometheus/cogstack/defaults/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=30d" + - "--web.external-url=/prometheus" + - "--web.route-prefix=/prometheus" + - "--web.enable-remote-write-receiver" + grafana: + image: cogstacksystems/cogstack-observability-grafana:latest + restart: unless-stopped + volumes: + - grafana-data:/var/lib/grafana + networks: + - observability + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true # Allows use of grafana without sign in + - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer + traefik: + image: cogstacksystems/cogstack-observability-traefik:latest + networks: + - observability + restart: unless-stopped + ports: + - "80:80" + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro # So that Traefik can listen to the Docker events + alloy: + image: grafana/alloy:latest + command: + - run + - --server.http.listen-addr=0.0.0.0:12345 + - --storage.path=/var/lib/alloy/data + - --server.http.ui-path-prefix=/alloy + - /etc/alloy/config.alloy + ports: + - "12345:12345" + volumes: + - ${BASE_PATH-.}/grafana-alloy/config.alloy:/etc/alloy/config.alloy + # CAdvisor + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + labels: + - "traefik.enable=true" + - "traefik.http.routers.cadvisor.rule=PathPrefix(`/alloy`)" + environment: + - PROMETHEUS_URL=http://cogstack-observability-prometheus-1:9090/prometheus/api/v1/write + networks: + - observability +networks: + observability: + driver: bridge + +volumes: + prometheus-data: + grafana-data: \ No newline at end of file diff --git a/observability/examples/full/grafana-alloy/config.alloy b/observability/examples/alloy/grafana-alloy/config.alloy similarity index 100% rename from observability/examples/full/grafana-alloy/config.alloy rename to observability/examples/alloy/grafana-alloy/config.alloy diff --git a/observability/examples/full/docker-compose.yml b/observability/examples/full/docker-compose.yml index 79322b0..8fb9362 100755 --- a/observability/examples/full/docker-compose.yml +++ b/observability/examples/full/docker-compose.yml @@ -5,8 +5,6 @@ services: prometheus: image: cogstacksystems/cogstack-observability-prometheus:latest restart: unless-stopped - ports: - - "9090:9090" volumes: - ./prometheus:/etc/prometheus/cogstack/site/ - prometheus-data:/prometheus @@ -31,30 +29,6 @@ services: - "80:80" volumes: - /var/run/docker.sock:/var/run/docker.sock:ro # So that Traefik can listen to the Docker events - alloy: - image: grafana/alloy:latest - command: - - run - - --server.http.listen-addr=0.0.0.0:12345 - - --storage.path=/var/lib/alloy/data - - --server.http.ui-path-prefix=/alloy - - /etc/alloy/config.alloy - ports: - - "12345:12345" - volumes: - - ./grafana-alloy/config.alloy:/etc/alloy/config.alloy - # CAdvisor - - /:/rootfs:ro - - /var/run:/var/run:rw - - /sys:/sys:ro - - /var/lib/docker/:/var/lib/docker:ro - labels: - - "traefik.enable=true" - - "traefik.http.routers.cadvisor.rule=PathPrefix(`/alloy`)" - environment: - - PROMETHEUS_URL=http://cogstack-observability-prometheus-1:9090/prometheus/api/v1/write - networks: - - observability networks: observability: driver: bridge From c352673b9352d3be8dfcb840bdfc199ffa4e48cc Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 4 Jun 2025 15:52:32 +0000 Subject: [PATCH 3/4] Add default labels to alloy scraped metrics --- observability/examples/alloy/docker-compose.yml | 13 ++++++++++--- .../examples/alloy/grafana-alloy/config.alloy | 6 ++++-- .../scrape-configs/probers/probe-internal.yml | 6 ++++++ .../scrape-configs/recording-rules/slo.yml | 8 ++++++++ observability/examples/full/docker-compose.yml | 5 +++++ 5 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 observability/examples/alloy/prometheus/scrape-configs/probers/probe-internal.yml create mode 100644 observability/examples/alloy/prometheus/scrape-configs/recording-rules/slo.yml diff --git a/observability/examples/alloy/docker-compose.yml b/observability/examples/alloy/docker-compose.yml index c2088b1..a2f53c6 100755 --- a/observability/examples/alloy/docker-compose.yml +++ b/observability/examples/alloy/docker-compose.yml @@ -8,7 +8,7 @@ services: ports: - "9090:9090" volumes: - - ${BASE_PATH-.}/prometheus:/etc/prometheus/cogstack/site/ + - ${BASE_DIR-.}/prometheus:/etc/prometheus/cogstack/site/ - prometheus-data:/prometheus networks: - observability @@ -38,6 +38,11 @@ services: - "80:80" volumes: - /var/run/docker.sock:/var/run/docker.sock:ro # So that Traefik can listen to the Docker events + blackbox-exporter: + image: cogstacksystems/cogstack-observability-blackbox-exporter:latest + restart: unless-stopped + networks: + - observability alloy: image: grafana/alloy:latest command: @@ -49,7 +54,7 @@ services: ports: - "12345:12345" volumes: - - ${BASE_PATH-.}/grafana-alloy/config.alloy:/etc/alloy/config.alloy + - ${BASE_DIR-.}/grafana-alloy/config.alloy:/etc/alloy/config.alloy # CAdvisor - /:/rootfs:ro - /var/run:/var/run:rw @@ -59,7 +64,9 @@ services: - "traefik.enable=true" - "traefik.http.routers.cadvisor.rule=PathPrefix(`/alloy`)" environment: - - PROMETHEUS_URL=http://cogstack-observability-prometheus-1:9090/prometheus/api/v1/write + - PROMETHEUS_URL=${PROMETHEUS_URL-http://cogstack-observability-prometheus-1:9090/prometheus/api/v1/write} + - ALLOY_HOSTNAME=my-custom-host + - ALLOY_IP_ADDRESS=my-custom-ip networks: - observability networks: diff --git a/observability/examples/alloy/grafana-alloy/config.alloy b/observability/examples/alloy/grafana-alloy/config.alloy index d3af4a2..eaa2c5c 100644 --- a/observability/examples/alloy/grafana-alloy/config.alloy +++ b/observability/examples/alloy/grafana-alloy/config.alloy @@ -7,12 +7,14 @@ prometheus.remote_write "default" { endpoint { url = sys.env("PROMETHEUS_URL") } + external_labels = { + host = sys.env("ALLOY_HOSTNAME"), + ip_address = sys.env("ALLOY_IP_ADDRESS"), + } } prometheus.scrape "exporter" { - scrape_interval = "15s" - targets = array.concat( prometheus.exporter.self.alloy.targets, prometheus.exporter.cadvisor.local_cadvisor.targets, diff --git a/observability/examples/alloy/prometheus/scrape-configs/probers/probe-internal.yml b/observability/examples/alloy/prometheus/scrape-configs/probers/probe-internal.yml new file mode 100644 index 0000000..b3d7353 --- /dev/null +++ b/observability/examples/alloy/prometheus/scrape-configs/probers/probe-internal.yml @@ -0,0 +1,6 @@ +# Example of probe targets +- targets: + - https://cogstack.org + labels: + name: cogstack-homepage + job: probe-services \ No newline at end of file diff --git a/observability/examples/alloy/prometheus/scrape-configs/recording-rules/slo.yml b/observability/examples/alloy/prometheus/scrape-configs/recording-rules/slo.yml new file mode 100644 index 0000000..440913c --- /dev/null +++ b/observability/examples/alloy/prometheus/scrape-configs/recording-rules/slo.yml @@ -0,0 +1,8 @@ +groups: + - name: slo-target-rules + rules: + # What SLO am I targeting + - record: slo_target_over_30_days + expr: 0.95 # We target 95% uptime over 30 days + labels: + job: "probe-external-demo-apps" #Job here must match the job in the probe targets \ No newline at end of file diff --git a/observability/examples/full/docker-compose.yml b/observability/examples/full/docker-compose.yml index 8fb9362..ab994fa 100755 --- a/observability/examples/full/docker-compose.yml +++ b/observability/examples/full/docker-compose.yml @@ -29,6 +29,11 @@ services: - "80:80" volumes: - /var/run/docker.sock:/var/run/docker.sock:ro # So that Traefik can listen to the Docker events + blackbox-exporter: + image: cogstacksystems/cogstack-observability-blackbox-exporter:latest + restart: unless-stopped + networks: + - observability networks: observability: driver: bridge From bfe035cfc2bf39a9f8b6287fc8c802699c46aa6f Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 4 Jun 2025 16:34:51 +0000 Subject: [PATCH 4/4] Add stub for making alloy the default --- .../examples/alloy/docker-compose.yml | 8 ++-- observability/grafana/Dockerfile.alloy | 1 + .../grafana/alloy/default-config.alloy | 38 +++++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 observability/grafana/Dockerfile.alloy create mode 100644 observability/grafana/alloy/default-config.alloy diff --git a/observability/examples/alloy/docker-compose.yml b/observability/examples/alloy/docker-compose.yml index a2f53c6..4795443 100755 --- a/observability/examples/alloy/docker-compose.yml +++ b/observability/examples/alloy/docker-compose.yml @@ -50,7 +50,7 @@ services: - --server.http.listen-addr=0.0.0.0:12345 - --storage.path=/var/lib/alloy/data - --server.http.ui-path-prefix=/alloy - - /etc/alloy/config.alloy + - /etc/alloy ports: - "12345:12345" volumes: @@ -62,11 +62,11 @@ services: - /var/lib/docker/:/var/lib/docker:ro labels: - "traefik.enable=true" - - "traefik.http.routers.cadvisor.rule=PathPrefix(`/alloy`)" + - "traefik.http.routers.alloy.rule=PathPrefix(`/alloy`)" environment: - PROMETHEUS_URL=${PROMETHEUS_URL-http://cogstack-observability-prometheus-1:9090/prometheus/api/v1/write} - - ALLOY_HOSTNAME=my-custom-host - - ALLOY_IP_ADDRESS=my-custom-ip + - ALLOY_HOSTNAME=${ALLOY_HOSTNAME} # Used to add a label to metrics + - ALLOY_IP_ADDRESS=${ALLOY_IP_ADDRESS} # Used to add a label to metrics networks: - observability networks: diff --git a/observability/grafana/Dockerfile.alloy b/observability/grafana/Dockerfile.alloy new file mode 100644 index 0000000..f87f5c1 --- /dev/null +++ b/observability/grafana/Dockerfile.alloy @@ -0,0 +1 @@ +# TODO \ No newline at end of file diff --git a/observability/grafana/alloy/default-config.alloy b/observability/grafana/alloy/default-config.alloy new file mode 100644 index 0000000..eaa2c5c --- /dev/null +++ b/observability/grafana/alloy/default-config.alloy @@ -0,0 +1,38 @@ +logging { + level = "debug" + format = "logfmt" +} + +prometheus.remote_write "default" { + endpoint { + url = sys.env("PROMETHEUS_URL") + } + external_labels = { + host = sys.env("ALLOY_HOSTNAME"), + ip_address = sys.env("ALLOY_IP_ADDRESS"), + } +} + +prometheus.scrape "exporter" { + scrape_interval = "15s" + targets = array.concat( + prometheus.exporter.self.alloy.targets, + prometheus.exporter.cadvisor.local_cadvisor.targets, + prometheus.exporter.unix.local_node_exporter.targets, + ) + forward_to = [prometheus.remote_write.default.receiver] +} + +// Alloys internal metrics +prometheus.exporter.self "alloy" { +} + +// CAdvisor +prometheus.exporter.cadvisor "local_cadvisor" { + docker_host = "unix:///var/run/docker.sock" + storage_duration = "5m" +} + +// Node exporter +prometheus.exporter.unix "local_node_exporter" { +}