Skip to content

Commit

Permalink
testutil/compose: add basic prometheus alerts (#719)
Browse files Browse the repository at this point in the history
Adds a few basic alerts to compose prometheus. Note this is pure prometheus alerts, `alertmanager` hasn't been configured since it isn't required to group or silence or manage alerts over time.

category: test
ticket: #631
  • Loading branch information
corverroos committed Jun 15, 2022
1 parent 62b3975 commit 78f8075
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
1 change: 1 addition & 0 deletions testutil/compose/docker-compose.template
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ services:
networks: [compose]
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- ./prometheus/rules.yml:/etc/prometheus/rules.yml

grafana:
image: grafana/grafana:latest
Expand Down
3 changes: 3 additions & 0 deletions testutil/compose/static/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ scrape_configs:
- job_name: 'node3'
static_configs:
- targets: ['node3:16001']

rule_files:
- /etc/prometheus/rules.yml
44 changes: 44 additions & 0 deletions testutil/compose/static/prometheus/rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
groups:
- name: charon
rules:
- alert: Charon Down
expr: up == 0
for: 15s
annotations:
description: "Ensures charon node(s) are available"

- alert: Error Log Rate
expr: app_log_error_total > 0
for: 15s
annotations:
description: "Ensures no error logs"

- alert: Warn Log Rate
expr: increase(app_log_warn_total[30s]) > 2
for: 15s
annotations:
description: "Ensures warning log rate is low"

- alert: Validator API Error Rate
expr: increase(core_validatorapi_request_error_total{endpoint!="proxy"}[30s]) > 1
for: 15s
annotations:
description: "Ensures validator api error rate is very low"

- alert: Proxy API Error Rate
expr: increase(core_validatorapi_request_error_total{endpoint="proxy"}[30s]) > 5
for: 15s
annotations:
description: "Ensures proxy api error rate is low"

- alert: Broadcast Duty Rate
expr: increase(core_bcast_broadcast_total[30s]) < 0.5
for: 15s
annotations:
description: "Ensures broadcast duty rate is not low / is high"

- alert: Outstanding Duty Rate
expr: core_bcast_broadcast_total - core_scheduler_duty_total > 50
for: 15s
annotations:
description: "Ensures outstanding duties remain low"
1 change: 1 addition & 0 deletions testutil/compose/testdata/TestDockerCompose_run_yml.golden
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ services:
networks: [compose]
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- ./prometheus/rules.yml:/etc/prometheus/rules.yml

grafana:
image: grafana/grafana:latest
Expand Down

0 comments on commit 78f8075

Please sign in to comment.