-
Notifications
You must be signed in to change notification settings - Fork 94
/
zync-que.yaml
50 lines (50 loc) · 2.44 KB
/
zync-que.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
creationTimestamp: null
labels:
app: 3scale-api-management
prometheus: application-monitoring
role: alert-rules
threescale_component: zync
name: zync-que
spec:
groups:
- name: __NAMESPACE__/zync-que.rules
rules:
- alert: ThreescaleZyncQueJobDown
annotations:
description: Job {{ $labels.job }} on {{ $labels.namespace }} is DOWN
sop_url: https://github.com/3scale/3scale-Operations/blob/master/sops/alerts/prometheus_job_down.adoc
summary: Job {{ $labels.job }} on {{ $labels.namespace }} is DOWN
expr: up{job=~".*/zync-que",namespace="__NAMESPACE__"} == 0
for: 1m
labels:
severity: critical
- alert: ThreescaleZyncQueScheduledJobCountHigh
annotations:
description: Job {{ $labels.job }} on {{ $labels.namespace }} has scheduled job count over 100
sop_url: https://github.com/3scale/3scale-Operations/blob/master/sops/alerts/zync_que_scheduled_job_count_high.adoc
summary: Job {{ $labels.job }} on {{ $labels.namespace }} has scheduled job count over 100
expr: max(que_jobs_scheduled_total{pod=~'zync-que.*',type='scheduled',namespace="__NAMESPACE__"}) by (namespace,job,exported_job) > 250
for: 1m
labels:
severity: warning
- alert: ThreescaleZyncQueFailedJobCountHigh
annotations:
description: Job {{ $labels.job }} on {{ $labels.namespace }} has failed job count over 100
sop_url: https://github.com/3scale/3scale-Operations/blob/master/sops/alerts/zync_que_failed_job_count_high.adoc
summary: Job {{ $labels.job }} on {{ $labels.namespace }} has failed job count over 100
expr: max(que_jobs_scheduled_total{pod=~'zync-que.*',type='failed',namespace="__NAMESPACE__"}) by (namespace,job,exported_job) > 250
for: 1m
labels:
severity: warning
- alert: ThreescaleZyncQueReadyJobCountHigh
annotations:
description: Job {{ $labels.job }} on {{ $labels.namespace }} has ready job count over 100
sop_url: https://github.com/3scale/3scale-Operations/blob/master/sops/alerts/zync_que_ready_job_count_high.adoc
summary: Job {{ $labels.job }} on {{ $labels.namespace }} has ready job count over 100
expr: max(que_jobs_scheduled_total{pod=~'zync-que.*',type='ready',namespace="__NAMESPACE__"}) by (namespace,job,exported_job) > 250
for: 1m
labels:
severity: warning