Skip to content

Commit

Permalink
Do not pass histogram metrics to the fluentd (#595)
Browse files Browse the repository at this point in the history
* Rewrite kubelet and apiserver prometheus regexes

* Update upgrade script
  • Loading branch information
sumo-drosiek committed May 4, 2020
1 parent 4d21605 commit f513d77
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 12 deletions.
43 changes: 39 additions & 4 deletions deploy/helm/prometheus-overrides.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,52 @@ prometheus:
- action: keep
regex: kube-scheduler;scheduler_(?:e2e_scheduling|binding|scheduling_algorithm)_latency_microseconds.*
sourceLabels: [job, __name__]
- # api server metrics
- # api server metrics:
# apiserver_request_count
# apiserver_request_total
# apiserver_request_duration_seconds_count
# apiserver_request_duration_seconds_sum
# apiserver_request_latencies_count
# apiserver_request_latencies_sum
# apiserver_request_latencies_summary
# apiserver_request_latencies_summary_count
# apiserver_request_latencies_summary_sum
# etcd_request_cache_get_duration_seconds_count
# etcd_request_cache_get_duration_seconds_sum
# etcd_request_cache_add_duration_seconds_count
# etcd_request_cache_add_duration_seconds_sum
# etcd_request_cache_add_latencies_summary_count
# etcd_request_cache_add_latencies_summary_sum
# etcd_request_cache_get_latencies_summary_count
# etcd_request_cache_get_latencies_summary_sum
# etcd_helper_cache_hit_count
# etcd_helper_cache_hit_total
# etcd_helper_cache_miss_count
# etcd_helper_cache_miss_total
url: http://$(CHART).$(NAMESPACE).svc.cluster.local:9888/prometheus.metrics.apiserver
writeRelabelConfigs:
- action: keep
regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:latencies|duration_seconds).*|etcd_request_cache_get_(?:latencies_summary|duration_seconds).*|etcd_request_cache_add_(?:latencies_summary|duration_seconds).*|etcd_helper_cache_hit_(?:count|total)|etcd_helper_cache_miss_(?:count|total))
regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:duration_seconds|latencies)_(?:count|sum)|apiserver_request_latencies_summary(?:|_count|_sum)|etcd_request_cache_(?:add|get)_(?:duration_seconds|latencies_summary)_(?:count|sum)|etcd_helper_cache_(?:hit|miss)_(?:count|total))
sourceLabels: [job, __name__]
- # kubelet metrics
- # kubelet metrics:
# kubelet_docker_operations_errors
# kubelet_docker_operations_errors_total
# kubelet_docker_operations_duration_seconds_count
# kubelet_docker_operations_duration_seconds_sum
# kubelet_runtime_operations_duration_seconds_count
# kubelet_runtime_operations_duration_seconds_sum
# kubelet_running_container_count
# kubelet_running_pod_count
# kubelet_docker_operations_latency_microseconds
# kubelet_docker_operations_latency_microseconds_count
# kubelet_docker_operations_latency_microseconds_sum
# kubelet_runtime_operations_latency_microseconds
# kubelet_runtime_operations_latency_microseconds_count
# kubelet_runtime_operations_latency_microseconds_sum
url: http://$(CHART).$(NAMESPACE).svc.cluster.local:9888/prometheus.metrics.kubelet
writeRelabelConfigs:
- action: keep
regex: kubelet;(?:kubelet_docker_operations_errors.*|kubelet_docker_operations_(?:latency_micro|duration_)seconds.*|kubelet_running_container_count|kubelet_running_pod_count|kubelet_runtime_operations_(?:latency_micro|duration_)seconds.*)
regex: kubelet;(?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)_count|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum))
sourceLabels: [job, __name__]
- # cadvisor container metrics
url: http://$(CHART).$(NAMESPACE).svc.cluster.local:9888/prometheus.metrics.container
Expand Down
4 changes: 2 additions & 2 deletions deploy/helm/sumologic/upgrade-1.0.0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@ expected_metrics="/prometheus.metrics.state\n
/prometheus.metrics.controller-manager\n
/prometheus.metrics.scheduler\n
/prometheus.metrics.apiserver\n
- regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:latencies|duration_seconds).*|etcd_request_cache_get_(?:latencies_summary|duration_seconds).*|etcd_request_cache_add_(?:latencies_summary|duration_seconds).*|etcd_helper_cache_hit_(?:count|total)|etcd_helper_cache_miss_(?:count|total))\n
- regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:duration_seconds|latencies)_(?:count|sum)|apiserver_request_latencies_summary(?:|_count|_sum)|etcd_request_cache_(?:add|get)_(?:duration_seconds|latencies_summary)_(?:count|sum)|etcd_helper_cache_(?:hit|miss)_(?:count|total))\n
+ regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_latenc(?:ies|y_seconds).*|etcd_request_cache_get_latenc(?:ies_summary|y_seconds).*|etcd_request_cache_add_latenc(?:ies_summary|y_seconds).*|etcd_helper_cache_hit_(?:count|total)|etcd_helper_cache_miss_(?:count|total))\n
/prometheus.metrics.kubelet\n
- regex: kubelet;(?:kubelet_docker_operations_errors.*|kubelet_docker_operations_(?:latency_micro|duration_)seconds.*|kubelet_running_container_count|kubelet_running_pod_count|kubelet_runtime_operations_(?:latency_micro|duration_)seconds.*)\n
- regex: kubelet;(?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)_count|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum))\n
+ regex: kubelet;(?:kubelet_docker_operations_errors|kubelet_docker_operations_latency_microseconds|kubelet_running_container_count|kubelet_running_pod_count|kubelet_runtime_operations_latency_microseconds.*)\n
/prometheus.metrics.container\n
- regex: kubelet;.+;(?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes)\n
Expand Down
43 changes: 39 additions & 4 deletions deploy/helm/sumologic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -669,17 +669,52 @@ prometheus-operator:
- action: keep
regex: kube-scheduler;scheduler_(?:e2e_scheduling|binding|scheduling_algorithm)_latency_microseconds.*
sourceLabels: [job, __name__]
# api server metrics
# api server metrics:
# apiserver_request_count
# apiserver_request_total
# apiserver_request_duration_seconds_count
# apiserver_request_duration_seconds_sum
# apiserver_request_latencies_count
# apiserver_request_latencies_sum
# apiserver_request_latencies_summary
# apiserver_request_latencies_summary_count
# apiserver_request_latencies_summary_sum
# etcd_request_cache_get_duration_seconds_count
# etcd_request_cache_get_duration_seconds_sum
# etcd_request_cache_add_duration_seconds_count
# etcd_request_cache_add_duration_seconds_sum
# etcd_request_cache_add_latencies_summary_count
# etcd_request_cache_add_latencies_summary_sum
# etcd_request_cache_get_latencies_summary_count
# etcd_request_cache_get_latencies_summary_sum
# etcd_helper_cache_hit_count
# etcd_helper_cache_hit_total
# etcd_helper_cache_miss_count
# etcd_helper_cache_miss_total
- url: http://$(CHART).$(NAMESPACE).svc.cluster.local:9888/prometheus.metrics.apiserver
writeRelabelConfigs:
- action: keep
regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:latencies|duration_seconds).*|etcd_request_cache_get_(?:latencies_summary|duration_seconds).*|etcd_request_cache_add_(?:latencies_summary|duration_seconds).*|etcd_helper_cache_hit_(?:count|total)|etcd_helper_cache_miss_(?:count|total))
regex: apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:duration_seconds|latencies)_(?:count|sum)|apiserver_request_latencies_summary(?:|_count|_sum)|etcd_request_cache_(?:add|get)_(?:duration_seconds|latencies_summary)_(?:count|sum)|etcd_helper_cache_(?:hit|miss)_(?:count|total))
sourceLabels: [job, __name__]
# kubelet metrics
# kubelet metrics:
# kubelet_docker_operations_errors
# kubelet_docker_operations_errors_total
# kubelet_docker_operations_duration_seconds_count
# kubelet_docker_operations_duration_seconds_sum
# kubelet_runtime_operations_duration_seconds_count
# kubelet_runtime_operations_duration_seconds_sum
# kubelet_running_container_count
# kubelet_running_pod_count
# kubelet_docker_operations_latency_microseconds
# kubelet_docker_operations_latency_microseconds_count
# kubelet_docker_operations_latency_microseconds_sum
# kubelet_runtime_operations_latency_microseconds
# kubelet_runtime_operations_latency_microseconds_count
# kubelet_runtime_operations_latency_microseconds_sum
- url: http://$(CHART).$(NAMESPACE).svc.cluster.local:9888/prometheus.metrics.kubelet
writeRelabelConfigs:
- action: keep
regex: kubelet;(?:kubelet_docker_operations_errors.*|kubelet_docker_operations_(?:latency_micro|duration_)seconds.*|kubelet_running_container_count|kubelet_running_pod_count|kubelet_runtime_operations_(?:latency_micro|duration_)seconds.*)
regex: kubelet;(?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)_count|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum))
sourceLabels: [job, __name__]
# cadvisor container metrics
- url: http://$(CHART).$(NAMESPACE).svc.cluster.local:9888/prometheus.metrics.container
Expand Down
4 changes: 2 additions & 2 deletions deploy/kubernetes/kube-prometheus-sumo-logic-mixin.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
writeRelabelConfigs: [
{
action: "keep",
regex: "apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:latencies|duration_seconds).*|etcd_request_cache_get_(?:latencies_summary|duration_seconds).*|etcd_request_cache_add_(?:latencies_summary|duration_seconds).*|etcd_helper_cache_hit_(?:count|total)|etcd_helper_cache_miss_(?:count|total))",
regex: "apiserver;(?:apiserver_request_(?:count|total)|apiserver_request_(?:duration_seconds|latencies)_(?:count|sum)|apiserver_request_latencies_summary(?:|_count|_sum)|etcd_request_cache_(?:add|get)_(?:duration_seconds|latencies_summary)_(?:count|sum)|etcd_helper_cache_(?:hit|miss)_(?:count|total))",
sourceLabels: [
"job",
"__name__"
Expand All @@ -62,7 +62,7 @@
writeRelabelConfigs: [
{
action: "keep",
regex: "kubelet;(?:kubelet_docker_operations_errors.*|kubelet_docker_operations_(?:latency_micro|duration_)seconds.*|kubelet_running_container_count|kubelet_running_pod_count|kubelet_runtime_operations_(?:latency_micro|duration_)seconds.*)",
regex: "kubelet;(?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)_count|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum))",
sourceLabels: [
"job",
"__name__"
Expand Down

0 comments on commit f513d77

Please sign in to comment.