From cafd0f1705aace6678d25c09277bfb012017d46c Mon Sep 17 00:00:00 2001 From: Bin Yi Date: Tue, 30 Apr 2019 09:53:19 -0700 Subject: [PATCH 1/2] remove go_* metrics --- deploy/helm/overrides.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/deploy/helm/overrides.yaml b/deploy/helm/overrides.yaml index 89f0331cef..f4c24c9c18 100644 --- a/deploy/helm/overrides.yaml +++ b/deploy/helm/overrides.yaml @@ -131,9 +131,3 @@ prometheus: - action: keep regex: node-exporter;node_filesystem_(?:avail|free|size)_bytes sourceLabels: [job, __name__] - # golang related metrics - - url: http://fluentd:9888/prometheus.metrics.go - writeRelabelConfigs: - - action: keep - regex: go_gc_duration_seconds|go_threads|go_goroutines - sourceLabels: [__name__] From d183dffdb75d062b65618cad0fb0fb8809ae7ad2 Mon Sep 17 00:00:00 2001 From: Bin Yi Date: Wed, 1 May 2019 15:48:24 -0700 Subject: [PATCH 2/2] add rule metrics and up --- deploy/helm/overrides.yaml | 62 ++++++++++++++++++++++++ deploy/kubernetes/fluentd-sumologic.yaml | 11 +++-- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/deploy/helm/overrides.yaml b/deploy/helm/overrides.yaml index f4c24c9c18..de7818d14c 100644 --- a/deploy/helm/overrides.yaml +++ b/deploy/helm/overrides.yaml @@ -131,3 +131,65 @@ prometheus: - action: keep regex: node-exporter;node_filesystem_(?:avail|free|size)_bytes sourceLabels: [job, __name__] + # prometheus operator rules + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: apiserver;cluster_quantile:apiserver_request_latencies:histogram_quantile + sourceLabels: [job, __name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: instance:node_(?:cpu|filesystem_usage|network_receive_bytes|node_network_transmit_bytes):rate:sum + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: instance:node_cpu:ratio|cluster:node_cpu:sum_rate5m|cluster:node_cpu:ratio + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: cluster_quantile:scheduler_(?:e2e_scheduling|scheduling_algorithm|binding)_latency:histogram_quantile + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: 'node_namespace_pod:kube_pod_info:|:kube_pod_info_node_count:' + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: 'node:node_num_cpu:sum|:node_cpu_utilisation:avg1m|node:node_cpu_utilisation:avg1m|node:cluster_cpu_utilisation:ratio|:node_cpu_saturation_load1:|node:node_cpu_saturation_load1:' + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: ':node_memory_utilisation:|:node_memory_MemFreeCachedBuffers_bytes:sum|:node_memory_MemTotal_bytes:sum|node:node_memory_bytes_available:sum|node:node_memory_bytes_total:sum|node:node_memory_utilisation:ratio|node:cluster_memory_utilisation:ratio|:node_memory_swap_io_bytes:sum_rate|node:node_memory_utilisation:|node:node_memory_utilisation_2:|node:node_memory_swap_io_bytes:sum_rate' + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: ':node_disk_utilisation:avg_irate|node:node_disk_utilisation:avg_irate|:node_disk_saturation:avg_irate|node:node_disk_saturation:avg_irate' + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: 'node:node_filesystem_usage:|node:node_filesystem_avail:' + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: ':node_net_utilisation:sum_irate|node:node_net_utilisation:sum_irate|:node_net_saturation:sum_irate|node:node_net_saturation:sum_irate' + sourceLabels: [__name__] + - url: http://fluentd:9888/prometheus.metrics.operator.rule + writeRelabelConfigs: + - action: keep + regex: 'node:node_inodes_total:|node:node_inodes_free:' + sourceLabels: [__name__] + # up metrics + - url: http://fluentd:9888/prometheus.metrics + writeRelabelConfigs: + - action: keep + regex: up + sourceLabels: [__name__] diff --git a/deploy/kubernetes/fluentd-sumologic.yaml b/deploy/kubernetes/fluentd-sumologic.yaml index a3d88f0305..6857a67556 100644 --- a/deploy/kubernetes/fluentd-sumologic.yaml +++ b/deploy/kubernetes/fluentd-sumologic.yaml @@ -79,7 +79,11 @@ data: @type protobuf - + + @type datapoint + tag prometheus.datapoint.operator.rule + + @type datapoint tag prometheus.datapoint @@ -91,10 +95,9 @@ data: tag ${tag}.$1 - + @type carbon_v2 - @type sumologic endpoint "#{ENV['SUMO_ENDPOINT_METRICS_APISERVER']}" @@ -137,7 +140,7 @@ data: data_type metrics metric_data_format carbon2 - + @type sumologic endpoint "#{ENV['SUMO_ENDPOINT_METRICS']}" data_type metrics