Skip to content

Commit

Permalink
fix(metrics): don't forward duplicate metrics from annotated Pods
Browse files Browse the repository at this point in the history
  • Loading branch information
swiatekm-sumo committed Jul 27, 2023
1 parent 7c1008b commit bbf017a
Show file tree
Hide file tree
Showing 20 changed files with 367 additions and 82 deletions.
1 change: 1 addition & 0 deletions .changelog/3171.fixed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fix(metrics): don't forward duplicate metrics from annotated Pods
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ processors:
metrics:
metric:
# we let the metrics from annotations ("kubernetes-pods") through as they are
- resource.attributes["service.name"] != "kubernetes-pods" and IsMatch(name, "scrape_.*")
- resource.attributes["service.name"] != "pod-annotations" and IsMatch(name, "scrape_.*")

receivers:
prometheus:
Expand All @@ -51,7 +51,7 @@ receivers:
## - prometheus.io/path: /metrics - path which the metric should be scrape from
## - prometheus.io/port: 9113 - port which the metric should be scrape from
## rel: https://github.com/prometheus-operator/kube-prometheus/pull/16#issuecomment-424318647
- job_name: "kubernetes-pods"
- job_name: "pod-annotations"
kubernetes_sd_configs:
- role: pod
relabel_configs:
Expand Down
7 changes: 7 additions & 0 deletions deploy/helm/sumologic/conf/metrics/otelcol/processors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ routing:
- exporters:
- sumologic/state
value: /prometheus.metrics.state
## custom metrics
## This entry is necessary due to a bug in routing processor that prevents the routing key from being deleted
## if the default exporter is chosen
## See: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/24644
- exporters:
- sumologic/default
value: /prometheus.metrics.applications.custom

## Configuration for Source Processor
## Source processor adds Sumo Logic related metadata
Expand Down
65 changes: 64 additions & 1 deletion deploy/helm/sumologic/values.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ data:
- exporters:
- sumologic/state
value: /prometheus.metrics.state
- exporters:
- sumologic/default
value: /prometheus.metrics.applications.custom
source:
collector: kubernetes
sumologic_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ data:
- exporters:
- sumologic/state
value: /prometheus.metrics.state
- exporters:
- sumologic/default
value: /prometheus.metrics.applications.custom
source:
collector: kubernetes
sumologic_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ spec:
metrics:
metric:
# we let the metrics from annotations ("kubernetes-pods") through as they are
- resource.attributes["service.name"] != "kubernetes-pods" and IsMatch(name, "scrape_.*")
- resource.attributes["service.name"] != "pod-annotations" and IsMatch(name, "scrape_.*")
receivers:
prometheus:
Expand All @@ -106,7 +106,7 @@ spec:
## - prometheus.io/path: /metrics - path which the metric should be scrape from
## - prometheus.io/port: 9113 - port which the metric should be scrape from
## rel: https://github.com/prometheus-operator/kube-prometheus/pull/16#issuecomment-424318647
- job_name: "kubernetes-pods"
- job_name: "pod-annotations"
kubernetes_sd_configs:
- role: pod
relabel_configs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ spec:
metrics:
metric:
# we let the metrics from annotations ("kubernetes-pods") through as they are
- resource.attributes["service.name"] != "kubernetes-pods" and IsMatch(name, "scrape_.*")
- resource.attributes["service.name"] != "pod-annotations" and IsMatch(name, "scrape_.*")
receivers:
prometheus:
Expand All @@ -128,7 +128,7 @@ spec:
## - prometheus.io/path: /metrics - path which the metric should be scrape from
## - prometheus.io/port: 9113 - port which the metric should be scrape from
## rel: https://github.com/prometheus-operator/kube-prometheus/pull/16#issuecomment-424318647
- job_name: "kubernetes-pods"
- job_name: "pod-annotations"
kubernetes_sd_configs:
- role: pod
relabel_configs:
Expand Down
98 changes: 53 additions & 45 deletions tests/integration/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ func GetMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollect
Assess("expected metrics are present",
stepfuncs.WaitUntilExpectedMetricsPresent(
expectedMetrics,
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
2*time.Minute, // take longer to account for recording rule metrics
tickDuration,
),
Expand Down Expand Up @@ -131,6 +128,59 @@ func GetMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollect
Feature()
}

func GetTelegrafMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollector, errOnExtra bool) features.Feature {
return features.New("telegraf_metrics").
Setup(stepfuncs.KubectlApplyFOpt(internal.NginxTelegrafMetricsTest, internal.NginxTelegrafNamespace)).
Assess("expected metrics are present",
stepfuncs.WaitUntilExpectedMetricsPresentWithFilters(
expectedMetrics,
receivermock.MetadataFilters{"deployment": "nginx", "endpoint": "/metrics"},
errOnExtra,
waitDuration,
tickDuration,
),
).
Assess("expected labels are present for annotation metrics",
func(ctx context.Context, t *testing.T, envConf *envconf.Config) context.Context {
metricFilters := receivermock.MetadataFilters{"__name__": "nginx_accepts", "endpoint": "/metrics"}
releaseName := ctxopts.HelmRelease(ctx)
namespace := ctxopts.Namespace(ctx)
expectedLabels := receivermock.Labels{
"cluster": "kubernetes",
"_origin": "kubernetes",
"deployment": "nginx",
"endpoint": "/metrics",
"namespace": internal.NginxTelegrafNamespace,
"node": internal.NodeNameRegex,
"pod_labels_app": "nginx",
"pod_labels_pod-template-hash": ".+",
"pod": "nginx-.+",
"replicaset": "nginx-.*",
"service": "nginx",
"app": "nginx",
"host": "nginx-.+",
"port": "80",
"server": "localhost",
"pod_template_hash": ".+",
}
expectedLabels = addCollectorSpecificMetricLabels(expectedLabels, releaseName, namespace, metricsCollector)

// drop some unnecessary labels
delete(expectedLabels, "k8s.node.name")
delete(expectedLabels, "prometheus_service")

// Prometheus currently suffers from a bug where it removes the job label, but Otel keeps it
if metricsCollector == Otelcol {
expectedLabels["job"] = "pod-annotations"
}

return stepfuncs.WaitUntilExpectedMetricLabelsPresent(metricFilters, expectedLabels, waitDuration, tickDuration)(ctx, t, envConf)
},
).
Teardown(stepfuncs.KubectlDeleteFOpt(internal.NginxTelegrafMetricsTest, internal.NginxTelegrafNamespace)).
Feature()
}

// addCollectorSpecificMetricLabels adds labels which are present only for the specific metric collector or metadata Service
func addCollectorSpecificMetricLabels(labels receivermock.Labels, releaseName string, serviceMonitorNamespace string, collector MetricsCollector) receivermock.Labels {
outputLabels := make(receivermock.Labels, len(labels))
Expand Down Expand Up @@ -194,9 +244,6 @@ func GetLogsFeature() features.Feature {
"pod_labels_app": internal.LogsGeneratorName,
"deployment": internal.LogsGeneratorName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -207,9 +254,6 @@ func GetLogsFeature() features.Feature {
"pod_labels_app": internal.LogsGeneratorName,
"daemonset": internal.LogsGeneratorName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -242,9 +286,6 @@ func GetLogsFeature() features.Feature {
),
"_sourceHost": internal.EmptyRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -276,9 +317,6 @@ func GetLogsFeature() features.Feature {
),
"_sourceHost": internal.EmptyRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -290,9 +328,6 @@ func GetLogsFeature() features.Feature {
"_sourceCategory": "kubernetes/system",
"_sourceHost": internal.NodeNameRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -304,9 +339,6 @@ func GetLogsFeature() features.Feature {
"_sourceCategory": "kubernetes/kubelet",
"_sourceHost": internal.NodeNameRegex,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -338,9 +370,6 @@ func GetMultilineLogsFeature() features.Feature {
"namespace": internal.MultilineLogsNamespace,
"pod_labels_example": internal.MultilineLogsPodName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -357,9 +386,6 @@ func GetEventsFeature() features.Feature {
"_sourceCategory": fmt.Sprintf("%s/events", internal.ClusterName),
"cluster": "kubernetes",
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down Expand Up @@ -391,9 +417,6 @@ func GetTracesFeature() features.Feature {
// "_sourceCategory": "kubernetes/customer/trace/tester/customer/trace/tester",
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -413,9 +436,6 @@ func GetTracesFeature() features.Feature {
// "_sourceCategory": "kubernetes/customer/trace/tester/customer/trace/tester",
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -435,9 +455,6 @@ func GetTracesFeature() features.Feature {
// "_sourceCategory": "kubernetes/customer/trace/tester/customer/trace/tester",
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -458,18 +475,12 @@ func GetTracesFeature() features.Feature {
"_sourceName": fmt.Sprintf("%s.%s.%s", internal.TracesGeneratorNamespace, internal.TracesGeneratorName, internal.TracesGeneratorName),
"otel.library.name": "jaegerThriftHttp",
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Assess("wait for all spans", stepfuncs.WaitUntilExpectedSpansPresent(
4*tracesPerExporter*spansPerTrace, // there are 4 exporters
map[string]string{},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand All @@ -492,9 +503,6 @@ func GetTailingSidecarFeature() features.Feature {
"namespace": internal.TailingSidecarTestNamespace,
"deployment": internal.TailingSidecarTestDeploymentName,
},
internal.ReceiverMockNamespace,
internal.ReceiverMockServiceName,
internal.ReceiverMockServicePort,
waitDuration,
tickDuration,
)).
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/helm_fluentbit_fluentd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ func Test_Helm_FluentBit_Fluentd(t *testing.T) {

featMetrics := GetMetricsFeature(expectedMetrics, Fluentd)

featTelegrafMetrics := GetTelegrafMetricsFeature(internal.NginxMetrics, Fluentd, false)

featLogs := GetLogsFeature()

featEvents := GetEventsFeature()

testenv.Test(t, featInstall, featMetrics, featLogs, featEvents)
testenv.Test(t, featInstall, featMetrics, featTelegrafMetrics, featLogs, featEvents)
}
4 changes: 3 additions & 1 deletion tests/integration/helm_ot_default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ func Test_Helm_Default_OT(t *testing.T) {

featMetrics := GetMetricsFeature(expectedMetrics, Prometheus)

featTelegrafMetrics := GetTelegrafMetricsFeature(internal.NginxMetrics, Prometheus, false)

featLogs := GetLogsFeature()

featMultilineLogs := GetMultilineLogsFeature()
Expand All @@ -37,5 +39,5 @@ func Test_Helm_Default_OT(t *testing.T) {

featTraces := GetTracesFeature()

testenv.Test(t, featInstall, featMetrics, featLogs, featMultilineLogs, featEvents, featTraces)
testenv.Test(t, featInstall, featMetrics, featTelegrafMetrics, featLogs, featMultilineLogs, featEvents, featTraces)
}
4 changes: 3 additions & 1 deletion tests/integration/helm_ot_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,7 @@ func Test_Helm_OT_Metrics(t *testing.T) {

featMetrics := GetMetricsFeature(expectedMetrics, Otelcol)

testenv.Test(t, featInstall, featMetrics)
featTelegrafMetrics := GetTelegrafMetricsFeature(internal.NginxMetrics, Otelcol, false)

testenv.Test(t, featInstall, featMetrics, featTelegrafMetrics)
}
13 changes: 13 additions & 0 deletions tests/integration/internal/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ const (
TailingSidecarTest = "yamls/tailing-sidecar-test.yaml"
TailingSidecarTestDeploymentName = "test-tailing-sidecar-operator"

NginxTelegrafMetricsTest = "yamls/nginx.yaml"
NginxTelegrafNamespace = "nginx"

// useful regular expressions for matching metadata
PodDeploymentSuffixRegex = "-[a-z0-9]{9,10}-[a-z0-9]{4,5}" // the Pod suffix for Deployments
PodDaemonSetSuffixRegex = "-[a-z0-9]{4,5}"
Expand Down Expand Up @@ -387,6 +390,16 @@ var (
"receiver_mock_logs_bytes_ip_count",
"receiver_mock_metrics_ip_count",
}

NginxMetrics = []string{
"nginx_accepts",
"nginx_active",
"nginx_handled",
"nginx_reading",
"nginx_requests",
"nginx_waiting",
"nginx_writing",
}
)

var (
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/internal/k8s/pods.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package k8s

import (
"context"
"fmt"
"testing"
"time"

"github.com/SumoLogic/sumologic-kubernetes-collection/tests/integration/internal"
"github.com/SumoLogic/sumologic-kubernetes-collection/tests/integration/internal/ctxopts"
"github.com/gruntwork-io/terratest/modules/k8s"
"github.com/gruntwork-io/terratest/modules/retry"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -72,6 +75,17 @@ func WaitUntilPodsAvailableE(
return nil
}

func WaitUntilReceiverMockAvailable(
ctx context.Context,
t *testing.T,
waitDuration time.Duration,
tickDuration time.Duration,
) {
kubectlOpts := *ctxopts.KubectlOptions(ctx)
kubectlOpts.Namespace = internal.ReceiverMockNamespace
k8s.WaitUntilServiceAvailable(t, &kubectlOpts, internal.ReceiverMockServiceName, int(waitDuration), tickDuration)
}

func formatSelectors(listOptions v1.ListOptions) string {
return fmt.Sprintf("LabelSelector: %q, FieldSelector: %q",
listOptions.LabelSelector, listOptions.FieldSelector,
Expand Down
Loading

0 comments on commit bbf017a

Please sign in to comment.