From 66798b4bf5b4ee2dd323490c130a44804a1b8db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20=C5=9Awi=C4=85tek?= Date: Thu, 23 Mar 2023 17:35:05 +0100 Subject: [PATCH] feat(metrics): add experimental otel metrics collector --- deploy/helm/sumologic/README.md | 3 + .../metrics/collector/otelcol/config.yaml | 123 ++++++++++++++++++ .../conf/metrics/otelcol/pipeline.yaml | 5 + .../conf/metrics/otelcol/processors.yaml | 13 ++ .../sumologic/templates/_helpers/_metrics.tpl | 57 ++++++++ .../collector/otelcol/clusterrole.yaml | 27 ++++ .../collector/otelcol/clusterrolebinding.yaml | 30 +++++ .../otelcol/opentelemetrycollector.yaml | 56 ++++++++ .../collector/otelcol/serviceaccount.yaml | 6 + .../otelcol/targetallocator-clusterrole.yaml | 51 ++++++++ .../targetallocator-clusterrolebinding.yaml | 14 ++ .../targetallocator-serviceaccount.yaml | 6 + .../templates/metrics/otelcol/configmap.yaml | 2 +- .../metrics/otelcol/statefulset.yaml | 2 +- deploy/helm/sumologic/values.yaml | 11 ++ tests/helm/metrics_test.go | 1 + .../additional_endpoints.output.yaml | 12 ++ .../metadata_metrics_otc/basic.output.yaml | 12 ++ tests/integration/features.go | 96 ++++++++++++-- .../helm_fluentbit_fluentd_test.go | 2 +- tests/integration/helm_ot_default_test.go | 2 +- tests/integration/helm_ot_metrics_test.go | 36 +++++ ...elm_otc_fips_metadata_installation_test.go | 2 +- tests/integration/internal/constants.go | 2 + .../values/values_helm_ot_metrics.yaml | 32 +++++ 25 files changed, 584 insertions(+), 19 deletions(-) create mode 100644 deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml create mode 100644 deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml create mode 100644 tests/integration/helm_ot_metrics_test.go create mode 100644 tests/integration/values/values_helm_ot_metrics.yaml diff --git a/deploy/helm/sumologic/README.md b/deploy/helm/sumologic/README.md index f6c4509875..b97fd0f154 100644 --- a/deploy/helm/sumologic/README.md +++ b/deploy/helm/sumologic/README.md @@ -103,6 +103,7 @@ The following table lists the configurable parameters of the Sumo Logic chart an | `sumologic.metrics.remoteWriteProxy.podAnnotations` | Additional annotations for for the remote write proxy container. | `{}` | | `sumologic.metrics.remoteWriteProxy.config.port` | Port on which remote write proxy is going to be exposed | `8080` | | `sumologic.metrics.serviceMonitors` | Configuration of Sumo Logic Kubernetes Collection components serviceMonitors | See [values.yaml] | +| `sumologic.metrics.collector.otelcol.enabled` | Enable experimental otelcol metrics collector | See [values.yaml] | | `sumologic.traces.enabled` | Set the enabled flag to true to enable tracing ingestion. _Tracing must be enabled for the account first. Please contact your Sumo representative for activation details_ | `true` | | `sumologic.traces.spans_per_request` | Maximum number of spans sent in single batch | `100` | | `sumologic.envFromSecret` | If enabled, accessId and accessKey will be sourced from Secret Name given. Be sure to include at least the following env variables in your secret (1) SUMOLOGIC_ACCESSID, (2) SUMOLOGIC_ACCESSKEY | `sumo-api-secret` | @@ -396,6 +397,8 @@ The following table lists the configurable parameters of the Sumo Logic chart an | `opentelemetry-operator.instrumentation.java.traces.enabled` | Flag to control traces export from Java instrumentation in `Instrumentation` resource. | `true` | | `opentelemetry-operator.instrumentation.python.metrics.enabled` | Flag to control metrics export from Python instrumentation in `Instrumentation` resource. | `true` | | `opentelemetry-operator.instrumentation.python.traces.enabled` | Flag to control traces export from Python instrumentation in `Instrumentation` resource. | `true` | +| `opentelemetry-operator.manager.collectorImage.repository` | The default collector image repository for OpenTelemetryCollector CRDs. | `public.ecr.aws/sumologic/sumologic-otel-collector` | +| `opentelemetry-operator.manager.collectorImage.tag` | The default collector image tag for OpenTelemetryCollector CRDs. | `0.73.0-sumo-1` | | `opentelemetry-operator.manager.resources.limits.cpu` | Used to set limit CPU for OpenTelemetry-Operator Manager. | `250m` | | `opentelemetry-operator.manager.resources.limits.memory` | Used to set limit Memory for OpenTelemetry-Operator Manager. | `512Mi` | | `opentelemetry-operator.manager.resources.requests.cpu` | Used to set requested CPU for OpenTelemetry-Operator Manager. | `150m` | diff --git a/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml b/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml new file mode 100644 index 0000000000..4c98ae2d92 --- /dev/null +++ b/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml @@ -0,0 +1,123 @@ +receivers: + prometheus: + config: + global: + scrape_interval: 30s + scrape_configs: + ## These scrape configs are for kubelet metrics + ## Prometheus operator does this by manually maintaining a Service with Endpoints for all Nodes + ## We don't have that capability, so we need to use a static configuration + - job_name: kubelet + scheme: https + authorization: + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + honor_labels: true + kubernetes_sd_configs: + - role: node + metric_relabel_configs: + - action: keep + regex: (?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)(?:_count|s)|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum)) + source_labels: [__name__] + # TODO: The below can't be used due to a bug in target allocator + # - action: labeldrop + # regex: id + relabel_configs: &relabel_configs # partially copied from what operator generates + - source_labels: + - __meta_kubernetes_node_name + target_label: node + - source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - source_labels: + - __meta_kubernetes_pod_container_name + target_label: container + - target_label: endpoint + replacement: https-metrics + - source_labels: + - __metrics_path__ + target_label: metrics_path + action: replace + - source_labels: + - __address__ + target_label: instance + action: replace + - job_name: cadvisor + scheme: https + authorization: + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + honor_labels: true + metrics_path: /metrics/cadvisor + kubernetes_sd_configs: + - role: node + metric_relabel_configs: + - action: replace + regex: .* + replacement: kubelet + source_labels: [__name__] + target_label: job + - action: keep + regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total) + source_labels: [__name__] + ## Drop container metrics with container tag set to an empty string: + ## these are the pod aggregated container metrics which can be aggregated + ## in Sumo anyway. There's also some cgroup-specific time series we also + ## do not need. + - action: drop + source_labels: [__name__, container] + regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes);$ + - action: labelmap + regex: container_name + replacement: container + - action: drop + source_labels: [container] + regex: POD + # TODO: The below can't be used due to a bug in target allocator + # - action: labeldrop + # regex: (id|name) + relabel_configs: *relabel_configs # partially copied from what operator generates + target_allocator: + endpoint: http://{{ template "sumologic.metadata.name.metrics.targetallocator.name" . }} + interval: 30s + collector_id: ${POD_NAME} +extensions: + health_check: {} +{{ if .Values.metadata.persistence.enabled }} + ## Configuration for File Storage extension + ## ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/release/v0.37.x/extension/storage/filestorage + file_storage: + directory: /var/lib/storage/otc + timeout: 10s + compaction: + on_rebound: true + directory: /tmp +{{ end }} + pprof: {} +exporters: +{{ tpl (.Files.Get "conf/metrics/otelcol/exporters.yaml") . | indent 2 }} +processors: +{{ tpl (.Files.Get "conf/metrics/otelcol/processors.yaml") . | indent 2 }} +service: + telemetry: + logs: + level: {{ .Values.metadata.metrics.logLevel }} + metrics: + address: 0.0.0.0:8888 # this is the default, but setting it explicitly lets the operator add it automatically + extensions: + - health_check +{{ if .Values.metadata.persistence.enabled }} + - file_storage +{{ end }} + - pprof + pipelines: + metrics: +{{ tpl (.Files.Get "conf/metrics/otelcol/pipeline.yaml") . | indent 6 }} + diff --git a/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml b/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml index d95ae844d1..7fbb43a827 100644 --- a/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml +++ b/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml @@ -10,6 +10,7 @@ exporters: processors: - memory_limiter - metricstransform + - groupbyattrs - resource - k8s_tagger - source @@ -24,4 +25,8 @@ processors: - batch - routing receivers: +{{- if not .Values.sumologic.metrics.collector.otelcol.enabled }} - telegraf +{{- else }} + - prometheus +{{- end }} diff --git a/deploy/helm/sumologic/conf/metrics/otelcol/processors.yaml b/deploy/helm/sumologic/conf/metrics/otelcol/processors.yaml index 93d5405a30..e61748800c 100644 --- a/deploy/helm/sumologic/conf/metrics/otelcol/processors.yaml +++ b/deploy/helm/sumologic/conf/metrics/otelcol/processors.yaml @@ -9,6 +9,14 @@ batch: ## Time duration after which a batch will be sent regardless of size timeout: 1s +# Prometheus receiver puts all labels in record-level attributes, and we need them in resource +groupbyattrs: + keys: + - container + - namespace + - pod + - service + ## The Kubernetes sprocessor automatically tags logs, metrics and traces with Kubernetes metadata like pod name, namespace name etc. ## ref: https://github.com/SumoLogic/sumologic-otel-collector/tree/main/pkg/processor/k8sprocessor k8s_tagger: @@ -79,6 +87,11 @@ resource: key: prometheus_service - action: delete key: service + - action: upsert + from_attribute: service.name + key: job + - action: delete # we don't want service.name to be set, as the schema processor translates it to "service" + key: service.name - action: upsert key: _origin # add "_origin" metadata to metrics to keep the same format as for metrics from Fluentd value: kubernetes diff --git a/deploy/helm/sumologic/templates/_helpers/_metrics.tpl b/deploy/helm/sumologic/templates/_helpers/_metrics.tpl index 222c3f6809..67a94cd4ea 100644 --- a/deploy/helm/sumologic/templates/_helpers/_metrics.tpl +++ b/deploy/helm/sumologic/templates/_helpers/_metrics.tpl @@ -216,6 +216,63 @@ sumologic.com/scrape: "true" {{- template "sumologic.labels.app.pvcCleaner" . }}-metrics {{- end -}} +{{/* +Definitions for metrics collector +*/}} + +{{- define "sumologic.labels.app.metrics.collector" -}} +{{- template "sumologic.fullname" . }}-metrics +{{- end -}} + +{{- define "sumologic.labels.app.metrics.collector.pod" -}} +{{- template "sumologic.labels.app.metrics.collector" . }} +{{- end -}} + +{{- define "sumologic.labels.app.metrics.collector.opentelemetrycollector" -}} +{{- template "sumologic.labels.app.metrics.collector" . }} +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.collector" -}} +{{- template "sumologic.fullname" . }}-metrics +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.collector.opentelemetrycollector" -}} +{{ template "sumologic.metadata.name.metrics.collector" . }} +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.collector.serviceaccount" -}} +{{ template "sumologic.metadata.name.metrics.collector" . }} +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.collector.clusterrole" -}} +{{ template "sumologic.metadata.name.metrics.collector" . }} +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.collector.clusterrolebinding.prometheus" -}} +{{ template "sumologic.metadata.name.metrics.collector" . }}-prometheus +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.collector.clusterrolebinding.metadata" -}} +{{ template "sumologic.metadata.name.metrics.collector" . }}-metadata +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.targetallocator.name" -}} +{{ template "sumologic.metadata.name.metrics.collector.opentelemetrycollector" . }}-targetallocator +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.targetallocator.serviceaccount" -}} +{{ template "sumologic.metadata.name.metrics.targetallocator.name" . }} +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.targetallocator.clusterrole" -}} +{{ template "sumologic.metadata.name.metrics.targetallocator.name" . }} +{{- end -}} + +{{- define "sumologic.metadata.name.metrics.targetallocator.clusterrolebinding" -}} +{{ template "sumologic.metadata.name.metrics.targetallocator.name" . }} +{{- end -}} + + {{/* Generate metrics match configuration diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml new file mode 100644 index 0000000000..ddcea9c8ab --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrole.yaml @@ -0,0 +1,27 @@ +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "sumologic.metadata.name.metrics.collector.clusterrole" . }} +rules: +- apiGroups: [""] + resources: + - pods + - nodes + - nodes/metrics + - services + - endpoints + verbs: + - get + - watch + - list +- apiGroups: ["networking.k8s.io"] + resources: + - ingresses + verbs: + - get + - watch + - list +- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] + verbs: ["get"] +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml new file mode 100644 index 0000000000..f7c9fcf466 --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/clusterrolebinding.yaml @@ -0,0 +1,30 @@ +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "sumologic.metadata.name.metrics.collector.clusterrolebinding.prometheus" . }} +subjects: +- kind: ServiceAccount + name: {{ template "sumologic.metadata.name.metrics.collector.serviceaccount" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ template "sumologic.metadata.name.metrics.collector.clusterrole" . }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "sumologic.metadata.name.metrics.collector.clusterrolebinding.metadata" . }} + labels: + app: {{ template "sumologic.labels.app.roles.clusterrolebinding" . }} + {{- include "sumologic.labels.common" . | nindent 4 }} +subjects: +- kind: ServiceAccount + namespace: {{ .Release.Namespace }} + name: {{ template "sumologic.metadata.name.metrics.collector.serviceaccount" . }} +roleRef: + kind: ClusterRole + name: {{ template "sumologic.metadata.name.roles.clusterrole" . }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml new file mode 100644 index 0000000000..6045123d83 --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml @@ -0,0 +1,56 @@ +{{- if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: {{ template "sumologic.metadata.name.metrics.collector.opentelemetrycollector" . }} + labels: + app: {{ template "sumologic.labels.app.metrics.collector" . }} +spec: + mode: statefulset + replicas: 3 + serviceAccount: {{ template "sumologic.metadata.name.metrics.collector.serviceaccount" . }} + targetAllocator: + serviceAccount: {{ template "sumologic.metadata.name.metrics.targetallocator.serviceaccount" . }} + enabled: true + prometheusCR: + enabled: true + env: +{{- $ctx := .Values -}} +{{- include "kubernetes.sources.envs" (dict "Context" $ctx "Type" "metrics") | nindent 4 -}} +{{- include "proxy-env-variables" . | nindent 4 -}} +{{- if .Values.metadata.metrics.statefulset.extraEnvVars }} +{{ toYaml .Values.metadata.metrics.statefulset.extraEnvVars | nindent 4 }} +{{- end }} + ports: + - name: pprof + port: 1777 + resources: + {{ .Values.sumologic.metrics.collector.otelcol.resources | toYaml | nindent 4 }} + volumes: + - name: tmp + emptyDir: {} + volumeMounts: + - name: tmp + mountPath: /tmp +{{- if .Values.metadata.persistence.enabled }} + - name: file-storage + mountPath: /var/lib/storage/otc +{{- end }} +{{- if .Values.metadata.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: file-storage +{{- if .Values.metadata.persistence.pvcLabels }} + labels: +{{ toYaml .Values.metadata.persistence.pvcLabels | indent 8 }} +{{- end }} + spec: + accessModes: [{{ .Values.metadata.persistence.accessMode }}] + storageClassName: {{ .Values.metadata.persistence.storageClass }} + resources: + requests: + storage: {{ .Values.metadata.persistence.size }} +{{- end }} + config: | +{{- (tpl (.Files.Get "conf/metrics/collector/otelcol/config.yaml") .) | nindent 4 }} +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml new file mode 100644 index 0000000000..d56a0e1dcd --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/serviceaccount.yaml @@ -0,0 +1,6 @@ +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "sumologic.metadata.name.metrics.collector.serviceaccount" . }} +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml new file mode 100644 index 0000000000..8ebe1991e0 --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrole.yaml @@ -0,0 +1,51 @@ +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "sumologic.metadata.name.metrics.targetallocator.clusterrole" . }} +rules: +- apiGroups: [""] + resources: + - pods + - nodes + - services + - endpoints + - configmaps + - secrets + - namespaces + verbs: + - get + - watch + - list +- apiGroups: ["apps"] + resources: + - statefulsets + - services + - endpoints + verbs: + - get + - watch + - list +- apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: + - get + - watch + - list +- apiGroups: ["networking.k8s.io"] + resources: + - ingresses + verbs: + - get + - watch + - list +- apiGroups: ["monitoring.coreos.com"] + resources: + - servicemonitors + - podmonitors + verbs: + - get + - watch + - list +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml new file mode 100644 index 0000000000..c36337d791 --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-clusterrolebinding.yaml @@ -0,0 +1,14 @@ +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "sumologic.metadata.name.metrics.targetallocator.clusterrolebinding" . }} +subjects: +- kind: ServiceAccount + name: {{ template "sumologic.metadata.name.metrics.targetallocator.serviceaccount" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ template "sumologic.metadata.name.metrics.targetallocator.clusterrole" . }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml new file mode 100644 index 0000000000..2226f444cd --- /dev/null +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/targetallocator-serviceaccount.yaml @@ -0,0 +1,6 @@ +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") .Values.sumologic.metrics.collector.otelcol.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "sumologic.metadata.name.metrics.targetallocator.serviceaccount" . }} +{{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml b/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml index f9741f4672..93e51d3fd0 100644 --- a/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml +++ b/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml @@ -1,4 +1,4 @@ -{{ if eq (include "metrics.otelcol.enabled" .) "true" }} +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") (not .Values.sumologic.metrics.collector.otelcol.enabled) }} {{ $baseConfig := (tpl (.Files.Get "conf/metrics/otelcol/config.yaml") .) | fromYaml }} {{ $mergeConfig := .Values.metadata.metrics.config.merge }} {{ $overrideConfig := .Values.metadata.metrics.config.override }} diff --git a/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml b/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml index ad56bd2ead..e62bbf0059 100644 --- a/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml +++ b/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml @@ -1,4 +1,4 @@ -{{- if eq (include "metrics.otelcol.enabled" .) "true" }} +{{ if and (eq (include "metrics.otelcol.enabled" .) "true") (not .Values.sumologic.metrics.collector.otelcol.enabled) }} apiVersion: apps/v1 kind: StatefulSet metadata: diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 558efe2567..2cc276bf55 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -390,6 +390,17 @@ sumologic: ## Defines metrics metadata enrichment provider - `otelcol` or `fluentd`. `otelcol` is the default and is recommended. `fluentd` is deprecated. provider: otelcol + collector: + otelcol: + enabled: false + resources: + limits: + memory: 1Gi + cpu: 1000m + requests: + memory: 768Mi + cpu: 500m + otelcol: ## Includes additional processors into pipelines. ## It can be used for filtering metrics, renaming, changing metadata and so on. diff --git a/tests/helm/metrics_test.go b/tests/helm/metrics_test.go index a6bde950cd..efab8305f6 100644 --- a/tests/helm/metrics_test.go +++ b/tests/helm/metrics_test.go @@ -121,6 +121,7 @@ func TestMetadataMetricsOtelConfigExtraProcessors(t *testing.T) { expectedPipelineValue := []string{ "memory_limiter", "metricstransform", + "groupbyattrs", "resource", "k8s_tagger", "source", diff --git a/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml b/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml index f285e22848..a58fc13ced 100644 --- a/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml +++ b/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml @@ -106,6 +106,12 @@ data: send_batch_max_size: 2048 send_batch_size: 1024 timeout: 1s + groupbyattrs: + keys: + - container + - namespace + - pod + - service k8s_tagger: extract: delimiter: _ @@ -155,6 +161,11 @@ data: key: prometheus_service - action: delete key: service + - action: upsert + from_attribute: service.name + key: job + - action: delete + key: service.name - action: upsert key: _origin value: kubernetes @@ -280,6 +291,7 @@ data: processors: - memory_limiter - metricstransform + - groupbyattrs - resource - k8s_tagger - source diff --git a/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml b/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml index 2b67eed822..60716b72fc 100644 --- a/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml +++ b/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml @@ -106,6 +106,12 @@ data: send_batch_max_size: 2048 send_batch_size: 1024 timeout: 1s + groupbyattrs: + keys: + - container + - namespace + - pod + - service k8s_tagger: extract: delimiter: _ @@ -155,6 +161,11 @@ data: key: prometheus_service - action: delete key: service + - action: upsert + from_attribute: service.name + key: job + - action: delete + key: service.name - action: upsert key: _origin value: kubernetes @@ -278,6 +289,7 @@ data: processors: - memory_limiter - metricstransform + - groupbyattrs - resource - k8s_tagger - source diff --git a/tests/integration/features.go b/tests/integration/features.go index f6007a43bd..5ab408dc83 100644 --- a/tests/integration/features.go +++ b/tests/integration/features.go @@ -29,19 +29,23 @@ import ( "github.com/SumoLogic/sumologic-kubernetes-collection/tests/integration/internal/stepfuncs" ) +type MetricsCollector string + const ( - tickDuration = 3 * time.Second - waitDuration = 5 * time.Minute - expectedEventCount uint = 50 // number determined experimentally - logsGeneratorCount uint = 1000 - logRecords = 4 // number of log records in single loop, see: tests/integration/yamls/pod_multiline_long_lines.yaml - logLoops = 500 // number of loops in which logs are generated, see: tests/integration/yamls/pod_multiline_long_lines.yaml - multilineLogCount uint = logRecords * logLoops - tracesPerExporter uint = 5 // number of traces generated per exporter - spansPerTrace uint = 2 + tickDuration = 3 * time.Second + waitDuration = 1 * time.Minute + expectedEventCount uint = 50 // number determined experimentally + logsGeneratorCount uint = 1000 + logRecords = 4 // number of log records in single loop, see: tests/integration/yamls/pod_multiline_long_lines.yaml + logLoops = 500 // number of loops in which logs are generated, see: tests/integration/yamls/pod_multiline_long_lines.yaml + multilineLogCount uint = logRecords * logLoops + tracesPerExporter uint = 5 // number of traces generated per exporter + spansPerTrace uint = 2 + Prometheus MetricsCollector = "prometheus" + Otelcol MetricsCollector = "otelcol" ) -func GetMetricsFeature(expectedMetrics []string) features.Feature { +func GetMetricsFeature(expectedMetrics []string, metricsCollector MetricsCollector) features.Feature { return features.New("metrics"). Assess("expected metrics are present", stepfuncs.WaitUntilExpectedMetricsPresent( @@ -105,7 +109,6 @@ func GetMetricsFeature(expectedMetrics []string) features.Feature { "deployment": "receiver-mock", "endpoint": "https-metrics", "image": "sumologic/kubernetes-tools:.*", - "instance": internal.IpWithPortRegex, "job": "kubelet", "metrics_path": "/metrics/cadvisor", "namespace": "receiver-mock", @@ -114,12 +117,31 @@ func GetMetricsFeature(expectedMetrics []string) features.Feature { "pod_labels_pod-template-hash": ".+", "pod_labels_service": "receiver-mock", "pod": podList.Items[0].Name, - "prometheus_replica": fmt.Sprintf("prometheus-%s-.*-0", releaseName), - "prometheus_service": fmt.Sprintf("%s-.*-kubelet", releaseName), - "prometheus": fmt.Sprintf("%s/%s-.*-prometheus", namespace, releaseName), "replicaset": "receiver-mock-.*", "service": "receiver-mock", } + prometheusLabels := receivermock.Labels{ + "instance": internal.IpWithPortRegex, + "prometheus_replica": fmt.Sprintf("prometheus-%s-.*-0", releaseName), + "prometheus": fmt.Sprintf("%s/%s-.*-prometheus", namespace, releaseName), + "prometheus_service": fmt.Sprintf("%s-.*-kubelet", releaseName), + } + otelcolLabels := receivermock.Labels{ + "http.scheme": "http.", + "net.host.name": internal.IpRegex, + "net.host.port": internal.NetworkPortRegex, + "service.instance.id": internal.IpWithPortRegex, + } + + if metricsCollector == Prometheus { + for key, value := range prometheusLabels { + expectedLabels[key] = value + } + } else if metricsCollector == Otelcol { + for key, value := range otelcolLabels { + expectedLabels[key] = value + } + } log.V(0).InfoS("sample's labels", "labels", labels) return labels.MatchAll(expectedLabels) @@ -511,6 +533,52 @@ func CheckOtelcolMetadataMetricsInstall(builder *features.FeatureBuilder) *featu }) } +func CheckOtelcolMetricsCollectorInstall(builder *features.FeatureBuilder) *features.FeatureBuilder { + return builder. + Assess("otelcol metrics collector statefulset is ready", + stepfuncs.WaitUntilStatefulSetIsReady( + waitDuration, + tickDuration, + stepfuncs.WithNameF( + stepfuncs.ReleaseFormatter("%s-sumologic-metrics-collector"), + ), + stepfuncs.WithLabelsF( + stepfuncs.LabelFormatterKV{ + K: "app", + V: stepfuncs.ReleaseFormatter("%s-sumologic-metrics"), + }, + ), + ), + ). + Assess("otelcol metrics collector buffers PVCs are created and bound", + func(ctx context.Context, t *testing.T, envConf *envconf.Config) context.Context { + res := envConf.Client().Resources(ctxopts.Namespace(ctx)) + pvcs := corev1.PersistentVolumeClaimList{} + cond := conditions. + New(res). + ResourceListMatchN(&pvcs, 1, + func(object k8s.Object) bool { + pvc := object.(*corev1.PersistentVolumeClaim) + if pvc.Status.Phase != corev1.ClaimBound { + log.V(0).Infof("PVC %q not bound yet", pvc.Name) + return false + } + return true + }, + resources.WithLabelSelector( + fmt.Sprintf("app.kubernetes.io/instance=%s.%s-sumologic-metrics", ctxopts.Namespace(ctx), ctxopts.HelmRelease(ctx)), + ), + ) + require.NoError(t, + wait.For(cond, + wait.WithTimeout(waitDuration), + wait.WithInterval(tickDuration), + ), + ) + return ctx + }) +} + func CheckOtelcolMetadataLogsInstall(builder *features.FeatureBuilder) *features.FeatureBuilder { return builder. Assess("otelcol logs statefulset is ready", diff --git a/tests/integration/helm_fluentbit_fluentd_test.go b/tests/integration/helm_fluentbit_fluentd_test.go index 65eeb320dc..932f68a022 100644 --- a/tests/integration/helm_fluentbit_fluentd_test.go +++ b/tests/integration/helm_fluentbit_fluentd_test.go @@ -20,7 +20,7 @@ func Test_Helm_FluentBit_Fluentd(t *testing.T) { featInstall := GetInstallFeature(installChecks) - featMetrics := GetMetricsFeature(expectedMetrics) + featMetrics := GetMetricsFeature(expectedMetrics, Prometheus) featLogs := GetLogsFeature() diff --git a/tests/integration/helm_ot_default_test.go b/tests/integration/helm_ot_default_test.go index 83db71a9ad..bf264b0d16 100644 --- a/tests/integration/helm_ot_default_test.go +++ b/tests/integration/helm_ot_default_test.go @@ -24,7 +24,7 @@ func Test_Helm_Default_OT(t *testing.T) { featInstall := GetInstallFeature(installChecks) - featMetrics := GetMetricsFeature(expectedMetrics) + featMetrics := GetMetricsFeature(expectedMetrics, Prometheus) featLogs := GetLogsFeature() diff --git a/tests/integration/helm_ot_metrics_test.go b/tests/integration/helm_ot_metrics_test.go new file mode 100644 index 0000000000..3af55fccb2 --- /dev/null +++ b/tests/integration/helm_ot_metrics_test.go @@ -0,0 +1,36 @@ +package integration + +import ( + "strings" + "testing" + + "github.com/SumoLogic/sumologic-kubernetes-collection/tests/integration/internal" +) + +func Test_Helm_OT_Metrics(t *testing.T) { + expectedMetrics := []string{} + + // drop histogram metrics for now, there's a couple problems with them + // also don't check otelcol metrics for now, we don't have a ServiceMonitor + for _, metrics := range internal.DefaultExpectedMetricsGroups { + for _, metric := range metrics { + if strings.HasSuffix(metric, "_count") || + strings.HasSuffix(metric, "_sum") || + strings.HasSuffix(metric, "_bucket") { + continue + } + expectedMetrics = append(expectedMetrics, metric) + } + } + + installChecks := []featureCheck{ + CheckSumologicSecret(8), + CheckOtelcolMetricsCollectorInstall, + } + + featInstall := GetInstallFeature(installChecks) + + featMetrics := GetMetricsFeature(expectedMetrics, Otelcol) + + testenv.Test(t, featInstall, featMetrics) +} diff --git a/tests/integration/helm_otc_fips_metadata_installation_test.go b/tests/integration/helm_otc_fips_metadata_installation_test.go index be4db0f48f..61ad65587b 100644 --- a/tests/integration/helm_otc_fips_metadata_installation_test.go +++ b/tests/integration/helm_otc_fips_metadata_installation_test.go @@ -24,7 +24,7 @@ func Test_Helm_Default_OT_FIPS_Metadata(t *testing.T) { featInstall := GetInstallFeature(installChecks) - featMetrics := GetMetricsFeature(expectedMetrics) + featMetrics := GetMetricsFeature(expectedMetrics, Prometheus) featLogs := GetLogsFeature() diff --git a/tests/integration/internal/constants.go b/tests/integration/internal/constants.go index dd32bd6e2c..c6c62c68ab 100644 --- a/tests/integration/internal/constants.go +++ b/tests/integration/internal/constants.go @@ -38,6 +38,8 @@ const ( // useful regular expressions for matching metadata PodDeploymentSuffixRegex = "-[a-z0-9]{9,10}-[a-z0-9]{4,5}" // the Pod suffix for Deployments PodDaemonSetSuffixRegex = "-[a-z0-9]{4,5}" + NetworkPortRegex = "\\d{1,5}" + IpRegex = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}" IpWithPortRegex = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d{1,5}" NodeNameRegex = ".*-control-plane" // node name for KinD TODO: get this from the cluster directly instead NotUndefinedRegex = "(?!undefined$).*" diff --git a/tests/integration/values/values_helm_ot_metrics.yaml b/tests/integration/values/values_helm_ot_metrics.yaml new file mode 100644 index 0000000000..b9d264fed0 --- /dev/null +++ b/tests/integration/values/values_helm_ot_metrics.yaml @@ -0,0 +1,32 @@ +sumologic: + logs: + enabled: false + traces: + enabled: false + events: + enabled: false + metrics: + enabled: true + remoteWriteProxy: + enabled: false + collector: + otelcol: + enabled: true + resources: + requests: + memory: 128Mi + cpu: 50m + +kube-prometheus-stack: + prometheus: + enabled: false + prometheusOperator: + enabled: false + +opentelemetry-operator: + enabled: true + admissionWebhooks: + create: false + manager: + env: + ENABLE_WEBHOOKS: "false"