diff --git a/.changelog/3122.changed.txt b/.changelog/3122.changed.txt new file mode 100644 index 0000000000..1a2cec4c19 --- /dev/null +++ b/.changelog/3122.changed.txt @@ -0,0 +1 @@ +feat(metrics): split metadata extraction from otel collector \ No newline at end of file diff --git a/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml b/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml index af7c809cfc..6aa887f94d 100644 --- a/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml +++ b/deploy/helm/sumologic/conf/metrics/collector/otelcol/config.yaml @@ -1,21 +1,22 @@ exporters: -{{ tpl (.Files.Get "conf/metrics/otelcol/exporters.yaml") . | indent 2 }} + otlphttp: + endpoint: http://${METADATA_METRICS_SVC}.${NAMESPACE}.svc.cluster.local.:4318 + sending_queue: + queue_size: 10000 + num_consumers: 10 + storage: file_storage extensions: health_check: {} -{{ if .Values.metadata.persistence.enabled }} - ## Configuration for File Storage extension + pprof: {} file_storage: directory: /var/lib/storage/otc timeout: 10s compaction: on_rebound: true directory: /tmp -{{ end }} - pprof: {} -processors: -{{ tpl (.Files.Get "conf/metrics/otelcol/processors.yaml") . | indent 2 }} +processors: {} receivers: prometheus: @@ -116,11 +117,10 @@ service: address: 0.0.0.0:8888 # this is the default, but setting it explicitly lets the operator add it automatically extensions: - health_check -{{- if .Values.metadata.persistence.enabled }} - - file_storage -{{- end }} - pprof + - file_storage pipelines: metrics: -{{ tpl (.Files.Get "conf/metrics/otelcol/pipeline.yaml") . | indent 6 }} + exporters: [otlphttp] + receivers: [prometheus] diff --git a/deploy/helm/sumologic/conf/metrics/otelcol/config.yaml b/deploy/helm/sumologic/conf/metrics/otelcol/config.yaml index f0cdbcd492..8d3c7335e3 100644 --- a/deploy/helm/sumologic/conf/metrics/otelcol/config.yaml +++ b/deploy/helm/sumologic/conf/metrics/otelcol/config.yaml @@ -35,6 +35,10 @@ receivers: paths = [ {{ include "metric.endpoints" . | indent 10 }} ] + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 service: extensions: diff --git a/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml b/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml index 7fbb43a827..94f6244be2 100644 --- a/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml +++ b/deploy/helm/sumologic/conf/metrics/otelcol/pipeline.yaml @@ -25,8 +25,5 @@ processors: - batch - routing receivers: -{{- if not .Values.sumologic.metrics.collector.otelcol.enabled }} - telegraf -{{- else }} - - prometheus -{{- end }} + - otlp diff --git a/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml b/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml index bc7d6e2f5b..5d861f69d3 100644 --- a/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml +++ b/deploy/helm/sumologic/templates/metrics/collector/otelcol/opentelemetrycollector.yaml @@ -32,9 +32,9 @@ spec: enabled: true prometheusCR: enabled: true - serviceMonitorSelector: + serviceMonitorSelector: {{ .Values.sumologic.metrics.collector.otelcol.serviceMonitorSelector | toYaml | nindent 8 }} - podMonitorSelector: + podMonitorSelector: {{ .Values.sumologic.metrics.collector.otelcol.podMonitorSelector | toYaml | nindent 8 }} {{- if .Values.sumologic.metrics.collector.otelcol.nodeSelector }} nodeSelector: @@ -57,12 +57,12 @@ spec: {{- end }} {{- end }} env: -{{- $ctx := .Values -}} -{{- include "kubernetes.sources.envs" (dict "Context" $ctx "Type" "metrics") | nindent 4 -}} -{{- include "proxy-env-variables" . | nindent 4 -}} -{{- if .Values.metadata.metrics.statefulset.extraEnvVars }} -{{ toYaml .Values.metadata.metrics.statefulset.extraEnvVars | nindent 4 }} -{{- end }} + - name: METADATA_METRICS_SVC + value: {{ template "sumologic.metadata.name.metrics.service" . }} # no need for remote write proxy here + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace podSecurityContext: {{ .Values.sumologic.metrics.collector.otelcol.securityContext | toYaml | nindent 4 }} ports: @@ -73,28 +73,13 @@ spec: volumes: - name: tmp emptyDir: {} + - name: file-storage + emptyDir: {} volumeMounts: - name: tmp mountPath: /tmp -{{- if .Values.metadata.persistence.enabled }} - name: file-storage mountPath: /var/lib/storage/otc -{{- end }} -{{- if .Values.metadata.persistence.enabled }} - volumeClaimTemplates: - - metadata: - name: file-storage -{{- if .Values.metadata.persistence.pvcLabels }} - labels: -{{ toYaml .Values.metadata.persistence.pvcLabels | indent 8 }} -{{- end }} - spec: - accessModes: [{{ .Values.metadata.persistence.accessMode }}] - storageClassName: {{ .Values.metadata.persistence.storageClass }} - resources: - requests: - storage: {{ .Values.metadata.persistence.size }} -{{- end }} config: | {{- (tpl (.Files.Get "conf/metrics/collector/otelcol/config.yaml") .) | nindent 4 }} {{- end }} diff --git a/deploy/helm/sumologic/templates/metrics/common/service-headless.yaml b/deploy/helm/sumologic/templates/metrics/common/service-headless.yaml index db96583474..afa775b93f 100644 --- a/deploy/helm/sumologic/templates/metrics/common/service-headless.yaml +++ b/deploy/helm/sumologic/templates/metrics/common/service-headless.yaml @@ -16,6 +16,9 @@ spec: app: {{ template "sumologic.labels.app.metrics.pod" . }} clusterIP: None ports: + - name: otlphttp + port: 4318 + protocol: TCP - name: prom-write port: 9888 targetPort: 9888 diff --git a/deploy/helm/sumologic/templates/metrics/common/service.yaml b/deploy/helm/sumologic/templates/metrics/common/service.yaml index 89a2f2825a..27a065f16a 100644 --- a/deploy/helm/sumologic/templates/metrics/common/service.yaml +++ b/deploy/helm/sumologic/templates/metrics/common/service.yaml @@ -15,6 +15,9 @@ spec: selector: app: {{ template "sumologic.labels.app.metrics.pod" . }} ports: + - name: otlphttp + port: 4318 + protocol: TCP - name: prom-write port: 9888 targetPort: 9888 diff --git a/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml b/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml index 4e08ac053f..9d34917ac4 100644 --- a/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml +++ b/deploy/helm/sumologic/templates/metrics/otelcol/configmap.yaml @@ -1,4 +1,4 @@ -{{ if and (eq (include "metrics.otelcol.enabled" .) "true") (not .Values.sumologic.metrics.collector.otelcol.enabled) }} +{{ if eq (include "metrics.otelcol.enabled" .) "true" }} {{ $baseConfig := (tpl (.Files.Get "conf/metrics/otelcol/config.yaml") .) | fromYaml }} {{ $mergeConfig := .Values.metadata.metrics.config.merge }} {{ $overrideConfig := .Values.metadata.metrics.config.override }} diff --git a/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml b/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml index 6b9bc34b51..5ce3432812 100644 --- a/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml +++ b/deploy/helm/sumologic/templates/metrics/otelcol/statefulset.yaml @@ -1,4 +1,4 @@ -{{ if and (eq (include "metrics.otelcol.enabled" .) "true") (not .Values.sumologic.metrics.collector.otelcol.enabled) }} +{{ if eq (include "metrics.otelcol.enabled" .) "true" }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -93,6 +93,9 @@ spec: {{- toYaml .Values.metadata.metrics.statefulset.containers.otelcol.securityContext | nindent 10 }} {{- end }} ports: + - name: otlphttp + containerPort: 4318 + protocol: TCP - name: prom-write containerPort: 9888 protocol: TCP diff --git a/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml b/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml index b3c761b806..bdbda8fbd3 100644 --- a/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml +++ b/tests/helm/testdata/goldenfile/metadata_metrics_otc/additional_endpoints.output.yaml @@ -224,6 +224,10 @@ data: sumologic_schema: add_cloud_namespace: false receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 telegraf: agent_config: | [agent] @@ -303,6 +307,7 @@ data: - routing receivers: - telegraf + - otlp telemetry: logs: level: info diff --git a/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml b/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml index 839944024c..5341aa7d66 100644 --- a/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml +++ b/tests/helm/testdata/goldenfile/metadata_metrics_otc/basic.output.yaml @@ -224,6 +224,10 @@ data: sumologic_schema: add_cloud_namespace: false receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 telegraf: agent_config: | [agent] @@ -301,6 +305,7 @@ data: - routing receivers: - telegraf + - otlp telemetry: logs: level: info diff --git a/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/basic.output.yaml b/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/basic.output.yaml index 4faf2d5bdf..364481cb63 100644 --- a/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/basic.output.yaml +++ b/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/basic.output.yaml @@ -69,6 +69,9 @@ spec: cpu: 500m memory: 768Mi ports: + - name: otlphttp + containerPort: 4318 + protocol: TCP - name: prom-write containerPort: 9888 protocol: TCP diff --git a/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/custom.output.yaml b/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/custom.output.yaml index 3eaf6bafbb..d0a6cb25d0 100644 --- a/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/custom.output.yaml +++ b/tests/helm/testdata/goldenfile/metadata_metrics_otc_statefulset/custom.output.yaml @@ -82,6 +82,9 @@ spec: cpu: 100m memory: 777Mi ports: + - name: otlphttp + containerPort: 4318 + protocol: TCP - name: prom-write containerPort: 9888 protocol: TCP diff --git a/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml b/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml index f6bf25a593..7493d98255 100644 --- a/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml +++ b/tests/helm/testdata/goldenfile/metrics_collector_otc/basic.output.yaml @@ -24,49 +24,12 @@ spec: serviceMonitorSelector: {} podMonitorSelector: {} env: - - name: SUMO_ENDPOINT_APISERVER_METRICS_SOURCE + - name: METADATA_METRICS_SVC + value: RELEASE-NAME-sumologic-metadata-metrics # no need for remote write proxy here + - name: NAMESPACE valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-apiserver - - name: SUMO_ENDPOINT_CONTROL_PLANE_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-control_plane_metrics_source - - name: SUMO_ENDPOINT_CONTROLLER_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kube-controller-manager - - name: SUMO_ENDPOINT_DEFAULT_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics - - name: SUMO_ENDPOINT_KUBELET_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kubelet - - name: SUMO_ENDPOINT_NODE_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-node-exporter - - name: SUMO_ENDPOINT_SCHEDULER_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kube-scheduler - - name: SUMO_ENDPOINT_STATE_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kube-state - - - name: NO_PROXY - value: kubernetes.default.svc + fieldRef: + fieldPath: metadata.namespace podSecurityContext: fsGroup: 999 ports: @@ -82,118 +45,25 @@ spec: volumes: - name: tmp emptyDir: {} + - name: file-storage + emptyDir: {} volumeMounts: - name: tmp mountPath: /tmp - name: file-storage mountPath: /var/lib/storage/otc - volumeClaimTemplates: - - metadata: - name: file-storage - spec: - accessModes: [ReadWriteOnce] - storageClassName: - resources: - requests: - storage: 10Gi config: | exporters: - sumologic/apiserver: - endpoint: ${SUMO_ENDPOINT_APISERVER_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/control_plane: - endpoint: ${SUMO_ENDPOINT_CONTROL_PLANE_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/controller: - endpoint: ${SUMO_ENDPOINT_CONTROLLER_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/default: - endpoint: ${SUMO_ENDPOINT_DEFAULT_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus + otlphttp: + endpoint: http://${METADATA_METRICS_SVC}.${NAMESPACE}.svc.cluster.local.:4318 sending_queue: - enabled: true + queue_size: 10000 num_consumers: 10 - ## setting queue_size a high number, so we always use maximum space of the storage - ## minimal alert non-triggering queue size (if only one exporter is being used): 10GB/16MB = 640 - queue_size: 10_000 storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/kubelet: - endpoint: ${SUMO_ENDPOINT_KUBELET_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/node: - endpoint: ${SUMO_ENDPOINT_NODE_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/scheduler: - endpoint: ${SUMO_ENDPOINT_SCHEDULER_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/state: - endpoint: ${SUMO_ENDPOINT_STATE_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - extensions: health_check: {} - - ## Configuration for File Storage extension + pprof: {} file_storage: directory: /var/lib/storage/otc timeout: 10s @@ -201,177 +71,7 @@ spec: on_rebound: true directory: /tmp - pprof: {} - - processors: - ## Configuration for Batch Processor - ## The batch processor accepts spans and places them into batches grouped by node and resource - batch: - ## Maximum number of spans sent at once - send_batch_max_size: 2_048 - ## Number of spans after which a batch will be sent regardless of time - send_batch_size: 1_024 - ## Time duration after which a batch will be sent regardless of size - timeout: 1s - - # Prometheus receiver puts all labels in record-level attributes, and we need them in resource - groupbyattrs: - keys: - - container - - namespace - - pod - - service - - ## The Kubernetes sprocessor automatically tags logs, metrics and traces with Kubernetes metadata like pod name, namespace name etc. - k8s_tagger: - extract: - delimiter: "_" - labels: - - key: "*" - tag_name: "pod_labels_%s" - metadata: - ## extract the following well-known metadata fields - - daemonSetName - - deploymentName - - nodeName - - replicaSetName - - serviceName - - statefulSetName - owner_lookup_enabled: true # To enable fetching additional metadata using `owner` relationship - ## Has to be false to enrich metadata - passthrough: false - pod_association: - - from: build_hostname # Pods are identified by Pod name and namespace - - ## Configuration for Memory Limiter Processor - ## The memory_limiter processor is used to prevent out of memory situations on the collector. - memory_limiter: - ## check_interval is the time between measurements of memory usage for the - ## purposes of avoiding going over the limits. Defaults to zero, so no - ## checks will be performed. Values below 1 second are not recommended since - ## it can result in unnecessary CPU consumption. - check_interval: 5s - ## Maximum amount of memory, in %, targeted to be allocated by the process heap. - limit_percentage: 75 - ## Spike limit (calculated from available memory). Must be less than limit_percentage. - spike_limit_percentage: 20 - - ## Configuration for Metrics Transform Processor - metricstransform: - transforms: - ## rename all prometheus_remote_write_$name metrics to $name - action: update - include: ^prometheus_remote_write_(.*)$$ - match_type: regexp - new_name: $$1 - - ## Configuration for Resource Processor - resource: - attributes: - - action: upsert - from_attribute: namespace - key: k8s.namespace.name - - action: delete - key: namespace # remove namespace to avoid duplication when attribute translation is enabled - - action: upsert - from_attribute: pod - key: k8s.pod.name - - action: delete - key: pod # remove pod to avoid duplication when attribute translation is enabled - - action: upsert - from_attribute: container - key: k8s.container.name # add container in OpenTelemetry convention to unify configuration for Source processor - - action: delete - key: container # remove container to avoid duplication when attribute translation is enabled - - action: upsert - from_attribute: service - key: prometheus_service - - action: delete - key: service - - action: upsert - from_attribute: service.name - key: job - - action: delete # we don't want service.name to be set, as the schema processor translates it to "service" - key: service.name - - action: upsert - key: _origin # add "_origin" metadata to metrics to keep the same format as for metrics from Fluentd - value: kubernetes - - action: upsert - key: cluster - value: "kubernetes" - - ## NOTE: Drop these for now and and when proper configuration options - ## are exposed and source processor is configured then send them - ## as headers. - resource/delete_source_metadata: - attributes: - - action: delete - key: _sourceCategory - - action: delete - key: _sourceHost - - action: delete - key: _sourceName - resource/remove_k8s_pod_pod_name: - attributes: - - action: delete - key: k8s.pod.pod_name - - ## NOTE: below listed rules could be simplified if routingprocessor - ## supports regex matching. At this point we could group route entries - ## going to the same set of exporters. - routing: - attribute_source: resource - default_exporters: - - sumologic/default - drop_resource_routing_attribute: true - from_attribute: http_listener_v2_path - table: - ## apiserver metrics - - exporters: - - sumologic/apiserver - value: /prometheus.metrics.apiserver - ## container metrics - - exporters: - - sumologic/kubelet - value: /prometheus.metrics.container - ## control-plane metrics - - exporters: - - sumologic/control_plane - value: /prometheus.metrics.control-plane.coredns - - exporters: - - sumologic/control_plane - value: /prometheus.metrics.control-plane.kube-etcd - ## controller metrics - - exporters: - - sumologic/controller - value: /prometheus.metrics.controller-manager - ## kubelet metrics - - exporters: - - sumologic/kubelet - value: /prometheus.metrics.kubelet - ## node metrics - - exporters: - - sumologic/node - value: /prometheus.metrics.node - ## scheduler metrics - - exporters: - - sumologic/scheduler - value: /prometheus.metrics.scheduler - ## state metrics - - exporters: - - sumologic/state - value: /prometheus.metrics.state - - ## Configuration for Source Processor - ## Source processor adds Sumo Logic related metadata - source: - collector: "kubernetes" - - ## The Sumo Logic Schema processor modifies the metadata on logs, metrics and traces sent to Sumo Logic - ## so that the Sumo Logic apps can make full use of the ingested data. - sumologic_schema: - add_cloud_namespace: false - + processors: {} receivers: prometheus: @@ -472,30 +172,9 @@ spec: address: 0.0.0.0:8888 # this is the default, but setting it explicitly lets the operator add it automatically extensions: - health_check - - file_storage - pprof + - file_storage pipelines: metrics: - exporters: - - sumologic/default - - sumologic/apiserver - - sumologic/control_plane - - sumologic/controller - - sumologic/kubelet - - sumologic/node - - sumologic/scheduler - - sumologic/state - processors: - - memory_limiter - - metricstransform - - groupbyattrs - - resource - - k8s_tagger - - source - - resource/remove_k8s_pod_pod_name - - resource/delete_source_metadata - - sumologic_schema - - batch - - routing - receivers: - - prometheus + exporters: [otlphttp] + receivers: [prometheus] diff --git a/tests/helm/testdata/goldenfile/metrics_collector_otc/custom.output.yaml b/tests/helm/testdata/goldenfile/metrics_collector_otc/custom.output.yaml index 26ebffb935..c5cf317e9c 100644 --- a/tests/helm/testdata/goldenfile/metrics_collector_otc/custom.output.yaml +++ b/tests/helm/testdata/goldenfile/metrics_collector_otc/custom.output.yaml @@ -46,55 +46,12 @@ spec: targetCPUUtilization: 95 targetMemoryUtilization: 90 env: - - name: SUMO_ENDPOINT_APISERVER_METRICS_SOURCE + - name: METADATA_METRICS_SVC + value: RELEASE-NAME-sumologic-metadata-metrics # no need for remote write proxy here + - name: NAMESPACE valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-apiserver - - name: SUMO_ENDPOINT_CONTROL_PLANE_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-control_plane_metrics_source - - name: SUMO_ENDPOINT_CONTROLLER_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kube-controller-manager - - name: SUMO_ENDPOINT_DEFAULT_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics - - name: SUMO_ENDPOINT_KUBELET_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kubelet - - name: SUMO_ENDPOINT_NODE_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-node-exporter - - name: SUMO_ENDPOINT_SCHEDULER_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kube-scheduler - - name: SUMO_ENDPOINT_STATE_METRICS_SOURCE - valueFrom: - secretKeyRef: - name: sumologic - key: endpoint-metrics-kube-state - - - name: NO_PROXY - value: kubernetes.default.svc - - - name: VALUE_FROM_SECRET - valueFrom: - secretKeyRef: - key: secret_key - name: secret_name + fieldRef: + fieldPath: metadata.namespace podSecurityContext: fsGroup: 999 ports: @@ -110,120 +67,25 @@ spec: volumes: - name: tmp emptyDir: {} + - name: file-storage + emptyDir: {} volumeMounts: - name: tmp mountPath: /tmp - name: file-storage mountPath: /var/lib/storage/otc - volumeClaimTemplates: - - metadata: - name: file-storage - labels: - pvcLabel: pvcValue - spec: - accessModes: [mode] - storageClassName: my-class - resources: - requests: - storage: 123Gi config: | exporters: - sumologic/apiserver: - endpoint: ${SUMO_ENDPOINT_APISERVER_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/control_plane: - endpoint: ${SUMO_ENDPOINT_CONTROL_PLANE_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus + otlphttp: + endpoint: http://${METADATA_METRICS_SVC}.${NAMESPACE}.svc.cluster.local.:4318 sending_queue: - enabled: true + queue_size: 10000 num_consumers: 10 - queue_size: 10_000 storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/controller: - endpoint: ${SUMO_ENDPOINT_CONTROLLER_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/default: - endpoint: ${SUMO_ENDPOINT_DEFAULT_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - ## setting queue_size a high number, so we always use maximum space of the storage - ## minimal alert non-triggering queue size (if only one exporter is being used): 10GB/16MB = 640 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/kubelet: - endpoint: ${SUMO_ENDPOINT_KUBELET_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/node: - endpoint: ${SUMO_ENDPOINT_NODE_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/scheduler: - endpoint: ${SUMO_ENDPOINT_SCHEDULER_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - sumologic/state: - endpoint: ${SUMO_ENDPOINT_STATE_METRICS_SOURCE} - max_request_body_size: 16_777_216 # 16 MB before compression - metric_format: prometheus - sending_queue: - enabled: true - num_consumers: 10 - queue_size: 10_000 - storage: file_storage - ## set timeout to 30s due to big requests - timeout: 30s - extensions: health_check: {} - - ## Configuration for File Storage extension + pprof: {} file_storage: directory: /var/lib/storage/otc timeout: 10s @@ -231,177 +93,7 @@ spec: on_rebound: true directory: /tmp - pprof: {} - - processors: - ## Configuration for Batch Processor - ## The batch processor accepts spans and places them into batches grouped by node and resource - batch: - ## Maximum number of spans sent at once - send_batch_max_size: 2_048 - ## Number of spans after which a batch will be sent regardless of time - send_batch_size: 1_024 - ## Time duration after which a batch will be sent regardless of size - timeout: 1s - - # Prometheus receiver puts all labels in record-level attributes, and we need them in resource - groupbyattrs: - keys: - - container - - namespace - - pod - - service - - ## The Kubernetes sprocessor automatically tags logs, metrics and traces with Kubernetes metadata like pod name, namespace name etc. - k8s_tagger: - extract: - delimiter: "_" - labels: - - key: "*" - tag_name: "pod_labels_%s" - metadata: - ## extract the following well-known metadata fields - - daemonSetName - - deploymentName - - nodeName - - replicaSetName - - serviceName - - statefulSetName - owner_lookup_enabled: true # To enable fetching additional metadata using `owner` relationship - ## Has to be false to enrich metadata - passthrough: false - pod_association: - - from: build_hostname # Pods are identified by Pod name and namespace - - ## Configuration for Memory Limiter Processor - ## The memory_limiter processor is used to prevent out of memory situations on the collector. - memory_limiter: - ## check_interval is the time between measurements of memory usage for the - ## purposes of avoiding going over the limits. Defaults to zero, so no - ## checks will be performed. Values below 1 second are not recommended since - ## it can result in unnecessary CPU consumption. - check_interval: 5s - ## Maximum amount of memory, in %, targeted to be allocated by the process heap. - limit_percentage: 75 - ## Spike limit (calculated from available memory). Must be less than limit_percentage. - spike_limit_percentage: 20 - - ## Configuration for Metrics Transform Processor - metricstransform: - transforms: - ## rename all prometheus_remote_write_$name metrics to $name - action: update - include: ^prometheus_remote_write_(.*)$$ - match_type: regexp - new_name: $$1 - - ## Configuration for Resource Processor - resource: - attributes: - - action: upsert - from_attribute: namespace - key: k8s.namespace.name - - action: delete - key: namespace # remove namespace to avoid duplication when attribute translation is enabled - - action: upsert - from_attribute: pod - key: k8s.pod.name - - action: delete - key: pod # remove pod to avoid duplication when attribute translation is enabled - - action: upsert - from_attribute: container - key: k8s.container.name # add container in OpenTelemetry convention to unify configuration for Source processor - - action: delete - key: container # remove container to avoid duplication when attribute translation is enabled - - action: upsert - from_attribute: service - key: prometheus_service - - action: delete - key: service - - action: upsert - from_attribute: service.name - key: job - - action: delete # we don't want service.name to be set, as the schema processor translates it to "service" - key: service.name - - action: upsert - key: _origin # add "_origin" metadata to metrics to keep the same format as for metrics from Fluentd - value: kubernetes - - action: upsert - key: cluster - value: "kubernetes" - - ## NOTE: Drop these for now and and when proper configuration options - ## are exposed and source processor is configured then send them - ## as headers. - resource/delete_source_metadata: - attributes: - - action: delete - key: _sourceCategory - - action: delete - key: _sourceHost - - action: delete - key: _sourceName - resource/remove_k8s_pod_pod_name: - attributes: - - action: delete - key: k8s.pod.pod_name - - ## NOTE: below listed rules could be simplified if routingprocessor - ## supports regex matching. At this point we could group route entries - ## going to the same set of exporters. - routing: - attribute_source: resource - default_exporters: - - sumologic/default - drop_resource_routing_attribute: true - from_attribute: http_listener_v2_path - table: - ## apiserver metrics - - exporters: - - sumologic/apiserver - value: /prometheus.metrics.apiserver - ## container metrics - - exporters: - - sumologic/kubelet - value: /prometheus.metrics.container - ## control-plane metrics - - exporters: - - sumologic/control_plane - value: /prometheus.metrics.control-plane.coredns - - exporters: - - sumologic/control_plane - value: /prometheus.metrics.control-plane.kube-etcd - ## controller metrics - - exporters: - - sumologic/controller - value: /prometheus.metrics.controller-manager - ## kubelet metrics - - exporters: - - sumologic/kubelet - value: /prometheus.metrics.kubelet - ## node metrics - - exporters: - - sumologic/node - value: /prometheus.metrics.node - ## scheduler metrics - - exporters: - - sumologic/scheduler - value: /prometheus.metrics.scheduler - ## state metrics - - exporters: - - sumologic/state - value: /prometheus.metrics.state - - ## Configuration for Source Processor - ## Source processor adds Sumo Logic related metadata - source: - collector: "kubernetes" - - ## The Sumo Logic Schema processor modifies the metadata on logs, metrics and traces sent to Sumo Logic - ## so that the Sumo Logic apps can make full use of the ingested data. - sumologic_schema: - add_cloud_namespace: false - + processors: {} receivers: prometheus: @@ -502,30 +194,9 @@ spec: address: 0.0.0.0:8888 # this is the default, but setting it explicitly lets the operator add it automatically extensions: - health_check - - file_storage - pprof + - file_storage pipelines: metrics: - exporters: - - sumologic/default - - sumologic/apiserver - - sumologic/control_plane - - sumologic/controller - - sumologic/kubelet - - sumologic/node - - sumologic/scheduler - - sumologic/state - processors: - - memory_limiter - - metricstransform - - groupbyattrs - - resource - - k8s_tagger - - source - - resource/remove_k8s_pod_pod_name - - resource/delete_source_metadata - - sumologic_schema - - batch - - routing - receivers: - - prometheus + exporters: [otlphttp] + receivers: [prometheus] diff --git a/tests/helm/testdata/goldenfile/services_with_service_monitor_labels/services_with_service_monitor_labels/default.output.yaml b/tests/helm/testdata/goldenfile/services_with_service_monitor_labels/services_with_service_monitor_labels/default.output.yaml index 3fb1bedfdc..586ed4c0b5 100644 --- a/tests/helm/testdata/goldenfile/services_with_service_monitor_labels/services_with_service_monitor_labels/default.output.yaml +++ b/tests/helm/testdata/goldenfile/services_with_service_monitor_labels/services_with_service_monitor_labels/default.output.yaml @@ -193,6 +193,7 @@ spec: targetPort: 4320 protocol: TCP --- +--- # Source: sumologic/templates/metrics/common/service.yaml apiVersion: v1 kind: Service @@ -211,6 +212,9 @@ spec: selector: app: RELEASE-NAME-sumologic-otelcol-metrics ports: + - name: otlphttp + port: 4318 + protocol: TCP - name: prom-write port: 9888 targetPort: 9888 diff --git a/tests/integration/features.go b/tests/integration/features.go index 11e0d1a313..b67b6820a0 100644 --- a/tests/integration/features.go +++ b/tests/integration/features.go @@ -543,34 +543,7 @@ func CheckOtelcolMetricsCollectorInstall(builder *features.FeatureBuilder) *feat stepfuncs.ReleaseFormatter("%s-sumologic-metrics-collector"), ), ), - ). - Assess("otelcol metrics collector buffers PVCs are created and bound", - func(ctx context.Context, t *testing.T, envConf *envconf.Config) context.Context { - res := envConf.Client().Resources(ctxopts.Namespace(ctx)) - pvcs := corev1.PersistentVolumeClaimList{} - cond := conditions. - New(res). - ResourceListMatchN(&pvcs, 1, - func(object k8s.Object) bool { - pvc := object.(*corev1.PersistentVolumeClaim) - if pvc.Status.Phase != corev1.ClaimBound { - log.V(0).Infof("PVC %q not bound yet", pvc.Name) - return false - } - return true - }, - resources.WithLabelSelector( - fmt.Sprintf("app.kubernetes.io/instance=%s.%s-sumologic-metrics", ctxopts.Namespace(ctx), ctxopts.HelmRelease(ctx)), - ), - ) - require.NoError(t, - wait.For(cond, - wait.WithTimeout(waitDuration), - wait.WithInterval(tickDuration), - ), - ) - return ctx - }) + ) } func CheckOtelcolMetadataLogsInstall(builder *features.FeatureBuilder) *features.FeatureBuilder { diff --git a/tests/integration/helm_ot_metrics_test.go b/tests/integration/helm_ot_metrics_test.go index 9a0cbb6ed5..14a76e1078 100644 --- a/tests/integration/helm_ot_metrics_test.go +++ b/tests/integration/helm_ot_metrics_test.go @@ -44,6 +44,7 @@ func Test_Helm_OT_Metrics(t *testing.T) { installChecks := []featureCheck{ CheckSumologicSecret(8), + CheckOtelcolMetadataMetricsInstall, CheckOtelcolMetricsCollectorInstall, }