Skip to content

Commit

Permalink
feat(otel): 🏗️ otel architecture changes for logs
Browse files Browse the repository at this point in the history
- Switched OtelCollector from Deployment to DaemonSet
- RBAC addition and changes to support logs
- Add K8s envs for OtelCollector and Metrics
- For internal metrics switched from internal.signoz.io to static scrape config
- Integrate Kubelet Metrics in OtelCollector
- Integrate K8s Cluster Metrics in OtelCollector Metrics

Signed-off-by: Prashant Shahi <prashant@signoz.io>
  • Loading branch information
prashant-shahi committed Aug 26, 2022
1 parent 655f44c commit e6e32e5
Show file tree
Hide file tree
Showing 7 changed files with 167 additions and 60 deletions.
75 changes: 60 additions & 15 deletions charts/signoz/templates/otel-collector-metrics/clusterrole.yaml
Expand Up @@ -5,18 +5,63 @@ metadata:
labels:
{{- include "otelCollectorMetrics.labels" . | nindent 4 }}
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- events
- namespaces
- namespaces/status
- nodes/spec
- pods
- pods/status
- replicationcontrollers
- replicationcontrollers/status
- resourcequotas
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- ingresses
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- get
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- get
- list
- watch
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
Expand Up @@ -9,6 +9,6 @@ roleRef:
kind: ClusterRole
name: {{ include "otelCollectorMetrics.fullname" . }}-{{ include "signoz.namespace" . }}
subjects:
- kind: ServiceAccount
name: {{ include "otelCollectorMetrics.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
- name: {{ include "otelCollectorMetrics.serviceAccountName" . }}
kind: ServiceAccount
namespace: {{ include "signoz.namespace" . }}
Expand Up @@ -55,6 +55,11 @@ spec:
- "--config=/conf/otel-collector-metrics-config.yaml"
env:
{{- include "snippet.clickhouse-env" . | nindent 10 }}
- name: MY_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
volumeMounts:
- name: otel-collector-metrics-config-vol
mountPath: /conf
Expand Down
25 changes: 25 additions & 0 deletions charts/signoz/templates/otel-collector/clusterrole.yaml
@@ -0,0 +1,25 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "otelCollector.fullname" . }}-{{ include "signoz.namespace" . }}
namespace: {{ include "signoz.namespace" . }}
rules:
- apiGroups: [""]
resources: ["pods", "nodes", "endpoints"]
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources: ["replicasets"]
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes/proxy"]
verbs: ["get"]
- apiGroups: [""]
resources: ["nodes/stats", "configmaps", "events"]
verbs: ["create", "get"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["{{ include "otelCollector.fullname" . }}"]
verbs: ["get","update"]
13 changes: 13 additions & 0 deletions charts/signoz/templates/otel-collector/clusterrolebinding.yaml
@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "otelCollector.fullname" . }}-{{ include "signoz.namespace" . }}
namespace: {{ include "signoz.namespace" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "otelCollector.fullname" . }}-{{ include "signoz.namespace" . }}
subjects:
- name: {{ include "otelCollector.serviceAccountName" . }}
kind: ServiceAccount
namespace: {{ include "signoz.namespace" . }}
19 changes: 19 additions & 0 deletions charts/signoz/templates/otel-collector/daemonset.yaml
Expand Up @@ -57,6 +57,25 @@ spec:
- "--config=/conf/otel-collector-config.yaml"
env:
{{- include "snippet.clickhouse-env" . | nindent 10 }}
- name: K8S_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: K8S_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: OTEL_RESOURCE_ATTRIBUTES
value: host.name=$(K8S_NODE_NAME)
volumeMounts:
- name: otel-collector-config-vol
mountPath: /conf
Expand Down
84 changes: 42 additions & 42 deletions charts/signoz/values.yaml
Expand Up @@ -796,11 +796,8 @@ otelCollector:
# -- OtelColector pod(s) annotation.
podAnnotations:
signoz.io/scrape: 'true'
signoz.io/port: '8888'
signoz.io/port: '8889'
signoz.io/path: /metrics
internal.signoz.io/scrape: 'true'
internal.signoz.io/port: '8889'
internal.signoz.io/path: /metrics

minReadySeconds: 5
ballastSizeMib: 683
Expand Down Expand Up @@ -1056,6 +1053,16 @@ otelCollector:
disk: {}
filesystem: {}
network: {}
kubeletstats:
collection_interval: 60s
auth_type: serviceAccount
endpoint: ${K8S_NODE_NAME}:10250
insecure_skip_verify: true
metric_groups:
- container
- node
- pod
- volume
# Uncomment to enable zipkin receiver.
# You will also have set set enable it in `otelCollector.ports
# zipkin:
Expand Down Expand Up @@ -1131,10 +1138,27 @@ otelCollector:
- type: move
from: attributes.log
to: body
prometheus:
config:
global:
scrape_interval: 30s
scrape_configs:
- job_name: otel-collector
static_configs:
- targets:
- ${HOST_IP}:8888
processors:
batch:
send_batch_size: 1000
timeout: 10s
# Ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/resourcedetectionprocessor/README.md
resourcedetection:
detectors: [env, system] # include ec2/eks for AWS, gce/gke for GCP and azure/aks for Azure
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels
timeout: 2s
override: false
system:
hostname_sources: [os] # alternatively, use [dns,os] for setting FQDN as host.name and os as fallback
signozspanmetrics/prometheus:
metrics_exporter: prometheus
latency_histogram_buckets:
Expand Down Expand Up @@ -1211,9 +1235,13 @@ otelCollector:
processors: [signozspanmetrics/prometheus, batch]
exporters: [clickhousetraces]
metrics:
receivers: [otlp, hostmetrics]
receivers: [otlp]
processors: [batch]
exporters: [clickhousemetricswrite]
metrics/generic:
receivers: [hostmetrics, kubeletstats, prometheus]
processors: [resourcedetection, batch]
exporters: [clickhousemetricswrite]
metrics/spanmetrics:
receivers: [otlp/spanmetrics]
exporters: [prometheus]
Expand Down Expand Up @@ -1384,16 +1412,19 @@ otelCollectorMetrics:
# @default -- See `values.yaml` for defaults
config:
receivers:
k8s_cluster:
collection_interval: 60s
node_conditions_to_report: [Ready, MemoryPressure]
# Data sources: metrics
prometheus:
config:
scrape_configs:
# otel-collector-metrics internal metrics
# - job_name: "otel-collector-metrics"
# scrape_interval: 60s
# static_configs:
# - targets:
# - localhost:8888
- job_name: "otel-collector-metrics"
scrape_interval: 60s
static_configs:
- targets:
- ${MY_POD_IP}:8888
# generic prometheus metrics scraper (scrapped when pod annotations are set)
- job_name: "generic-collector"
scrape_interval: 60s
Expand Down Expand Up @@ -1425,37 +1456,6 @@ otelCollectorMetrics:
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: k8s_pod
# SigNoz span metrics
- job_name: "signozspanmetrics-collector"
scrape_interval: 60s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels:
[__meta_kubernetes_pod_annotation_internal_signoz_io_scrape]
action: keep
regex: true
- source_labels:
[__meta_kubernetes_pod_annotation_internal_signoz_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
[
__meta_kubernetes_pod_ip,
__meta_kubernetes_pod_annotation_internal_signoz_io_port,
]
action: replace
separator: ":"
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: k8s_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: k8s_pod
processors:
batch:
send_batch_size: 1000
Expand Down Expand Up @@ -1491,6 +1491,6 @@ otelCollectorMetrics:
extensions: [health_check, zpages, pprof]
pipelines:
metrics:
receivers: [prometheus]
receivers: [k8s_cluster, prometheus]
processors: [batch]
exporters: [clickhousemetricswrite]

0 comments on commit e6e32e5

Please sign in to comment.