Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions deployment/common/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
## Running telemetry stack

To run telemetry stack, run
```
cd deployment/common/telemetry
docker compose -f docker-compose.otel.yml up -d
```

If you want to use your own opentelemetry collector you need to modify variables in .otel.env which are used in merginmaps server and celery workers.

Grafana UI is accesible on port 3000 but it can be exposed via mergin nginx proxy (uncomment in nginx.conf).
24 changes: 24 additions & 0 deletions deployment/common/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ server {
# redirects, we set the Host: header above already.
proxy_redirect off;
proxy_pass http://merginmaps-server:5000;
proxy_hide_header X-Trace-Id;

# disable buffering
client_max_body_size 0; # No maximum client body size
Expand All @@ -49,6 +50,7 @@ server {
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
proxy_pass http://merginmaps-server:5000;
proxy_hide_header X-Trace-Id;
}

location /download/ {
Expand All @@ -59,3 +61,25 @@ server {
}
}

# if needed to expose granafa
# server {
# listen 8082;
# listen [::]:8082;
# server_name _;

# client_max_body_size 4G;

# # Don't show version information
# server_tokens off;

# location / {
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# proxy_set_header X-Forwarded-Proto $scheme;
# proxy_set_header Host $http_host;
# # we don't want nginx trying to do something clever with
# # redirects, we set the Host: header above already.
# proxy_redirect off;
# proxy_pass http://merginmaps-grafana:3000;
# }
# }

3 changes: 3 additions & 0 deletions deployment/common/telemetry/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
loki-data
dashboards
alloy-data
6 changes: 6 additions & 0 deletions deployment/common/telemetry/.otel.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
OTEL_METRICS_EXPORTER=otlp
OTEL_TRACES_EXPORTER=otlp
OTEL_LOGS_EXPORTER=otlp
OTEL_EXPORTER_OTLP_ENDPOINT=http://merginmaps-otel-collector:4317
OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=http://merginmaps-otel-collector:4317
OTEL_METRIC_EXPORT_INTERVAL=10000
35 changes: 35 additions & 0 deletions deployment/common/telemetry/alloy-config.alloy
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
discovery.docker "containers" {
host = "unix:///var/run/docker.sock"
}

discovery.relabel "containers" {
targets = discovery.docker.containers.targets

rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "container_name"
}

rule {
source_labels = ["__meta_docker_container_log_stream"]
target_label = "stream"
}

rule {
source_labels = ["__meta_docker_container_image_name"]
target_label = "image"
}
}

loki.source.docker "containers" {
host = "unix:///var/run/docker.sock"
targets = discovery.relabel.containers.output
forward_to = [loki.write.default.receiver]
}

loki.write "default" {
endpoint {
url = "http://merginmaps-loki:3100/loki/api/v1/push"
}
}
88 changes: 88 additions & 0 deletions deployment/common/telemetry/docker-compose.otel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
networks:
merginmaps:
name: mergin
external: true

services:
otel-collector:
# Contrib version is required for StatsD
image: otel/opentelemetry-collector-contrib:0.90.0
container_name: merginmaps-otel-collector
volumes:
- ./otel-config.yaml:/etc/otelcol-contrib/config.yaml
networks:
- merginmaps
ports:
- "8125:8125/udp" # StatsD (Metrics)
- "4317:4317" # OTLP (Traces)
- "8889:8889" # Prometheus Scrape Port
- "55679:55679"
depends_on:
tempo:
condition: service_started

alloy:
image: grafana/alloy:v1.8.3
container_name: merginmaps-alloy
volumes:
- ./alloy-config.alloy:/etc/alloy/config.alloy
- /var/run/docker.sock:/var/run/docker.sock
- ./alloy-data:/var/lib/alloy/data
networks:
- merginmaps
depends_on:
- loki
command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy

tempo:
image: grafana/tempo:2.8.3
container_name: merginmaps-tempo
command: ["-config.file=/etc/tempo.yaml", "-target=all"]
networks:
- merginmaps
volumes:
- ./tempo.yaml:/etc/tempo.yaml
# - ./tempo-data:/tmp/tempo
ports:
- "3200:3200" # Tempo UI/API

prometheus:
image: prom/prometheus:v3.9.0
container_name: merginmaps-prometheus
networks:
- merginmaps
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"

loki:
image: grafana/loki:3.6.4
container_name: merginmaps-loki
ports:
- "3100:3100"
user: "root"
volumes:
- ./loki-config.yaml:/etc/loki/local-config.yaml
- ./loki-data:/loki # Persistent storage
command: -config.file=/etc/loki/local-config.yaml
networks:
- merginmaps

grafana:
image: grafana/grafana:12.3.2
container_name: merginmaps-grafana
networks:
- merginmaps
ports:
- "3000:3000"
environment:
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
volumes:
- ./grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml
- ./dashboards:/var/lib/grafana/dashboards
depends_on:
- loki
- prometheus
- tempo
18 changes: 18 additions & 0 deletions deployment/common/telemetry/grafana-datasource.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://merginmaps-prometheus:9090
isDefault: true
- name: Tempo
type: tempo
url: http://merginmaps-tempo:3200
- name: Loki
type: loki
url: http://merginmaps-loki:3100
jsonData:
derivedFields:
- datasourceUid: Tempo
matcherRegex: '\[(?:ACCESS|INFO|ERROR)\]\s\[(\w{32})\]' # Finds the trace_id in the log JSON/metadata
name: TraceID
url: '$${__value.raw}' # The raw ID value used to query Tempo
39 changes: 39 additions & 0 deletions deployment/common/telemetry/loki-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
auth_enabled: false

server:
http_listen_port: 3100
grpc_listen_port: 9096

common:
instance_addr: 0.0.0.0
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory

schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h

limits_config:
reject_old_samples: false
reject_old_samples_max_age: 336h
allow_structured_metadata: true
otlp_config:
resource_attributes:
attributes_config:
- action: index_label
attributes:
- container_name
- image
64 changes: 64 additions & 0 deletions deployment/common/telemetry/otel-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
extensions:
health_check:

receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4317"
http:
endpoint: "0.0.0.0:4318"
statsd:
endpoint: "0.0.0.0:8125"
aggregation_interval: 10s
enable_metric_type: true
redis:
endpoint: "merginmaps-redis:6379"
collection_interval: 10s
#password: "${REDIS_PASSWORD}"
processors:
batch:
transform:
metric_statements:
- context: metric
statements:
- set(name, "mergin_gunicorn_workers") where name == "app.gunicorn.workers"
- set(name, "mergin_gunicorn_request_duration") where name == "app.gunicorn.request.duration"
# these metrics are not working
- set(name, "mergin_gunicorn_request_rate") where name == "app.gunicorn.requests"
- set(name, "mergin_gunicorn_log_critical") where name == "gunicorn.log.critical"
- set(name, "mergin_gunicorn_log_error") where name == "gunicorn.log.error"
- set(name, "mergin_gunicorn_log_warning") where name == "gunicorn.log.warning"
- set(name, "mergin_gunicorn_log_exception") where name == "gunicorn.log.exception"
- set(name, "mergin_gunicorn_response_code_200") where name == "app.gunicorn.request.status.200"
# log_statements:
# - context: log
# statements:
# - set(attributes["service_name"], attributes["docker_id"])
# - set(resource.attributes["service.name"], attributes["docker_id"])
exporters:
prometheus:
endpoint: "0.0.0.0:8889" # The Collector will "host" metrics here
resource_to_telemetry_conversion:
enabled: true # Converts OTel resource attributes to Prometheus labels
add_metric_suffixes: true
otlp:
endpoint: "merginmaps-tempo:4317"
tls:
insecure: true
service:
extensions: [health_check]
telemetry:
metrics:
address: 0.0.0.0:8888 # This enables the /metrics port
logs:
level: "warn"
pipelines:
metrics:
receivers: [otlp, statsd, redis]
processors: [transform, batch]
exporters: [prometheus]
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp]
34 changes: 34 additions & 0 deletions deployment/common/telemetry/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
global:
scrape_interval: 15s # How often to scrape targets
evaluation_interval: 15s

# Enable the Exemplars feature in the storage engine
# storage:
# tsdb:
# out_of_order_time_window: 0s # Standard for local dev
# exemplars:
# max_exemplars: 100000

scrape_configs:
# 1. Scrape the OTel Collector (where Gunicorn metrics are hosted)
- job_name: 'otel-collector'
static_configs:
- targets: ['merginmaps-otel-collector:8889']
# This allows Prometheus to pull TraceIDs from the Collector
# and attach them to the metrics as Exemplars.
# metric_relabel_configs:
# - source_labels: [__name__]
# separator: ;
# regex: 'mergin_gunicorn.*'
# replacement: $1
# action: keep

# 2. Optional: Scrape Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

# 3. Optional: Scrape Tempo (to monitor your tracing backend)
- job_name: 'tempo'
static_configs:
- targets: ['merginmaps-tempo:3200']
Loading
Loading