From 1e6a1cde10e8926222defec66065ce46d0bf9e1c Mon Sep 17 00:00:00 2001 From: 5000user5000 Date: Sat, 30 May 2026 12:23:31 +0800 Subject: [PATCH] feat: add Prometheus and Grafana monitoring stack --- .gitignore | 3 + backend/pom.xml | 8 + .../identity/security/SecurityConfig.java | 1 + .../src/main/resources/application.properties | 8 + .../identity/security/SecurityConfigTest.java | 13 + deploy/production-like/README.md | 60 +- deploy/production-like/compose.yml | 80 +- .../dashboards/cloud-native-overview.json | 1036 +++++++++++++++++ .../provisioning/dashboards/cloud-native.yml | 11 + .../provisioning/datasources/prometheus.yml | 10 + deploy/production-like/nginx.conf | 9 + deploy/production-like/prometheus.yml | 26 + 12 files changed, 1262 insertions(+), 3 deletions(-) create mode 100644 deploy/production-like/grafana/dashboards/cloud-native-overview.json create mode 100644 deploy/production-like/grafana/provisioning/dashboards/cloud-native.yml create mode 100644 deploy/production-like/grafana/provisioning/datasources/prometheus.yml create mode 100644 deploy/production-like/prometheus.yml diff --git a/.gitignore b/.gitignore index 0b70d75..7165943 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ *.DS_Store local_docs/ .scannerwork/ +.env +.env.* +!.env.example diff --git a/backend/pom.xml b/backend/pom.xml index 499007f..821ef44 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -39,6 +39,14 @@ org.springframework.boot spring-boot-starter-security + + org.springframework.boot + spring-boot-starter-actuator + + + io.micrometer + micrometer-registry-prometheus + com.fasterxml.jackson.dataformat jackson-dataformat-yaml diff --git a/backend/src/main/java/com/cloudnative/identity/security/SecurityConfig.java b/backend/src/main/java/com/cloudnative/identity/security/SecurityConfig.java index 5e2e757..b103f2c 100644 --- a/backend/src/main/java/com/cloudnative/identity/security/SecurityConfig.java +++ b/backend/src/main/java/com/cloudnative/identity/security/SecurityConfig.java @@ -47,6 +47,7 @@ public SecurityFilterChain securityFilterChain(HttpSecurity http, JwtAuthenticat .requestMatchers("/error").permitAll() .requestMatchers("/api/auth/register", "/api/auth/login").permitAll() .requestMatchers("/api/health").permitAll() + .requestMatchers("/actuator/health", "/actuator/health/**", "/actuator/prometheus").permitAll() .requestMatchers("/api/system/instance").permitAll() .anyRequest().authenticated() ) diff --git a/backend/src/main/resources/application.properties b/backend/src/main/resources/application.properties index 49b56c5..fb4ca71 100644 --- a/backend/src/main/resources/application.properties +++ b/backend/src/main/resources/application.properties @@ -20,3 +20,11 @@ server.error.include-message=always logging.level.root=${LOGGING_LEVEL_ROOT:INFO} logging.level.org.springframework=${LOGGING_LEVEL_SPRING:INFO} logging.level.org.hibernate=${LOGGING_LEVEL_HIBERNATE:WARN} + +management.endpoints.web.exposure.include=health,metrics,prometheus +management.endpoint.health.probes.enabled=true +management.endpoint.health.show-details=when_authorized +management.health.livenessstate.enabled=true +management.health.readinessstate.enabled=true +management.endpoint.health.group.liveness.include=livenessState +management.endpoint.health.group.readiness.include=readinessState,db diff --git a/backend/src/test/java/com/cloudnative/identity/security/SecurityConfigTest.java b/backend/src/test/java/com/cloudnative/identity/security/SecurityConfigTest.java index 0a8ac64..aa94d75 100644 --- a/backend/src/test/java/com/cloudnative/identity/security/SecurityConfigTest.java +++ b/backend/src/test/java/com/cloudnative/identity/security/SecurityConfigTest.java @@ -18,6 +18,7 @@ import java.util.List; import java.util.UUID; +import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; import static org.springframework.http.MediaType.APPLICATION_JSON; @@ -117,4 +118,16 @@ void requiresAuthenticationForConfigurationEndpoints() throws Exception { mockMvc.perform(get("/api/configurations")) .andExpect(status().isUnauthorized()); } + + @Test + void allowsAnonymousActuatorEndpointsThroughSecurityFilter() throws Exception { + mockMvc.perform(get("/actuator/health")) + .andExpect(result -> assertThat(result.getResponse().getStatus()).isNotEqualTo(401)); + + mockMvc.perform(get("/actuator/health/readiness")) + .andExpect(result -> assertThat(result.getResponse().getStatus()).isNotEqualTo(401)); + + mockMvc.perform(get("/actuator/prometheus")) + .andExpect(result -> assertThat(result.getResponse().getStatus()).isNotEqualTo(401)); + } } diff --git a/deploy/production-like/README.md b/deploy/production-like/README.md index 7d0d6fe..d2c2be4 100644 --- a/deploy/production-like/README.md +++ b/deploy/production-like/README.md @@ -17,6 +17,12 @@ It uses common production building blocks without requiring Kubernetes: - Spring Boot backend containers - PostgreSQL as the source-of-truth database - Docker Compose health checks +- Spring Boot Actuator readiness and Prometheus metrics +- Prometheus scraping backend, PostgreSQL, container, and Nginx metrics +- Grafana dashboard provisioning for local monitoring evidence +- PostgreSQL exporter for database-level metrics +- cAdvisor for container CPU/memory metrics +- Nginx Prometheus exporter for reverse proxy metrics - same-origin frontend API calls through `/api/*` ## Start @@ -25,18 +31,50 @@ It uses common production building blocks without requiring Kubernetes: docker compose -f deploy/production-like/compose.yml up --build --scale backend=2 ``` +If the host still uses the legacy Compose binary, run the same command as: + +```bash +docker-compose -f deploy/production-like/compose.yml up --build --scale backend=2 +``` + Open: ```text -http://127.0.0.1:8090 +Application: http://127.0.0.1:8090 +Prometheus: http://127.0.0.1:9090 +Grafana: http://127.0.0.1:3001 ``` -Check the backend health endpoint through the reverse proxy: +Grafana local credentials: + +```text +username: admin +password: admin +``` + +Check the backend health endpoints: ```bash curl http://127.0.0.1:8090/api/health +docker compose -f deploy/production-like/compose.yml exec backend \ + wget -qO- http://127.0.0.1:8080/actuator/health/readiness +``` + +Check Prometheus metrics from inside the backend container: + +```bash +docker compose -f deploy/production-like/compose.yml exec backend \ + wget -qO- http://127.0.0.1:8080/actuator/prometheus | head ``` +Prometheus target verification: + +```text +http://127.0.0.1:9090/targets +``` + +The `cloud-native-backend`, `cloud-native-postgres`, `cloud-native-containers`, and `cloud-native-nginx` targets should be `UP`. + ## Stop Keep database data: @@ -45,6 +83,12 @@ Keep database data: docker compose -f deploy/production-like/compose.yml down ``` +Legacy Compose fallback: + +```bash +docker-compose -f deploy/production-like/compose.yml down +``` + Reset database data: ```bash @@ -100,3 +144,15 @@ Before using this pattern outside local/demo environments: - restrict `CORS_ALLOWED_ORIGINS` to the real frontend domain - add centralized logs and uptime monitoring + +## Monitoring Story + +This stack provides the minimum concrete monitoring evidence for the final report: + +- Actuator readiness checks include the database dependency through the `db` health contributor. +- Prometheus scrapes `/actuator/prometheus` from the backend service. +- Prometheus also scrapes PostgreSQL exporter, cAdvisor, and Nginx exporter. +- Grafana auto-loads a `Cloud Native Backend Overview` dashboard. +- The dashboard shows backend availability, request rate, error rate, average latency, JVM memory, process CPU, HikariCP connection pool, PostgreSQL connections/transactions, container CPU/memory, and Nginx connection/request metrics. + +This is intentionally a local production-like monitoring setup. It demonstrates observability and dependency-aware readiness without claiming full cloud failover, alert routing, or multi-region disaster recovery. diff --git a/deploy/production-like/compose.yml b/deploy/production-like/compose.yml index da90b5c..2e293a0 100644 --- a/deploy/production-like/compose.yml +++ b/deploy/production-like/compose.yml @@ -31,7 +31,7 @@ services: postgres: condition: service_healthy healthcheck: - test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8080/api/health | grep -q UP"] + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8080/actuator/health/readiness | grep -q UP"] interval: 10s timeout: 3s retries: 12 @@ -53,6 +53,84 @@ services: networks: - app-network + postgres-exporter: + image: prometheuscommunity/postgres-exporter:v0.15.0 + environment: + DATA_SOURCE_NAME: postgresql://cloudnative:cloudnative@postgres:5432/cloud_native_db?sslmode=disable + depends_on: + postgres: + condition: service_healthy + networks: + - app-network + + cadvisor: + image: gcr.io/cadvisor/cadvisor:v0.49.1 + privileged: true + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker:/var/lib/docker:ro + - /dev/disk:/dev/disk:ro + networks: + - app-network + + nginx-exporter: + image: nginx/nginx-prometheus-exporter:1.3.0 + command: + - --nginx.scrape-uri=http://reverse-proxy:8080/nginx_status + depends_on: + reverse-proxy: + condition: service_healthy + networks: + - app-network + + prometheus: + image: prom/prometheus:v2.54.1 + ports: + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + depends_on: + backend: + condition: service_healthy + postgres-exporter: + condition: service_started + cadvisor: + condition: service_started + nginx-exporter: + condition: service_started + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:9090/-/ready | grep -q Ready"] + interval: 10s + timeout: 3s + retries: 12 + networks: + - app-network + + grafana: + image: grafana/grafana:11.1.4 + ports: + - "3001:3000" + environment: + GF_SECURITY_ADMIN_USER: admin + GF_SECURITY_ADMIN_PASSWORD: admin + GF_USERS_ALLOW_SIGN_UP: "false" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + depends_on: + prometheus: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:3000/api/health | grep -q ok"] + interval: 10s + timeout: 3s + retries: 12 + start_period: 20s + networks: + - app-network + reverse-proxy: image: nginx:1.27-alpine ports: diff --git a/deploy/production-like/grafana/dashboards/cloud-native-overview.json b/deploy/production-like/grafana/dashboards/cloud-native-overview.json new file mode 100644 index 0000000..940ffd4 --- /dev/null +++ b/deploy/production-like/grafana/dashboards/cloud-native-overview.json @@ -0,0 +1,1036 @@ +{ + "id": null, + "uid": "cloud-native-overview", + "title": "Cloud Native Production-Like Overview", + "tags": [ + "cloud-native", + "spring-boot", + "prometheus", + "production-like" + ], + "timezone": "browser", + "schemaVersion": 39, + "version": 4, + "refresh": "10s", + "time": { + "from": "now-15m", + "to": "now" + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "panels": [ + { + "type": "row", + "title": "Service Health", + "id": 100, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "collapsed": false, + "panels": [] + }, + { + "type": "stat", + "title": "Backend", + "id": 1, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 1 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "up{job=\"cloud-native-backend\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "DOWN", + "color": "red" + }, + "1": { + "text": "UP", + "color": "green" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + } + }, + { + "type": "stat", + "title": "PostgreSQL", + "id": 8, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 1 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "up{job=\"cloud-native-postgres\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "DOWN", + "color": "red" + }, + "1": { + "text": "UP", + "color": "green" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + } + }, + { + "type": "stat", + "title": "Nginx Exporter", + "id": 15, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 1 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "up{job=\"cloud-native-nginx\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "DOWN", + "color": "red" + }, + "1": { + "text": "UP", + "color": "green" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + } + }, + { + "type": "stat", + "title": "Container Metrics", + "id": 16, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 1 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "up{job=\"cloud-native-containers\"}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "DOWN", + "color": "red" + }, + "1": { + "text": "UP", + "color": "green" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + } + }, + { + "type": "row", + "title": "API Traffic", + "id": 101, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "collapsed": false, + "panels": [] + }, + { + "type": "stat", + "title": "Requests / sec", + "id": 17, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 6 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count[1m]))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps", + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + } + }, + { + "type": "stat", + "title": "5xx / sec", + "id": 18, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 6 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count{status=~\"5..\"}[1m]))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps", + "decimals": 3 + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + } + }, + { + "type": "stat", + "title": "Avg Latency", + "id": 19, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 6 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_sum[1m])) / clamp_min(sum(rate(http_server_requests_seconds_count[1m])), 0.001)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 3 + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + } + }, + { + "type": "stat", + "title": "Active DB Connections", + "id": 20, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 6 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(hikaricp_connections_active)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + } + }, + { + "type": "timeseries", + "title": "HTTP Request Rate", + "id": 2, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count[1m]))", + "legendFormat": "requests/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps", + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "HTTP Error Rate", + "id": 3, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count{status=~\"5..\"}[1m]))", + "legendFormat": "5xx/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps", + "decimals": 3 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "Average HTTP Latency", + "id": 4, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_sum[1m])) / clamp_min(sum(rate(http_server_requests_seconds_count[1m])), 0.001)", + "legendFormat": "avg seconds", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 3 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "Nginx Requests", + "id": 13, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "rate(nginx_http_requests_total[1m])", + "legendFormat": "requests/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps", + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "row", + "title": "Backend Runtime", + "id": 102, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "collapsed": false, + "panels": [] + }, + { + "type": "timeseries", + "title": "JVM Memory Used", + "id": 5, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes)", + "legendFormat": "bytes used", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "Process CPU Usage", + "id": 6, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "process_cpu_usage", + "legendFormat": "backend process", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "decimals": 3 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "HikariCP Connections", + "id": 7, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "hikaricp_connections_active", + "legendFormat": "active", + "refId": "A" + }, + { + "expr": "hikaricp_connections_idle", + "legendFormat": "idle", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "row", + "title": "Database", + "id": 103, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "collapsed": false, + "panels": [] + }, + { + "type": "timeseries", + "title": "PostgreSQL Connections", + "id": 9, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(pg_stat_activity_count)", + "legendFormat": "connections", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "PostgreSQL Transactions", + "id": 14, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum(rate(pg_stat_database_xact_commit[1m]))", + "legendFormat": "commits/sec", + "refId": "A" + }, + { + "expr": "sum(rate(pg_stat_database_xact_rollback[1m]))", + "legendFormat": "rollbacks/sec", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "row", + "title": "Container And Proxy", + "id": 104, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "collapsed": false, + "panels": [] + }, + { + "type": "timeseries", + "title": "Container CPU Usage", + "id": 10, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum by (id) (rate(container_cpu_usage_seconds_total{id=~\"/system.slice/docker-.+\\\\.scope\"}[1m]))", + "legendFormat": "{{id}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "cores", + "decimals": 3 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "Container Memory Usage", + "id": 11, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "sum by (id) (container_memory_usage_bytes{id=~\"/system.slice/docker-.+\\\\.scope\"})", + "legendFormat": "{{id}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + }, + { + "type": "timeseries", + "title": "Nginx Connections", + "id": 12, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "targets": [ + { + "expr": "nginx_connections_active", + "legendFormat": "active", + "refId": "A" + }, + { + "expr": "nginx_connections_reading", + "legendFormat": "reading", + "refId": "B" + }, + { + "expr": "nginx_connections_writing", + "legendFormat": "writing", + "refId": "C" + }, + { + "expr": "nginx_connections_waiting", + "legendFormat": "waiting", + "refId": "D" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + } + } + ] +} diff --git a/deploy/production-like/grafana/provisioning/dashboards/cloud-native.yml b/deploy/production-like/grafana/provisioning/dashboards/cloud-native.yml new file mode 100644 index 0000000..3cda21f --- /dev/null +++ b/deploy/production-like/grafana/provisioning/dashboards/cloud-native.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: Cloud Native + orgId: 1 + folder: Cloud Native + type: file + disableDeletion: false + updateIntervalSeconds: 30 + options: + path: /var/lib/grafana/dashboards diff --git a/deploy/production-like/grafana/provisioning/datasources/prometheus.yml b/deploy/production-like/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..a90b5d4 --- /dev/null +++ b/deploy/production-like/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: Prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true diff --git a/deploy/production-like/nginx.conf b/deploy/production-like/nginx.conf index 4d25a43..50b7b44 100644 --- a/deploy/production-like/nginx.conf +++ b/deploy/production-like/nginx.conf @@ -16,6 +16,15 @@ http { keepalive 32; } + server { + listen 8080; + + location = /nginx_status { + stub_status; + access_log off; + } + } + server { listen 80; diff --git a/deploy/production-like/prometheus.yml b/deploy/production-like/prometheus.yml new file mode 100644 index 0000000..d76aeb4 --- /dev/null +++ b/deploy/production-like/prometheus.yml @@ -0,0 +1,26 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: cloud-native-backend + metrics_path: /actuator/prometheus + static_configs: + - targets: + - backend:8080 + + + - job_name: cloud-native-postgres + static_configs: + - targets: + - postgres-exporter:9187 + + - job_name: cloud-native-containers + static_configs: + - targets: + - cadvisor:8080 + + - job_name: cloud-native-nginx + static_configs: + - targets: + - nginx-exporter:9113