From 02fd5b97417e1b07a7f85a90569ea1d8802db0bc Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 3 Jun 2025 17:24:12 +0000 Subject: [PATCH 1/2] Refactor files --- .gitignore | 1 + docs/observability/setup/alerting.md | 40 +--------- docs/observability/setup/probing.md | 81 +------------------- docs/observability/setup/production-setup.md | 42 ++++------ 4 files changed, 18 insertions(+), 146 deletions(-) diff --git a/.gitignore b/.gitignore index b33ab23..4ac0a20 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Custom ignores observability/examples/simple/observability-simple +observability/examples/full/cogstack-observability _build # Python ignores diff --git a/docs/observability/setup/alerting.md b/docs/observability/setup/alerting.md index c697557..5de9c13 100644 --- a/docs/observability/setup/alerting.md +++ b/docs/observability/setup/alerting.md @@ -57,48 +57,10 @@ SLACK_WEBHOOK_URL=https://hooks.slack.com/services/your-webhook --- ## Advanced Customization -### Customize Alert Contact points +See [Alerts Customization](../customization/alerts-customization.md) to further customize alerts, for example setting up a different contact, or a new rule. -You can customize where alerts are sent by defining a new contact point in Grafana: - -``` -notifiers: - - name: "custom-contact" - type: "slack" - settings: - url: "https://hooks.slack.com/services/..." -``` - -Mount this file into: - -``` -/etc/grafana/provisioning/alerting/custom-contact.yml -``` - -Then update the environment variable: - -``` -ALERTING_DEFAULT_CONTACT=custom-contact -``` - -**Note** to be only mount the exact file, and not override the whole provisioning folder in the image, as this is already used to contain the defaults. - ---- - -### Add Custom Alerts -To define additional alert rules, create files in: - -``` -/etc/grafana/provisioning/alerting/ -``` - -Grafana will automatically load these at startup. - ---- ## Further Reading * [Grafana Alerting Provisioning](https://grafana.com/docs/grafana/latest/alerting/set-up/provision-alerting-resources/) * [Google SRE – Burn Rate Alerting](https://sre.google/workbook/alerting-on-slos/#4-alert-on-burn-rate) - -Let me know if you'd like to split this into multiple focused guides, e.g., one for basic uptime, one for SLO-based alerts. diff --git a/docs/observability/setup/probing.md b/docs/observability/setup/probing.md index bb60714..5555986 100644 --- a/docs/observability/setup/probing.md +++ b/docs/observability/setup/probing.md @@ -35,86 +35,7 @@ To add a new prober target: --- ## Advanced Setup - -### How to add Auth to the prober or further configurations - -To define how a probe behaves (e.g., add basic auth, headers, timeout, method), we will configure a module in the Blackbox Exporter config. - -#### Create a Blackbox Exporter Config file -You will need to create a new file, and then mount it over the existing provided vconfig - - -1. Create a new file: - - ``` - prometheus/blackbox-exporter/custom-blackbox-config.yml - ``` - -2. Add the existing defaults - -``` -modules: - http_get_200: - prober: http - timeout: 5s - http: - valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] - valid_status_codes: [200] # Defaults to 2xx - method: GET - preferred_ip_protocol: "ip4" # defaults to "ip6" - tls_config: - insecure_skip_verify: true -``` - -3. Add your own module to the modules in that file -``` - http_2xx_custom: - prober: http - timeout: 5s - http: - valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] - valid_status_codes: [200] # Defaults to 2xx - method: GET - preferred_ip_protocol: "ip4" # defaults to "ip6" - tls_config: - insecure_skip_verify: true - basic_auth: - username: my-user - password: example-pass -``` - -This example adds a module named `http_2xx_custom` that adds some basic auth credentials - ---- - -#### Reference the new module in your prober config - -In your probe YAML file, reference the module in the `module` field of the `labels` section: - -``` - - targets: - - https://myservice.example.com/health - labels: - name: my-service - module: http_2xx_custom # Optional - overrides the default Blackbox module -``` - -#### Mount the config file -You lastly need to mount the new config file and refer to it in docker compose - -``` - blackbox-exporter: - image: cogstacksystems/cogstack-observability-blackbox-exporter:latest - restart: unless-stopped - networks: - - observability - volumes: - - ./prometheus/blackbox-exporter:/config - command: - - "--config.file=/config/custom-blackbox-config.yml" -``` - ---- +See [Prober Customization](../customization/blackbox-exporter-config.md) to do any advanced setup, for example adding Basic Auth to allow the prober to call endpoints that need a username and password ## Notes diff --git a/docs/observability/setup/production-setup.md b/docs/observability/setup/production-setup.md index 55f2ad6..ec8f896 100644 --- a/docs/observability/setup/production-setup.md +++ b/docs/observability/setup/production-setup.md @@ -1,5 +1,4 @@ # Production Setup Tutorial -//In Progress This tutorial guides you through setting up the **CogStack Observability Stack** for production use. If you're new, we recommend completing the [Quickstart Tutorial](../quickstart.md) first to get a simplified setup running. @@ -65,39 +64,28 @@ This will launch Prometheus, Grafana, and all required services with ## Step 4: Create Site-Specific Config Files You must provide your own scrape and recording rules to tell Prometheus what to monitor. +- Probers: HTTP endpoints you want to monitor for availability + - Add files in `scrape-configs/probers/*.yml` + - [Configure Probers](./probing.md) + +- Exporters: Targets like Elasticsearch or Docker + - Add files in `scrape-configs/exporters/*.yml` + - [Add Exporters](./telemetry.md) -* Exporters: Targets like Elasticsearch or Docker - → Add files in `scrape-configs/exporters/*.yml` - -* Probers: HTTP endpoints you want to monitor for availability - → Add files in `scrape-configs/probers/*.yml` - -* Recording Rules: Define uptime goals or custom aggregations - → Add files in `recording-rules/*.yml` - -Refer to the following How-To guides for creating each config: - -* [Configure Probers](./probing.md) -* [Add Exporters](./telemetry.md) -* [Enable Alerting](./alerting.md) -* [Customise Setup](../customization/_index.md) - ---- - +- Recording Rules: Define uptime goals or custom aggregations + - Add files in `recording-rules/*.yml` + - [Enable Alerting](./alerting.md) --- ## What’s Next? -Your observability stack is now monitoring your own services. +Your observability stack is now monitoring your services, and you have a production ready project setup -Continue with: +You can now setup prometheus with any telemetry or probers required following the remaining steps in [Setup](./_index.md) -* [Grafana Dashboards](./dashboards.md) -* [Set up Alerts](./alerting.md) -* [Create custom views](../customization/_index.md) - ---- +For the last steps, you can -Let me know if you'd like to add code snippets for `.yml` examples in each folder. +- Fully customise with [Customization](../customization/_index.md) +- Look further into understanding the concepts and details in [Reference](../reference/_index.md) From efc342e1fca9419cf2533fbb928949f2c10f760a Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 3 Jun 2025 17:28:16 +0000 Subject: [PATCH 2/2] Refactor advanced notes2 --- .../customization/alerts-customization.md | 44 ++++++++++ .../customization/blackbox-exporter-config.md | 81 +++++++++++++++++++ docs/observability/setup/production-setup.md | 9 ++- 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 docs/observability/customization/alerts-customization.md create mode 100644 docs/observability/customization/blackbox-exporter-config.md diff --git a/docs/observability/customization/alerts-customization.md b/docs/observability/customization/alerts-customization.md new file mode 100644 index 0000000..4915913 --- /dev/null +++ b/docs/observability/customization/alerts-customization.md @@ -0,0 +1,44 @@ +# Alerts Customization + +You can further setup and customize alerts in the stack. + +## Customize Alert Contact points +If you want alerts to go to a different contact, for example an Email address instead of slack, you can customize where alerts are sent by defining a new contact point in Grafana: + +``` +notifiers: + - name: "custom-contact" + type: "slack" + settings: + url: "https://hooks.slack.com/services/..." +``` + +Mount this file into: + +``` +/etc/grafana/provisioning/alerting/custom-contact.yml +``` + +Then update the environment variable: + +``` +ALERTING_DEFAULT_CONTACT=custom-contact +``` + +**Note** to be only mount the exact file, and not override the whole provisioning folder in the image, as this is already used to contain the defaults. + +--- + +## Add Custom Alerts +You can make custom alerts. + +To define additional alert rules, create files in: + +``` +/etc/grafana/provisioning/alerting/ +``` + +Grafana will automatically load these at startup. + +--- + diff --git a/docs/observability/customization/blackbox-exporter-config.md b/docs/observability/customization/blackbox-exporter-config.md new file mode 100644 index 0000000..f4e542d --- /dev/null +++ b/docs/observability/customization/blackbox-exporter-config.md @@ -0,0 +1,81 @@ +# Prober Customizations + +## How to add Auth to the prober or further configurations? + +To define how a probe behaves (e.g., add basic auth, headers, timeout, method), we will configure a module in the Blackbox Exporter config. + +#### Create a Blackbox Exporter Config file +You will need to create a new file, and then mount it over the existing provided vconfig + + +1. Create a new file: + + ``` + prometheus/blackbox-exporter/custom-blackbox-config.yml + ``` + +2. Add the existing defaults + +``` +modules: + http_get_200: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + valid_status_codes: [200] # Defaults to 2xx + method: GET + preferred_ip_protocol: "ip4" # defaults to "ip6" + tls_config: + insecure_skip_verify: true +``` + +3. Add your own module to the modules in that file +``` + http_2xx_custom: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + valid_status_codes: [200] # Defaults to 2xx + method: GET + preferred_ip_protocol: "ip4" # defaults to "ip6" + tls_config: + insecure_skip_verify: true + basic_auth: + username: my-user + password: example-pass +``` + +This example adds a module named `http_2xx_custom` that adds some basic auth credentials + +--- + +#### Reference the new module in your prober config + +In your probe YAML file, reference the module in the `module` field of the `labels` section: + +``` + - targets: + - https://myservice.example.com/health + labels: + name: my-service + module: http_2xx_custom # Optional - overrides the default Blackbox module +``` + +#### Mount the config file +You lastly need to mount the new config file and refer to it in docker compose + +``` + blackbox-exporter: + image: cogstacksystems/cogstack-observability-blackbox-exporter:latest + restart: unless-stopped + networks: + - observability + volumes: + - ./prometheus/blackbox-exporter:/config + command: + - "--config.file=/config/custom-blackbox-config.yml" +``` + +--- \ No newline at end of file diff --git a/docs/observability/setup/production-setup.md b/docs/observability/setup/production-setup.md index ec8f896..250ea7f 100644 --- a/docs/observability/setup/production-setup.md +++ b/docs/observability/setup/production-setup.md @@ -62,8 +62,10 @@ This will launch Prometheus, Grafana, and all required services with ## Step 4: Create Site-Specific Config Files - You must provide your own scrape and recording rules to tell Prometheus what to monitor. + +This is probably the hardest step: You will actually need to know what is running, and where it is! Building out these config files will give you that inventory, and give a real definition of what is running where. + - Probers: HTTP endpoints you want to monitor for availability - Add files in `scrape-configs/probers/*.yml` - [Configure Probers](./probing.md) @@ -76,6 +78,9 @@ You must provide your own scrape and recording rules to tell Prometheus what to - Add files in `recording-rules/*.yml` - [Enable Alerting](./alerting.md) +## Step 5: Run Exporters Everywhere +The exporters need to be run on each VM that you want information from. It's a pull model, not push. + --- @@ -87,5 +92,7 @@ You can now setup prometheus with any telemetry or probers required following th For the last steps, you can +- Run the exporters on all the VMs that you want access to +- Deploy the stack in produciton - Fully customise with [Customization](../customization/_index.md) - Look further into understanding the concepts and details in [Reference](../reference/_index.md)