diff --git a/charts/posthog/Chart.yaml b/charts/posthog/Chart.yaml index 3b689b79..6f60fb23 100644 --- a/charts/posthog/Chart.yaml +++ b/charts/posthog/Chart.yaml @@ -11,7 +11,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 30.22.0 +version: 30.22.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. diff --git a/charts/posthog/values.yaml b/charts/posthog/values.yaml index f5fe395a..f9cd686b 100644 --- a/charts/posthog/values.yaml +++ b/charts/posthog/values.yaml @@ -2328,15 +2328,15 @@ prometheus: needs to be rolled-back. - alert: End2EndIngestionLag - expr: (max by(scenario) (posthog_celery_observed_ingestion_lag_seconds{scenario=~"ingestion_api|ingestion"})) > 600 + expr: (max by(scenario) (posthog_celery_observed_ingestion_lag_seconds{scenario=~"ingestion_api|ingestion"})) > 1200 for: 5m labels: rotation: common severity: critical annotations: - summary: End-to-end analytics event ingestion lag exceeds 10 minutes for more than 5 minutes. + summary: End-to-end analytics event ingestion lag exceeds 20 minutes for more than 5 minutes. description: | - Our end-to-end probe measured an ingestion lag higher than 10 minutes for scenario {{ $labels.scenario }}. + Our end-to-end probe measured an ingestion lag higher than 20 minutes for scenario {{ $labels.scenario }}. Check the "Kafka (cluster overview)" dashboard to identify what topics and partitions are lagging and follow the https://posthog.com/docs/runbook/services/plugin-server/ingestion-lag runbook for recovery steps.