-
Notifications
You must be signed in to change notification settings - Fork 1
CCM-14044 Adding anomaly alarms #103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| terraform 1.12.0 | ||
| terraform 1.14.3 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| resource "aws_cloudwatch_metric_alarm" "control_plane_ingestion_anomaly" { | ||
| count = var.enable_event_anomaly_detection ? 1 : 0 | ||
|
|
||
| alarm_name = "${local.csi}-control-plane-ingestion-anomaly" | ||
| alarm_description = "RELIABILITY: Detects anomalous patterns in events ingested to the control plane event bus" | ||
| comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" | ||
| evaluation_periods = var.event_anomaly_evaluation_periods | ||
| threshold_metric_id = "ad1" | ||
| treat_missing_data = "notBreaching" | ||
|
|
||
| metric_query { | ||
| id = "m1" | ||
| return_data = true | ||
|
|
||
| metric { | ||
| metric_name = "Ingestion" | ||
| namespace = "AWS/Events" | ||
| period = var.event_anomaly_period | ||
| stat = "Sum" | ||
|
|
||
| dimensions = { | ||
| EventBusName = aws_cloudwatch_event_bus.control_plane.name | ||
| } | ||
| } | ||
| } | ||
|
|
||
| metric_query { | ||
| id = "ad1" | ||
| expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" | ||
| label = "Ingestion (expected)" | ||
| return_data = true | ||
| } | ||
|
|
||
| tags = merge( | ||
| local.default_tags, | ||
| { | ||
| Name = "${local.csi}-control-plane-ingestion-anomaly" | ||
| } | ||
| ) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| resource "aws_cloudwatch_metric_alarm" "control_plane_invocations_anomaly" { | ||
| count = var.enable_event_anomaly_detection ? 1 : 0 | ||
|
|
||
| alarm_name = "${local.csi}-control-plane-invocations-anomaly" | ||
| alarm_description = "RELIABILITY: Detects anomalous patterns in events delivered from the control plane event bus to targets" | ||
| comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" | ||
| evaluation_periods = var.event_anomaly_evaluation_periods | ||
| threshold_metric_id = "ad1" | ||
| treat_missing_data = "notBreaching" | ||
|
|
||
| metric_query { | ||
| id = "m1" | ||
| return_data = true | ||
|
|
||
| metric { | ||
| metric_name = "Invocations" | ||
| namespace = "AWS/Events" | ||
| period = var.event_anomaly_period | ||
| stat = "Sum" | ||
|
|
||
| dimensions = { | ||
| EventBusName = aws_cloudwatch_event_bus.control_plane.name | ||
| } | ||
| } | ||
| } | ||
|
|
||
| metric_query { | ||
| id = "ad1" | ||
| expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" | ||
| label = "Invocations (expected)" | ||
| return_data = true | ||
| } | ||
|
|
||
| tags = merge( | ||
| local.default_tags, | ||
| { | ||
| Name = "${local.csi}-control-plane-invocations-anomaly" | ||
| } | ||
| ) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| resource "aws_cloudwatch_metric_alarm" "data_plane_ingestion_anomaly" { | ||
| count = var.enable_event_anomaly_detection ? 1 : 0 | ||
|
|
||
| alarm_name = "${local.csi}-data-plane-ingestion-anomaly" | ||
| alarm_description = "RELIABILITY: Detects anomalous patterns in events ingested to the data plane event bus" | ||
| comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" | ||
| evaluation_periods = var.event_anomaly_evaluation_periods | ||
| threshold_metric_id = "ad1" | ||
| treat_missing_data = "notBreaching" | ||
|
|
||
| metric_query { | ||
| id = "m1" | ||
| return_data = true | ||
|
|
||
| metric { | ||
| metric_name = "Ingestion" | ||
| namespace = "AWS/Events" | ||
| period = var.event_anomaly_period | ||
| stat = "Sum" | ||
|
|
||
| dimensions = { | ||
| EventBusName = aws_cloudwatch_event_bus.data_plane.name | ||
| } | ||
| } | ||
| } | ||
|
|
||
| metric_query { | ||
| id = "ad1" | ||
| expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" | ||
| label = "Ingestion (expected)" | ||
| return_data = true | ||
| } | ||
|
|
||
| tags = merge( | ||
| local.default_tags, | ||
| { | ||
| Name = "${local.csi}-data-plane-ingestion-anomaly" | ||
| } | ||
| ) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| resource "aws_cloudwatch_metric_alarm" "data_plane_invocations_anomaly" { | ||
| count = var.enable_event_anomaly_detection ? 1 : 0 | ||
|
|
||
| alarm_name = "${local.csi}-data-plane-invocations-anomaly" | ||
| alarm_description = "RELIABILITY: Detects anomalous patterns in events delivered from the data plane event bus to targets" | ||
| comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" | ||
| evaluation_periods = var.event_anomaly_evaluation_periods | ||
| threshold_metric_id = "ad1" | ||
| treat_missing_data = "notBreaching" | ||
|
|
||
| metric_query { | ||
| id = "m1" | ||
| return_data = true | ||
|
|
||
| metric { | ||
| metric_name = "Invocations" | ||
| namespace = "AWS/Events" | ||
| period = var.event_anomaly_period | ||
| stat = "Sum" | ||
|
|
||
| dimensions = { | ||
| EventBusName = aws_cloudwatch_event_bus.data_plane.name | ||
| } | ||
| } | ||
| } | ||
|
|
||
| metric_query { | ||
| id = "ad1" | ||
| expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" | ||
| label = "Invocations (expected)" | ||
| return_data = true | ||
| } | ||
|
|
||
| tags = merge( | ||
| local.default_tags, | ||
| { | ||
| Name = "${local.csi}-data-plane-invocations-anomaly" | ||
| } | ||
| ) | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -130,3 +130,32 @@ variable "notify_core_sns_kms_arn" { | |||||||||||||||
| type = string | ||||||||||||||||
| default = null | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| variable "enable_event_anomaly_detection" { | ||||||||||||||||
| type = bool | ||||||||||||||||
| description = "Enable CloudWatch anomaly detection alarms for event bus traffic. Applies to both data and control plane ingestion and invocations." | ||||||||||||||||
| default = true | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| variable "event_anomaly_evaluation_periods" { | ||||||||||||||||
| type = number | ||||||||||||||||
| description = "Number of evaluation periods for the anomaly alarm. Each period is defined by event_anomaly_period." | ||||||||||||||||
| default = 2 | ||||||||||||||||
|
||||||||||||||||
| default = 2 | |
| default = 2 | |
| validation { | |
| condition = var.event_anomaly_evaluation_periods >= 1 && floor(var.event_anomaly_evaluation_periods) == var.event_anomaly_evaluation_periods | |
| error_message = "event_anomaly_evaluation_periods must be a positive whole number (integer >= 1)." | |
| } |
Copilot
AI
Mar 3, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The description for event_anomaly_period states a minimum of 300 seconds, but there’s no validation enforcing this (or that the value is an integer). Adding a validation block (e.g., >= 300 and whole number) would prevent configuration values that will be rejected by CloudWatch at apply time.
| default = 300 | |
| default = 300 | |
| validation { | |
| condition = var.event_anomaly_period >= 300 && floor(var.event_anomaly_period) == var.event_anomaly_period | |
| error_message = "Event anomaly period must be an integer number of seconds and at least 300 seconds." | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This bumps the asdf Terraform version to 1.14.3, but the module’s Terraform constraint still allows older versions (
required_version = ">= 1.12.0"ininfrastructure/terraform/components/events/versions.tf, also reflected in the generated README). If 1.14.3 is now the minimum supported version, consider updatingrequired_version(and regenerating docs) to avoid contributors/CI running with an older Terraform than intended.