Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions infrastructure/terraform/modules/eventpub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
| <a name="input_data_plane_bus_arn"></a> [data\_plane\_bus\_arn](#input\_data\_plane\_bus\_arn) | Data plane event bus arn | `string` | n/a | yes |
| <a name="input_default_tags"></a> [default\_tags](#input\_default\_tags) | Default tag map for application to all taggable resources in the module | `map(string)` | `{}` | no |
| <a name="input_enable_event_cache"></a> [enable\_event\_cache](#input\_enable\_event\_cache) | Enable caching of events to an S3 bucket | `bool` | `false` | no |
| <a name="input_enable_event_publishing_anomaly_detection"></a> [enable\_event\_publishing\_anomaly\_detection](#input\_enable\_event\_publishing\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for SNS message publishing. Detects abnormal drops or spikes in event publishing volume. | `bool` | `true` | no |
| <a name="input_enable_firehose_raw_message_delivery"></a> [enable\_firehose\_raw\_message\_delivery](#input\_enable\_firehose\_raw\_message\_delivery) | Enables raw message delivery on firehose subscription | `bool` | `false` | no |
| <a name="input_enable_sns_delivery_logging"></a> [enable\_sns\_delivery\_logging](#input\_enable\_sns\_delivery\_logging) | Enable SNS Delivery Failure Notifications | `bool` | `false` | no |
| <a name="input_environment"></a> [environment](#input\_environment) | The name of the terraformscaffold environment the module is called for | `string` | n/a | yes |
| <a name="input_event_cache_buffer_interval"></a> [event\_cache\_buffer\_interval](#input\_event\_cache\_buffer\_interval) | The buffer interval for data firehose | `number` | `500` | no |
| <a name="input_event_cache_expiry_days"></a> [event\_cache\_expiry\_days](#input\_event\_cache\_expiry\_days) | s3 archiving expiry in days | `number` | `30` | no |
| <a name="input_event_publishing_anomaly_band_width"></a> [event\_publishing\_anomaly\_band\_width](#input\_event\_publishing\_anomaly\_band\_width) | The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4. | `number` | `5` | no |
| <a name="input_event_publishing_anomaly_evaluation_periods"></a> [event\_publishing\_anomaly\_evaluation\_periods](#input\_event\_publishing\_anomaly\_evaluation\_periods) | Number of evaluation periods for the publishing anomaly alarm. Each period is defined by event\_publishing\_anomaly\_period. | `number` | `3` | no |
| <a name="input_event_publishing_anomaly_period"></a> [event\_publishing\_anomaly\_period](#input\_event\_publishing\_anomaly\_period) | The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600. | `number` | `300` | no |
| <a name="input_force_destroy"></a> [force\_destroy](#input\_force\_destroy) | When enabled will force destroy event-cache S3 bucket | `bool` | `false` | no |
| <a name="input_group"></a> [group](#input\_group) | The name of the tfscaffold group | `string` | `null` | no |
| <a name="input_iam_permissions_boundary_arn"></a> [iam\_permissions\_boundary\_arn](#input\_iam\_permissions\_boundary\_arn) | The ARN of the permissions boundary to use for the IAM role | `string` | `null` | no |
Expand All @@ -42,6 +46,7 @@

| Name | Description |
|------|-------------|
| <a name="output_publishing_anomaly_alarm"></a> [publishing\_anomaly\_alarm](#output\_publishing\_anomaly\_alarm) | CloudWatch anomaly detection alarm details for SNS publishing |
| <a name="output_s3_bucket_event_cache"></a> [s3\_bucket\_event\_cache](#output\_s3\_bucket\_event\_cache) | S3 Bucket ARN and Name for event cache |
| <a name="output_sns_topic"></a> [sns\_topic](#output\_sns\_topic) | SNS Topic ARN and Name |
<!-- vale on -->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
resource "aws_cloudwatch_metric_alarm" "publishing_anomaly" {
count = var.enable_event_publishing_anomaly_detection ? 1 : 0

alarm_name = "${local.csi}-sns-publishing-anomaly"
alarm_description = "RELIABILITY: Anomaly detection alarm for abnormal SNS message publishing patterns. Detects unexpected drops or spikes in event publishing volume that may indicate service degradation or misconfiguration."
comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold"
evaluation_periods = var.event_publishing_anomaly_evaluation_periods # Number of evaluation periods for the publishing anomaly alarm.
threshold_metric_id = "ad1"
treat_missing_data = "notBreaching"
actions_enabled = true

tags = merge(
local.default_tags,
{
AlarmType = "AnomalyDetection"
AlarmPurpose = "EventPublishingAbnormality"
}
)

metric_query {
id = "m1"
return_data = true

metric {
metric_name = "NumberOfMessagesPublished"
namespace = "AWS/SNS"
period = var.event_publishing_anomaly_period # The period in seconds over which the specified statistic is applied for anomaly detection.
stat = "Sum"

dimensions = {
TopicName = aws_sns_topic.main.name
}
}
}

metric_query {
id = "ad1"
expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_publishing_anomaly_band_width})" # The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity.
label = "NumberOfMessagesPublished (expected)"
return_data = true
}
}
8 changes: 8 additions & 0 deletions infrastructure/terraform/modules/eventpub/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ output "s3_bucket_event_cache" {
bucket = module.s3bucket_event_cache[0].bucket
} : {}
}

output "publishing_anomaly_alarm" {
description = "CloudWatch anomaly detection alarm details for SNS publishing"
value = var.enable_event_publishing_anomaly_detection ? {
arn = aws_cloudwatch_metric_alarm.publishing_anomaly[0].arn
name = aws_cloudwatch_metric_alarm.publishing_anomaly[0].alarm_name
} : null
}
24 changes: 24 additions & 0 deletions infrastructure/terraform/modules/eventpub/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,27 @@ variable "additional_policies_for_event_cache_bucket" {
description = "A list of JSON policies to use to build the bucket policy"
default = []
}

variable "enable_event_publishing_anomaly_detection" {
type = bool
description = "Enable CloudWatch anomaly detection alarm for SNS message publishing. Detects abnormal drops or spikes in event publishing volume."
default = true
}

variable "event_publishing_anomaly_evaluation_periods" {
type = number
description = "Number of evaluation periods for the publishing anomaly alarm. Each period is defined by event_publishing_anomaly_period."
default = 3
}

variable "event_publishing_anomaly_period" {
type = number
description = "The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600."
default = 300
}

variable "event_publishing_anomaly_band_width" {
type = number
description = "The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4."
default = 5
}