From 4df1098569206c7f9f36685525eae1ebe6804edc Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Tue, 30 Sep 2025 14:01:13 +0200 Subject: [PATCH 01/18] refactor(aws)!: AWSX-1703 Change matching of regex. Previously done against JSON, now only log message (#996) * refactor(aws)!: AWSX-1703 Change matching of regex. Previously done against JSON, now only on log Signed-off-by: Vincent Boutour * fixup! refactor(aws)!: AWSX-1703 Change matching of regex. Previously done against JSON, now only on log Signed-off-by: Vincent Boutour --------- Signed-off-by: Vincent Boutour --- aws/logs_monitoring/forwarder.py | 45 ++++++++++--------- aws/logs_monitoring/logs/datadog_matcher.py | 48 +++++++++++++++++++++ aws/logs_monitoring/logs/helpers.py | 37 ---------------- aws/logs_monitoring/tests/test_logs.py | 33 ++++++++++---- 4 files changed, 98 insertions(+), 65 deletions(-) create mode 100644 aws/logs_monitoring/logs/datadog_matcher.py diff --git a/aws/logs_monitoring/forwarder.py b/aws/logs_monitoring/forwarder.py index 60fe8a70b..11108c2d5 100644 --- a/aws/logs_monitoring/forwarder.py +++ b/aws/logs_monitoring/forwarder.py @@ -4,34 +4,35 @@ # Copyright 2021 Datadog, Inc. -import logging import json +import logging import os -from telemetry import send_event_metric, send_log_metric -from trace_forwarder.connection import TraceConnection -from logs.datadog_http_client import DatadogHTTPClient from logs.datadog_batcher import DatadogBatcher from logs.datadog_client import DatadogClient -from logs.datadog_tcp_client import DatadogTCPClient +from logs.datadog_http_client import DatadogHTTPClient +from logs.datadog_matcher import DatadogMatcher from logs.datadog_scrubber import DatadogScrubber -from logs.helpers import filter_logs, add_retry_tag -from retry.storage import Storage +from logs.datadog_tcp_client import DatadogTCPClient +from logs.helpers import add_retry_tag from retry.enums import RetryPrefix +from retry.storage import Storage from settings import ( DD_API_KEY, - DD_USE_TCP, + DD_FORWARD_LOG, DD_NO_SSL, - DD_SKIP_SSL_VALIDATION, - DD_URL, DD_PORT, - DD_TRACE_INTAKE_URL, - DD_FORWARD_LOG, + DD_SKIP_SSL_VALIDATION, DD_STORE_FAILED_EVENTS, - SCRUBBING_RULE_CONFIGS, - INCLUDE_AT_MATCH, + DD_TRACE_INTAKE_URL, + DD_URL, + DD_USE_TCP, EXCLUDE_AT_MATCH, + INCLUDE_AT_MATCH, + SCRUBBING_RULE_CONFIGS, ) +from telemetry import send_event_metric, send_log_metric +from trace_forwarder.connection import TraceConnection logger = logging.getLogger() logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper())) @@ -83,25 +84,29 @@ def _forward_logs(self, logs, key=None): logger.debug(f"Forwarding {len(logs)} logs") scrubber = DatadogScrubber(SCRUBBING_RULE_CONFIGS) + matcher = DatadogMatcher( + include_pattern=INCLUDE_AT_MATCH, exclude_pattern=EXCLUDE_AT_MATCH + ) + logs_to_forward = [] for log in logs: if key: log = add_retry_tag(log) - # apply scrubbing rules to inner log message if exists + evaluated_log = log + + # apply scrubbing rules to inner log message if isinstance(log, dict) and log.get("message"): try: log["message"] = scrubber.scrub(log["message"]) + evaluated_log = log["message"] except Exception as e: logger.exception( f"Exception while scrubbing log message {log['message']}: {e}" ) - logs_to_forward.append(json.dumps(log, ensure_ascii=False)) - - logs_to_forward = filter_logs( - logs_to_forward, INCLUDE_AT_MATCH, EXCLUDE_AT_MATCH - ) + if matcher.match(evaluated_log): + logs_to_forward.append(json.dumps(log, ensure_ascii=False)) if DD_USE_TCP: batcher = DatadogBatcher(256 * 1000, 256 * 1000, 1) diff --git a/aws/logs_monitoring/logs/datadog_matcher.py b/aws/logs_monitoring/logs/datadog_matcher.py new file mode 100644 index 000000000..fd448cbfd --- /dev/null +++ b/aws/logs_monitoring/logs/datadog_matcher.py @@ -0,0 +1,48 @@ +# Unless explicitly stated otherwise all files in this repository are licensed +# under the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2021 Datadog, Inc. + + +import logging +import os +import re + +from logs.exceptions import ScrubbingException +from logs.helpers import compileRegex + +logger = logging.getLogger() +logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper())) + + +class DatadogMatcher(object): + def __init__(self, include_pattern=None, exclude_pattern=None): + self._include_regex = None + self._exclude_regex = None + + if include_pattern is not None: + logger.debug(f"Applying include pattern: {include_pattern}") + self._include_regex = compileRegex("INCLUDE_AT_MATCH", include_pattern) + + if exclude_pattern is not None: + logger.debug(f"Applying exclude pattern: {exclude_pattern}") + self._exclude_regex = compileRegex("EXCLUDE_AT_MATCH", exclude_pattern) + + def match(self, log): + try: + if self._exclude_regex is not None and re.search( + self._exclude_regex, str(log) + ): + logger.debug("Exclude pattern matched, excluding log event") + return False + + if self._include_regex is not None and not re.search( + self._include_regex, str(log) + ): + logger.debug("Include pattern did not match, excluding log event") + return False + + return True + + except ScrubbingException: + raise Exception("could not filter the payload") diff --git a/aws/logs_monitoring/logs/helpers.py b/aws/logs_monitoring/logs/helpers.py index 2dee129f6..a6ed14ede 100644 --- a/aws/logs_monitoring/logs/helpers.py +++ b/aws/logs_monitoring/logs/helpers.py @@ -12,47 +12,10 @@ from settings import DD_CUSTOM_TAGS, DD_RETRY_KEYWORD -from logs.exceptions import ScrubbingException - logger = logging.getLogger() logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper())) -def filter_logs(logs, include_pattern=None, exclude_pattern=None): - """ - Applies log filtering rules. - If no filtering rules exist, return all the logs. - """ - if include_pattern is None and exclude_pattern is None: - return logs - - logger.debug(f"Applying exclude pattern: {exclude_pattern}") - exclude_regex = compileRegex("EXCLUDE_AT_MATCH", exclude_pattern) - - logger.debug(f"Applying include pattern: {include_pattern}") - include_regex = compileRegex("INCLUDE_AT_MATCH", include_pattern) - - # Add logs that should be sent to logs_to_send - logs_to_send = [] - - for log in logs: - try: - if exclude_regex is not None and re.search(exclude_regex, log): - logger.debug("Exclude pattern matched, excluding log event") - continue - - if include_regex is not None and not re.search(include_regex, log): - logger.debug("Include pattern did not match, excluding log event") - continue - - logs_to_send.append(log) - - except ScrubbingException: - raise Exception("could not filter the payload") - - return logs_to_send - - def compress_logs(batch, level): if level < 0: compression_level = 0 diff --git a/aws/logs_monitoring/tests/test_logs.py b/aws/logs_monitoring/tests/test_logs.py index d245b4af7..09487b8e8 100644 --- a/aws/logs_monitoring/tests/test_logs.py +++ b/aws/logs_monitoring/tests/test_logs.py @@ -1,12 +1,12 @@ -import unittest import os import sys -from importlib import reload +import unittest import unittest.mock +from importlib import reload -from logs.datadog_scrubber import DatadogScrubber from logs.datadog_batcher import DatadogBatcher -from logs.helpers import filter_logs +from logs.datadog_matcher import DatadogMatcher +from logs.datadog_scrubber import DatadogScrubber class TestScrubLogs(unittest.TestCase): @@ -65,10 +65,13 @@ class TestFilterLogs(unittest.TestCase): "This is not a REPORT log", "END RequestId: ...", "REPORT RequestId: ...", + {"message": "It should work"}, ] def test_include_at_match(self): - filtered_logs = filter_logs(self.example_logs, include_pattern=r"^(START|END)") + filtered_logs = filter_logs( + DatadogMatcher(include_pattern="^(START|END)"), self.example_logs + ) self.assertEqual( filtered_logs, @@ -79,19 +82,23 @@ def test_include_at_match(self): ) def test_exclude_at_match(self): - filtered_logs = filter_logs(self.example_logs, exclude_pattern=r"^(START|END)") + filtered_logs = filter_logs( + DatadogMatcher(exclude_pattern="^(START|END)"), self.example_logs + ) self.assertEqual( filtered_logs, [ "This is not a REPORT log", "REPORT RequestId: ...", + {"message": "It should work"}, ], ) def test_exclude_overrides_include(self): filtered_logs = filter_logs( - self.example_logs, include_pattern=r"^(START|END)", exclude_pattern=r"^END" + DatadogMatcher(include_pattern="^(START|END)", exclude_pattern="^END"), + self.example_logs, ) self.assertEqual( @@ -102,9 +109,19 @@ def test_exclude_overrides_include(self): ) def test_no_filtering_rules(self): - filtered_logs = filter_logs(self.example_logs) + filtered_logs = filter_logs(DatadogMatcher(), self.example_logs) self.assertEqual(filtered_logs, self.example_logs) +def filter_logs(matcher, logs): + filtered = [] + + for log in logs: + if matcher.match(log): + filtered.append(log) + + return filtered + + if __name__ == "__main__": unittest.main() From a9301a9b18bf69796d94445dcead606e07295a7d Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Thu, 2 Oct 2025 11:43:06 +0200 Subject: [PATCH 02/18] feat(aws)!: Drop deprecated TCP support (#1000) Signed-off-by: Vincent Boutour --- aws/logs_monitoring/README.md | 217 +++++++++--------- aws/logs_monitoring/forwarder.py | 14 +- .../logs/datadog_tcp_client.py | 72 ------ aws/logs_monitoring/settings.py | 24 +- aws/logs_monitoring/template.yaml | 13 -- .../integration_tests/docker-compose.yml | 1 - 6 files changed, 112 insertions(+), 229 deletions(-) delete mode 100644 aws/logs_monitoring/logs/datadog_tcp_client.py diff --git a/aws/logs_monitoring/README.md b/aws/logs_monitoring/README.md index 40a2ddc08..0bc8d5f83 100644 --- a/aws/logs_monitoring/README.md +++ b/aws/logs_monitoring/README.md @@ -1,11 +1,11 @@ --- title: Datadog Forwarder dependencies: - - "https://github.com/DataDog/datadog-serverless-functions/blob/master/aws/logs_monitoring/README.md" + - "https://github.com/DataDog/datadog-serverless-functions/blob/master/aws/logs_monitoring/README.md" aliases: - - /serverless/troubleshooting/installing_the_forwarder/ - - /serverless/forwarder/ - - /serverless/libraries_integrations/forwarder/ + - /serverless/troubleshooting/installing_the_forwarder/ + - /serverless/forwarder/ + - /serverless/libraries_integrations/forwarder/ --- ## Overview @@ -119,12 +119,15 @@ The environment variables provided on this If you encounter issues upgrading to the latest version, check the Troubleshooting section. ### Upgrade an older version to 4.13.0+ + Starting version 4.13.0+ Lambda function has been updated to require **Python 3.13**. If upgrading an older forwarder installation to 4.13.0+, ensure AWS Lambda function is configured to use Python 3.13 ### Upgrade an older version to 4.3.0+ + Starting version 4.3.0 Lambda forwarder will support a single python version only. The supported Python version of this release is 3.12. ### Upgrade an older version to +4.0.0 + Starting version 4.0.0 `source`, `service` and `host` identification logic will be pulled out from the Lambda forwarder's code and set in directly in Datadog's backend. The first migrated log source is `RDS`. This is not a breaking change on how the `source`, `service` and `host` are set on the `Log explorer` side. Users should continue to have the same behavior as before. @@ -163,8 +166,8 @@ Since version 3.0.0, the forwarder Lambda function is managed by CloudFormation. 4. Ensure the new forwarder is working as expected, for example, being invoked regularly without errors. 5. Ensure the logs from the migrated triggers (sources) are showing up in Datadog log explorer and look right to you. 6. Migrate all triggers to the new forwarder. - - If you have been letting Datadog manage triggers [automatically][6] for you, update the forwarder Lambda ARN in AWS integration page **Log Collection** tab. - - If you have been manage the triggers [manually][7], then you have to migrate them manually (or using a script). + - If you have been letting Datadog manage triggers [automatically][6] for you, update the forwarder Lambda ARN in AWS integration page **Log Collection** tab. + - If you have been manage the triggers [manually][7], then you have to migrate them manually (or using a script). 7. Ensure the old forwarder Lambda function's invocations count drops to zero. 8. Delete the old forwarder Lambda function when you feel comfortable. 9. If you have old forwarder Lambda functions installed in multiple AWS accounts and regions, repeat the steps above in every account and region combination. @@ -218,42 +221,42 @@ We love pull requests. Here's a quick guide. 1. If you would like to discuss a feature or bug fix before implementing, find us in the `#serverless` channel of the [Datadog Slack community][11]. 1. Fork, clone, and create a branch: - ```bash - git clone git@github.com:/datadog-serverless-functions.git - git checkout -b - ``` + ```bash + git clone git@github.com:/datadog-serverless-functions.git + git checkout -b + ``` 1. Make code changes. 1. Build with your local changes. - ```bash - cd aws/logs_monitoring - ./tools/build_bundle.sh # any unique version is fine - ``` + ```bash + cd aws/logs_monitoring + ./tools/build_bundle.sh # any unique version is fine + ``` 1. Update your testing Forwarder with the modified code and test: - ```bash - # Upload in the AWS Lambda console if you don't have AWS CLI - aws lambda update-function-code \ - --region \ - --function-name \ - --zip-file fileb://.forwarder/aws-dd-forwarder-.zip - ``` + ```bash + # Upload in the AWS Lambda console if you don't have AWS CLI + aws lambda update-function-code \ + --region \ + --function-name \ + --zip-file fileb://.forwarder/aws-dd-forwarder-.zip + ``` 1. Run unit tests. - ``` - python -m unittest discover . # for code in Python - ./trace_forwarder/scripts/run_tests.sh # for code in Go - ``` + ``` + python -m unittest discover . # for code in Python + ./trace_forwarder/scripts/run_tests.sh # for code in Go + ``` 1. Run the integration tests. - ```bash - ./tools/integration_tests/integration_tests.sh + ```bash + ./tools/integration_tests/integration_tests.sh - # to update the snapshots if changes are expected - ./tools/integration_tests/integration_tests.sh --update - ``` + # to update the snapshots if changes are expected + ./tools/integration_tests/integration_tests.sh --update + ``` 1. If you changes affect the CloudFormation template, run the installation test against your own AWS account. - ```bash - ./tools/installation_test.sh - ``` + ```bash + ./tools/installation_test.sh + ``` 1. Push to your fork and [submit a pull request][12]. ## Advanced @@ -273,9 +276,9 @@ You can run the Forwarder in a VPC private subnet and send data to Datadog over 1. Follow [these instructions][14] to add the Datadog `api`, `http-logs.intake`, and `trace.agent` endpoints to your VPC. 2. Follow the [instructions][15] to add the AWS Secrets Manager and S3 endpoints to your VPC. 3. When installing the Forwarder with the CloudFormation template: - 1. Set `DdUseVPC` to `true`. - 2. Set `VPCSecurityGroupIds` and `VPCSubnetIds` based on your VPC settings. - 3. Set `DdFetchLambdaTags`, `DdFetchStepFunctionsTags` and `DdFetchS3Tags` to `false`, because AWS Resource Groups Tagging API doesn't support PrivateLink. + 1. Set `DdUseVPC` to `true`. + 2. Set `VPCSecurityGroupIds` and `VPCSubnetIds` based on your VPC settings. + 3. Set `DdFetchLambdaTags`, `DdFetchStepFunctionsTags` and `DdFetchS3Tags` to `false`, because AWS Resource Groups Tagging API doesn't support PrivateLink. #### DdUsePrivateLink is deprecated @@ -356,10 +359,6 @@ Datadog recommends using at least 10 reserved concurrency, but this defaults to `DdMultilineLogRegexPattern` : Use the supplied regular expression to detect for a new log line for multiline logs from S3, such as `\d{2}\/\d{2}\/\d{4}` for multiline logs beginning with pattern "11/10/2014". -`DdUseTcp` -: By default, the forwarder sends logs using HTTPS through the port 443. To send logs over an -SSL encrypted TCP connection, set this parameter to true. - `DdNoSsl` : Disable SSL when forwarding logs, set to true when forwarding logs through a proxy. @@ -411,7 +410,7 @@ Some examples of regular expressions that can be used for log filtering: - Include CloudTrail error messages only: `errorMessage`. - Include only logs containing an HTTP 4XX or 5XX error code: `\b[4|5][0-9][0-9]\b`. - Include only CloudWatch logs where the `message` field contains a specific JSON key/value pair: `\"awsRegion\":\"us-east-1\"`. - - The message field of a CloudWatch log event is encoded as a string. For example,`{"awsRegion": "us-east-1"}` is encoded as `{\"awsRegion\":\"us-east-1\"}`. Therefore, the pattern you provide must include `\` escape characters, like this: `\"awsRegion\":\"us-east-1\"`. + - The message field of a CloudWatch log event is encoded as a string. For example,`{"awsRegion": "us-east-1"}` is encoded as `{\"awsRegion\":\"us-east-1\"}`. Therefore, the pattern you provide must include `\` escape characters, like this: `\"awsRegion\":\"us-east-1\"`. To test different patterns against your logs, turn on [debug logs](#troubleshooting). @@ -462,11 +461,11 @@ To test different patterns against your logs, turn on [debug logs](#troubleshoot `LayerARN` : ARN for the layer containing the forwarder code. If empty, the script will use the version of the layer the forwarder was published with. Defaults to empty. - [20]: https://app.datadoghq.com/organization-settings/api-keys [13]: https://docs.datadoghq.com/getting_started/site/ [21]: https://docs.datadoghq.com/logs/processing/pipelines/ [2]: https://docs.datadoghq.com/logs/guide/send-aws-services-logs-with-the-datadog-lambda-function/ + {{% /tab %}} {{% tab "Terraform" %}} @@ -526,10 +525,6 @@ Datadog recommends using at least 10 reserved concurrency, but this defaults to `DD_MULTILINE_LOG_REGEX_PATTERN` : Use the supplied regular expression to detect for a new log line for multiline logs from S3, such as `\d{2}\/\d{2}\/\d{4}` for multiline logs beginning with pattern "11/10/2014". -`DD_USE_TCP` -: By default, the forwarder sends logs using HTTPS through the port 443. To send logs over an -SSL encrypted TCP connection, set this parameter to true. - `DD_NO_SSL` : Disable SSL when forwarding logs, set to true when forwarding logs through a proxy. @@ -581,7 +576,7 @@ Some examples of regular expressions that can be used for log filtering: - Include CloudTrail error messages only: `errorMessage`. - Include only logs containing an HTTP 4XX or 5XX error code: `\b[4|5][0-9][0-9]\b`. - Include only CloudWatch logs where the `message` field contains a specific JSON key/value pair: `\"awsRegion\":\"us-east-1\"`. - - The message field of a CloudWatch log event is encoded as a string. For example,`{"awsRegion": "us-east-1"}` is encoded as `{\"awsRegion\":\"us-east-1\"}`. Therefore, the pattern you provide must include `\` escape characters, like this: `\"awsRegion\":\"us-east-1\"`. + - The message field of a CloudWatch log event is encoded as a string. For example,`{"awsRegion": "us-east-1"}` is encoded as `{\"awsRegion\":\"us-east-1\"}`. Therefore, the pattern you provide must include `\` escape characters, like this: `\"awsRegion\":\"us-east-1\"`. To test different patterns against your logs, turn on [debug logs](#troubleshooting). @@ -633,6 +628,7 @@ To test different patterns against your logs, turn on [debug logs](#troubleshoot [13]: https://docs.datadoghq.com/getting_started/site/ [21]: https://docs.datadoghq.com/logs/processing/pipelines/ [2]: https://docs.datadoghq.com/logs/guide/send-aws-services-logs-with-the-datadog-lambda-function/ + {{% /tab %}} {{< /tabs >}} @@ -644,33 +640,33 @@ To deploy the CloudFormation Stack with the default options, you need to have th ```json { - "Effect": "Allow", - "Action": [ - "cloudformation:*", - "secretsmanager:CreateSecret", - "secretsmanager:TagResource", - "s3:CreateBucket", - "s3:GetObject", - "s3:PutEncryptionConfiguration", - "s3:PutBucketPublicAccessBlock", - "iam:CreateRole", - "iam:GetRole", - "iam:PassRole", - "iam:PutRolePolicy", - "iam:AttachRolePolicy", - "lambda:CreateFunction", - "lambda:GetFunction", - "lambda:GetFunctionConfiguration", - "lambda:GetLayerVersion", - "lambda:InvokeFunction", - "lambda:PutFunctionConcurrency", - "lambda:AddPermission", - "lambda:TagResource", - "logs:CreateLogGroup", - "logs:DescribeLogGroups", - "logs:PutRetentionPolicy" - ], - "Resource": "*" + "Effect": "Allow", + "Action": [ + "cloudformation:*", + "secretsmanager:CreateSecret", + "secretsmanager:TagResource", + "s3:CreateBucket", + "s3:GetObject", + "s3:PutEncryptionConfiguration", + "s3:PutBucketPublicAccessBlock", + "iam:CreateRole", + "iam:GetRole", + "iam:PassRole", + "iam:PutRolePolicy", + "iam:AttachRolePolicy", + "lambda:CreateFunction", + "lambda:GetFunction", + "lambda:GetFunctionConfiguration", + "lambda:GetLayerVersion", + "lambda:InvokeFunction", + "lambda:PutFunctionConcurrency", + "lambda:AddPermission", + "lambda:TagResource", + "logs:CreateLogGroup", + "logs:DescribeLogGroups", + "logs:PutRetentionPolicy" + ], + "Resource": "*" } ``` @@ -687,25 +683,25 @@ The CloudFormation Stack creates following IAM roles: ```json [ - { - "Effect": "Allow", - "Action": [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents" - ], - "Resource": "*" - }, - { - "Action": ["s3:GetObject"], - "Resource": "arn:aws:s3:::*", - "Effect": "Allow" - }, - { - "Action": ["secretsmanager:GetSecretValue"], - "Resource": "", - "Effect": "Allow" - } + { + "Effect": "Allow", + "Action": [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": "*" + }, + { + "Action": ["s3:GetObject"], + "Resource": "arn:aws:s3:::*", + "Effect": "Allow" + }, + { + "Action": ["secretsmanager:GetSecretValue"], + "Resource": "", + "Effect": "Allow" + } ] ``` @@ -715,36 +711,33 @@ The CloudFormation Stack creates following IAM roles: ```json [ - { - "Effect": "Allow", - "Action": [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents" - ], - "Resource": "*" - }, - { - "Action": [ - "s3:ListBucket", - "s3:PutObject", - "s3:DeleteObject" - ], - "Resource": "", - "Effect": "Allow" - } + { + "Effect": "Allow", + "Action": [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": "*" + }, + { + "Action": ["s3:ListBucket", "s3:PutObject", "s3:DeleteObject"], + "Resource": "", + "Effect": "Allow" + } ] ``` ## Service Tag Setting + The value of the `service` tag is determined based on multiple inputs. These inputs are ranked by priority from highest to lowest + 1. Log message custom tags: If the log message has a `ddtags` key which contains a `service` tag value, it will be used to override the `service` tag in the log event. 2. Lambda tags cache (applicable for Lambda logs only): Activating `DdFetchLambdaTags` will fetch and store all Lambda functions tags and will override the `service` tag if it wasn't set previously or was set to a default value i.e. `source` value. 3. Cloudwatch log group tags cache (applicable for Cloudwatch logs only): Activating `DdFetchLogGroupTags` will fetch and store all Cloudwatch log groups tags which are added to the `ddtags` entry in the log event. If `service` tag value was set in the tags cache it will be used to set the `service` tag for the log event. 4. Directly setting a `service` tag value in the forwarder's `ddtags` ENV var. 5. Default value equal to the `source` tag. - ## Further Reading Additional helpful documentation, links, and articles: diff --git a/aws/logs_monitoring/forwarder.py b/aws/logs_monitoring/forwarder.py index 11108c2d5..735f62573 100644 --- a/aws/logs_monitoring/forwarder.py +++ b/aws/logs_monitoring/forwarder.py @@ -13,7 +13,6 @@ from logs.datadog_http_client import DatadogHTTPClient from logs.datadog_matcher import DatadogMatcher from logs.datadog_scrubber import DatadogScrubber -from logs.datadog_tcp_client import DatadogTCPClient from logs.helpers import add_retry_tag from retry.enums import RetryPrefix from retry.storage import Storage @@ -26,7 +25,6 @@ DD_STORE_FAILED_EVENTS, DD_TRACE_INTAKE_URL, DD_URL, - DD_USE_TCP, EXCLUDE_AT_MATCH, INCLUDE_AT_MATCH, SCRUBBING_RULE_CONFIGS, @@ -108,14 +106,10 @@ def _forward_logs(self, logs, key=None): if matcher.match(evaluated_log): logs_to_forward.append(json.dumps(log, ensure_ascii=False)) - if DD_USE_TCP: - batcher = DatadogBatcher(256 * 1000, 256 * 1000, 1) - cli = DatadogTCPClient(DD_URL, DD_PORT, DD_NO_SSL, DD_API_KEY, scrubber) - else: - batcher = DatadogBatcher(512 * 1000, 4 * 1000 * 1000, 400) - cli = DatadogHTTPClient( - DD_URL, DD_PORT, DD_NO_SSL, DD_SKIP_SSL_VALIDATION, DD_API_KEY, scrubber - ) + batcher = DatadogBatcher(512 * 1000, 4 * 1000 * 1000, 400) + cli = DatadogHTTPClient( + DD_URL, DD_PORT, DD_NO_SSL, DD_SKIP_SSL_VALIDATION, DD_API_KEY, scrubber + ) failed_logs = [] with DatadogClient(cli) as client: diff --git a/aws/logs_monitoring/logs/datadog_tcp_client.py b/aws/logs_monitoring/logs/datadog_tcp_client.py deleted file mode 100644 index 980f8ad64..000000000 --- a/aws/logs_monitoring/logs/datadog_tcp_client.py +++ /dev/null @@ -1,72 +0,0 @@ -# Unless explicitly stated otherwise all files in this repository are licensed -# under the Apache License Version 2.0. -# This product includes software developed at Datadog (https://www.datadoghq.com/). -# Copyright 2021 Datadog, Inc. - - -import os -import socket -import ssl -import logging - -from logs.exceptions import RetriableException, ScrubbingException - -logger = logging.getLogger() -logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper())) - - -class DatadogTCPClient(object): - """ - Client that sends a batch of logs over TCP. - """ - - def __init__(self, host, port, no_ssl, api_key, scrubber): - self.host = host - self.port = port - self._use_ssl = not no_ssl - self._api_key = api_key - self._scrubber = scrubber - self._sock = None - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - f"Initialized tcp client for logs intake: " - f"" - ) - - def _connect(self): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - if self._use_ssl: - context = ssl.create_default_context() - context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 - context.minimum_version = ssl.TLSVersion.TLSv1_2 - sock = context.wrap_socket(sock, server_hostname=self.host) - sock.connect((self.host, self.port)) - self._sock = sock - - def _close(self): - if self._sock: - self._sock.close() - - def _reset(self): - self._close() - self._connect() - - def send(self, logs): - try: - frame = self._scrubber.scrub( - "".join(["{} {}\n".format(self._api_key, log) for log in logs]) - ) - self._sock.sendall(frame.encode("UTF-8")) - except ScrubbingException: - raise Exception("could not scrub the payload") - except Exception: - # most likely a network error, reset the connection - self._reset() - raise RetriableException() - - def __enter__(self): - self._connect() - return self - - def __exit__(self, ex_type, ex_value, traceback): - self._close() diff --git a/aws/logs_monitoring/settings.py b/aws/logs_monitoring/settings.py index e2d6af47f..5b5d64e91 100644 --- a/aws/logs_monitoring/settings.py +++ b/aws/logs_monitoring/settings.py @@ -59,12 +59,6 @@ def get_env_var(envvar, default, boolean=False): # DD_FORWARD_LOG = get_env_var("DD_FORWARD_LOG", "true", boolean=True) -## @param DD_USE_TCP - boolean - optional -default: false -## Change this value to `true` to send your logs and metrics using the TCP network client -## By default, it uses the HTTP client. -# -DD_USE_TCP = get_env_var("DD_USE_TCP", "false", boolean=True) - ## @param DD_USE_COMPRESSION - boolean - optional -default: true ## Only valid when sending logs over HTTP ## Change this value to `false` to send your logs without any compression applied @@ -117,19 +111,9 @@ def get_env_var(envvar, default, boolean=False): default="{}://trace.agent.{}".format("http" if DD_NO_SSL else "https", DD_SITE), ) -# The TCP transport has been deprecated, migrate to the HTTP intake. -if DD_USE_TCP: - DD_URL = get_env_var("DD_URL", default="lambda-intake.logs." + DD_SITE) - try: - if "DD_SITE" in os.environ and DD_SITE == "datadoghq.eu": - DD_PORT = int(get_env_var("DD_PORT", default="443")) - else: - DD_PORT = int(get_env_var("DD_PORT", default="10516")) - except Exception: - DD_PORT = 10516 -else: - DD_URL = get_env_var("DD_URL", default="http-intake.logs." + DD_SITE) - DD_PORT = int(get_env_var("DD_PORT", default="443")) + +DD_URL = get_env_var("DD_URL", default="http-intake.logs." + DD_SITE) +DD_PORT = int(get_env_var("DD_PORT", default="443")) ## @param DD_USE_VPC DD_USE_VPC = get_env_var("DD_USE_VPC", "false", boolean=True) @@ -148,8 +132,6 @@ def get_env_var(envvar, default, boolean=False): logger.debug("Private link enabled, overriding configuration settings") # Only the US Datadog site is supported when PrivateLink is enabled DD_SITE = "datadoghq.com" - # TCP isn't supported when PrivateLink is enabled - DD_USE_TCP = False DD_NO_SSL = False DD_PORT = 443 # Override URLs diff --git a/aws/logs_monitoring/template.yaml b/aws/logs_monitoring/template.yaml index 900df1c3e..61115d47b 100644 --- a/aws/logs_monitoring/template.yaml +++ b/aws/logs_monitoring/template.yaml @@ -103,13 +103,6 @@ Parameters: - true - false Description: Let the forwarder fetch S3 buckets tags using GetResources API calls and apply them to S3 based logs. If set to true, permission tag:GetResources will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.get_resources_api_calls metric for each API call made. - DdUseTcp: - Type: String - Default: false - AllowedValues: - - true - - false - Description: By default, the forwarder sends logs using HTTPS through the port 443. To send logs over an SSL encrypted TCP connection, set this parameter to true. DdNoSsl: Type: String Default: false @@ -310,7 +303,6 @@ Conditions: SetS3SourceZip: !Equals [!Select [0, !Split [/, !Ref SourceZipUrl]], "s3:"] SetDdTags: !Not - !Equals [!Ref DdTags, ""] - SetDdUseTcp: !Equals [!Ref DdUseTcp, true] SetDdNoSsl: !Equals [!Ref DdNoSsl, true] SetDdUrl: !Not - !Equals [!Ref DdUrl, ""] @@ -476,10 +468,6 @@ Resources: - SetDdFetchStepFunctionsTags - !Ref DdFetchStepFunctionsTags - !Ref AWS::NoValue - DD_USE_TCP: !If - - SetDdUseTcp - - !Ref DdUseTcp - - !Ref AWS::NoValue DD_NO_SSL: !If - SetDdNoSsl - !Ref DdNoSsl @@ -1018,7 +1006,6 @@ Metadata: Parameters: - DdTags - DdMultilineLogRegexPattern - - DdUseTcp - DdNoSsl - DdUrl - DdPort diff --git a/aws/logs_monitoring/tools/integration_tests/docker-compose.yml b/aws/logs_monitoring/tools/integration_tests/docker-compose.yml index fd9dd71d6..90e48ac5f 100644 --- a/aws/logs_monitoring/tools/integration_tests/docker-compose.yml +++ b/aws/logs_monitoring/tools/integration_tests/docker-compose.yml @@ -37,7 +37,6 @@ services: DD_TRACE_INTAKE_URL: http://recorder:8080 DD_NO_SSL: "true" DD_SKIP_SSL_VALIDATION: "true" - DD_USE_TCP: "false" DD_USE_COMPRESSION: "false" DD_ADDITIONAL_TARGET_LAMBDAS: "${EXTERNAL_LAMBDAS}" DD_S3_BUCKET_NAME: "${DD_S3_BUCKET_NAME}" From b3342fc94ad40cb6c03ce1f160ab326c06b39448 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 3 Oct 2025 09:46:48 +0200 Subject: [PATCH 03/18] feat(aws)!: Remove deprecated PrivateLink env variable (#1002) Signed-off-by: Vincent Boutour --- aws/logs_monitoring/README.md | 18 ---------------- aws/logs_monitoring/settings.py | 18 ---------------- aws/logs_monitoring/template.yaml | 36 +++++++++++-------------------- 3 files changed, 12 insertions(+), 60 deletions(-) diff --git a/aws/logs_monitoring/README.md b/aws/logs_monitoring/README.md index 0bc8d5f83..4376ba7f8 100644 --- a/aws/logs_monitoring/README.md +++ b/aws/logs_monitoring/README.md @@ -280,18 +280,6 @@ You can run the Forwarder in a VPC private subnet and send data to Datadog over 2. Set `VPCSecurityGroupIds` and `VPCSubnetIds` based on your VPC settings. 3. Set `DdFetchLambdaTags`, `DdFetchStepFunctionsTags` and `DdFetchS3Tags` to `false`, because AWS Resource Groups Tagging API doesn't support PrivateLink. -#### DdUsePrivateLink is deprecated - -The `DdUsePrivateLink` option has been deprecated since [v3.41.0][16]. This option was previously used to instruct the Forwarder to use a special set of PrivateLink endpoints for data intake: `pvtlink.api.{{< region-param key="dd_site" code="true" >}}`, `api-pvtlink.logs.{{< region-param key="dd_site" code="true" >}}`, and `trace-pvtlink.agent.{{< region-param key="dd_site" code="true" >}}`. Since v3.41.0, the Forwarder can send data over PrivateLink to Datadog using the regular DNS names of intake endpoints: `api.{{< region-param key="dd_site" code="true" >}}`, `http-intake.logs.{{< region-param key="dd_site" code="true" >}}`, and `trace.agent.{{< region-param key="dd_site" code="true" >}}`. Therefore, the `DdUsePrivateLink` option is no longer needed. - -If you have an older deployment of the Forwarder with `DdUsePrivateLink` set to `true`, then you may find mismatches between your configured PrivateLink endpoints and the [ones documented in Datadog][14], which is expected. Although the older PrivateLink endpoints were removed from that doc, they remain to function. When upgrading the Forwarder, there is no change required, that is, you can keep `DdUsePrivateLink` enabled and continue to use the older endpoints. - -However, if you are interested in switching to the new endpoints, you need to follow the updated instructions above to: - -1. Set up the new endpoints to `api.{{< region-param key="dd_site" code="true" >}}`, `http-intake.logs.{{< region-param key="dd_site" code="true" >}}`, and `trace.agent.{{< region-param key="dd_site" code="true" >}}`. -2. Set `DdUseVPC` to `true`. -3. Set `DdUsePrivateLink` to `false`. - ### AWS VPC and proxy support If you must deploy the Forwarder to a VPC without direct public internet access, and you cannot use AWS PrivateLink to connect to Datadog (for example, if your organization is hosted on the Datadog EU site: `datadoghq.eu`), then you can send data through a proxy. @@ -437,9 +425,6 @@ To test different patterns against your logs, turn on [debug logs](#troubleshoot `PermissionsBoundaryArn` : ARN for the Permissions Boundary Policy. -`DdUsePrivateLink` (DEPRECATED) -: Set to true to enable sending logs and metrics through AWS PrivateLink. See [Connect to Datadog over AWS PrivateLink][2]. - `DdHttpProxyURL` : Sets the standard web proxy environment variables HTTP_PROXY and HTTPS_PROXY. These are the URL endpoints your proxy server exposes. Do not use this in combination with AWS Private Link. Make sure to also set `DdSkipSslValidation` to true. @@ -600,9 +585,6 @@ To test different patterns against your logs, turn on [debug logs](#troubleshoot `PERMISSIONS_BOUNDARY_ARN` : ARN for the Permissions Boundary Policy. -`DD_USE_PRIVATE_LINK` (DEPRECATED) -: Set to true to enable sending logs and metrics through AWS PrivateLink. See [Connect to Datadog over AWS PrivateLink][2]. - `DD_HTTP_PROXY_URL` : Sets the standard web proxy environment variables HTTP_PROXY and HTTPS_PROXY. These are the URL endpoints your proxy server exposes. Do not use this in combination with AWS Private Link. Make sure to also set `DD_SKIP_SSL_VALIDATION` to true. diff --git a/aws/logs_monitoring/settings.py b/aws/logs_monitoring/settings.py index 5b5d64e91..5385e56d1 100644 --- a/aws/logs_monitoring/settings.py +++ b/aws/logs_monitoring/settings.py @@ -121,24 +121,6 @@ def get_env_var(envvar, default, boolean=False): ## @param DD_CUSTOM_SOURCE DD_CUSTOM_SOURCE = get_env_var("DD_SOURCE", "") -# DEPRECATED. No longer need to use special endpoints, as you can now expose -# regular Datadog API endpoints `api`, `http-intake.logs` and `trace.agent` -# via PrivateLink. See https://docs.datadoghq.com/agent/guide/private-link/. -# @param DD_USE_PRIVATE_LINK - whether to forward logs via PrivateLink -# Overrides incompatible settings -# -DD_USE_PRIVATE_LINK = get_env_var("DD_USE_PRIVATE_LINK", "false", boolean=True) -if DD_USE_PRIVATE_LINK: - logger.debug("Private link enabled, overriding configuration settings") - # Only the US Datadog site is supported when PrivateLink is enabled - DD_SITE = "datadoghq.com" - DD_NO_SSL = False - DD_PORT = 443 - # Override URLs - DD_URL = "api-pvtlink.logs.datadoghq.com" - DD_API_URL = "https://pvtlink.api.datadoghq.com" - DD_TRACE_INTAKE_URL = "https://trace-pvtlink.agent.datadoghq.com" - class ScrubbingRuleConfig(object): def __init__(self, name, pattern, placeholder, enabled=True): diff --git a/aws/logs_monitoring/template.yaml b/aws/logs_monitoring/template.yaml index 61115d47b..963bd2de4 100644 --- a/aws/logs_monitoring/template.yaml +++ b/aws/logs_monitoring/template.yaml @@ -180,13 +180,6 @@ Parameters: - true - false Description: Set to false to disable log compression. Only valid when sending logs over HTTP. - DdUsePrivateLink: - Type: String - Default: false - AllowedValues: - - true - - false - Description: DEPRECATED, DO NOT CHANGE. See README.md for details. Set to true to deploy the Forwarder to a VPC and send logs, metrics, and traces via AWS PrivateLink. When set to true, must also set VPCSecurityGroupIds and VPCSubnetIds. DdUseVPC: Type: String Default: false @@ -205,11 +198,11 @@ Parameters: VPCSecurityGroupIds: Type: CommaDelimitedList Default: "" - Description: Comma separated list of VPC Security Group Ids. Used when DdUsePrivateLink or DdUseVPC is enabled. + Description: Comma separated list of VPC Security Group Ids. Used when DdUseVPC is enabled. VPCSubnetIds: Type: CommaDelimitedList Default: "" - Description: Comma separated list of VPC Subnet Ids. Used when DdUsePrivateLink or DdUseVPC is enabled. + Description: Comma separated list of VPC Subnet Ids. Used when DdUseVPC is enabled. DdCompressionLevel: Type: Number Default: 6 @@ -329,7 +322,6 @@ Conditions: - !Equals [!Ref DdFetchLogGroupTags, true] - !Equals [!Ref DdFetchLambdaTags, true] - !Equals [!Ref DdForwarderExistingBucketName, ""] - SetDdUsePrivateLink: !Equals [!Ref DdUsePrivateLink, true] SetDdUseVPC: !Equals [!Ref DdUseVPC, true] SetDdHttpProxyURL: !Not - !Equals [!Ref DdHttpProxyURL, ""] @@ -337,11 +329,9 @@ Conditions: - !Equals [!Ref DdNoProxy, ""] SetLayerARN: !Not - !Equals [!Ref LayerARN, ""] - UseVPC: !Or - - !Condition SetDdUsePrivateLink - - !Condition SetDdUseVPC SetDdForwardLog: !Equals [!Ref DdForwardLog, false] - SetDdStepFunctionsTraceEnabled: !Equals [!Ref DdStepFunctionsTraceEnabled, true] + SetDdStepFunctionsTraceEnabled: + !Equals [!Ref DdStepFunctionsTraceEnabled, true] SetDdUseCompression: !Equals [!Ref DdUseCompression, false] SetDdCompressionLevel: !Not - !Equals [!Ref DdCompressionLevel, 6] @@ -422,7 +412,10 @@ Resources: - !Ref DdForwarderExistingBucketName S3Key: !Sub - "aws-dd-forwarder-${DdForwarderVersion}.zip" - - {DdForwarderVersion: !FindInMap [Constants, DdForwarder, Version]} + - { + DdForwarderVersion: + !FindInMap [Constants, DdForwarder, Version], + } - ZipFile: " " MemorySize: !Ref MemorySize Runtime: python3.13 @@ -536,12 +529,8 @@ Resources: - SetDdMaxWorkers - !Ref DdMaxWorkers - !Ref AWS::NoValue - DD_USE_PRIVATE_LINK: !If - - SetDdUsePrivateLink - - true - - false DD_USE_VPC: !If - - UseVPC + - SetDdUseVPC - true - false HTTP_PROXY: !If @@ -581,7 +570,7 @@ Resources: - !Ref ReservedConcurrency - !Ref AWS::NoValue VpcConfig: !If - - UseVPC + - SetDdUseVPC - SecurityGroupIds: !If - SetVpcSecurityGroupIds - !Ref VPCSecurityGroupIds @@ -684,7 +673,7 @@ Resources: Effect: Allow - !Ref AWS::NoValue - !If - - UseVPC # Required for Lambda deployed in VPC + - SetDdUseVPC # Required for Lambda deployed in VPC - Action: - ec2:CreateNetworkInterface - ec2:DescribeNetworkInterfaces @@ -821,7 +810,7 @@ Resources: - !Ref SourceZipUrl - !Sub - "https://github.com/DataDog/datadog-serverless-functions/releases/download/aws-dd-forwarder-${DdForwarderVersion}/aws-dd-forwarder-${DdForwarderVersion}.zip" - - {DdForwarderVersion: !FindInMap [Constants, DdForwarder, Version]} + - { DdForwarderVersion: !FindInMap [Constants, DdForwarder, Version] } # The Forwarder's source code is too big to fit the inline code size limit for CloudFormation. In most of AWS # partitions and regions, the Forwarder is able to load its source code from a Lambda layer attached to it. # In places where Datadog can't/doesn't yet publish Lambda layers, use another Lambda to copy the source code @@ -1040,7 +1029,6 @@ Metadata: - InstallAsLayer - LayerARN - PermissionsBoundaryArn - - DdUsePrivateLink - DdUseVPC - DdHttpProxyURL - DdNoProxy From f7fb1e168399967bfa2406364388f34a8ca72c3d Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Wed, 8 Oct 2025 16:30:02 +0200 Subject: [PATCH 04/18] docs(aws): Removing JSON explanation in the include at match documentation (#1007) Signed-off-by: Vincent Boutour --- aws/logs_monitoring/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aws/logs_monitoring/README.md b/aws/logs_monitoring/README.md index 4376ba7f8..c159cf29c 100644 --- a/aws/logs_monitoring/README.md +++ b/aws/logs_monitoring/README.md @@ -468,6 +468,7 @@ For all configuration options and details, including [Multi-Region deployment][2 [203]: https://docs.datadoghq.com/getting_started/site/#access-the-datadog-site [204]: https://app.datadoghq.com/organization-settings/api-keys [205]: https://registry.terraform.io/modules/DataDog/log-lambda-forwarder-datadog/aws/latest#multi-region-deployments + {{% /tab %}} {{% tab "Manual" %}} @@ -553,15 +554,14 @@ Datadog recommends using at least 10 reserved concurrency, but this defaults to `INCLUDE_AT_MATCH` : Only send logs matching the supplied regular expression, and not excluded by `EXCLUDE_AT_MATCH`. -Filtering rules are applied to the full JSON-formatted log, including any metadata that is automatically added by the Forwarder. However, transformations applied by [log pipelines][21], which occur after logs are sent to Datadog, cannot be used to filter logs in the Forwarder. Using an inefficient regular expression, such as `.*`, may slow down the Forwarder. +Filtering rules are applied to the log message as read by the forwarder. Using an inefficient regular expression, such as `.*`, may slow down the Forwarder. Some examples of regular expressions that can be used for log filtering: -- Include (or exclude) Lambda platform logs: `"(START|END) RequestId:\s`. The preceding `"` is needed to match the start of the log message, which is in a JSON blob (`{"message": "START RequestId...."}`). Datadog recommends keeping the `REPORT` logs, as they are used to populate the invocations list in the serverless function views. +- Include (or exclude) Lambda platform logs: `(START|END) RequestId:\s`. Datadog recommends keeping the `REPORT` logs, as they are used to populate the invocations list in the serverless function views. - Include CloudTrail error messages only: `errorMessage`. - Include only logs containing an HTTP 4XX or 5XX error code: `\b[4|5][0-9][0-9]\b`. -- Include only CloudWatch logs where the `message` field contains a specific JSON key/value pair: `\"awsRegion\":\"us-east-1\"`. - - The message field of a CloudWatch log event is encoded as a string. For example,`{"awsRegion": "us-east-1"}` is encoded as `{\"awsRegion\":\"us-east-1\"}`. Therefore, the pattern you provide must include `\` escape characters, like this: `\"awsRegion\":\"us-east-1\"`. +- Include only CloudWatch logs where the `message` field contains a specific JSON key/value pair: `"awsRegion":"us-east-1"`. To test different patterns against your logs, turn on [debug logs](#troubleshooting). From 7a3914161a6ba3b66cf7d5bdcb380867929a9c8a Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Thu, 16 Oct 2025 14:37:48 +0200 Subject: [PATCH 05/18] Add environment variable to support backend storage tag enrichment (#1006) * feat(aws): AWSX-1566 Adding storage tag HTTP header Signed-off-by: Vincent Boutour * feat(aws): AWSX-1566 Adding new variable in the cloudformation template Signed-off-by: Vincent Boutour * feat: AWSX-1566 Restoring fetch in the integration test to see metrics Signed-off-by: Vincent Boutour * fixup! feat: AWSX-1566 Restoring fetch in the integration test to see metrics Signed-off-by: Vincent Boutour * docs(aws): AWSX-1566 Refine documentation around the new variable Signed-off-by: Vincent Boutour --------- Signed-off-by: Vincent Boutour --- aws/logs_monitoring/README.md | 22 ++++++++++--- .../logs/datadog_http_client.py | 32 +++++++++++++++---- aws/logs_monitoring/settings.py | 25 +++++++++++++++ aws/logs_monitoring/template.yaml | 26 ++++++++++++--- aws/logs_monitoring/tools/build_bundle.sh | 2 +- .../integration_tests/docker-compose.yml | 6 ++-- .../integration_tests/integration_tests.sh | 5 +-- ..._log_group_lambda_invocation.json~snapshot | 1 + .../snapshots/cloudwatch_log.json~snapshot | 1 + .../cloudwatch_log_cloudtrail.json~snapshot | 1 + .../cloudwatch_log_custom_tags.json~snapshot | 1 + ...dwatch_log_lambda_invocation.json~snapshot | 1 + .../cloudwatch_log_service_tag.json~snapshot | 1 + .../cloudwatch_log_timeout.json~snapshot | 1 + .../step_functions_log.json~snapshot | 1 + 15 files changed, 105 insertions(+), 21 deletions(-) diff --git a/aws/logs_monitoring/README.md b/aws/logs_monitoring/README.md index c159cf29c..4dd97323f 100644 --- a/aws/logs_monitoring/README.md +++ b/aws/logs_monitoring/README.md @@ -278,7 +278,7 @@ You can run the Forwarder in a VPC private subnet and send data to Datadog over 3. When installing the Forwarder with the CloudFormation template: 1. Set `DdUseVPC` to `true`. 2. Set `VPCSecurityGroupIds` and `VPCSubnetIds` based on your VPC settings. - 3. Set `DdFetchLambdaTags`, `DdFetchStepFunctionsTags` and `DdFetchS3Tags` to `false`, because AWS Resource Groups Tagging API doesn't support PrivateLink. + 3. Set `DdFetchLambdaTags`, `DdFetchStepFunctionsTags`, and `DdFetchS3Tags` to `false`, because AWS Resource Groups Tagging API doesn't support PrivateLink. ### AWS VPC and proxy support @@ -287,7 +287,7 @@ If you must deploy the Forwarder to a VPC without direct public internet access, 1. Unless the Forwarder is deployed to a public subnet, follow the [instructions][15] to add endpoints for Secrets Manager and S3 to the VPC, so that the Forwarder can access those services. 2. Update your proxy with following configurations ([HAProxy][17] or [NGINX][18]). If you are using another proxy, or Web Proxy, allowlist the Datadog domain, for example: `.{{< region-param key="dd_site" code="true" >}}`. 3. When installing the Forwarder with the CloudFormation template, set `DdUseVPC`, `VPCSecurityGroupIds`, and `VPCSubnetIds`. -4. Ensure the `DdFetchLambdaTags`, `DdFetchStepFunctionsTags` and `DdFetchS3Tags` options are disabled, because AWS VPC does not yet offer an endpoint for the Resource Groups Tagging API. +4. Ensure the `DdFetchLambdaTags`, `DdFetchStepFunctionsTags`, and `DdFetchS3Tags` options are disabled, because AWS VPC does not yet offer an endpoint for the Resource Groups Tagging API. 5. If you are using HAProxy or NGINX: - Set `DdApiUrl` to `http://:3834` or `https://:3834`. @@ -404,17 +404,23 @@ To test different patterns against your logs, turn on [debug logs](#troubleshoot ### Advanced (optional) +`DdEnrichS3Tags` +: Enabled by default. When enabled, instructs the Datadog backend to automatically enrich logs originating from S3 buckets with the tags associated with those buckets. This approach offers the same tag enrichment as `DdFetchS3Tags` but defers the operation after log ingestion, reducing Forwarder overhead. Requires [Resource Collection](https://docs.datadoghq.com/integrations/amazon-web-services/#resource-collection) to be enabled in your AWS integration. + +`DdEnrichCloudwatchTags` +: Enabled by default. When enabled, instructs the Datadog backend to automatically enrich logs originating from CloudWatch LogGroups with the tags associated with those log groups. This approach offers the same tag enrichment as `DdFetchLogGroupTags` but defers the operation after log ingestion, reducing Forwarder overhead. Requires [Resource Collection](https://docs.datadoghq.com/integrations/amazon-web-services/#resource-collection) to be enabled in your AWS integration. + `DdFetchLambdaTags` : Let the Forwarder fetch Lambda tags using GetResources API calls and apply them to logs, metrics, and traces. If set to true, permission `tag:GetResources` will be automatically added to the Lambda execution IAM role. `DdFetchLogGroupTags` -: Let the forwarder fetch Log Group tags using ListTagsLogGroup and apply them to logs, metrics, and traces. If set to true, permission `logs:ListTagsForResource` will be automatically added to the Lambda execution IAM role. +: **[DEPRECATED, use DdEnrichCloudwatchTags]** Let the forwarder fetch Log Group tags using ListTagsLogGroup and apply them to logs, metrics, and traces. If set to true, permission `logs:ListTagsForResource` will be automatically added to the Lambda execution IAM role. `DdFetchStepFunctionsTags` : Let the Forwarder fetch Step Functions tags using GetResources API calls and apply them to logs and traces (if Step Functions tracing is enabled). If set to true, permission `tag:GetResources` will be automatically added to the Lambda execution IAM role. `DdFetchS3Tags` -: Let the Forwarder fetch S3 tags using GetResources API calls and apply them to logs and traces. If set to true, permission `tag:GetResources` will be automatically added to the Lambda execution IAM role. +: **[DEPRECATED, use DdEnrichS3Tags]** Let the Forwarder fetch S3 tags using GetResources API calls and apply them to logs and traces. If set to true, permission `tag:GetResources` will be automatically added to the Lambda execution IAM role. `DdStepFunctionsTraceEnabled` : Set to true to enable tracing for all Step Functions. @@ -567,11 +573,17 @@ To test different patterns against your logs, turn on [debug logs](#troubleshoot ### Advanced (optional) +`DD_ENRICH_S3_TAGS` +: Enabled by default. When enabled, instructs the Datadog backend to automatically enrich logs originating from S3 buckets with the tags associated with those buckets. This approach offers the same tag enrichment as `DD_FETCH_S3_TAGS` but defers the operation after log ingestion, reducing Forwarder overhead. Requires https://docs.datadoghq.com/integrations/amazon-web-services/#resource-collection to be enabled in your AWS integration. + +`DD_ENRICH_CLOUDWATCH_TAGS` +: Enabled by default. When enabled, instructs the Datadog backend to automatically enrich logs originating from Cloudwatch LogGroup with the tags associated with those log groups. This approach offers the same tag enrichment as `DD_FETCH_LOG_GROUP_TAGS` but defers the operation after log ingestion, reducing Forwarder overhead. Requires https://docs.datadoghq.com/integrations/amazon-web-services/#resource-collection to be enabled in your AWS integration. + `DD_FETCH_LAMBDA_TAGS` : Let the Forwarder fetch Lambda tags using GetResources API calls and apply them to logs, metrics, and traces. If set to true, permission `tag:GetResources` will be automatically added to the Lambda execution IAM role. `DD_FETCH_LOG_GROUP_TAGS` -: Let the forwarder fetch Log Group tags using ListTagsLogGroup and apply them to logs, metrics, and traces. If set to true, permission `logs:ListTagsForResource` will be automatically added to the Lambda execution IAM role. +: [DEPRECATED, use DD_ENRICH_CLOUDWATCH_TAGS] Let the forwarder fetch Log Group tags using ListTagsLogGroup and apply them to logs, metrics, and traces. If set to true, permission `logs:ListTagsForResource` will be automatically added to the Lambda execution IAM role. `DD_FETCH_STEP_FUNCTIONS_TAGS` : Let the Forwarder fetch Step Functions tags using GetResources API calls and apply them to logs and traces (if Step Functions tracing is enabled). If set to true, permission `tag:GetResources` will be automatically added to the Lambda execution IAM role. diff --git a/aws/logs_monitoring/logs/datadog_http_client.py b/aws/logs_monitoring/logs/datadog_http_client.py index 67e5e2a03..79fa740bc 100644 --- a/aws/logs_monitoring/logs/datadog_http_client.py +++ b/aws/logs_monitoring/logs/datadog_http_client.py @@ -4,25 +4,41 @@ # Copyright 2021 Datadog, Inc. -import os import logging - +import os from concurrent.futures import as_completed + from requests_futures.sessions import FuturesSession -from logs.helpers import compress_logs -from logs.exceptions import ScrubbingException +from logs.exceptions import ScrubbingException +from logs.helpers import compress_logs from settings import ( - DD_USE_COMPRESSION, DD_COMPRESSION_LEVEL, - DD_MAX_WORKERS, DD_FORWARDER_VERSION, + DD_MAX_WORKERS, + DD_USE_COMPRESSION, + get_enrich_cloudwatch_tags, + get_enrich_s3_tags, ) logger = logging.getLogger() logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper())) +def get_dd_storage_tag_header(): + storage_tag = "" + + if get_enrich_s3_tags(): + storage_tag += "s3" + + if get_enrich_cloudwatch_tags(): + if storage_tag != "": + storage_tag += "," + storage_tag += "cloudwatch" + + return storage_tag + + class DatadogHTTPClient(object): """ Client that sends a batch of logs over HTTP. @@ -37,6 +53,10 @@ class DatadogHTTPClient(object): _HEADERS["DD-EVP-ORIGIN"] = "aws_forwarder" _HEADERS["DD-EVP-ORIGIN-VERSION"] = DD_FORWARDER_VERSION + storage_tag = get_dd_storage_tag_header() + if storage_tag != "": + _HEADERS["DD-STORAGE-TAG"] = storage_tag + def __init__( self, host, port, no_ssl, skip_ssl_validation, api_key, scrubber, timeout=10 ): diff --git a/aws/logs_monitoring/settings.py b/aws/logs_monitoring/settings.py index 5385e56d1..5db1f4f58 100644 --- a/aws/logs_monitoring/settings.py +++ b/aws/logs_monitoring/settings.py @@ -250,6 +250,23 @@ def __init__(self, name, pattern, placeholder, enabled=True): ) +DD_ENRICH_S3_TAGS = get_env_var("DD_ENRICH_S3_TAGS", default="true", boolean=True) + +DD_ENRICH_CLOUDWATCH_TAGS = get_env_var( + "DD_ENRICH_CLOUDWATCH_TAGS", default="true", boolean=True +) + +if DD_FETCH_S3_TAGS and DD_ENRICH_S3_TAGS: + logger.warn( + "Enabling both DD_FETCH_S3_TAGS and DD_ENRICH_S3_TAGS might be unwanted" + ) + +if DD_FETCH_LOG_GROUP_TAGS and DD_ENRICH_CLOUDWATCH_TAGS: + logger.warn( + "Enabling both DD_FETCH_LOG_GROUP_TAGS and DD_ENRICH_CLOUDWATCH_TAGS might be unwanted" + ) + + def get_fetch_s3_tags(): return DD_FETCH_S3_TAGS @@ -266,6 +283,14 @@ def get_fetch_step_functions_tags(): return DD_FETCH_STEP_FUNCTIONS_TAGS +def get_enrich_s3_tags(): + return DD_ENRICH_S3_TAGS + + +def get_enrich_cloudwatch_tags(): + return DD_ENRICH_CLOUDWATCH_TAGS + + DD_SOURCE = "ddsource" DD_CUSTOM_TAGS = "ddtags" DD_SERVICE = "service" diff --git a/aws/logs_monitoring/template.yaml b/aws/logs_monitoring/template.yaml index 963bd2de4..b3ef6c873 100644 --- a/aws/logs_monitoring/template.yaml +++ b/aws/logs_monitoring/template.yaml @@ -75,6 +75,20 @@ Parameters: Type: String Default: "" Description: Add custom tags to forwarded logs, comma-delimited string, no trailing comma, e.g., env:prod,stack:classic + DdEnrichS3Tags: + Type: String + Default: true + AllowedValues: + - true + - false + Description: Instruct Datadog backend to enrich a log coming from a S3 bucket with the tag attached to this bucket. Datadog AWS Resource Collection needs to be enabled. + DdEnrichCloudwatchTags: + Type: String + Default: true + AllowedValues: + - true + - false + Description: Instruct Datadog backend to enrich a log coming from a Cloudwatch logGroup with the tag attached to this logGroup. Datadog AWS Resource Collection needs to be enabled. DdFetchLambdaTags: Type: String Default: true @@ -88,7 +102,7 @@ Parameters: AllowedValues: - true - false - Description: Let the forwarder fetch Log Group tags using ListTagsLogGroup and apply them to logs, metrics and traces. If set to true, permission logs:ListTagsLogGroup will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.list_tags_log_group_api_call metric for each API call made. + Description: (DEPRECATED in favor of DdEnrichCloudwatchTags) Let the forwarder fetch Log Group tags using ListTagsLogGroup and apply them to logs, metrics and traces. If set to true, permission logs:ListTagsLogGroup will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.list_tags_log_group_api_call metric for each API call made. DdFetchStepFunctionsTags: Type: String Default: true @@ -98,11 +112,11 @@ Parameters: Description: Let the forwarder fetch Step Functions tags using GetResources API calls and apply them to logs, metrics and traces. If set to true, permission tag:GetResources will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.get_resources_api_calls metric for each API call made. DdFetchS3Tags: Type: String - Default: true + Default: false AllowedValues: - true - false - Description: Let the forwarder fetch S3 buckets tags using GetResources API calls and apply them to S3 based logs. If set to true, permission tag:GetResources will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.get_resources_api_calls metric for each API call made. + Description: (DEPRECATED in favor of DdEnrichS3Tags) Let the forwarder fetch S3 buckets tags using GetResources API calls and apply them to S3 based logs. If set to true, permission tag:GetResources will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.get_resources_api_calls metric for each API call made. DdNoSsl: Type: String Default: false @@ -448,11 +462,13 @@ Resources: - !Ref DdTags - !Ref AWS::NoValue DD_TAGS_CACHE_TTL_SECONDS: !Ref TagsCacheTTLSeconds + DD_ENRICH_S3_TAGS: !Ref DdEnrichS3Tags + DD_ENRICH_CLOUDWATCH_TAGS: !Ref DdEnrichCloudwatchTags + DD_FETCH_S3_TAGS: !Ref DdFetchS3Tags DD_FETCH_LAMBDA_TAGS: !If - SetDdFetchLambdaTags - !Ref DdFetchLambdaTags - !Ref AWS::NoValue - DD_FETCH_S3_TAGS: !Ref DdFetchS3Tags DD_FETCH_LOG_GROUP_TAGS: !If - SetDdFetchLogGroupTags - !Ref DdFetchLogGroupTags @@ -1018,6 +1034,8 @@ Metadata: - Label: default: Advanced (Optional) Parameters: + - DdEnrichS3Tags + - DdEnrichCloudwatchTags - DdFetchLambdaTags - DdFetchLogGroupTags - DdFetchStepFunctionsTags diff --git a/aws/logs_monitoring/tools/build_bundle.sh b/aws/logs_monitoring/tools/build_bundle.sh index 1f0669c4a..df450b3c2 100755 --- a/aws/logs_monitoring/tools/build_bundle.sh +++ b/aws/logs_monitoring/tools/build_bundle.sh @@ -64,7 +64,7 @@ docker_build_zip() { # between different python runtimes. temp_dir=$(mktemp -d) - docker buildx build --platform linux/arm64 --file "${DIR}/Dockerfile_bundle" -t "datadog-bundle:$1" .. --no-cache --build-arg "runtime=${PYTHON_VERSION}" + docker buildx build --platform linux/arm64 --file "${DIR}/Dockerfile_bundle" -t "datadog-bundle:$1" .. --no-cache --build-arg "runtime=${PYTHON_VERSION}" # Run the image by runtime tag, tar its generated `python` directory to sdout, # then extract it to a temp directory. diff --git a/aws/logs_monitoring/tools/integration_tests/docker-compose.yml b/aws/logs_monitoring/tools/integration_tests/docker-compose.yml index 90e48ac5f..acf78f489 100644 --- a/aws/logs_monitoring/tools/integration_tests/docker-compose.yml +++ b/aws/logs_monitoring/tools/integration_tests/docker-compose.yml @@ -40,9 +40,9 @@ services: DD_USE_COMPRESSION: "false" DD_ADDITIONAL_TARGET_LAMBDAS: "${EXTERNAL_LAMBDAS}" DD_S3_BUCKET_NAME: "${DD_S3_BUCKET_NAME}" - DD_FETCH_LAMBDA_TAGS: "true" - DD_FETCH_LOG_GROUP_TAGS: "true" - DD_FETCH_STEP_FUNCTIONS_TAGS: "false" # intentionally set false to allow integration test for step function logs to run without hitting aws + DD_FETCH_LAMBDA_TAGS: "${DD_FETCH_LAMBDA_TAGS:-false}" + DD_FETCH_LOG_GROUP_TAGS: "${DD_FETCH_LOG_GROUP_TAGS:-false}" + DD_FETCH_STEP_FUNCTIONS_TAGS: "${DD_FETCH_STEP_FUNCTIONS_TAGS:-false}" DD_STORE_FAILED_EVENTS: "false" DD_TRACE_ENABLED: "true" expose: diff --git a/aws/logs_monitoring/tools/integration_tests/integration_tests.sh b/aws/logs_monitoring/tools/integration_tests/integration_tests.sh index 70e884212..8848b5599 100755 --- a/aws/logs_monitoring/tools/integration_tests/integration_tests.sh +++ b/aws/logs_monitoring/tools/integration_tests/integration_tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Unless explicitly stated otherwise all files in this repository are licensed # under the Apache License Version 2.0. @@ -21,6 +21,7 @@ SNAPS=($SNAPSHOT_DIR) ADDITIONAL_LAMBDA=false CACHE_TEST=false DD_FETCH_LAMBDA_TAGS="true" +DD_FETCH_LOG_GROUP_TAGS="true" DD_FETCH_STEP_FUNCTIONS_TAGS="true" script_start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ") @@ -37,7 +38,6 @@ for arg in "$@"; do shift ;; - # -u or --update # Update the snapshots to reflect this test run -u | --update) @@ -152,6 +152,7 @@ LOG_LEVEL=${LOG_LEVEL} \ AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID} \ SNAPSHOTS_DIR_NAME="./${SNAPSHOTS_DIR_NAME}" \ DD_FETCH_LAMBDA_TAGS=${DD_FETCH_LAMBDA_TAGS} \ + DD_FETCH_LOG_GROUP_TAGS=${DD_FETCH_LOG_GROUP_TAGS} \ DD_FETCH_STEP_FUNCTIONS_TAGS=${DD_FETCH_STEP_FUNCTIONS_TAGS} \ docker compose up --build --abort-on-container-exit diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_customized_log_group_lambda_invocation.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_customized_log_group_lambda_invocation.json~snapshot index 23d7609f5..fd481ba52 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_customized_log_group_lambda_invocation.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_customized_log_group_lambda_invocation.json~snapshot @@ -75,6 +75,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log.json~snapshot index ed83e290e..3ea06e7e9 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log.json~snapshot @@ -48,6 +48,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_cloudtrail.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_cloudtrail.json~snapshot index d1cc0ee2f..3fc79dadd 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_cloudtrail.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_cloudtrail.json~snapshot @@ -84,6 +84,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_custom_tags.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_custom_tags.json~snapshot index d68d0a08c..0fa74be37 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_custom_tags.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_custom_tags.json~snapshot @@ -30,6 +30,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_lambda_invocation.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_lambda_invocation.json~snapshot index 328e4384b..85a2beba8 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_lambda_invocation.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_lambda_invocation.json~snapshot @@ -348,6 +348,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_service_tag.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_service_tag.json~snapshot index ace4aa562..d94a7f450 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_service_tag.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_service_tag.json~snapshot @@ -30,6 +30,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_timeout.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_timeout.json~snapshot index d65c8dec2..cc54f2a3b 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_timeout.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/cloudwatch_log_timeout.json~snapshot @@ -96,6 +96,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", diff --git a/aws/logs_monitoring/tools/integration_tests/snapshots/step_functions_log.json~snapshot b/aws/logs_monitoring/tools/integration_tests/snapshots/step_functions_log.json~snapshot index 38015a018..09a539383 100644 --- a/aws/logs_monitoring/tools/integration_tests/snapshots/step_functions_log.json~snapshot +++ b/aws/logs_monitoring/tools/integration_tests/snapshots/step_functions_log.json~snapshot @@ -30,6 +30,7 @@ "DD-API-KEY": "abcdefghijklmnopqrstuvwxyz012345", "DD-EVP-ORIGIN": "aws_forwarder", "DD-EVP-ORIGIN-VERSION": "", + "DD-STORAGE-TAG": "s3,cloudwatch", "Host": "recorder:8080", "User-Agent": "", "traceparent": "", From 14e7d114028596b440b71cb9f3ed1c109f91a982 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Thu, 16 Oct 2025 14:46:42 +0200 Subject: [PATCH 06/18] fix(aws): Fixing discrepancies of the fetch configuration between cloudformation and code Signed-off-by: Vincent Boutour --- aws/logs_monitoring/settings.py | 2 +- aws/logs_monitoring/template.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aws/logs_monitoring/settings.py b/aws/logs_monitoring/settings.py index 5db1f4f58..6bcacade8 100644 --- a/aws/logs_monitoring/settings.py +++ b/aws/logs_monitoring/settings.py @@ -235,7 +235,7 @@ def __init__(self, name, pattern, placeholder, enabled=True): "DD_MULTILINE_LOG_REGEX_PATTERN", default=None ) -DD_FETCH_S3_TAGS = get_env_var("DD_FETCH_S3_TAGS", default="true", boolean=True) +DD_FETCH_S3_TAGS = get_env_var("DD_FETCH_S3_TAGS", default="false", boolean=True) DD_FETCH_LOG_GROUP_TAGS = get_env_var( "DD_FETCH_LOG_GROUP_TAGS", default="false", boolean=True diff --git a/aws/logs_monitoring/template.yaml b/aws/logs_monitoring/template.yaml index b3ef6c873..8848b3ef7 100644 --- a/aws/logs_monitoring/template.yaml +++ b/aws/logs_monitoring/template.yaml @@ -98,7 +98,7 @@ Parameters: Description: Let the forwarder fetch Lambda tags using GetResources API calls and apply them to logs, metrics and traces. If set to true, permission tag:GetResources will be automatically added to the Lambda execution IAM role. The tags are cached in memory and S3 so that they'll only be fetched when the function cold starts or when the TTL (1 hour) expires. The forwarder increments the aws.lambda.enhanced.get_resources_api_calls metric for each API call made. DdFetchLogGroupTags: Type: String - Default: true + Default: false AllowedValues: - true - false From b50198de58ba5f356f232ebb64c2a4f063780f1f Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 11:47:10 +0200 Subject: [PATCH 07/18] feat: Add a changelog for the v5 (#1009) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Add a changelog for the v5 Signed-off-by: Vincent Boutour * fixup! feat: Add a changelog for the v5 Signed-off-by: Vincent Boutour * Update aws/logs_monitoring/CHANGELOG.md Co-authored-by: Raphaël Allier <118757729+RaphaelAllier@users.noreply.github.com> * fixup! Update aws/logs_monitoring/CHANGELOG.md Signed-off-by: Vincent Boutour --------- Signed-off-by: Vincent Boutour Co-authored-by: Raphaël Allier <118757729+RaphaelAllier@users.noreply.github.com> --- aws/logs_monitoring/CHANGELOG.md | 120 +++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 aws/logs_monitoring/CHANGELOG.md diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md new file mode 100644 index 000000000..f6bd55bbb --- /dev/null +++ b/aws/logs_monitoring/CHANGELOG.md @@ -0,0 +1,120 @@ +# Datadog Lambda Forwarder Changelog + +## v5.0.0 - BREAKING CHANGES + +### Overview + +Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking changes that remove deprecated features and improve log filtering behavior. This release introduces a new way to enrich your logs with tags that will reduce AWS Lambda related cost (S3, KMS and Lambda). + +### New Features + +#### 1. Backend Storage Tag Enrichment + +**Added:** + +- New `DD_ENRICH_S3_TAGS` / `DdEnrichS3Tags` parameter (default: `true`) +- New `DD_ENRICH_CLOUDWATCH_TAGS` / `DdEnrichCloudwatchTags` parameter (default: `true`) +- These instruct the Datadog backend to automatically enrich logs with resource tags **after ingestion** +- New cloudwatch tags can appear on logs, check your Datadog log index configuration to ensure smooth transition. + +**Benefits:** + +- **Reduces forwarder cost** and execution time +- Provides the same tag enrichment as `DdFetchS3Tags` and `DdFetchLogGroupTags` +- Requires [Resource Collection](https://docs.datadoghq.com/integrations/amazon-web-services/#resource-collection) enabled in your AWS integration + +**Deprecation Notice:** + +- `DdFetchS3Tags` is now marked as **DEPRECATED** in favor of `DdEnrichS3Tags` +- `DdFetchLogGroupTags` is now marked as **DEPRECATED** in favor of `DdEnrichCloudwatchTags` +- `DD_FETCH_S3_TAGS` now defaults to `false` (previously `true`) + +--- + +### Breaking Changes + +#### 1. Changed Regex Matching Behavior for Log Filtering + +**What Changed:** + +- `IncludeAtMatch` / `INCLUDE_AT_MATCH` and `ExcludeAtMatch` / `EXCLUDE_AT_MATCH` regex patterns now match **only against the log message** itself +- Previously, these patterns matched against the **entire JSON-formatted log** + +**Migration Required:** + +- **Review and update your filtering regex patterns** +- If your patterns relied on matching against JSON structure or metadata fields, they will need to be rewritten +- Example changes needed: + - **Before (v4)**: `\"awsRegion\":\"us-east-1\"` (matched JSON with escaped quotes) + - **After (v5)**: `"awsRegion":"us-east-1"` (matches the message content directly) +- Patterns that matched the `message` field content should continue to work with minimal changes + +--- + +#### 2. Removed TCP Transport Support + +**What Changed:** + +- Removed the `DD_USE_TCP` / `DdUseTcp` environment variable and parameter +- Deleted the TCP client implementation +- All logs now **must** be sent via HTTP/HTTPS + +**Migration Required:** + +- Remove any configuration setting `DD_USE_TCP=true` or `DdUseTcp=true` +- The forwarder will now exclusively use HTTP transport +- If you were using TCP with custom ports (10516), these configurations will be ignored +- The default HTTP endpoint is now `http-intake.logs.` on port 443 + +--- + +#### 3. Removed Deprecated PrivateLink Environment Variable + +**What Changed:** + +- Removed the `DD_USE_PRIVATE_LINK` / `DdUsePrivateLink` environment variable and parameter + +**Migration Required:** + +- Remove any configuration setting `DD_USE_PRIVATE_LINK=true` +- **AWS PrivateLink is still fully supported**, but you must follow [PrivateLink documentation](https://docs.datadoghq.com/agent/guide/private-link/): + 1. Set up VPC endpoints for `api`, `http-logs.intake`, and `trace.agent` as documented + 2. Configure the forwarder with `DdUseVPC=true` + 3. Set `VPCSecurityGroupIds` and `VPCSubnetIds` + +**Why This Changed:** + +- The variable was previously deprecated but not removed from past versions. + +--- + +### Upgrade Instructions + +Follow the usual [documentation](https://docs.datadoghq.com/logs/guide/forwarder/?tab=cloudformation#upgrade-to-a-new-version) about upgrading your Lambda Forwarder. + +#### Pre-Upgrade Checklist + +1. **Verify you're not using TCP transport:** + + ```bash + aws lambda get-function-configuration --function-name "" --query 'Environment.Variables.DD_USE_TCP' + ``` + +2. **Verify you're not using the deprecated PrivateLink variable:** + + ```bash + aws lambda get-function-configuration --function-name "" --query 'Environment.Variables.DD_USE_PRIVATE_LINK' + ``` + +3. **Review your log filtering patterns:** + - If using `IncludeAtMatch` or `ExcludeAtMatch`, test your patterns against log messages only + - Remove any JSON escaping (e.g., `\"` → `"`) + +#### Testing + +After upgrading: + +1. Verify logs are being forwarded to Datadog +2. Check that filtering rules still work as expected +3. Confirm tag enrichment is working (check logs in Datadog Explorer) +4. Monitor forwarder execution duration and errors in CloudWatch From 9e5b3f4b7c2e23b047c91fcc1856597da8f6ae41 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:16:19 +0200 Subject: [PATCH 08/18] Update aws/logs_monitoring/CHANGELOG.md Co-authored-by: Georgi --- aws/logs_monitoring/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index f6bd55bbb..19506e4e8 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -77,7 +77,7 @@ Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking change **Migration Required:** - Remove any configuration setting `DD_USE_PRIVATE_LINK=true` -- **AWS PrivateLink is still fully supported**, but you must follow [PrivateLink documentation](https://docs.datadoghq.com/agent/guide/private-link/): +- **AWS PrivateLink is still fully supported**, follow [PrivateLink documentation](https://docs.datadoghq.com/agent/guide/private-link/) for more information about the setup: 1. Set up VPC endpoints for `api`, `http-logs.intake`, and `trace.agent` as documented 2. Configure the forwarder with `DdUseVPC=true` 3. Set `VPCSecurityGroupIds` and `VPCSubnetIds` From 84a369c7f465c65baa469673f0bd70e69d09581e Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:16:31 +0200 Subject: [PATCH 09/18] Update aws/logs_monitoring/CHANGELOG.md Co-authored-by: Georgi --- aws/logs_monitoring/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index 19506e4e8..3349d7a57 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -94,7 +94,7 @@ Follow the usual [documentation](https://docs.datadoghq.com/logs/guide/forwarder #### Pre-Upgrade Checklist -1. **Verify you're not using TCP transport:** +1. **Verify that TCP transport is not used:** ```bash aws lambda get-function-configuration --function-name "" --query 'Environment.Variables.DD_USE_TCP' From 4899a4cd38f1a0d0e1314ed695a777bf34ddf59c Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:16:40 +0200 Subject: [PATCH 10/18] Update aws/logs_monitoring/CHANGELOG.md Co-authored-by: Georgi --- aws/logs_monitoring/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index 3349d7a57..a83153bd1 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -90,7 +90,7 @@ Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking change ### Upgrade Instructions -Follow the usual [documentation](https://docs.datadoghq.com/logs/guide/forwarder/?tab=cloudformation#upgrade-to-a-new-version) about upgrading your Lambda Forwarder. +Follow Datadog's official [documentation](https://docs.datadoghq.com/logs/guide/forwarder/?tab=cloudformation#upgrade-to-a-new-version) for upgrading Lambda Forwarder. #### Pre-Upgrade Checklist From f2d2298e7244794c0dac6faaa83ae3725fb9062c Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:17:05 +0200 Subject: [PATCH 11/18] Update aws/logs_monitoring/CHANGELOG.md Co-authored-by: Georgi --- aws/logs_monitoring/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index a83153bd1..b56b408ba 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -106,7 +106,7 @@ Follow Datadog's official [documentation](https://docs.datadoghq.com/logs/guide/ aws lambda get-function-configuration --function-name "" --query 'Environment.Variables.DD_USE_PRIVATE_LINK' ``` -3. **Review your log filtering patterns:** +3. **Review log filtering patterns:** - If using `IncludeAtMatch` or `ExcludeAtMatch`, test your patterns against log messages only - Remove any JSON escaping (e.g., `\"` → `"`) From 6121a280423ef37689403287fdbf9a4dc1649e5d Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:17:15 +0200 Subject: [PATCH 12/18] Update aws/logs_monitoring/CHANGELOG.md Co-authored-by: Georgi --- aws/logs_monitoring/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index b56b408ba..3d390c97f 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -107,7 +107,7 @@ Follow Datadog's official [documentation](https://docs.datadoghq.com/logs/guide/ ``` 3. **Review log filtering patterns:** - - If using `IncludeAtMatch` or `ExcludeAtMatch`, test your patterns against log messages only + - When using `IncludeAtMatch` or `ExcludeAtMatch`, test the patterns against log messages only - Remove any JSON escaping (e.g., `\"` → `"`) #### Testing From df4d09c2ae185fb675526a1122c031f165631cd5 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:49:19 +0200 Subject: [PATCH 13/18] docs: Updating changelog from comments Signed-off-by: Vincent Boutour --- aws/logs_monitoring/CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index 3d390c97f..cf5812e19 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -4,7 +4,7 @@ ### Overview -Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking changes that remove deprecated features and improve log filtering behavior. This release introduces a new way to enrich your logs with tags that will reduce AWS Lambda related cost (S3, KMS and Lambda). +Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking changes that remove deprecated features and improve log filtering behavior. This release introduces a new way to enrich logs with tags that will reduce AWS Lambda related cost (S3, KMS and Lambda). ### New Features @@ -42,7 +42,7 @@ Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking change **Migration Required:** -- **Review and update your filtering regex patterns** +- **Review and update filtering regex patterns** - If your patterns relied on matching against JSON structure or metadata fields, they will need to be rewritten - Example changes needed: - **Before (v4)**: `\"awsRegion\":\"us-east-1\"` (matched JSON with escaped quotes) @@ -62,7 +62,7 @@ Version 5.0.0 of the Datadog Lambda Forwarder introduces several breaking change **Migration Required:** - Remove any configuration setting `DD_USE_TCP=true` or `DdUseTcp=true` -- The forwarder will now exclusively use HTTP transport +- The forwarder will now exclusively use HTTP protocol - If you were using TCP with custom ports (10516), these configurations will be ignored - The default HTTP endpoint is now `http-intake.logs.` on port 443 From 8edda81fa083961d54abdc7e5ffb862a4b7215b7 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Fri, 17 Oct 2025 14:49:34 +0200 Subject: [PATCH 14/18] fixup! docs: Updating changelog from comments Signed-off-by: Vincent Boutour --- aws/logs_monitoring/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/logs_monitoring/CHANGELOG.md b/aws/logs_monitoring/CHANGELOG.md index cf5812e19..5f4ff5e5e 100644 --- a/aws/logs_monitoring/CHANGELOG.md +++ b/aws/logs_monitoring/CHANGELOG.md @@ -100,7 +100,7 @@ Follow Datadog's official [documentation](https://docs.datadoghq.com/logs/guide/ aws lambda get-function-configuration --function-name "" --query 'Environment.Variables.DD_USE_TCP' ``` -2. **Verify you're not using the deprecated PrivateLink variable:** +2. **Verify that deprecated PrivateLink variable is not used:** ```bash aws lambda get-function-configuration --function-name "" --query 'Environment.Variables.DD_USE_PRIVATE_LINK' From 2356c4ead446aeff1e46f91d35907ea80ec3fb09 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Tue, 21 Oct 2025 17:11:11 +0200 Subject: [PATCH 15/18] Add reference to the changelog in README (for public doc) (#1011) * feat: Add link to the changelog in the read toe reflect the documentation The readme is feeding this page https://docs.datadoghq.com/logs/guide/forwarder/?tab=cloudformation Signed-off-by: Vincent Boutour * fixup! feat: Add link to the changelog in the read toe reflect the documentation Signed-off-by: Vincent Boutour --------- Signed-off-by: Vincent Boutour --- aws/logs_monitoring/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/aws/logs_monitoring/README.md b/aws/logs_monitoring/README.md index 4dd97323f..ac0fd3332 100644 --- a/aws/logs_monitoring/README.md +++ b/aws/logs_monitoring/README.md @@ -118,6 +118,17 @@ The environment variables provided on this If you encounter issues upgrading to the latest version, check the Troubleshooting section. +### Upgrade to v5.0.0+ + +**Version 5.0.0 is a major release with breaking changes.** Before upgrading, review the [CHANGELOG.md][25] for detailed information about breaking changes and migration requirements. + +Key breaking changes in v5.0.0: + +- **Log filtering behavior changed**: `IncludeAtMatch` and `ExcludeAtMatch` now match against the log message only, not the entire JSON structure +- **TCP transport removed**: `DD_USE_TCP` parameter removed, all logs must use HTTP/HTTPS +- **PrivateLink variable removed**: `DD_USE_PRIVATE_LINK` removed (PrivateLink is still supported via `DdUseVPC`) +- **New tag enrichment**: Backend-based tag enrichment enabled by default via `DdEnrichS3Tags` and `DdEnrichCloudwatchTags`, reducing forwarder costs + ### Upgrade an older version to 4.13.0+ Starting version 4.13.0+ Lambda function has been updated to require **Python 3.13**. If upgrading an older forwarder installation to 4.13.0+, ensure AWS Lambda function is configured to use Python 3.13 @@ -762,3 +773,4 @@ Additional helpful documentation, links, and articles: [22]: https://docs.datadoghq.com/agent/guide/private-link/ [23]: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/transform-aws-serverless.html [24]: https://docs.datadoghq.com/logs/log_configuration/processors/?tab=ui#log-date-remapper +[25]: https://github.com/DataDog/datadog-serverless-functions/blob/master/aws/logs_monitoring/CHANGELOG.md From 2712e515b6b78d37fcc07d20940e0e744fe11106 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Wed, 22 Oct 2025 13:57:31 +0200 Subject: [PATCH 16/18] refactor: Improve error handling for exception (#1012) * refactor: Improve error handling for exception Signed-off-by: Vincent Boutour * fixup! refactor: Improve error handling for exception Signed-off-by: Vincent Boutour * fixup! refactor: Improve error handling for exception Signed-off-by: Vincent Boutour --------- Signed-off-by: Vincent Boutour --- .../caching/base_tags_cache.py | 20 ++++----- .../caching/cloudwatch_log_group_cache.py | 16 +++---- aws/logs_monitoring/caching/lambda_cache.py | 6 +-- aws/logs_monitoring/caching/s3_tags_cache.py | 4 +- .../caching/step_functions_cache.py | 6 +-- .../enhanced_lambda_metrics.py | 17 ++++---- aws/logs_monitoring/forwarder.py | 18 ++++---- aws/logs_monitoring/lambda_function.py | 43 +++++++++++-------- .../logs/datadog_http_client.py | 8 ++-- aws/logs_monitoring/logs/datadog_matcher.py | 7 ++- aws/logs_monitoring/logs/helpers.py | 10 ++--- aws/logs_monitoring/retry/storage.py | 12 +++--- aws/logs_monitoring/steps/enrichment.py | 8 ++-- .../steps/handlers/aws_attributes.py | 4 +- 14 files changed, 96 insertions(+), 83 deletions(-) diff --git a/aws/logs_monitoring/caching/base_tags_cache.py b/aws/logs_monitoring/caching/base_tags_cache.py index c38aa00a2..79ac21f88 100644 --- a/aws/logs_monitoring/caching/base_tags_cache.py +++ b/aws/logs_monitoring/caching/base_tags_cache.py @@ -60,9 +60,9 @@ def write_cache_to_s3(self, data): DD_S3_BUCKET_NAME, self.get_cache_name_with_prefix() ) s3_object.put(Body=(bytes(json.dumps(data).encode("UTF-8")))) - except ClientError: + except ClientError as e: send_forwarder_internal_metrics("s3_cache_write_failure") - self.logger.debug("Unable to write new cache to S3", exc_info=True) + self.logger.debug(f"Unable to write new cache to S3: {e}", exc_info=True) def acquire_s3_cache_lock(self): """Acquire cache lock""" @@ -76,16 +76,16 @@ def acquire_s3_cache_lock(self): last_modified_unix_time = get_last_modified_time(file_content) if last_modified_unix_time + DD_S3_CACHE_LOCK_TTL_SECONDS >= time(): return False - except Exception: - self.logger.debug("Unable to get cache lock file") + except Exception as e: + self.logger.debug(f"Unable to get cache lock file: {e}") # lock file doesn't exist, create file to acquire lock try: cache_lock_object.put(Body=(bytes("lock".encode("UTF-8")))) send_forwarder_internal_metrics("s3_cache_lock_acquired") self.logger.debug("S3 cache lock acquired") - except ClientError: - self.logger.debug("Unable to write S3 cache lock file", exc_info=True) + except ClientError as e: + self.logger.debug(f"Unable to write S3 cache lock file: {e}", exc_info=True) return False return True @@ -99,9 +99,9 @@ def release_s3_cache_lock(self): cache_lock_object.delete() send_forwarder_internal_metrics("s3_cache_lock_released") self.logger.debug("S3 cache lock released") - except ClientError: + except ClientError as e: send_forwarder_internal_metrics("s3_cache_lock_release_failure") - self.logger.debug("Unable to release S3 cache lock", exc_info=True) + self.logger.debug(f"Unable to release S3 cache lock: {e}", exc_info=True) def get_cache_from_s3(self): """Retrieves tags cache from s3 and returns the body along with @@ -113,9 +113,9 @@ def get_cache_from_s3(self): file_content = cache_object.get() tags_cache = json.loads(file_content["Body"].read().decode("utf-8")) last_modified_unix_time = get_last_modified_time(file_content) - except: + except Exception as e: send_forwarder_internal_metrics("s3_cache_fetch_failure") - self.logger.debug("Unable to fetch cache from S3", exc_info=True) + self.logger.debug(f"Unable to fetch cache from S3: {e}", exc_info=True) return {}, -1 return tags_cache, last_modified_unix_time diff --git a/aws/logs_monitoring/caching/cloudwatch_log_group_cache.py b/aws/logs_monitoring/caching/cloudwatch_log_group_cache.py index 232281991..c22b201bb 100644 --- a/aws/logs_monitoring/caching/cloudwatch_log_group_cache.py +++ b/aws/logs_monitoring/caching/cloudwatch_log_group_cache.py @@ -103,10 +103,10 @@ def _get_log_group_tags_from_cache(self, cache_file_name): ) tags_cache = json.loads(response.get("Body").read().decode("utf-8")) last_modified_unix_time = int(response.get("LastModified").timestamp()) - except Exception: + except Exception as e: send_forwarder_internal_metrics("loggroup_cache_fetch_failure") - self.logger.exception( - "Failed to get log group tags from cache", exc_info=True + self.logger.error( + f"Failed to get log group tags from cache: {e}", exc_info=True ) return None, -1 @@ -120,10 +120,10 @@ def _update_log_group_tags_cache(self, log_group, tags): Key=cache_file_name, Body=(bytes(json.dumps(tags).encode("UTF-8"))), ) - except Exception: + except Exception as e: send_forwarder_internal_metrics("loggroup_cache_write_failure") - self.logger.exception( - "Failed to update log group tags cache", exc_info=True + self.logger.error( + f"Failed to update log group tags cache: {e}", exc_info=True ) def _is_expired(self, last_modified): @@ -150,8 +150,8 @@ def _get_log_group_tags(self, log_group_arn): response = self.cloudwatch_logs_client.list_tags_for_resource( resourceArn=log_group_arn ) - except Exception: - self.logger.exception("Failed to get log group tags", exc_info=True) + except Exception as e: + self.logger.error(f"Failed to get log group tags: {e}", exc_info=True) formatted_tags = None if response is not None: formatted_tags = [ diff --git a/aws/logs_monitoring/caching/lambda_cache.py b/aws/logs_monitoring/caching/lambda_cache.py index 460522d69..034827299 100644 --- a/aws/logs_monitoring/caching/lambda_cache.py +++ b/aws/logs_monitoring/caching/lambda_cache.py @@ -42,9 +42,9 @@ def build_tags_cache(self): tags_fetch_success = True except ClientError as e: - self.logger.exception( - "Encountered a ClientError when trying to fetch tags. You may need to give " - "this Lambda's role the 'tag:GetResources' permission" + self.logger.error( + f"Failed to fetch Lambda tags: {e}. " + "Add 'tag:GetResources' permission to the Forwarder's IAM role." ) additional_tags = [ f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}" diff --git a/aws/logs_monitoring/caching/s3_tags_cache.py b/aws/logs_monitoring/caching/s3_tags_cache.py index 107da5132..34a559064 100644 --- a/aws/logs_monitoring/caching/s3_tags_cache.py +++ b/aws/logs_monitoring/caching/s3_tags_cache.py @@ -40,9 +40,9 @@ def build_tags_cache(self): tags_by_arn_cache.update(page_tags_by_arn) tags_fetch_success = True except ClientError as e: - self.logger.exception( + self.logger.error( "Encountered a ClientError when trying to fetch tags. You may need to give " - "this Lambda's role the 'tag:GetResources' permission" + f"this Lambda's role the 'tag:GetResources' permission: {e}" ) additional_tags = [ f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}" diff --git a/aws/logs_monitoring/caching/step_functions_cache.py b/aws/logs_monitoring/caching/step_functions_cache.py index 811071965..0a240ed7a 100644 --- a/aws/logs_monitoring/caching/step_functions_cache.py +++ b/aws/logs_monitoring/caching/step_functions_cache.py @@ -49,9 +49,9 @@ def build_tags_cache(self): tags_fetch_success = True except ClientError as e: - self.logger.exception( + self.logger.error( "Encountered a ClientError when trying to fetch tags. You may need to give " - "this Lambda's role the 'tag:GetResources' permission" + f"this Lambda's role the 'tag:GetResources' permission: {e}" ) additional_tags = [ f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}" @@ -131,7 +131,7 @@ def _get_state_machine_tags(self, state_machine_arn: str): ResourceARNList=[state_machine_arn] ) except Exception as e: - self.logger.exception(f"Failed to get Step Functions tags due to {e}") + self.logger.error(f"Failed to get Step Functions tags due to {e}") if response and len(response.get("ResourceTagMappingList", {})) > 0: resource_dict = response.get("ResourceTagMappingList")[0] diff --git a/aws/logs_monitoring/enhanced_lambda_metrics.py b/aws/logs_monitoring/enhanced_lambda_metrics.py index e8c85206c..4de8f4607 100644 --- a/aws/logs_monitoring/enhanced_lambda_metrics.py +++ b/aws/logs_monitoring/enhanced_lambda_metrics.py @@ -2,11 +2,11 @@ # under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2021 Datadog, Inc. +import datetime import json -import os import logging +import os import re -import datetime from time import time ENHANCED_METRICS_NAMESPACE_PREFIX = "aws.lambda.enhanced" @@ -168,10 +168,11 @@ def parse_and_submit_enhanced_metrics(logs, cache_layer): ) for enhanced_metric in enhanced_metrics: enhanced_metric.submit_to_dd() - except Exception: - logger.exception( - "Encountered an error while trying to parse and submit enhanced metrics for log %s", + except Exception as e: + logger.error( + "Encountered an error while trying to parse and submit enhanced metrics for log %s: %s", log, + str(e), ) @@ -347,9 +348,9 @@ def parse_metrics_from_report_log(report_log_line): Args: report_log_line (str): The REPORT log generated by Lambda - EX: "REPORT RequestId: 814ba7cb-071e-4181-9a09-fa41db5bccad Duration: 1711.87 ms \ - Billed Duration: 1800 ms Memory Size: 128 MB Max Memory Used: 98 MB \ - XRAY TraceId: 1-5d83c0ad-b8eb33a0b1de97d804fac890 SegmentId: 31255c3b19bd3637 Sampled: true" + EX: "REPORT RequestId: 814ba7cb-071e-4181-9a09-fa41db5bccad Duration: 1711.87 ms \ + Billed Duration: 1800 ms Memory Size: 128 MB Max Memory Used: 98 MB \ + XRAY TraceId: 1-5d83c0ad-b8eb33a0b1de97d804fac890 SegmentId: 31255c3b19bd3637 Sampled: true" Returns: metrics - DatadogMetricPoint[] diff --git a/aws/logs_monitoring/forwarder.py b/aws/logs_monitoring/forwarder.py index 735f62573..10c8033c0 100644 --- a/aws/logs_monitoring/forwarder.py +++ b/aws/logs_monitoring/forwarder.py @@ -99,7 +99,7 @@ def _forward_logs(self, logs, key=None): log["message"] = scrubber.scrub(log["message"]) evaluated_log = log["message"] except Exception as e: - logger.exception( + logger.error( f"Exception while scrubbing log message {log['message']}: {e}" ) @@ -116,8 +116,8 @@ def _forward_logs(self, logs, key=None): for batch in batcher.batch(logs_to_forward): try: client.send(batch) - except Exception: - logger.exception(f"Exception while forwarding log batch {batch}") + except Exception as e: + logger.error(f"Exception while forwarding log batch {batch}: {e}") failed_logs.extend(batch) else: if logger.isEnabledFor(logging.DEBUG): @@ -142,9 +142,9 @@ def _forward_metrics(self, metrics, key=None): for metric in metrics: try: send_log_metric(metric) - except Exception: - logger.exception( - f"Exception while forwarding metric {json.dumps(metric)}" + except Exception as e: + logger.error( + f"Exception while forwarding metric {json.dumps(metric)}: {e}" ) failed_metrics.append(metric) else: @@ -168,9 +168,9 @@ def _forward_traces(self, traces, key=None): try: serialized_trace_paylods = json.dumps(traces) self.trace_connection.send_traces(serialized_trace_paylods) - except Exception: - logger.exception( - f"Exception while forwarding traces {serialized_trace_paylods}" + except Exception as e: + logger.error( + f"Exception while forwarding traces {serialized_trace_paylods}: {e}" ) if DD_STORE_FAILED_EVENTS and not key: self.storage.store_data(RetryPrefix.TRACES, traces) diff --git a/aws/logs_monitoring/lambda_function.py b/aws/logs_monitoring/lambda_function.py index ccb8f3c9e..f81b4619a 100644 --- a/aws/logs_monitoring/lambda_function.py +++ b/aws/logs_monitoring/lambda_function.py @@ -4,41 +4,46 @@ # Copyright 2021 Datadog, Inc. import json -import os -import boto3 import logging -import requests +import os from hashlib import sha1 -from datadog_lambda.wrapper import datadog_lambda_wrapper +import boto3 +import requests from datadog import api -from enhanced_lambda_metrics import parse_and_submit_enhanced_metrics -from steps.parsing import parse -from steps.enrichment import enrich -from steps.transformation import transform -from steps.splitting import split +from datadog_lambda.wrapper import datadog_lambda_wrapper + from caching.cache_layer import CacheLayer +from enhanced_lambda_metrics import parse_and_submit_enhanced_metrics from forwarder import Forwarder from settings import ( + DD_ADDITIONAL_TARGET_LAMBDAS, DD_API_KEY, - DD_SKIP_SSL_VALIDATION, DD_API_URL, DD_FORWARDER_VERSION, - DD_ADDITIONAL_TARGET_LAMBDAS, DD_RETRY_KEYWORD, + DD_SITE, + DD_SKIP_SSL_VALIDATION, ) +from steps.enrichment import enrich +from steps.parsing import parse +from steps.splitting import split +from steps.transformation import transform logger = logging.getLogger() logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper())) # DD_API_KEY must be set if DD_API_KEY == "" or DD_API_KEY == "": - raise Exception("Missing Datadog API key") + raise Exception( + "Missing Datadog API key. Set DD_API_KEY environment variable. " + "See: https://docs.datadoghq.com/serverless/forwarder/" + ) # Check if the API key is the correct number of characters if len(DD_API_KEY) != 32: raise Exception( - "The API key is not the expected length. " - "Please confirm that your API key is correct" + f"Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. " + f"Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys" ) # Validate the API key logger.debug("Validating the Datadog API key") @@ -57,7 +62,11 @@ timeout=10, ) if not validation_res.ok: - raise Exception("The API key is not valid.") + raise Exception( + f"Datadog API key validation failed (HTTP {validation_res.status_code}). " + f"Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). " + "See: https://docs.datadoghq.com/getting_started/site/" + ) # Force the layer to use the exact same API key and host as the forwarder api._api_key = DD_API_KEY @@ -106,7 +115,7 @@ def init_cache_layer(function_prefix): if cache_layer is None: cache_layer = CacheLayer(function_prefix) except Exception as e: - logger.exception(f"Failed to create cache layer due to {e}") + logger.error(f"Failed to create cache layer due to {e}") raise @@ -135,7 +144,7 @@ def invoke_additional_target_lambdas(event): Payload=lambda_payload, ) except Exception as e: - logger.exception( + logger.error( f"Failed to invoke additional target lambda {lambda_arn} due to {e}" ) diff --git a/aws/logs_monitoring/logs/datadog_http_client.py b/aws/logs_monitoring/logs/datadog_http_client.py index 79fa740bc..0d1c8a275 100644 --- a/aws/logs_monitoring/logs/datadog_http_client.py +++ b/aws/logs_monitoring/logs/datadog_http_client.py @@ -84,8 +84,8 @@ def _close(self): for future in as_completed(self._futures): try: future.result() - except Exception: - logger.exception("Exception while forwarding logs") + except Exception as e: + logger.error(f"Exception while forwarding logs: {e}") self._session.close() @@ -95,8 +95,8 @@ def send(self, logs): """ try: data = self._scrubber.scrub("[{}]".format(",".join(logs))) - except ScrubbingException: - raise Exception("could not scrub the payload") + except ScrubbingException as e: + raise Exception(f"could not scrub the payload: {e}") if DD_USE_COMPRESSION: data = compress_logs(data, DD_COMPRESSION_LEVEL) diff --git a/aws/logs_monitoring/logs/datadog_matcher.py b/aws/logs_monitoring/logs/datadog_matcher.py index fd448cbfd..13864b156 100644 --- a/aws/logs_monitoring/logs/datadog_matcher.py +++ b/aws/logs_monitoring/logs/datadog_matcher.py @@ -44,5 +44,8 @@ def match(self, log): return True - except ScrubbingException: - raise Exception("could not filter the payload") + except ScrubbingException as e: + raise Exception(f"Failed to filter log: {e}") + + except Exception as e: + raise Exception(f"Failed to filter log: {e}") diff --git a/aws/logs_monitoring/logs/helpers.py b/aws/logs_monitoring/logs/helpers.py index a6ed14ede..0388bac21 100644 --- a/aws/logs_monitoring/logs/helpers.py +++ b/aws/logs_monitoring/logs/helpers.py @@ -34,16 +34,16 @@ def compileRegex(rule, pattern): if pattern == "": # If pattern is an empty string, raise exception raise Exception( - "No pattern provided:\nAdd pattern or remove {} environment variable".format( - rule - ) + f"Empty pattern for {rule}. Set a valid regex pattern or remove the {rule} environment variable." ) try: return re.compile(pattern) - except Exception: + except re.error as e: raise Exception( - "could not compile {} regex with pattern: {}".format(rule, pattern) + f"Invalid regex pattern for {rule}: '{pattern}'. Regex error: {e}" ) + except Exception as e: + raise Exception(f"Failed to compile {rule} regex pattern '{pattern}': {e}") def add_retry_tag(log): diff --git a/aws/logs_monitoring/retry/storage.py b/aws/logs_monitoring/retry/storage.py index 89f1c9afd..527ed5c7f 100644 --- a/aws/logs_monitoring/retry/storage.py +++ b/aws/logs_monitoring/retry/storage.py @@ -40,14 +40,14 @@ def store_data(self, prefix, data): self.s3_client.put_object( Bucket=self.bucket_name, Key=key, Body=serialized_data ) - except ClientError: - logger.error(f"Failed to store retry data for prefix {prefix}") + except ClientError as e: + logger.error(f"Failed to store retry data for prefix {prefix}: {e}") def delete_data(self, key): try: self.s3_client.delete_object(Bucket=self.bucket_name, Key=key) - except ClientError: - logger.error(f"Failed to delete retry data for key {key}") + except ClientError as e: + logger.error(f"Failed to delete retry data for key {key}: {e}") def _list_keys(self, prefix): key_prefix = self._get_key_prefix(prefix) @@ -68,8 +68,8 @@ def _fetch_data_for_key(self, key): body = response.get("Body") data = body.read() return self._deserialize(data) - except ClientError: - logger.error(f"Failed to fetch retry data for key {key}") + except ClientError as e: + logger.error(f"Failed to fetch retry data for key {key}: {e}") return None except Exception as e: logger.error( diff --git a/aws/logs_monitoring/steps/enrichment.py b/aws/logs_monitoring/steps/enrichment.py index a3ff8e20c..f119fd013 100644 --- a/aws/logs_monitoring/steps/enrichment.py +++ b/aws/logs_monitoring/steps/enrichment.py @@ -152,9 +152,9 @@ def extract_ddtags_from_message(event): message_dict = json.loads(event["message"]) extracted_ddtags = message_dict.pop(DD_CUSTOM_TAGS) event["message"] = json.dumps(message_dict) - except Exception: + except Exception as e: if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Failed to extract ddtags from: {event}") + logger.debug(f"Failed to extract ddtags from: {event}: {e}") return # strip and cleanup spaces from extracted tags: @@ -191,8 +191,8 @@ def extract_host_from_cloudtrails(event): if isinstance(message, str): try: message = json.loads(message) - except json.JSONDecodeError: - logger.debug("Failed to decode cloudtrail message") + except json.JSONDecodeError as e: + logger.debug(f"Failed to decode cloudtrail message: {e}") return # deal with s3 input type events diff --git a/aws/logs_monitoring/steps/handlers/aws_attributes.py b/aws/logs_monitoring/steps/handlers/aws_attributes.py index 19912854a..d98a7b1ab 100644 --- a/aws/logs_monitoring/steps/handlers/aws_attributes.py +++ b/aws/logs_monitoring/steps/handlers/aws_attributes.py @@ -60,5 +60,5 @@ def set_account_region(self, arn): parts = arn.split(":") self.account = parts[4] self.region = parts[3] - except Exception: - raise Exception("Failed to parse account and region from ARN") + except Exception as e: + raise Exception(f"Failed to parse account and region from ARN: {e}") From 58ce3b7ef80244b4b979dcaf132210700a8e7695 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Wed, 22 Oct 2025 14:11:07 +0200 Subject: [PATCH 17/18] Update aws/logs_monitoring/lambda_function.py Co-authored-by: Georgi --- aws/logs_monitoring/lambda_function.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aws/logs_monitoring/lambda_function.py b/aws/logs_monitoring/lambda_function.py index f81b4619a..b7d13ac81 100644 --- a/aws/logs_monitoring/lambda_function.py +++ b/aws/logs_monitoring/lambda_function.py @@ -43,7 +43,10 @@ if len(DD_API_KEY) != 32: raise Exception( f"Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. " - f"Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys" + f''' + Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. + Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys + ''' ) # Validate the API key logger.debug("Validating the Datadog API key") From 12206699b491ab39ba1f1d2261382239f6ee9dc3 Mon Sep 17 00:00:00 2001 From: Vincent Boutour Date: Wed, 22 Oct 2025 14:14:42 +0200 Subject: [PATCH 18/18] fixup! Update aws/logs_monitoring/lambda_function.py Signed-off-by: Vincent Boutour --- aws/logs_monitoring/enhanced_lambda_metrics.py | 4 +--- aws/logs_monitoring/lambda_function.py | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/aws/logs_monitoring/enhanced_lambda_metrics.py b/aws/logs_monitoring/enhanced_lambda_metrics.py index 4de8f4607..2c4ef9750 100644 --- a/aws/logs_monitoring/enhanced_lambda_metrics.py +++ b/aws/logs_monitoring/enhanced_lambda_metrics.py @@ -170,9 +170,7 @@ def parse_and_submit_enhanced_metrics(logs, cache_layer): enhanced_metric.submit_to_dd() except Exception as e: logger.error( - "Encountered an error while trying to parse and submit enhanced metrics for log %s: %s", - log, - str(e), + f"Encountered an error while trying to parse and submit enhanced metrics for log {log}: {e}", ) diff --git a/aws/logs_monitoring/lambda_function.py b/aws/logs_monitoring/lambda_function.py index b7d13ac81..8f6926b92 100644 --- a/aws/logs_monitoring/lambda_function.py +++ b/aws/logs_monitoring/lambda_function.py @@ -42,11 +42,10 @@ # Check if the API key is the correct number of characters if len(DD_API_KEY) != 32: raise Exception( - f"Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. " - f''' + f""" Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys - ''' + """ ) # Validate the API key logger.debug("Validating the Datadog API key")