diff --git a/aws/logs_monitoring/enhanced_lambda_metrics.py b/aws/logs_monitoring/enhanced_lambda_metrics.py index a87c655a9..c6c509664 100644 --- a/aws/logs_monitoring/enhanced_lambda_metrics.py +++ b/aws/logs_monitoring/enhanced_lambda_metrics.py @@ -28,6 +28,7 @@ MEMORY_ALLOCATED_FIELD_NAME = "memorysize" MAX_MEMORY_USED_METRIC_NAME = "max_memory_used" INIT_DURATION_METRIC_NAME = "init_duration" +TIMED_OUT_DURATION_METRIC_NAME = "timed_out" # Create named groups for each metric and tag so that we can # access the values from the search result by name @@ -43,6 +44,12 @@ + r"(\s+Init\s+Duration:\s+(?P<{}>[\d\.]+)\s+ms)?".format(INIT_DURATION_METRIC_NAME) ) +TIMED_OUT_REGEX = re.compile( + r"Task\stimed\sout\safter\s+(?P<{}>[\d\.]+)\s+seconds".format( + TIMED_OUT_DURATION_METRIC_NAME + ) +) + METRICS_TO_PARSE_FROM_REPORT = [ DURATION_METRIC_NAME, BILLED_DURATION_METRIC_NAME, @@ -380,14 +387,16 @@ def generate_enhanced_lambda_metrics(log, tags_cache): timestamp = log.get("timestamp") # If the log dict is missing any of this data it's not a Lambda REPORT log and we move on - if not all( - (log_function_arn, log_message, timestamp, log_message.startswith("REPORT")) - ): + if not all((log_function_arn, log_message, timestamp)): return [] + # Check if this is a report log, if not it will be returned empty parsed_metrics = parse_metrics_from_report_log(log_message) if not parsed_metrics: - return [] + # if empty, check if this is a timed out task + parsed_metrics = create_timeout_enhanced_metric(log_message) + if not parsed_metrics: + return [] # Add the tags from ARN, custom tags cache, and env var tags_from_arn = parse_lambda_tags_from_arn(log_function_arn) @@ -518,3 +527,28 @@ def get_enriched_lambda_log_tags(log_event): # Combine and dedup tags tags = list(set(tags_from_arn + lambda_custom_tags)) return tags + + +def create_timeout_enhanced_metric(report_log_line): + """Parses and returns timed out metric from lambda log + + Args: + report_log_line (str): The timed out task log + EX: "2019-07-18T18:58:22.286Z b5264ab7-2056-4f5b-bb0f-a06a70f6205d \ + Task timed out after 30.03 seconds" + + Returns: + DatadogMetricPoint[] + """ + + regex_match = TIMED_OUT_REGEX.search(report_log_line) + if not regex_match: + return [] + + dd_metric = DatadogMetricPoint( + "{}.{}".format( + ENHANCED_METRICS_NAMESPACE_PREFIX, TIMED_OUT_DURATION_METRIC_NAME + ), + float(regex_match.group(TIMED_OUT_DURATION_METRIC_NAME)), + ) + return [dd_metric] diff --git a/aws/logs_monitoring/lambda_function.py b/aws/logs_monitoring/lambda_function.py index 46fee7058..6dd6ed41d 100644 --- a/aws/logs_monitoring/lambda_function.py +++ b/aws/logs_monitoring/lambda_function.py @@ -553,7 +553,7 @@ def scrub(self, payload): def log_has_report_msg(log): msg = log.get("message", "") - if isinstance(msg, str) and msg.startswith("REPORT"): + if isinstance(msg, str): return True return False diff --git a/aws/logs_monitoring/tests/snapshots/cloudwatch_log_coldstart.json b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_coldstart.json new file mode 100644 index 000000000..ec0c2d2b5 --- /dev/null +++ b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_coldstart.json @@ -0,0 +1,5 @@ +{ + "awslogs": { + "data": "H4sIAAAAAAAAAK2QT2/aQBDFv4pl9Yjj2dnZf9yIcKNIpa2w20tA1RovqSUbU3sppVG+exZIJVQlh0i5rd7svPeb9xC3bhjsvSsOWxeP4+mkmPyYZXk+ucniUdztN64PsgRGqFAZYyDITXd/03e7bZikdj+kjW3LyqaD7/p2SFZNt6v21q9+Ju632/jzQu57Z9uwgYCQgkyB0rsPnyZFlhdLu8aSOaaJl5osVhpWUmkC6/h6bRUGi2FXDqu+3vq623ysG+/6IR7fxe3hnLE8hWTH91F/iOsqZHFBWmrOyIAGkMQZCia4BMkADXKmGBE3EkkoprRQEoUCpJDn69CMt204kgnDUJMQBowe/Wss2OfFZF5Ec/drF77eVuOoKlGJtVaJNZwSUswm2mhISsnLNVUrVroy+h7QwxHj6Pn6xSZ+HL0ALJWgI6siw4wyCjkpFSCNVhoIECUHQ8xoVPw1YAbsEjj7PH0r7jvQvVrnf3Tz7OuXt/e58NNdb/2pUXalZNQOC39dN42roosJwGkwc23XH6K8/uuCiDqaXQfR/omeB98GF3LprN9uan/pgXBlzv7HVpaPT8e9Sb4+AwAA" + } +} \ No newline at end of file diff --git a/aws/logs_monitoring/tests/snapshots/cloudwatch_log_coldstart.json~snapshot b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_coldstart.json~snapshot new file mode 100644 index 000000000..43c50eef2 --- /dev/null +++ b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_coldstart.json~snapshot @@ -0,0 +1,77 @@ +{ + "events": [ + { + "path": "/v1/input/abcdefghijklmnopqrstuvwxyz012345", + "verb": "POST", + "content-type": "application/json", + "data": [ + { + "id": "35486831490800643125153606102923171443962457178576257024", + "timestamp": 1591284559098, + "message": "START RequestId: db275f87-a934-471a-8980-b63bf4dc1beb Version: $LATEST\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/04/[$LATEST]af2b1e1843b84a2d80c67840ae3ffa72", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + { + "id": "35486831490867545360749197972347778598780402263094198273", + "timestamp": 1591284559101, + "message": "END RequestId: db275f87-a934-471a-8980-b63bf4dc1beb\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/04/[$LATEST]af2b1e1843b84a2d80c67840ae3ffa72", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + { + "id": "35486831490867545360749197972347778598780402263094198274", + "timestamp": 1591284559101, + "message": "REPORT RequestId: db275f87-a934-471a-8980-b63bf4dc1beb\tDuration: 1.76 ms\tBilled Duration: 100 ms\tMemory Size: 128 MB\tMax Memory Used: 48 MB\tInit Duration: 120.96 ms\t\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/04/[$LATEST]af2b1e1843b84a2d80c67840ae3ffa72", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + } + ] + } + ] +} \ No newline at end of file diff --git a/aws/logs_monitoring/tests/snapshots/cloudwatch_log_timeout.json b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_timeout.json new file mode 100644 index 000000000..28a621169 --- /dev/null +++ b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_timeout.json @@ -0,0 +1,5 @@ +{ + "awslogs": { + "data": "H4sIAAAAAAAAAK1T247TMBD9FSvisWnGdztvXbWsVqKAmsAD2wo5ibtE5FJih9Jd7b/jtou0ICF1BW+eM+M5Z45mHqLWOmfubH7Y2SiN5rN89nm5yLLZ9SKaRP2+s0OABWBGJJFaawhw099dD/24C5nE7F3SmLaoTOJ8P7QuLpt+rPbGl19i+912/vwh84M1bfhBgEACIgGd3L56M8sXWb4pCNNKcFMZs2VbUynYar5VpgItCS9UaOHGwpVDvfN1372uG28HF6W3UXs4c2xOJIvj+4g/RHUVuChnWjCiqeRSEwGUaEVDxChgSbkgXGImtCJCciWkwkBDAEc+XwdnvGnDkJhrHOo0o6F48sux0D7LZ6screy3MZTeVCmSpeZCFjzGIIuYGVHGSmoVA+bSlARXBSfoY5AehkjR0/TrLnqc/CGYMaJAUEYE1loGfZxzKSQGDTzI1hiAYE2JwpwpovRfBAvM8XPBi7fzl8r9d3WnlblE3Wrx/t3L/Vz7+TgYf3KUAtApFqh1a39VN42t0G9JOGWWtu2HA8rqe5siTBRaXgXQ/EBPiQ/OBm52xm+62j9rgomcAjm1+Q/O4AudOd5MDCIGnWOeAkmJmGIOny5buNy4r+hIU6F+9Mhsw/EgOg1uOFv2XeXW3XGWzeNPhE+9yQ4EAAA=" + } +} \ No newline at end of file diff --git a/aws/logs_monitoring/tests/snapshots/cloudwatch_log_timeout.json~snapshot b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_timeout.json~snapshot new file mode 100644 index 000000000..040fc59c0 --- /dev/null +++ b/aws/logs_monitoring/tests/snapshots/cloudwatch_log_timeout.json~snapshot @@ -0,0 +1,99 @@ +{ + "events": [ + { + "path": "/v1/input/abcdefghijklmnopqrstuvwxyz012345", + "verb": "POST", + "content-type": "application/json", + "data": [ + { + "id": "35496429375792603298393743017356257146982675867810398208", + "timestamp": 1591714943146, + "message": "START RequestId: 7c9567b5-107b-4a6c-8798-0157ac21db52 Version: $LATEST\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + { + "id": "35496429442806342619978265557671090556291002193281548289", + "timestamp": 1591714946151, + "message": "END RequestId: 7c9567b5-107b-4a6c-8798-0157ac21db52\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + { + "id": "35496429442806342619978265557671090556291002193281548290", + "timestamp": 1591714946151, + "message": "REPORT RequestId: 7c9567b5-107b-4a6c-8798-0157ac21db52\tDuration: 3003.16 ms\tBilled Duration: 3000 ms\tMemory Size: 128 MB\tMax Memory Used: 48 MB\tInit Duration: 127.02 ms\t\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + { + "id": "35496429442806342619978265557671090556291002193281548291", + "timestamp": 1591714946151, + "message": "2020-06-09T15:02:26.150Z 7c9567b5-107b-4a6c-8798-0157ac21db52 Task timed out after 3.00 seconds\n\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/storms-cloudwatch-event", + "logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8", + "owner": "601427279990" + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test" + }, + "lambda": { + "arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + }, + "ddsourcecategory": "aws", + "ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1", + "ddsource": "lambda", + "service": "storms-cloudwatch-event", + "host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event" + } + ] + } + ] +} \ No newline at end of file diff --git a/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py b/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py index 89727590d..589d6b3df 100644 --- a/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py +++ b/aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py @@ -10,6 +10,7 @@ generate_enhanced_lambda_metrics, LambdaTagsCache, parse_get_resources_response_for_tags_by_arn, + create_timeout_enhanced_metric, ) @@ -314,7 +315,6 @@ def test_generate_enhanced_lambda_metrics_with_tags(self, mock_build_cache): ] } tags_cache = LambdaTagsCache() - logs_input = { "message": "REPORT RequestId: fe1467d6-1458-4e20-8e40-9aaa4be7a0f4\tDuration: 3470.65 ms\tBilled Duration: 3500 ms\tMemory Size: 128 MB\tMax Memory Used: 89 MB\t\nXRAY TraceId: 1-5d8bba5a-dc2932496a65bab91d2d42d4\tSegmentId: 5ff79d2a06b82ad6\tSampled: true\t\n", "aws": { @@ -492,6 +492,59 @@ def test_generate_enhanced_lambda_metrics_refresh_on_new_arn( del os.environ["DD_FETCH_LAMBDA_TAGS"] + @patch("enhanced_lambda_metrics.build_tags_by_arn_cache") + def test_generate_enhanced_lambda_metrics_timeout(self, mock_build_cache): + + mock_build_cache.return_value = { + "arn:aws:lambda:us-east-1:0:function:cloudwatch-event": [ + "team:metrics", + "monitor:datadog", + "env:prod", + "creator:swf", + ] + } + tags_cache = LambdaTagsCache() + + logs_input = { + "message": "2020-06-09T15:02:26.150Z 7c9567b5-107b-4a6c-8798-0157ac21db52 Task timed out after 3.00 seconds\n\n", + "aws": { + "awslogs": { + "logGroup": "/aws/lambda/cloudwatch-event", + "logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8", + "owner": "601427279990", + }, + "function_version": "$LATEST", + "invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test", + }, + "lambda": {"arn": "arn:aws:lambda:us-east-1:0:function:cloudwatch-event"}, + "timestamp": 1591714946151, + } + + os.environ["DD_FETCH_LAMBDA_TAGS"] = "True" + + generated_metrics = generate_enhanced_lambda_metrics(logs_input, tags_cache) + self.assertEqual( + [metric.__dict__ for metric in generated_metrics], + [ + { + "name": "aws.lambda.enhanced.timed_out", + "tags": [ + "region:us-east-1", + "account_id:0", + "aws_account:0", + "functionname:cloudwatch-event", + "team:metrics", + "monitor:datadog", + "env:prod", + "creator:swf", + ], + "timestamp": 1591714946151, + "value": 3.0, + } + ], + ) + del os.environ["DD_FETCH_LAMBDA_TAGS"] + if __name__ == "__main__": unittest.main()