Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions aws/logs_monitoring/enhanced_lambda_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
MEMORY_ALLOCATED_FIELD_NAME = "memorysize"
MAX_MEMORY_USED_METRIC_NAME = "max_memory_used"
INIT_DURATION_METRIC_NAME = "init_duration"
TIMED_OUT_DURATION_METRIC_NAME = "timed_out"

# Create named groups for each metric and tag so that we can
# access the values from the search result by name
Expand All @@ -43,6 +44,12 @@
+ r"(\s+Init\s+Duration:\s+(?P<{}>[\d\.]+)\s+ms)?".format(INIT_DURATION_METRIC_NAME)
)

TIMED_OUT_REGEX = re.compile(
r"Task\stimed\sout\safter\s+(?P<{}>[\d\.]+)\s+seconds".format(
TIMED_OUT_DURATION_METRIC_NAME
)
)

METRICS_TO_PARSE_FROM_REPORT = [
DURATION_METRIC_NAME,
BILLED_DURATION_METRIC_NAME,
Expand Down Expand Up @@ -380,14 +387,16 @@ def generate_enhanced_lambda_metrics(log, tags_cache):
timestamp = log.get("timestamp")

# If the log dict is missing any of this data it's not a Lambda REPORT log and we move on
if not all(
(log_function_arn, log_message, timestamp, log_message.startswith("REPORT"))
):
if not all((log_function_arn, log_message, timestamp)):
return []

# Check if this is a report log, if not it will be returned empty
parsed_metrics = parse_metrics_from_report_log(log_message)
if not parsed_metrics:
return []
# if empty, check if this is a timed out task
parsed_metrics = create_timeout_enhanced_metric(log_message)
if not parsed_metrics:
return []

# Add the tags from ARN, custom tags cache, and env var
tags_from_arn = parse_lambda_tags_from_arn(log_function_arn)
Expand Down Expand Up @@ -518,3 +527,28 @@ def get_enriched_lambda_log_tags(log_event):
# Combine and dedup tags
tags = list(set(tags_from_arn + lambda_custom_tags))
return tags


def create_timeout_enhanced_metric(report_log_line):
"""Parses and returns timed out metric from lambda log

Args:
report_log_line (str): The timed out task log
EX: "2019-07-18T18:58:22.286Z b5264ab7-2056-4f5b-bb0f-a06a70f6205d \
Task timed out after 30.03 seconds"

Returns:
DatadogMetricPoint[]
"""

regex_match = TIMED_OUT_REGEX.search(report_log_line)
if not regex_match:
return []

dd_metric = DatadogMetricPoint(
"{}.{}".format(
ENHANCED_METRICS_NAMESPACE_PREFIX, TIMED_OUT_DURATION_METRIC_NAME
),
float(regex_match.group(TIMED_OUT_DURATION_METRIC_NAME)),
)
return [dd_metric]
2 changes: 1 addition & 1 deletion aws/logs_monitoring/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def scrub(self, payload):

def log_has_report_msg(log):
msg = log.get("message", "")
if isinstance(msg, str) and msg.startswith("REPORT"):
if isinstance(msg, str):
return True
return False

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"awslogs": {
"data": "H4sIAAAAAAAAAK2QT2/aQBDFv4pl9Yjj2dnZf9yIcKNIpa2w20tA1RovqSUbU3sppVG+exZIJVQlh0i5rd7svPeb9xC3bhjsvSsOWxeP4+mkmPyYZXk+ucniUdztN64PsgRGqFAZYyDITXd/03e7bZikdj+kjW3LyqaD7/p2SFZNt6v21q9+Ju632/jzQu57Z9uwgYCQgkyB0rsPnyZFlhdLu8aSOaaJl5osVhpWUmkC6/h6bRUGi2FXDqu+3vq623ysG+/6IR7fxe3hnLE8hWTH91F/iOsqZHFBWmrOyIAGkMQZCia4BMkADXKmGBE3EkkoprRQEoUCpJDn69CMt204kgnDUJMQBowe/Wss2OfFZF5Ec/drF77eVuOoKlGJtVaJNZwSUswm2mhISsnLNVUrVroy+h7QwxHj6Pn6xSZ+HL0ALJWgI6siw4wyCjkpFSCNVhoIECUHQ8xoVPw1YAbsEjj7PH0r7jvQvVrnf3Tz7OuXt/e58NNdb/2pUXalZNQOC39dN42roosJwGkwc23XH6K8/uuCiDqaXQfR/omeB98GF3LprN9uan/pgXBlzv7HVpaPT8e9Sb4+AwAA"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{
"events": [
{
"path": "/v1/input/abcdefghijklmnopqrstuvwxyz012345",
"verb": "POST",
"content-type": "application/json",
"data": [
{
"id": "35486831490800643125153606102923171443962457178576257024",
"timestamp": 1591284559098,
"message": "START RequestId: db275f87-a934-471a-8980-b63bf4dc1beb Version: $LATEST\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/04/[$LATEST]af2b1e1843b84a2d80c67840ae3ffa72",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
{
"id": "35486831490867545360749197972347778598780402263094198273",
"timestamp": 1591284559101,
"message": "END RequestId: db275f87-a934-471a-8980-b63bf4dc1beb\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/04/[$LATEST]af2b1e1843b84a2d80c67840ae3ffa72",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
{
"id": "35486831490867545360749197972347778598780402263094198274",
"timestamp": 1591284559101,
"message": "REPORT RequestId: db275f87-a934-471a-8980-b63bf4dc1beb\tDuration: 1.76 ms\tBilled Duration: 100 ms\tMemory Size: 128 MB\tMax Memory Used: 48 MB\tInit Duration: 120.96 ms\t\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/04/[$LATEST]af2b1e1843b84a2d80c67840ae3ffa72",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"awslogs": {
"data": "H4sIAAAAAAAAAK1T247TMBD9FSvisWnGdztvXbWsVqKAmsAD2wo5ibtE5FJih9Jd7b/jtou0ICF1BW+eM+M5Z45mHqLWOmfubH7Y2SiN5rN89nm5yLLZ9SKaRP2+s0OABWBGJJFaawhw099dD/24C5nE7F3SmLaoTOJ8P7QuLpt+rPbGl19i+912/vwh84M1bfhBgEACIgGd3L56M8sXWb4pCNNKcFMZs2VbUynYar5VpgItCS9UaOHGwpVDvfN1372uG28HF6W3UXs4c2xOJIvj+4g/RHUVuChnWjCiqeRSEwGUaEVDxChgSbkgXGImtCJCciWkwkBDAEc+XwdnvGnDkJhrHOo0o6F48sux0D7LZ6screy3MZTeVCmSpeZCFjzGIIuYGVHGSmoVA+bSlARXBSfoY5AehkjR0/TrLnqc/CGYMaJAUEYE1loGfZxzKSQGDTzI1hiAYE2JwpwpovRfBAvM8XPBi7fzl8r9d3WnlblE3Wrx/t3L/Vz7+TgYf3KUAtApFqh1a39VN42t0G9JOGWWtu2HA8rqe5siTBRaXgXQ/EBPiQ/OBm52xm+62j9rgomcAjm1+Q/O4AudOd5MDCIGnWOeAkmJmGIOny5buNy4r+hIU6F+9Mhsw/EgOg1uOFv2XeXW3XGWzeNPhE+9yQ4EAAA="
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a pity we aren't capturing metrics with the integration test right now, it would make this test much more useful

"events": [
{
"path": "/v1/input/abcdefghijklmnopqrstuvwxyz012345",
"verb": "POST",
"content-type": "application/json",
"data": [
{
"id": "35496429375792603298393743017356257146982675867810398208",
"timestamp": 1591714943146,
"message": "START RequestId: 7c9567b5-107b-4a6c-8798-0157ac21db52 Version: $LATEST\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
{
"id": "35496429442806342619978265557671090556291002193281548289",
"timestamp": 1591714946151,
"message": "END RequestId: 7c9567b5-107b-4a6c-8798-0157ac21db52\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
{
"id": "35496429442806342619978265557671090556291002193281548290",
"timestamp": 1591714946151,
"message": "REPORT RequestId: 7c9567b5-107b-4a6c-8798-0157ac21db52\tDuration: 3003.16 ms\tBilled Duration: 3000 ms\tMemory Size: 128 MB\tMax Memory Used: 48 MB\tInit Duration: 127.02 ms\t\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
{
"id": "35496429442806342619978265557671090556291002193281548291",
"timestamp": 1591714946151,
"message": "2020-06-09T15:02:26.150Z 7c9567b5-107b-4a6c-8798-0157ac21db52 Task timed out after 3.00 seconds\n\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/storms-cloudwatch-event",
"logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8",
"owner": "601427279990"
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test"
},
"lambda": {
"arn": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
},
"ddsourcecategory": "aws",
"ddtags": "forwardername:test,forwarder_memorysize:1536,forwarder_version:x.x.x,env:none,account_id:0,aws_account:0,functionname:storms-cloudwatch-event,region:us-east-1",
"ddsource": "lambda",
"service": "storms-cloudwatch-event",
"host": "arn:aws:lambda:us-east-1:0:function:storms-cloudwatch-event"
}
]
}
]
}
55 changes: 54 additions & 1 deletion aws/logs_monitoring/tests/test_enhanced_lambda_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
generate_enhanced_lambda_metrics,
LambdaTagsCache,
parse_get_resources_response_for_tags_by_arn,
create_timeout_enhanced_metric,
)


Expand Down Expand Up @@ -314,7 +315,6 @@ def test_generate_enhanced_lambda_metrics_with_tags(self, mock_build_cache):
]
}
tags_cache = LambdaTagsCache()

logs_input = {
"message": "REPORT RequestId: fe1467d6-1458-4e20-8e40-9aaa4be7a0f4\tDuration: 3470.65 ms\tBilled Duration: 3500 ms\tMemory Size: 128 MB\tMax Memory Used: 89 MB\t\nXRAY TraceId: 1-5d8bba5a-dc2932496a65bab91d2d42d4\tSegmentId: 5ff79d2a06b82ad6\tSampled: true\t\n",
"aws": {
Expand Down Expand Up @@ -492,6 +492,59 @@ def test_generate_enhanced_lambda_metrics_refresh_on_new_arn(

del os.environ["DD_FETCH_LAMBDA_TAGS"]

@patch("enhanced_lambda_metrics.build_tags_by_arn_cache")
def test_generate_enhanced_lambda_metrics_timeout(self, mock_build_cache):

mock_build_cache.return_value = {
"arn:aws:lambda:us-east-1:0:function:cloudwatch-event": [
"team:metrics",
"monitor:datadog",
"env:prod",
"creator:swf",
]
}
tags_cache = LambdaTagsCache()

logs_input = {
"message": "2020-06-09T15:02:26.150Z 7c9567b5-107b-4a6c-8798-0157ac21db52 Task timed out after 3.00 seconds\n\n",
"aws": {
"awslogs": {
"logGroup": "/aws/lambda/cloudwatch-event",
"logStream": "2020/06/09/[$LATEST]b249865adaaf4fad80f95f8ad09725b8",
"owner": "601427279990",
},
"function_version": "$LATEST",
"invoked_function_arn": "arn:aws:lambda:us-east-1:0:function:test",
},
"lambda": {"arn": "arn:aws:lambda:us-east-1:0:function:cloudwatch-event"},
"timestamp": 1591714946151,
}

os.environ["DD_FETCH_LAMBDA_TAGS"] = "True"

generated_metrics = generate_enhanced_lambda_metrics(logs_input, tags_cache)
self.assertEqual(
[metric.__dict__ for metric in generated_metrics],
[
{
"name": "aws.lambda.enhanced.timed_out",
"tags": [
"region:us-east-1",
"account_id:0",
"aws_account:0",
"functionname:cloudwatch-event",
"team:metrics",
"monitor:datadog",
"env:prod",
"creator:swf",
],
"timestamp": 1591714946151,
"value": 3.0,
}
],
)
del os.environ["DD_FETCH_LAMBDA_TAGS"]


if __name__ == "__main__":
unittest.main()