Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/aws_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
AWS_DEFAULT_REGION: us-east-1
DD_API_KEY: "11111111111111111111111111111111"
DD_ADDITIONAL_TARGET_LAMBDAS: "ironmaiden,megadeth"
DD_STORE_FAILED_EVENTS: "true"
run: |
pip install boto3 mock approvaltests
python -m unittest discover ./aws/logs_monitoring/
Expand Down
2 changes: 1 addition & 1 deletion aws/logs_monitoring/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ If you can't install the Forwarder using the provided CloudFormation template, y
8. Set environment variable `DD_STORE_FAILED_EVENTS` to `true` to enable the forwarder to also store event data in the S3 bucket. In case of exceptions when sending logs, metrics or traces to intake, the forwarder will store relevant data in the S3 bucket. On custom invocations i.e. on receiving an event with the `retry` keyword set to a non empty string (which can be manually triggered - see below), the forwarder will retry sending the stored events. When successful it will clear up the storage in the bucket.

```bash
aws lambda invoke --function-name <function-name> --payload '{"retry":"true"}' out
aws lambda invoke --function-name <function-name> --payload '{"retry":"true"}' --cli-binary-format raw-in-base64-out --log-type Tail /dev/stdout
```

<div class="alert alert-warning">
Expand Down
6 changes: 6 additions & 0 deletions aws/logs_monitoring/forwarder.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ def _forward_logs(self, logs, key=None):
if DD_STORE_FAILED_EVENTS and len(failed_logs) > 0 and not key:
self.storage.store_data(RetryPrefix.LOGS, failed_logs)

if len(failed_logs) > 0:
send_event_metric("logs_failed", failed_logs)

send_event_metric("logs_forwarded", len(logs_to_forward) - len(failed_logs))

def _forward_metrics(self, metrics, key=None):
Expand Down Expand Up @@ -156,6 +159,9 @@ def _forward_metrics(self, metrics, key=None):
if DD_STORE_FAILED_EVENTS and len(failed_metrics) > 0 and not key:
self.storage.store_data(RetryPrefix.METRICS, failed_metrics)

if len(failed_metrics) > 0:
send_event_metric("metrics_failed", failed_metrics)

send_event_metric("metrics_forwarded", len(metrics) - len(failed_metrics))

def _forward_traces(self, traces, key=None):
Expand Down
40 changes: 5 additions & 35 deletions aws/logs_monitoring/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from hashlib import sha1

import boto3
import requests
from datadog import api
from datadog_lambda.wrapper import datadog_lambda_wrapper

Expand All @@ -22,8 +21,9 @@
DD_API_URL,
DD_FORWARDER_VERSION,
DD_RETRY_KEYWORD,
DD_SITE,
DD_SKIP_SSL_VALIDATION,
DD_STORE_FAILED_EVENTS,
is_api_key_valid,
)
from steps.enrichment import enrich
from steps.parsing import parse
Expand All @@ -33,42 +33,12 @@
logger = logging.getLogger()
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))

# DD_API_KEY must be set
if DD_API_KEY == "<YOUR_DATADOG_API_KEY>" or DD_API_KEY == "":
raise Exception(
"Missing Datadog API key. Set DD_API_KEY environment variable. "
"See: https://docs.datadoghq.com/serverless/forwarder/"
)
# Check if the API key is the correct number of characters
if len(DD_API_KEY) != 32:
raise Exception(
f"""
Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}.
Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys
"""
)
# Validate the API key
logger.debug("Validating the Datadog API key")

with requests.Session() as s:
retries = requests.adapters.Retry(
total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]
if not is_api_key_valid() and not DD_STORE_FAILED_EVENTS:
raise Exception(
"Failed to check if API Key is valid and no storage of failed events, aborting."
)

s.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries))
s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))

validation_res = s.get(
"{}/api/v1/validate?api_key={}".format(DD_API_URL, DD_API_KEY),
verify=(not DD_SKIP_SSL_VALIDATION),
timeout=10,
)
if not validation_res.ok:
raise Exception(
f"Datadog API key validation failed (HTTP {validation_res.status_code}). "
f"Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). "
"See: https://docs.datadoghq.com/getting_started/site/"
)

# Force the layer to use the exact same API key and host as the forwarder
api._api_key = DD_API_KEY
Expand Down
1 change: 1 addition & 0 deletions aws/logs_monitoring/logs/datadog_http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(
self._session = None
self._ssl_validation = not skip_ssl_validation
self._futures = []

if logger.isEnabledFor(logging.DEBUG):
logger.debug(
f"Initialized http client for logs intake: "
Expand Down
44 changes: 44 additions & 0 deletions aws/logs_monitoring/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import boto3
import botocore.config
import requests

logger = logging.getLogger()
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))
Expand Down Expand Up @@ -230,6 +231,49 @@ def __init__(self, name, pattern, placeholder, enabled=True):
DD_API_KEY = DD_API_KEY.strip()
os.environ["DD_API_KEY"] = DD_API_KEY


def is_api_key_valid():
# DD_API_KEY must be set
if DD_API_KEY == "<YOUR_DATADOG_API_KEY>" or DD_API_KEY == "":
raise Exception(
"Missing Datadog API key. Set DD_API_KEY environment variable. "
"See: https://docs.datadoghq.com/serverless/forwarder/"
)

# Check if the API key is the correct number of characters
if len(DD_API_KEY) != 32:
raise Exception(
f"""
Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}.
Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys
"""
)

# Validate the API key
logger.debug("Validating the Datadog API key")

with requests.Session() as s:
retries = requests.adapters.Retry(
total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]
)

s.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries))
s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))

validation_res = s.get(
"{}/api/v1/validate?api_key={}".format(DD_API_URL, DD_API_KEY),
verify=(not DD_SKIP_SSL_VALIDATION),
timeout=10,
)
if not validation_res.ok:
logger.error(
f"Datadog API key validation failed (HTTP {validation_res.status_code}). Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). See: https://docs.datadoghq.com/getting_started/site/"
)
return False

return True


# DD_MULTILINE_LOG_REGEX_PATTERN: Multiline Log Regular Expression Pattern
DD_MULTILINE_LOG_REGEX_PATTERN = get_env_var(
"DD_MULTILINE_LOG_REGEX_PATTERN", default=None
Expand Down
1 change: 1 addition & 0 deletions aws/logs_monitoring/tests/run_unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@

export DD_API_KEY=11111111111111111111111111111111
export DD_ADDITIONAL_TARGET_LAMBDAS=ironmaiden,megadeth
export DD_STORE_FAILED_EVENTS="true"
export DD_S3_BUCKET_NAME=dd-s3-bucket
python3 -m unittest discover .
54 changes: 29 additions & 25 deletions aws/logs_monitoring/tools/integration_tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,50 +27,54 @@ services:
AWS_SECURITY_TOKEN: "${AWS_SECURITY_TOKEN}"
AWS_SESSION_TOKEN: "${AWS_SESSION_TOKEN}"
AWS_DEFAULT_REGION: us-east-1
DD_LOG_LEVEL: ${LOG_LEVEL:-info}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alpha sort while passing by

DD_ADDITIONAL_TARGET_LAMBDAS: "${EXTERNAL_LAMBDAS}"
DD_API_KEY: abcdefghijklmnopqrstuvwxyz012345 # Must be 32 characters exactly
DD_URL: recorder # Used for logs intake
DD_PORT: 8080 # API port to use
DD_SITE: datadog.com
DD_API_URL: http://recorder:8080
DD_LOGS_INTAKE_URL: recorder:8080
DD_TRACE_INTAKE_URL: http://recorder:8080
DD_NO_SSL: "true"
DD_SKIP_SSL_VALIDATION: "true"
DD_USE_COMPRESSION: "false"
DD_ADDITIONAL_TARGET_LAMBDAS: "${EXTERNAL_LAMBDAS}"
DD_S3_BUCKET_NAME: "${DD_S3_BUCKET_NAME}"
DD_FETCH_LAMBDA_TAGS: "${DD_FETCH_LAMBDA_TAGS:-false}"
DD_FETCH_LOG_GROUP_TAGS: "${DD_FETCH_LOG_GROUP_TAGS:-false}"
DD_FETCH_STEP_FUNCTIONS_TAGS: "${DD_FETCH_STEP_FUNCTIONS_TAGS:-false}"
DD_STORE_FAILED_EVENTS: "false"
DD_LOG_LEVEL: ${LOG_LEVEL:-info}
DD_LOGS_INTAKE_URL: recorder:8080
DD_NO_SSL: "true"
DD_PORT: 8080 # API port to use
DD_S3_BUCKET_NAME: "${DD_S3_BUCKET_NAME}"
DD_SITE: datadog.com
DD_SKIP_SSL_VALIDATION: "true"
DD_STORE_FAILED_EVENTS: "${DD_STORE_FAILED_EVENTS:-true}"
DD_TRACE_ENABLED: "true"
DD_TRACE_INTAKE_URL: http://recorder:8080
DD_URL: recorder # Used for logs intake
DD_USE_COMPRESSION: "false"
expose:
- 8080
depends_on:
recorder:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/2015-03-31/functions/function/invocations"]
interval: 10s
timeout: 5s
retries: 3

test:
[
"CMD",
"curl",
"-f",
"http://localhost:8080/2015-03-31/functions/function/invocations",
]
interval: 10s
timeout: 5s

tester:
image: ${PYTHON_BASE}
command: /bin/sh -c 'pip install "deepdiff<6" && python -m unittest discover'
volumes:
- ./tester:/tester
- ${SNAPSHOTS_DIR_NAME}:/snapshots
- "${SNAPSHOTS_DIR_NAME}:/snapshots"
working_dir: /tester
environment:
RECORDER_URL: http://recorder:8080/recording
FORWARDER_URL: http://forwarder:8080/2015-03-31/functions/function/invocations
UPDATE_SNAPSHOTS: ${UPDATE_SNAPSHOTS:-false}
SNAPSHOTS_DIR_NAME: ${SNAPSHOTS_DIR_NAME}
RECORDER_URL: http://recorder:8080/recording
SNAPSHOTS_DIR_NAME: "${SNAPSHOTS_DIR_NAME}"
UPDATE_SNAPSHOTS: "${UPDATE_SNAPSHOTS:-false}"
depends_on:
forwarder:
condition: service_healthy
recorder:
condition: service_healthy
forwarder:
condition: service_healthy
recorder:
condition: service_healthy
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ CACHE_TEST=false
DD_FETCH_LAMBDA_TAGS="true"
DD_FETCH_LOG_GROUP_TAGS="true"
DD_FETCH_STEP_FUNCTIONS_TAGS="true"
DD_STORE_FAILED_EVENTS="true"

script_start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
echo "Starting script time: $script_start_time"
Expand Down Expand Up @@ -154,6 +155,7 @@ LOG_LEVEL=${LOG_LEVEL} \
DD_FETCH_LAMBDA_TAGS=${DD_FETCH_LAMBDA_TAGS} \
DD_FETCH_LOG_GROUP_TAGS=${DD_FETCH_LOG_GROUP_TAGS} \
DD_FETCH_STEP_FUNCTIONS_TAGS=${DD_FETCH_STEP_FUNCTIONS_TAGS} \
DD_STORE_FAILED_EVENTS=${DD_STORE_FAILED_EVENTS} \
docker compose up --build --abort-on-container-exit

if [ $ADDITIONAL_LAMBDA == true ]; then
Expand Down
Loading