Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -726,19 +726,19 @@ experiments:
- name: iastpropagation-no-propagation
thresholds:
- execution_time < 0.06 ms
- max_rss_usage < 40.50 MB
- max_rss_usage < 42.00 MB
- name: iastpropagation-propagation_enabled
thresholds:
- execution_time < 0.19 ms
- max_rss_usage < 40.00 MB
- max_rss_usage < 42.00 MB
- name: iastpropagation-propagation_enabled_100
thresholds:
- execution_time < 2.30 ms
- max_rss_usage < 40.00 MB
- max_rss_usage < 42.00 MB
- name: iastpropagation-propagation_enabled_1000
thresholds:
- execution_time < 34.55 ms
- max_rss_usage < 40.00 MB
- max_rss_usage < 42.00 MB

# otelsdkspan
- name: otelsdkspan-add-event
Expand Down
68 changes: 30 additions & 38 deletions .gitlab/benchmarks/microbenchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ variables:
PACKAGE_IMAGE: registry.ddbuild.io/images/mirror/pypa/manylinux2014_x86_64:2025-04-12-5990e2d
GITHUB_CLI_IMAGE: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1
BENCHMARKING_BRANCH: dd-trace-py
BENCHMARKING_COMMIT_SHA: e7bbac96e1ae9bfb5f8906dcdf103b08f5ca0805
BENCHMARKING_COMMIT_SHA: 32681a9f805f4d62cf6bd7d205ddeb83ab72288d

.benchmarks:
stage: test
Expand All @@ -24,8 +24,6 @@ variables:
timeout: 30m
dependencies: [ "baseline:build", "candidate" ]
script: |
export REPORTS_DIR="$(pwd)/reports/" && (mkdir "${REPORTS_DIR}" || :)

if [[ -n "$CI_JOB_TOKEN" ]];
then
git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/"
Expand All @@ -34,18 +32,28 @@ variables:
(cd /platform && git reset --hard "${BENCHMARKING_COMMIT_SHA}")
export PATH="$PATH:/platform/steps"

capture-hardware-software-info.sh
for SCENARIO in $(echo "$SCENARIOS" | tr -s '[:space:]' ' ');
do
export REPORTS_DIR="$(pwd)/reports/${SCENARIO}/" && (mkdir -p "${REPORTS_DIR}" || :)

if [[ $SCENARIO =~ ^flask_* || $SCENARIO =~ ^django_* ]];
then
BP_SCENARIO=$SCENARIO bp-runner "${CI_PROJECT_DIR:-.}/.gitlab/benchmarks/bp-runner.yml" --debug -t
else
run-benchmarks.sh
fi
capture-hardware-software-info.sh

if [[ $SCENARIO =~ ^flask_* || $SCENARIO =~ ^django_* ]];
then
BP_SCENARIO=$SCENARIO bp-runner "${CI_PROJECT_DIR:-.}/.gitlab/benchmarks/bp-runner.yml" --debug -t
else
run-benchmarks.sh
fi

analyze-results.sh
# Join all config results into a single results.json
.gitlab/benchmarks/steps/combine-results.sh "/artifacts/${CI_JOB_ID}-${SCENARIO}/candidate/"
.gitlab/benchmarks/steps/combine-results.sh "/artifacts/${CI_JOB_ID}-${SCENARIO}/baseline/"

upload-results-to-s3.sh || :
analyze-results.sh
upload-results-to-s3.sh || :
# Copy converted JSON reports to common location
cp $REPORTS_DIR/*.converted.json $(pwd)/reports/
done

# We have to move artifacts to ${CI_PROJECT_DIR} if we want to attach as GitLab artifact
cp -R /artifacts ${CI_PROJECT_DIR}/
Expand Down Expand Up @@ -146,40 +154,24 @@ candidate:
microbenchmarks:
extends: .benchmarks
parallel:
# DEV: The organization into these groups is mostly arbitrary, based on observed runtimes and
# trying to keep total runtime per job <10 minutes
matrix:
- SCENARIO:
- "span"
- "tracer"
- "sampling_rule_matches"
- "set_http_meta"
- "django_simple"
- "flask_simple"
- "flask_sqli"
- "core_api"
- "otel_span"
- "otel_sdk_span"
- "appsec_iast_aspects"
- "appsec_iast_aspects_ospath"
- "appsec_iast_aspects_re_module"
- "appsec_iast_aspects_split"
- CPUS_PER_RUN: "1"
SCENARIOS:
- "span tracer core_api set_http_meta telemetry_add_metric otel_span otel_sdk_span recursive_computation sampling_rule_matches"
- "http_propagation_extract http_propagation_inject rate_limiter appsec_iast_aspects appsec_iast_aspects_ospath appsec_iast_aspects_re_module appsec_iast_aspects_split appsec_iast_propagation"
- "packages_package_for_root_module_mapping packages_update_imported_dependencies"
- CPUS_PER_RUN: "2"
SCENARIOS:
- "django_simple flask_simple flask_sqli errortracking_django_simple errortracking_flask_sqli"
# Flaky timeouts on starting up
# - "appsec_iast_django_startup"
# TOOD: Re-enable when this issue is resolved:
- "appsec_iast_propagation"
- "errortracking_django_simple"
# They take a long time to run and frequently time out
# TODO: Make benchmarks faster, or run less frequently, or as macrobenchmarks
# - "appsec_iast_django_startup"
- "errortracking_flask_sqli"
# Flaky. Timeout errors
# - "encoder"
- "http_propagation_extract"
- "http_propagation_inject"
- "rate_limiter"
- "packages_package_for_root_module_mapping"
- "packages_update_imported_dependencies"
- "recursive_computation"
- "telemetry_add_metric"
# They take a long time to run, and now need the agent running
# TODO: Make benchmarks faster, or run less frequently, or as macrobenchmarks
# - "startup"
Expand Down
27 changes: 27 additions & 0 deletions .gitlab/benchmarks/steps/combine-results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -exo pipefail

ARTIFACTS_DIR="${1}"

# Combine all the individual results into a single results fule.
# We need:
# - to merge all the benchmarks into a single list
# - to keep only one copy of the metadata, removing fields that are per-benchmark specific
# - add benchmark specific metadata into each benchmark entry
jq -s '
map(
. as $file
| .benchmarks |= map(
.metadata = ($file.metadata | { name, loops, cpu_affinity, cpu_config, cpu_freq } )
)
| {
benchmarks: .benchmarks,
leftover_meta: (.metadata | del(.name, .loops, .cpu_affinity, .cpu_config, .cpu_freq))
}
)
|
{
benchmarks: (map(.benchmarks) | add),
metadata: (first | .leftover_meta)
}
' $ARTIFACTS_DIR/results.*.json > "${ARTIFACTS_DIR}/results.json"
82 changes: 76 additions & 6 deletions benchmarks/base/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

import json
import os
import queue
import subprocess
import sys
import threading
from typing import Any
from typing import Optional

import yaml

Expand All @@ -16,13 +20,39 @@ def read_config(path):
return yaml.load(fp, Loader=yaml.FullLoader)


def run(scenario_py, cname, cvars, output_dir):
def cpu_affinity_to_cpu_groups(cpu_affinity: str, cpus_per_run: int) -> list[list[int]]:
# CPU_AFFINITY is a comma-separated list of CPU IDs and ranges
# 6-11
# 6-11,14,15
# 6-11,13-15,16,18,20-21
cpu_ids: list[int] = []
for part in cpu_affinity.split(","):
if "-" in part:
start, end = part.split("-")
cpu_ids.extend(range(int(start), int(end) + 1))
else:
cpu_ids.append(int(part))

if len(cpu_ids) % cpus_per_run != 0:
raise ValueError(f"CPU count {len(cpu_ids)} not divisible by CPUS_PER_RUN={cpus_per_run}")
cpu_groups = [cpu_ids[i : i + cpus_per_run] for i in range(0, len(cpu_ids), cpus_per_run)]
return cpu_groups


def run(scenario_py: str, cname: str, cvars: dict[str, Any], output_dir: str, cpus: Optional[list[int]] = None):
cmd: list[str] = []

if cpus:
# Use taskset to set CPU affinity
cpu_list_str = ",".join(str(cpu) for cpu in cpus)
cmd += ["taskset", "-c", cpu_list_str]

if SHOULD_PROFILE:
# viztracer won't create the missing directory itself
viztracer_output_dir = os.path.join(output_dir, "viztracer")
os.makedirs(viztracer_output_dir, exist_ok=True)

cmd = [
cmd += [
"viztracer",
"--minimize_memory",
"--min_duration",
Expand All @@ -33,14 +63,14 @@ def run(scenario_py, cname, cvars, output_dir):
os.path.join(output_dir, "viztracer", "{}.json".format(cname)),
]
else:
cmd = ["python"]
cmd += ["python"]

cmd += [
scenario_py,
# necessary to copy PYTHONPATH for venvs
"--copy-env",
"--append",
os.path.join(output_dir, "results.json"),
"--output",
os.path.join(output_dir, f"results.{cname}.json"),
"--name",
cname,
]
Expand Down Expand Up @@ -72,5 +102,45 @@ def run(scenario_py, cname, cvars, output_dir):
config = {k: v for k, v in config.items() if k in allowed_configs}
print("Filtering to configs: {}".format(", ".join(sorted(config.keys()))))

CPU_AFFINITY = os.environ.get("CPU_AFFINITY")

# No CPU affinity specified, run sequentially
if not CPU_AFFINITY:
for cname, cvars in config.items():
run("scenario.py", cname, cvars, output_dir)
sys.exit(0)

CPUS_PER_RUN = int(os.environ.get("CPUS_PER_RUN", "1"))
cpu_groups = cpu_affinity_to_cpu_groups(CPU_AFFINITY, CPUS_PER_RUN)

print(f"Running with CPU affinity: {CPU_AFFINITY}")
print(f"CPUs per run: {CPUS_PER_RUN}")
print(f"CPU groups: {list(cpu_groups)}")

job_queue = queue.Queue()
cpu_queue = queue.Queue()

def worker(cpu_queue: queue.Queue, job_queue: queue.Queue):
while job_queue.qsize() > 0:
cname, cvars = job_queue.get(timeout=1)

cpus = cpu_queue.get()
print(f"Starting run {cname} on CPUs {cpus}")
run("scenario.py", cname, cvars, output_dir, cpus=cpus)
print(f"Finished run {cname}")
cpu_queue.put(cpus)

for cname, cvars in config.items():
run("scenario.py", cname, cvars, output_dir)
job_queue.put((cname, cvars))

workers = []
print(f"Starting {len(cpu_groups)} worker threads")
for cpus in cpu_groups:
cpu_queue.put(cpus)
t = threading.Thread(target=worker, args=(cpu_queue, job_queue))
t.start()
workers.append(t)

for t in workers:
t.join()
print("All runs completed.")
5 changes: 4 additions & 1 deletion ddtrace/contrib/internal/trace_utils_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ def _set_url_tag(integration_config: IntegrationConfig, span: Span, url: str, qu
# users should set ``DD_TRACE_HTTP_CLIENT_TAG_QUERY_STRING=False``. This case should be
# removed when config.global_query_string_obfuscation_disabled is removed (v3.0).
span._set_tag_str(http.URL, url)
elif getattr(config._obfuscation_query_string_pattern, "pattern", None) == b"":
elif (
config._obfuscation_query_string_pattern is None
or getattr(config._obfuscation_query_string_pattern, "pattern", None) == b""
):
# obfuscation is disabled when DD_TRACE_OBFUSCATION_QUERY_STRING_REGEXP=""
span._set_tag_str(http.URL, strip_query_string(url))
else:
Expand Down
Loading