diff --git a/.gitlab/benchmarks/bp-runner.microbenchmarks.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.microbenchmarks.fail-on-breach.yml index 850cdc18d1e..824b9f6b9ff 100644 --- a/.gitlab/benchmarks/bp-runner.microbenchmarks.fail-on-breach.yml +++ b/.gitlab/benchmarks/bp-runner.microbenchmarks.fail-on-breach.yml @@ -726,19 +726,19 @@ experiments: - name: iastpropagation-no-propagation thresholds: - execution_time < 0.06 ms - - max_rss_usage < 40.50 MB + - max_rss_usage < 42.00 MB - name: iastpropagation-propagation_enabled thresholds: - execution_time < 0.19 ms - - max_rss_usage < 40.00 MB + - max_rss_usage < 42.00 MB - name: iastpropagation-propagation_enabled_100 thresholds: - execution_time < 2.30 ms - - max_rss_usage < 40.00 MB + - max_rss_usage < 42.00 MB - name: iastpropagation-propagation_enabled_1000 thresholds: - execution_time < 34.55 ms - - max_rss_usage < 40.00 MB + - max_rss_usage < 42.00 MB # otelsdkspan - name: otelsdkspan-add-event diff --git a/.gitlab/benchmarks/microbenchmarks.yml b/.gitlab/benchmarks/microbenchmarks.yml index 0630adf81b4..c8c966e3564 100644 --- a/.gitlab/benchmarks/microbenchmarks.yml +++ b/.gitlab/benchmarks/microbenchmarks.yml @@ -10,7 +10,7 @@ variables: PACKAGE_IMAGE: registry.ddbuild.io/images/mirror/pypa/manylinux2014_x86_64:2025-04-12-5990e2d GITHUB_CLI_IMAGE: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1 BENCHMARKING_BRANCH: dd-trace-py - BENCHMARKING_COMMIT_SHA: e7bbac96e1ae9bfb5f8906dcdf103b08f5ca0805 + BENCHMARKING_COMMIT_SHA: 32681a9f805f4d62cf6bd7d205ddeb83ab72288d .benchmarks: stage: test @@ -24,8 +24,6 @@ variables: timeout: 30m dependencies: [ "baseline:build", "candidate" ] script: | - export REPORTS_DIR="$(pwd)/reports/" && (mkdir "${REPORTS_DIR}" || :) - if [[ -n "$CI_JOB_TOKEN" ]]; then git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/" @@ -34,18 +32,28 @@ variables: (cd /platform && git reset --hard "${BENCHMARKING_COMMIT_SHA}") export PATH="$PATH:/platform/steps" - capture-hardware-software-info.sh + for SCENARIO in $(echo "$SCENARIOS" | tr -s '[:space:]' ' '); + do + export REPORTS_DIR="$(pwd)/reports/${SCENARIO}/" && (mkdir -p "${REPORTS_DIR}" || :) - if [[ $SCENARIO =~ ^flask_* || $SCENARIO =~ ^django_* ]]; - then - BP_SCENARIO=$SCENARIO bp-runner "${CI_PROJECT_DIR:-.}/.gitlab/benchmarks/bp-runner.yml" --debug -t - else - run-benchmarks.sh - fi + capture-hardware-software-info.sh + + if [[ $SCENARIO =~ ^flask_* || $SCENARIO =~ ^django_* ]]; + then + BP_SCENARIO=$SCENARIO bp-runner "${CI_PROJECT_DIR:-.}/.gitlab/benchmarks/bp-runner.yml" --debug -t + else + run-benchmarks.sh + fi - analyze-results.sh + # Join all config results into a single results.json + .gitlab/benchmarks/steps/combine-results.sh "/artifacts/${CI_JOB_ID}-${SCENARIO}/candidate/" + .gitlab/benchmarks/steps/combine-results.sh "/artifacts/${CI_JOB_ID}-${SCENARIO}/baseline/" - upload-results-to-s3.sh || : + analyze-results.sh + upload-results-to-s3.sh || : + # Copy converted JSON reports to common location + cp $REPORTS_DIR/*.converted.json $(pwd)/reports/ + done # We have to move artifacts to ${CI_PROJECT_DIR} if we want to attach as GitLab artifact cp -R /artifacts ${CI_PROJECT_DIR}/ @@ -146,40 +154,24 @@ candidate: microbenchmarks: extends: .benchmarks parallel: + # DEV: The organization into these groups is mostly arbitrary, based on observed runtimes and + # trying to keep total runtime per job <10 minutes matrix: - - SCENARIO: - - "span" - - "tracer" - - "sampling_rule_matches" - - "set_http_meta" - - "django_simple" - - "flask_simple" - - "flask_sqli" - - "core_api" - - "otel_span" - - "otel_sdk_span" - - "appsec_iast_aspects" - - "appsec_iast_aspects_ospath" - - "appsec_iast_aspects_re_module" - - "appsec_iast_aspects_split" + - CPUS_PER_RUN: "1" + SCENARIOS: + - "span tracer core_api set_http_meta telemetry_add_metric otel_span otel_sdk_span recursive_computation sampling_rule_matches" + - "http_propagation_extract http_propagation_inject rate_limiter appsec_iast_aspects appsec_iast_aspects_ospath appsec_iast_aspects_re_module appsec_iast_aspects_split appsec_iast_propagation" + - "packages_package_for_root_module_mapping packages_update_imported_dependencies" + - CPUS_PER_RUN: "2" + SCENARIOS: + - "django_simple flask_simple flask_sqli errortracking_django_simple errortracking_flask_sqli" # Flaky timeouts on starting up # - "appsec_iast_django_startup" - # TOOD: Re-enable when this issue is resolved: - - "appsec_iast_propagation" - - "errortracking_django_simple" # They take a long time to run and frequently time out # TODO: Make benchmarks faster, or run less frequently, or as macrobenchmarks # - "appsec_iast_django_startup" - - "errortracking_flask_sqli" # Flaky. Timeout errors # - "encoder" - - "http_propagation_extract" - - "http_propagation_inject" - - "rate_limiter" - - "packages_package_for_root_module_mapping" - - "packages_update_imported_dependencies" - - "recursive_computation" - - "telemetry_add_metric" # They take a long time to run, and now need the agent running # TODO: Make benchmarks faster, or run less frequently, or as macrobenchmarks # - "startup" diff --git a/.gitlab/benchmarks/steps/combine-results.sh b/.gitlab/benchmarks/steps/combine-results.sh new file mode 100755 index 00000000000..0d2815fc00e --- /dev/null +++ b/.gitlab/benchmarks/steps/combine-results.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -exo pipefail + +ARTIFACTS_DIR="${1}" + +# Combine all the individual results into a single results fule. +# We need: +# - to merge all the benchmarks into a single list +# - to keep only one copy of the metadata, removing fields that are per-benchmark specific +# - add benchmark specific metadata into each benchmark entry +jq -s ' + map( + . as $file + | .benchmarks |= map( + .metadata = ($file.metadata | { name, loops, cpu_affinity, cpu_config, cpu_freq } ) + ) + | { + benchmarks: .benchmarks, + leftover_meta: (.metadata | del(.name, .loops, .cpu_affinity, .cpu_config, .cpu_freq)) + } + ) + | + { + benchmarks: (map(.benchmarks) | add), + metadata: (first | .leftover_meta) + } +' $ARTIFACTS_DIR/results.*.json > "${ARTIFACTS_DIR}/results.json" diff --git a/benchmarks/base/run.py b/benchmarks/base/run.py index 41a3a4a6f17..14d29512bd8 100755 --- a/benchmarks/base/run.py +++ b/benchmarks/base/run.py @@ -2,8 +2,12 @@ import json import os +import queue import subprocess import sys +import threading +from typing import Any +from typing import Optional import yaml @@ -16,13 +20,39 @@ def read_config(path): return yaml.load(fp, Loader=yaml.FullLoader) -def run(scenario_py, cname, cvars, output_dir): +def cpu_affinity_to_cpu_groups(cpu_affinity: str, cpus_per_run: int) -> list[list[int]]: + # CPU_AFFINITY is a comma-separated list of CPU IDs and ranges + # 6-11 + # 6-11,14,15 + # 6-11,13-15,16,18,20-21 + cpu_ids: list[int] = [] + for part in cpu_affinity.split(","): + if "-" in part: + start, end = part.split("-") + cpu_ids.extend(range(int(start), int(end) + 1)) + else: + cpu_ids.append(int(part)) + + if len(cpu_ids) % cpus_per_run != 0: + raise ValueError(f"CPU count {len(cpu_ids)} not divisible by CPUS_PER_RUN={cpus_per_run}") + cpu_groups = [cpu_ids[i : i + cpus_per_run] for i in range(0, len(cpu_ids), cpus_per_run)] + return cpu_groups + + +def run(scenario_py: str, cname: str, cvars: dict[str, Any], output_dir: str, cpus: Optional[list[int]] = None): + cmd: list[str] = [] + + if cpus: + # Use taskset to set CPU affinity + cpu_list_str = ",".join(str(cpu) for cpu in cpus) + cmd += ["taskset", "-c", cpu_list_str] + if SHOULD_PROFILE: # viztracer won't create the missing directory itself viztracer_output_dir = os.path.join(output_dir, "viztracer") os.makedirs(viztracer_output_dir, exist_ok=True) - cmd = [ + cmd += [ "viztracer", "--minimize_memory", "--min_duration", @@ -33,14 +63,14 @@ def run(scenario_py, cname, cvars, output_dir): os.path.join(output_dir, "viztracer", "{}.json".format(cname)), ] else: - cmd = ["python"] + cmd += ["python"] cmd += [ scenario_py, # necessary to copy PYTHONPATH for venvs "--copy-env", - "--append", - os.path.join(output_dir, "results.json"), + "--output", + os.path.join(output_dir, f"results.{cname}.json"), "--name", cname, ] @@ -72,5 +102,45 @@ def run(scenario_py, cname, cvars, output_dir): config = {k: v for k, v in config.items() if k in allowed_configs} print("Filtering to configs: {}".format(", ".join(sorted(config.keys())))) + CPU_AFFINITY = os.environ.get("CPU_AFFINITY") + + # No CPU affinity specified, run sequentially + if not CPU_AFFINITY: + for cname, cvars in config.items(): + run("scenario.py", cname, cvars, output_dir) + sys.exit(0) + + CPUS_PER_RUN = int(os.environ.get("CPUS_PER_RUN", "1")) + cpu_groups = cpu_affinity_to_cpu_groups(CPU_AFFINITY, CPUS_PER_RUN) + + print(f"Running with CPU affinity: {CPU_AFFINITY}") + print(f"CPUs per run: {CPUS_PER_RUN}") + print(f"CPU groups: {list(cpu_groups)}") + + job_queue = queue.Queue() + cpu_queue = queue.Queue() + + def worker(cpu_queue: queue.Queue, job_queue: queue.Queue): + while job_queue.qsize() > 0: + cname, cvars = job_queue.get(timeout=1) + + cpus = cpu_queue.get() + print(f"Starting run {cname} on CPUs {cpus}") + run("scenario.py", cname, cvars, output_dir, cpus=cpus) + print(f"Finished run {cname}") + cpu_queue.put(cpus) + for cname, cvars in config.items(): - run("scenario.py", cname, cvars, output_dir) + job_queue.put((cname, cvars)) + + workers = [] + print(f"Starting {len(cpu_groups)} worker threads") + for cpus in cpu_groups: + cpu_queue.put(cpus) + t = threading.Thread(target=worker, args=(cpu_queue, job_queue)) + t.start() + workers.append(t) + + for t in workers: + t.join() + print("All runs completed.") diff --git a/ddtrace/contrib/internal/trace_utils_base.py b/ddtrace/contrib/internal/trace_utils_base.py index af222425693..12ba484f1ee 100644 --- a/ddtrace/contrib/internal/trace_utils_base.py +++ b/ddtrace/contrib/internal/trace_utils_base.py @@ -150,7 +150,10 @@ def _set_url_tag(integration_config: IntegrationConfig, span: Span, url: str, qu # users should set ``DD_TRACE_HTTP_CLIENT_TAG_QUERY_STRING=False``. This case should be # removed when config.global_query_string_obfuscation_disabled is removed (v3.0). span._set_tag_str(http.URL, url) - elif getattr(config._obfuscation_query_string_pattern, "pattern", None) == b"": + elif ( + config._obfuscation_query_string_pattern is None + or getattr(config._obfuscation_query_string_pattern, "pattern", None) == b"" + ): # obfuscation is disabled when DD_TRACE_OBFUSCATION_QUERY_STRING_REGEXP="" span._set_tag_str(http.URL, strip_query_string(url)) else: