Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ddtrace/_trace/processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
from ddtrace.constants import _APM_ENABLED_METRIC_KEY as MK_APM_ENABLED
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_MECHANISM
from ddtrace.internal import gitmetadata
from ddtrace.internal import process_tags
from ddtrace.internal import telemetry
from ddtrace.internal.constants import COMPONENT
from ddtrace.internal.constants import HIGHER_ORDER_TRACE_ID_BITS
from ddtrace.internal.constants import LAST_DD_PARENT_ID_KEY
from ddtrace.internal.constants import MAX_UINT_64BITS
from ddtrace.internal.constants import PROCESS_TAGS
from ddtrace.internal.constants import SAMPLING_DECISION_TRACE_TAG_KEY
from ddtrace.internal.constants import SamplingMechanism
from ddtrace.internal.logger import get_logger
Expand Down Expand Up @@ -250,6 +252,8 @@ def process_trace(self, trace: List[Span]) -> Optional[List[Span]]:
span._update_tags_from_context()
self._set_git_metadata(span)
span._set_tag_str("language", "python")
if p_tags := process_tags.process_tags:
span._set_tag_str(PROCESS_TAGS, p_tags)
# for 128 bit trace ids
if span.trace_id > MAX_UINT_64BITS:
trace_id_hob = _get_64_highest_order_bits_as_hex(span.trace_id)
Expand Down
1 change: 1 addition & 0 deletions ddtrace/internal/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
HTTP_REQUEST_PATH_PARAMETER = "http.request.path.parameter"
REQUEST_PATH_PARAMS = "http.request.path_params"
STATUS_403_TYPE_AUTO = {"status_code": 403, "type": "auto"}
PROCESS_TAGS = "_dd.tags.process"

CONTAINER_ID_HEADER_NAME = "Datadog-Container-Id"

Expand Down
56 changes: 56 additions & 0 deletions ddtrace/internal/process_tags/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
from pathlib import Path
import re
import sys
from typing import Optional

from ddtrace.internal.logger import get_logger
from ddtrace.settings._config import config

from .constants import ENTRYPOINT_BASEDIR_TAG
from .constants import ENTRYPOINT_NAME_TAG
from .constants import ENTRYPOINT_TYPE_SCRIPT
from .constants import ENTRYPOINT_TYPE_TAG
from .constants import ENTRYPOINT_WORKDIR_TAG


log = get_logger(__name__)

_INVALID_CHARS_PATTERN = re.compile(r"[^a-z0-9/._-]")
_CONSECUTIVE_UNDERSCORES_PATTERN = re.compile(r"_{2,}")


def normalize_tag(value: str) -> str:
normalized = _INVALID_CHARS_PATTERN.sub("_", value.lower())
normalized = _CONSECUTIVE_UNDERSCORES_PATTERN.sub("_", normalized)
return normalized.strip("_")


def generate_process_tags() -> Optional[str]:
if not config._process_tags_enabled:
return None

try:
return ",".join(
f"{key}:{normalize_tag(value)}"
for key, value in sorted(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the sorting needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this branch/for APM, no, but it will be needed for dsm in a follow-up PR

[
(ENTRYPOINT_WORKDIR_TAG, os.path.basename(os.getcwd())),
(ENTRYPOINT_BASEDIR_TAG, Path(sys.argv[0]).resolve().parent.name),
(ENTRYPOINT_NAME_TAG, os.path.splitext(os.path.basename(sys.argv[0]))[0]),
(ENTRYPOINT_TYPE_TAG, ENTRYPOINT_TYPE_SCRIPT),
]
)
)
except Exception as e:
log.debug("failed to get process_tags: %s", e)
return None


# For test purpose
def _process_tag_reload():
global process_tags
process_tags = generate_process_tags()
Comment on lines +51 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we move this to e.g. a pytest fixture?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Process Tags will be used across all product therefore this function will be used in bunch of tests. Does it still make sense to add this to a pytest fixture ?



process_tags = generate_process_tags()
7 changes: 7 additions & 0 deletions ddtrace/internal/process_tags/constants.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we can merge this with the init script and have a single process_tags.py file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems having a constants.py file is a common pattern in the repo and I think it is more maintainable if we add more in the future (it will probably happen)

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ENTRYPOINT_NAME_TAG = "entrypoint.name"
ENTRYPOINT_WORKDIR_TAG = "entrypoint.workdir"

ENTRYPOINT_TYPE_TAG = "entrypoint.type"
ENTRYPOINT_TYPE_SCRIPT = "script"

ENTRYPOINT_BASEDIR_TAG = "entrypoint.basedir"
3 changes: 3 additions & 0 deletions ddtrace/internal/settings/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,9 @@ def __init__(self):
self._trace_resource_renaming_always_simplified_endpoint = _get_config(
"DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT", default=False, modifier=asbool
)
self._process_tags_enabled = _get_config(
"DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED", default=False, modifier=asbool
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a comment in the RFC that this should be enabled by default if the user also adds DEFAULT_TRACE_EXPERIMENTAL_FEATURES_ENABLED. I'm not sure what that means for Python since we have something called DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED:
https://ddtrace.readthedocs.io/en/stable/configuration.html#DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED, which currently lists runtime metrics as an experimental feature:

class EXPERIMENTAL_FEATURES:

I'm not sure if that's also needed in this PR on top of this new environment variable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This env variable is meant to be used like:
DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED=DD_RUNTIME_METRICS_ENABLED.

So I don't think that covers our use case.

)

# Long-running span interval configurations
# Only supported for Ray spans for now
Expand Down
106 changes: 106 additions & 0 deletions tests/internal/test_process_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from unittest.mock import patch

import pytest

from ddtrace.internal import process_tags
from ddtrace.internal.process_tags import _process_tag_reload
from ddtrace.internal.process_tags import normalize_tag
from ddtrace.settings._config import config
from tests.subprocesstest import run_in_subprocess
from tests.utils import TracerTestCase


@pytest.mark.parametrize(
"input_tag,expected",
[
("HelloWorld", "helloworld"),
("Hello@World!", "hello_world"),
("HeLLo123", "hello123"),
("hello world", "hello_world"),
("a/b.c_d-e", "a/b.c_d-e"),
("héllø", "h_ll"),
("", ""),
("💡⚡️", ""),
("!foo@", "foo"),
("123_abc.DEF-ghi/jkl", "123_abc.def-ghi/jkl"),
("Env:Prod-Server#1", "env_prod-server_1"),
("__hello__world__", "hello_world"),
("___test___", "test"),
("_leading", "leading"),
("trailing_", "trailing"),
("double__underscore", "double_underscore"),
("test\x99\x8faaa", "test_aaa"),
],
)
def test_normalize_tag(input_tag, expected):
assert normalize_tag(input_tag) == expected


class TestProcessTags(TracerTestCase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we probably want a tearDown to reset process tags to their original configuration value and generated value?

since all of these tests are polluting the global state/configuration and the state stored at the module level of process_tags.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in 0428dcd

def setUp(self):
super(TestProcessTags, self).setUp()
self._original_process_tags_enabled = config._process_tags_enabled
self._original_process_tags = process_tags.process_tags

def tearDown(self):
config._process_tags_enabled = self._original_process_tags_enabled
process_tags.process_tags = self._original_process_tags
super().tearDown()

@pytest.mark.snapshot
def test_process_tags_deactivated(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could be a snapshot test too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in 0428dcd

config._process_tags_enabled = False
_process_tag_reload()

with self.tracer.trace("test"):
pass

@pytest.mark.snapshot
def test_process_tags_activated(self):
with patch("sys.argv", ["/path/to/test_script.py"]), patch("os.getcwd", return_value="/path/to/workdir"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there any edge cases/failure scenarios we should be testing?

e.g. what if sys.argv[0] is outside of os.getcwd?

what if the basename of sys.argv[0] doesn't have an extension?

is it possible for Path(sys.argv[0]).resolve() to not have a parent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in 0428dcd

I'm not sure if you said:

sys.argv[0] is outside of os.getcwd?

as a real example. Because in terms of code, everything would work. Maybe it would not make sense in terms of tag values but this is not something we can control

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to do a windows version of this in the test suite? Windows path do \ instead of /: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/path#examples .

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os.path.basename() uses the path separator of the operating system it’s running on. Therefore, if we want to add a test for Windows, we'll have to run the test on a Windows machine which might be overkill.

config._process_tags_enabled = True
_process_tag_reload()

with self.tracer.trace("parent"):
with self.tracer.trace("child"):
pass

@pytest.mark.snapshot
def test_process_tags_edge_case(self):
with patch("sys.argv", ["/test_script"]), patch("os.getcwd", return_value="/path/to/workdir"):
config._process_tags_enabled = True
_process_tag_reload()

with self.tracer.trace("span"):
pass

@pytest.mark.snapshot
def test_process_tags_error(self):
with patch("sys.argv", []), patch("os.getcwd", return_value="/path/to/workdir"):
config._process_tags_enabled = True

with self.override_global_config(dict(_telemetry_enabled=False)):
with patch("ddtrace.internal.process_tags.log") as mock_log:
_process_tag_reload()

with self.tracer.trace("span"):
pass

# Check if debug log was called
mock_log.debug.assert_called_once()
call_args = mock_log.debug.call_args[0]
assert "failed to get process_tags" in call_args[0]

@pytest.mark.snapshot
@run_in_subprocess(env_overrides=dict(DD_TRACE_PARTIAL_FLUSH_ENABLED="true", DD_TRACE_PARTIAL_FLUSH_MIN_SPANS="2"))
def test_process_tags_partial_flush(self):
with patch("sys.argv", ["/path/to/test_script.py"]), patch("os.getcwd", return_value="/path/to/workdir"):
config._process_tags_enabled = True
_process_tag_reload()

with self.override_global_config(dict(_partial_flush_enabled=True, _partial_flush_min_spans=2)):
with self.tracer.trace("parent"):
with self.tracer.trace("child1"):
pass
with self.tracer.trace("child2"):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[[
{
"name": "span",
"service": "tests.internal",
"resource": "span",
"trace_id": 0,
"span_id": 1,
"parent_id": 0,
"type": "",
"error": 0,
"meta": {
"_dd.p.dm": "-0",
"_dd.p.tid": "6911da3a00000000",
"_dd.tags.process": "entrypoint.basedir:,entrypoint.name:test_script,entrypoint.type:script,entrypoint.workdir:workdir",
"language": "python",
"runtime-id": "c9342b8003de45feb0bf56d32ece46a1"
},
"metrics": {
"_dd.top_level": 1,
"_dd.tracer_kr": 1.0,
"_sampling_priority_v1": 1,
"process_id": 605
},
"duration": 105292,
"start": 1762777658431833668
}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[[
{
"name": "parent",
"service": "tests.internal",
"resource": "parent",
"trace_id": 0,
"span_id": 1,
"parent_id": 0,
"type": "",
"error": 0,
"meta": {
"_dd.p.dm": "-0",
"_dd.p.tid": "6911dc5a00000000",
"_dd.tags.process": "entrypoint.basedir:to,entrypoint.name:test_script,entrypoint.type:script,entrypoint.workdir:workdir",
"language": "python",
"runtime-id": "2d5de91f8dd9442cad7faca5554a09f1"
},
"metrics": {
"_dd.top_level": 1,
"_dd.tracer_kr": 1.0,
"_sampling_priority_v1": 1,
"process_id": 605
},
"duration": 231542,
"start": 1762778202287875128
},
{
"name": "child",
"service": "tests.internal",
"resource": "child",
"trace_id": 0,
"span_id": 2,
"parent_id": 1,
"type": "",
"error": 0,
"duration": 55500,
"start": 1762778202287999128
}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[[
{
"name": "test",
"service": "tests.internal",
"resource": "test",
"trace_id": 0,
"span_id": 1,
"parent_id": 0,
"type": "",
"error": 0,
"meta": {
"_dd.p.dm": "-0",
"_dd.p.tid": "6911dc5a00000000",
"language": "python",
"runtime-id": "2d5de91f8dd9442cad7faca5554a09f1"
},
"metrics": {
"_dd.top_level": 1,
"_dd.tracer_kr": 1.0,
"_sampling_priority_v1": 1,
"process_id": 605
},
"duration": 22292,
"start": 1762778202327669586
}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[[
{
"name": "span",
"service": "tests.internal",
"resource": "span",
"trace_id": 0,
"span_id": 1,
"parent_id": 0,
"type": "",
"error": 0,
"meta": {
"_dd.p.dm": "-0",
"_dd.p.tid": "6911dc5a00000000",
"_dd.tags.process": "entrypoint.basedir:,entrypoint.name:test_script,entrypoint.type:script,entrypoint.workdir:workdir",
"language": "python",
"runtime-id": "2d5de91f8dd9442cad7faca5554a09f1"
},
"metrics": {
"_dd.top_level": 1,
"_dd.tracer_kr": 1.0,
"_sampling_priority_v1": 1,
"process_id": 605
},
"duration": 35458,
"start": 1762778202321224878
}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[[
{
"name": "span",
"service": "tests.internal",
"resource": "span",
"trace_id": 0,
"span_id": 1,
"parent_id": 0,
"type": "",
"error": 0,
"meta": {
"_dd.p.dm": "-0",
"_dd.p.tid": "6911db6e00000000",
"language": "python",
"runtime-id": "c59cb90aad3246579bc4421d1cca07c8"
},
"metrics": {
"_dd.top_level": 1,
"_dd.tracer_kr": 1.0,
"_sampling_priority_v1": 1,
"process_id": 605
},
"duration": 102833,
"start": 1762777966446950922
}]]
Loading
Loading