From c5708ff9b479432b577b977ddbdb063be6793564 Mon Sep 17 00:00:00 2001 From: Kyle Verhoog Date: Tue, 16 Aug 2022 17:23:02 -0400 Subject: [PATCH] fix(profiling): fix possible deadlock It is possible that while the profiling StackCollector is handling the on_span_activate event that a gc occurs which could result in a weakref finalize callback being called which then could activate a new span. This sequence of events results in a deadlock as the lock the StackCollector uses is not reentrant. A regression test would be great but we cannot think of a tractable test case which wouldn't be flaky. --- ddtrace/internal/nogevent.py | 1 + ddtrace/profiling/_threading.pyx | 4 +++- .../notes/profiling-threadlink-lock-27df798c36ed64e6.yaml | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/profiling-threadlink-lock-27df798c36ed64e6.yaml diff --git a/ddtrace/internal/nogevent.py b/ddtrace/internal/nogevent.py index 4c91c0f90ff..b63cda60ca3 100644 --- a/ddtrace/internal/nogevent.py +++ b/ddtrace/internal/nogevent.py @@ -37,6 +37,7 @@ def is_module_patched(module): thread_get_ident = get_original(six.moves._thread.__name__, "get_ident") Thread = get_original("threading", "Thread") Lock = get_original("threading", "Lock") +RLock = get_original("threading", "RLock") is_threading_patched = is_module_patched("threading") diff --git a/ddtrace/profiling/_threading.pyx b/ddtrace/profiling/_threading.pyx index 7832e246f29..b3b9279d4f8 100644 --- a/ddtrace/profiling/_threading.pyx +++ b/ddtrace/profiling/_threading.pyx @@ -69,7 +69,9 @@ class _ThreadLink(_thread_link_base): # Key is a thread_id # Value is a weakref to an object _thread_id_to_object = attr.ib(factory=dict, repr=False, init=False, type=typing.Dict[int, _weakref_type]) - _lock = attr.ib(factory=nogevent.Lock, repr=False, init=False, type=nogevent.Lock) + # Note that this lock has to be reentrant as spans can be activated unexpectedly in the same thread + # ex. during a gc weakref finalize callback + _lock = attr.ib(factory=nogevent.RLock, repr=False, init=False, type=nogevent.RLock) def link_object( self, diff --git a/releasenotes/notes/profiling-threadlink-lock-27df798c36ed64e6.yaml b/releasenotes/notes/profiling-threadlink-lock-27df798c36ed64e6.yaml new file mode 100644 index 00000000000..54af811a28e --- /dev/null +++ b/releasenotes/notes/profiling-threadlink-lock-27df798c36ed64e6.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + profiling: fix a possible deadlock due to spans being activated unexpectedly.