Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions ddtrace/profiling/collector/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
cmake_minimum_required(VERSION 3.19)
include(FetchContent)

project(_memalloc)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Add compile options
add_compile_options(-fPIC -fvisibility=hidden -pthread -Wall -Wextra)

# Platform-specific compile definitions
if(APPLE)
# Fix for newer macOS SDKs that don't define BSD-style types These are needed by Abseil on macOS
add_compile_definitions(_DARWIN_C_SOURCE)
endif()

add_compile_definitions(_POSIX_C_SOURCE=200809L)

# Check the DD_COMPILE_ABSEIL environment variable and build type
if(DEFINED ENV{DD_COMPILE_ABSEIL} AND ("$ENV{DD_COMPILE_ABSEIL}" STREQUAL "0" OR "$ENV{DD_COMPILE_ABSEIL}" STREQUAL
"false"))
message("==============================================================")
message("WARNING: DD_COMPILE_ABSEIL set to 0 or false: not using abseil")
message("==============================================================")
add_definitions(-DDONT_COMPILE_ABSEIL)
elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
message("=====================================")
message("WARNING: Debug mode: not using abseil")
message("=====================================")
add_definitions(-DDONT_COMPILE_ABSEIL)
else()
message("Release, RelWithDebInfo, or MinSizeRel mode: using abseil (DD_COMPILE_ABSEIL unset or not 0/false)")
FetchContent_Declare(absl URL "https://github.com/abseil/abseil-cpp/archive/refs/tags/20250127.1.zip")
FetchContent_MakeAvailable(absl)
endif()

# Find Python (be flexible about what's available in build environments)
find_package(Python3 COMPONENTS Interpreter Development)

# Make sure we have necessary Python variables
if(NOT Python3_INCLUDE_DIRS)
# Fallback to PYTHON_INCLUDE_DIRS if Python3_INCLUDE_DIRS not found
if(PYTHON_INCLUDE_DIRS)
set(Python3_INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS})
else()
message(FATAL_ERROR "Python3_INCLUDE_DIRS not found")
endif()
endif()

# Python3::Python target might not exist in all build environments so we'll link using include dirs and let the linker
# find Python dynamically
if(NOT TARGET Python3::Python)
message(STATUS "Python3::Python target not found, using include dirs only")
endif()

# Source files for the extension
set(SOURCE_FILES _memalloc.cpp _memalloc_tb.cpp _memalloc_heap.cpp _memalloc_reentrant.cpp _memalloc_heap_map.cpp)

# Get the extension name from setup.py or use default Note: EXTENSION_NAME from setup.py already includes the full
# suffix
if(DEFINED EXTENSION_NAME)
set(FULL_EXTENSION_NAME "${EXTENSION_NAME}")
else()
set(FULL_EXTENSION_NAME "_memalloc.so")
endif()

# Create the shared library with the full name
add_library(${FULL_EXTENSION_NAME} SHARED ${SOURCE_FILES})

# Set properties to prevent CMake from adding any prefix or suffix
set_target_properties(${FULL_EXTENSION_NAME} PROPERTIES PREFIX "" SUFFIX "")

# Set output directory if specified
if(DEFINED LIB_INSTALL_DIR)
set_target_properties(${FULL_EXTENSION_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIB_INSTALL_DIR})
endif()

# Include directories
target_include_directories(
${FULL_EXTENSION_NAME} PRIVATE ${Python3_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../internal/datadog/profiling/dd_wrapper/include)

# Link libraries Python3::Python target might not exist in all build environments (e.g., manylinux) Python modules
# should use -undefined dynamic_lookup on macOS and not link to libpython on Linux
if(TARGET Python3::Python AND NOT APPLE)
target_link_libraries(${FULL_EXTENSION_NAME} PRIVATE Python3::Python)
endif()

# Link Abseil if available
if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug"
OR (DEFINED ENV{DD_COMPILE_ABSEIL} AND ("$ENV{DD_COMPILE_ABSEIL}" STREQUAL "0" OR "$ENV{DD_COMPILE_ABSEIL}"
STREQUAL "false"))))
target_link_libraries(${FULL_EXTENSION_NAME} PRIVATE absl::flat_hash_map)
endif()

# Platform-specific settings
if(APPLE)
# macOS specific - set rpath for libdd_wrapper and use dynamic lookup for Python symbols
set_target_properties(
${FULL_EXTENSION_NAME}
PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "@loader_path/../../internal/datadog/profiling"
LINK_FLAGS "-undefined dynamic_lookup")
elseif(UNIX)
# Linux specific
target_link_libraries(${FULL_EXTENSION_NAME} PRIVATE atomic)
set_target_properties(${FULL_EXTENSION_NAME} PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "$ORIGIN/../../internal/datadog/profiling")
endif()

# Link with libdd_wrapper if NATIVE_EXTENSION_LOCATION is defined
if(DEFINED NATIVE_EXTENSION_LOCATION)
# Find the libdd_wrapper shared library
find_library(
DD_WRAPPER_LIB
NAMES libdd_wrapper${EXTENSION_SUFFIX}
PATHS ${CMAKE_CURRENT_SOURCE_DIR}/../../internal/datadog/profiling
${NATIVE_EXTENSION_LOCATION}/../../datadog/profiling
NO_DEFAULT_PATH)

if(DD_WRAPPER_LIB)
message(STATUS "Found libdd_wrapper: ${DD_WRAPPER_LIB}")
target_link_libraries(${FULL_EXTENSION_NAME} PRIVATE ${DD_WRAPPER_LIB})
else()
message(WARNING "libdd_wrapper not found, extension may not link correctly")
endif()
endif()

# Add NDEBUG flag for release builds
if(CMAKE_BUILD_TYPE STREQUAL "Release"
OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo"
OR CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
target_compile_definitions(${FULL_EXTENSION_NAME} PRIVATE NDEBUG)
else()
target_compile_definitions(${FULL_EXTENSION_NAME} PRIVATE UNDEBUG)
endif()

# Install the extension
install(TARGETS ${FULL_EXTENSION_NAME} LIBRARY DESTINATION ${LIB_INSTALL_DIR})

# Optional: Build tests if BUILD_TESTING is ON
option(BUILD_TESTING "Build tests" OFF)
if(BUILD_TESTING)
enable_testing()
add_subdirectory(test)
endif()
163 changes: 36 additions & 127 deletions ddtrace/profiling/collector/_memalloc_heap_map.cpp
Original file line number Diff line number Diff line change
@@ -1,93 +1,71 @@
#include "_memalloc_heap_map.hpp"
#include "_memalloc_debug.h"

/* Note that the HeapSample tables will, in general, never free their backing
* memory unless we completely clear them. The table takes 17 bytes per entry: 8
* for the void* keys, 8 for the traceback* values, and 1 byte per entry for
* control metadata. Assuming a load factor target of ~50%, meaning our table
* has roughly twice as many slots as actual entries, then for our default
* maximum of 2^16 entries the table will be about 2MiB. A table this large
* would correspond to a program with a ~65GiB live heap with a 1MiB default
* sampling interval. Most of the memory usage of the profiler will come from
* the tracebacks themselves, which we _do_ free when we're done with them.
/* Note that the heap tracking tables will, in general, never free their backing
* memory unless we completely clear them. Abseil's flat_hash_map uses approximately
* 8 bytes for the key (void*), 8 bytes for the value (traceback_t*), plus metadata.
* With a load factor of ~87.5% (Abseil's default), the table is quite efficient.
* For our default maximum of 2^16 entries, the table will be roughly 2-3 MiB.
* A table this large would correspond to a program with a ~65GiB live heap with
* a 1MiB default sampling interval. Most of the memory usage of the profiler will
* come from the tracebacks themselves, which we _do_ free when we're done with them.
*/

// memalloc_heap_map implementation
memalloc_heap_map::memalloc_heap_map()
: map(HeapSamples_new(0))
{
}

memalloc_heap_map::~memalloc_heap_map()
{
HeapSamples_CIter it = HeapSamples_citer(&map);
for (const HeapSamples_Entry* e = HeapSamples_CIter_get(&it); e != nullptr; e = HeapSamples_CIter_next(&it)) {
delete e->val;
// Delete all traceback objects before map is destroyed
for (auto& [key, tb] : map) {
delete tb;
}
HeapSamples_destroy(&map);
}

size_t
memalloc_heap_map::size() const
{
return HeapSamples_size(&map);
}

traceback_t*
memalloc_heap_map::insert(void* key, traceback_t* value)
{
HeapSamples_Entry k = { .key = key, .val = value };
HeapSamples_Insert res = HeapSamples_insert(&map, &k);
traceback_t* prev = nullptr;
if (!res.inserted) {
// Try to insert the new value
auto [it, inserted] = map.insert({ key, value });

if (!inserted) {
// Key already existed, replace the value
/* This should not happen. It means we did not properly remove a previously-tracked
* allocation from the map. This should probably be an assertion. Return the previous
* entry as it is for an allocation that has been freed. */
HeapSamples_Entry* e = HeapSamples_Iter_get(&res.iter);
prev = e->val;
e->val = value;
traceback_t* prev = it->second;
it->second = value;
return prev;
}
return prev;
}

bool
memalloc_heap_map::contains(void* key) const
{
return HeapSamples_contains(&map, &key);
return nullptr;
}

traceback_t*
memalloc_heap_map::remove(void* key)
{
traceback_t* res = nullptr;
HeapSamples_Iter it = HeapSamples_find(&map, &key);
HeapSamples_Entry* e = HeapSamples_Iter_get(&it);
if (e != nullptr) {
res = e->val;
/* This erases the entry but won't shrink the table. */
HeapSamples_erase_at(it);
auto it = map.find(key);
if (it == map.end()) {
return nullptr;
}
return res;

traceback_t* result = it->second;
map.erase(it);
return result;
}

PyObject*
memalloc_heap_map::export_to_python() const
{
PyObject* heap_list = PyList_New(HeapSamples_size(&map));
PyObject* heap_list = PyList_New(map.size());
if (heap_list == nullptr) {
return nullptr;
}

int i = 0;
HeapSamples_CIter it = HeapSamples_citer(&map);
for (const HeapSamples_Entry* e = HeapSamples_CIter_get(&it); e != nullptr; e = HeapSamples_CIter_next(&it)) {
traceback_t* tb = e->val;

size_t i = 0;
for (const auto& [key, tb] : map) {
PyObject* tb_and_size = PyTuple_New(2);
PyTuple_SET_ITEM(tb_and_size, 0, tb->to_tuple());
PyTuple_SET_ITEM(tb_and_size, 1, PyLong_FromSize_t(tb->size));
PyList_SET_ITEM(heap_list, i, tb_and_size);
i++;
PyList_SET_ITEM(heap_list, i++, tb_and_size);

memalloc_debug_gil_release();
}
Expand All @@ -97,79 +75,10 @@ memalloc_heap_map::export_to_python() const
void
memalloc_heap_map::destructive_copy_from(memalloc_heap_map& src)
{
HeapSamples_Iter it = HeapSamples_iter(&src.map);
for (const HeapSamples_Entry* e = HeapSamples_Iter_get(&it); e != nullptr; e = HeapSamples_Iter_next(&it)) {
HeapSamples_insert(&map, e);
}
/* Can't erase inside the loop or the iterator is invalidated */
HeapSamples_clear(&src.map);
}

// Iterator implementation
memalloc_heap_map::iterator::iterator()
: iter{}
{
}
// Move all entries from src to this map using merge (C++17)
// This efficiently transfers ownership without copying
map.merge(src.map);

memalloc_heap_map::iterator::iterator(const memalloc_heap_map& map)
: iter(HeapSamples_citer(&map.map))
{
}

memalloc_heap_map::iterator&
memalloc_heap_map::iterator::operator++()
{
const HeapSamples_Entry* e = HeapSamples_CIter_get(&iter);
if (!e) {
return *this;
}
HeapSamples_CIter_next(&iter);
return *this;
}

memalloc_heap_map::iterator
memalloc_heap_map::iterator::operator++(int)
{
iterator tmp = *this;
++(*this);
return tmp;
}

memalloc_heap_map::iterator::value_type
memalloc_heap_map::iterator::operator*() const
{
const HeapSamples_Entry* e = HeapSamples_CIter_get(&iter);
if (!e) {
return { nullptr, nullptr };
}
return { e->key, e->val };
}

bool
memalloc_heap_map::iterator::operator==(const iterator& other) const
{
// Compare underlying iterators by their current entry pointers
// Note: HeapSamples_CIter doesn't have equality comparison, so we compare
// the current entry pointers. Both end iterators will have nullptr entries.
const HeapSamples_Entry* e1 = HeapSamples_CIter_get(&iter);
const HeapSamples_Entry* e2 = HeapSamples_CIter_get(&other.iter);
return e1 == e2;
}

bool
memalloc_heap_map::iterator::operator!=(const iterator& other) const
{
return !(*this == other);
}

memalloc_heap_map::iterator
memalloc_heap_map::begin() const
{
return iterator(*this);
}

memalloc_heap_map::iterator
memalloc_heap_map::end() const
{
return iterator();
// Clear any remaining entries in src (shouldn't be any after merge)
src.map.clear();
}
Loading
Loading