Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 2 - Heap Recorder
Browse files Browse the repository at this point in the history
[PROF-8667] Some comments and location fixes.

[PROF-8667] Fix typo and enable 'records live heap objects' test.

[PROF-8667] Heap Profiling - Part 2 - Live Tracking

[PROF-8667] Remove one remaining usage of sum.

[PROF-8667] Address comments
  • Loading branch information
AlexJF committed Dec 13, 2023
1 parent 14b40d4 commit 051c803
Show file tree
Hide file tree
Showing 9 changed files with 739 additions and 41 deletions.
632 changes: 610 additions & 22 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ heap_recorder* heap_recorder_new(void);
void heap_recorder_free(heap_recorder *heap_recorder);

// Do any cleanup needed after forking.
// WARN: Assumes this gets called before profiler is reinitialized on the fork
void heap_recorder_after_fork(heap_recorder *heap_recorder);

// Start a heap allocation recording on the heap recorder for a new object.
Expand All @@ -66,7 +67,8 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
// WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording.
void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations);

// Flush any intermediate state that might be queued inside the heap recorder.
// Flush any intermediate state that might be queued inside the heap recorder or updates certain
// state to reflect the latest state of the VM.
//
// NOTE: This should usually be called before iteration to ensure data is as little stale as possible.
void heap_recorder_flush(heap_recorder *heap_recorder);
Expand Down
58 changes: 58 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,61 @@ void raise_syserr(
grab_gvl_and_raise_syserr(syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name);
}
}

char* ruby_strndup(const char *str, size_t size) {
char *dup;

dup = xmalloc(size + 1);
memcpy(dup, str, size);
dup[size] = '\0';

return dup;
}

// Note on sampler global state safety:
//
// `module_object_space` is **GLOBAL** state. Be careful when accessing or modifying it.
// In particular, it's important to only mutate them while holding the global VM lock, to ensure correctness.
//
// This global state is needed to memoize the ObjectSpace module so we can skip definition/lookup on each call
// to _id2ref.
static VALUE module_object_space = Qnil;

VALUE _id2ref(VALUE obj_id) {
if (module_object_space == Qnil) {
module_object_space = rb_define_module("ObjectSpace");
}

// Call ::ObjectSpace._id2ref natively. It will raise if the id is no longer valid
return rb_funcall(module_object_space, rb_intern("_id2ref"), 1, obj_id);
}

VALUE _id2ref_failure(DDTRACE_UNUSED VALUE _unused1, DDTRACE_UNUSED VALUE _unused2) {
return Qfalse;
}

// Native wrapper to get an object ref from an id. Returns true on success and
// writes the ref to the value pointer parameter if !NULL. False if id doesn't
// reference a valid object (in which case value is not changed).
bool ruby_ref_from_id(VALUE obj_id, VALUE *value) {
// Call ::ObjectSpace._id2ref natively. It will raise if the id is no longer valid
// so we need to call it via rb_erscue2
VALUE result = rb_rescue2(
_id2ref,
obj_id,
_id2ref_failure,
Qnil,
rb_eRangeError, // rb_eRangeError is the erorr used to flag invalid ids
0 // Required by API to be the last argument
);

if (result == Qfalse) {
return false;
}

if (value != NULL) {
(*value) = result;
}

return true;
}
15 changes: 15 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,18 @@ NORETURN(void raise_syserr(
int line,
const char *function_name
));

// Alternative to ruby_strdup that takes a size argument.
// Similar to C's strndup but slightly less smart as size is expected to
// be smaller or equal to the real size of str (minus null termination if it
// exists).
// A new string will be returned with size+1 bytes and last byte set to '\0'.
// The returned string must be freed explicitly.
//
// WARN: Cannot be used during GC or outside the GVL.
char* ruby_strndup(const char *str, size_t size);

// Native wrapper to get an object ref from an id. Returns true on success and
// writes the ref to the value pointer parameter if !NULL. False if id doesn't
// reference a valid object (in which case value is not changed).
bool ruby_ref_from_id(size_t id, VALUE *value);
21 changes: 15 additions & 6 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -193,17 +193,26 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled)
heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled

if heap_profiling_enabled && !allocation_profiling_enabled
raise ArgumentError, 'Heap profiling requires allocation profiling to be enabled'
end
return false unless heap_profiling_enabled

if heap_profiling_enabled
if RUBY_VERSION.start_with?('2.') && RUBY_VERSION < '2.7'
Datadog.logger.warn(
'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!'
'Heap profiling currently relies on features introduced in Ruby 2.7 and will be forcibly disabled. '\
'Please upgrade to Ruby >= 2.7 in order to use this feature.'
)
return false
end

unless allocation_profiling_enabled
raise ArgumentError,
'Heap profiling requires allocation profiling to be enabled'
end

heap_profiling_enabled
Datadog.logger.warn(
'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!'
)

true
end

private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength
Expand Down
22 changes: 16 additions & 6 deletions spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -570,21 +570,31 @@
end

it 'records live heap objects' do
pending "heap profiling isn't actually implemented just yet"

stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))

# Warm this up to remove VM-related allocations
# TODO: Remove this when we can match on allocation class
CpuAndWallTimeWorkerSpec::TestStruct.new

start

test_num_allocated_object.times { CpuAndWallTimeWorkerSpec::TestStruct.new }
live_objects = Array.new(test_num_allocated_object)

test_num_allocated_object.times { |i| live_objects[i] = CpuAndWallTimeWorkerSpec::TestStruct.new }
allocation_line = __LINE__ - 1

cpu_and_wall_time_worker.stop

relevant_sample = samples_for_thread(samples_from_pprof(recorder.serialize!), Thread.current)
.find { |s| s.locations.first.lineno == allocation_line && s.locations.first.path == __FILE__ }
test_struct_heap_sample = lambda { |sample|
first_frame = sample.locations.first
first_frame.lineno == allocation_line && first_frame.path == __FILE__ && first_frame.base_label == 'new' &&
(sample.values[:'heap-live-samples'] || 0) > 0
}

relevant_sample = samples_from_pprof(recorder.serialize!)
.find(&test_struct_heap_sample)

expect(relevant_sample.values[':heap-live-samples']).to eq test_num_allocated_object
expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object
end
end

Expand Down
21 changes: 18 additions & 3 deletions spec/datadog/profiling/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,26 @@
end

context 'when heap profiling is enabled' do
# Universally supported ruby version for allocation profiling by default
let(:testing_version) { '2.7.2' }

before do
settings.profiling.advanced.experimental_heap_enabled = true
# Universally supported ruby version for allocation profiling, we don't want to test those
# edge cases here
stub_const('RUBY_VERSION', '2.7.2')
stub_const('RUBY_VERSION', testing_version)
end

context 'on a Ruby older than 2.7' do
let(:testing_version) { '2.6' }

it 'initializes StackRecorder without heap sampling support and warns' do
expect(Datadog::Profiling::StackRecorder).to receive(:new)
.with(hash_including(heap_samples_enabled: false))
.and_call_original

expect(Datadog.logger).to receive(:warn).with(/upgrade to Ruby >= 2.7/)

build_profiler_component
end
end

context 'and allocation profiling disabled' do
Expand Down
5 changes: 4 additions & 1 deletion spec/datadog/profiling/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ def object_id_from(thread_id)
end

def samples_for_thread(samples, thread)
samples.select { |sample| object_id_from(sample.labels.fetch(:'thread id')) == thread.object_id }
samples.select do |sample|
thread_id = sample.labels.fetch(:'thread id', nil)
thread_id && object_id_from(thread_id) == thread.object_id
end
end

# We disable heap_sample collection by default in tests since it requires some extra mocking/
Expand Down
2 changes: 0 additions & 2 deletions spec/datadog/profiling/stack_recorder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,6 @@ def sample_types_from(decoded_profile)
end

it 'include the stack and sample counts for the objects still left alive' do
pending 'heap_recorder implementation is currently missing'

# We sample from 2 distinct locations
expect(heap_samples.size).to eq(2)

Expand Down

0 comments on commit 051c803

Please sign in to comment.