Merge pull request #3760 from DataDog/ivoanjo/prof-10123-placeholder-…

…missing-allocations [PROF-10123] Add placeholder for skipped samples in allocation profiler
DataDog · Jul 9, 2024 · 7732142 · 7732142
2 parents ca006e9 + abdb523
commit 7732142
Show file tree

Hide file tree

Showing 16 changed files with 216 additions and 15 deletions.
diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
@@ -1140,9 +1140,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
     discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
     // if we aren't, then we're sampling every event
     1;
-  // TODO: Signal in the profile that clamping happened?
+
+  // To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
+  // assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
   unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
   thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
+  // ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
+  if (weight < allocations_since_last_sample) {
+    thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, allocations_since_last_sample - weight);
+  }
 
   // Return a dummy VALUE because we're called from rb_rescue2 which requires it
   return Qnil;

diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c
@@ -267,7 +267,8 @@ void sample_thread(
     recorder_instance,
     (ddog_prof_Slice_Location) {.ptr = buffer->locations, .len = captured_frames},
     values,
-    labels
+    labels,
+    /* placeholder: */ false
   );
 }
 
@@ -371,7 +372,8 @@ void record_placeholder_stack(
     recorder_instance,
     (ddog_prof_Slice_Location) {.ptr = buffer->locations, .len = 1},
     values,
-    labels
+    labels,
+    /* placeholder: */ true
   );
 }
 

diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c
@@ -231,6 +231,7 @@ static void ddtrace_otel_trace_identifiers_for(
   VALUE active_span,
   VALUE otel_values
 );
+static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
 
 void collectors_thread_context_init(VALUE profiling_module) {
   VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -261,6 +262,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
   rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
   rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
   rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
+  rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
 
   at_active_span_id = rb_intern_const("@active_span");
   at_active_trace_id = rb_intern_const("@active_trace");
@@ -1400,3 +1402,33 @@ static void ddtrace_otel_trace_identifiers_for(
   *active_trace = current_trace;
   *numeric_span_id = resolved_numeric_span_id;
 }
+
+void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples) {
+  struct thread_context_collector_state *state;
+  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
+
+  ddog_prof_Label labels[] = {
+    // Providing .num = 0 should not be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
+    {.key = DDOG_CHARSLICE_C("thread id"),        .str = DDOG_CHARSLICE_C("SA"),                .num = 0},
+    {.key = DDOG_CHARSLICE_C("thread name"),      .str = DDOG_CHARSLICE_C("Skipped Samples"),   .num = 0},
+    {.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Skipped Samples)"), .num = 0},
+  };
+  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])};
+
+  record_placeholder_stack(
+    state->sampling_buffer,
+    state->recorder_instance,
+    (sample_values) {.alloc_samples = skipped_samples},
+    (sample_labels) {
+      .labels = slice_labels,
+      .state_label = NULL,
+      .end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events
+    },
+    DDOG_CHARSLICE_C("Skipped Samples")
+  );
+}
+
+static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples) {
+  thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples));
+  return Qtrue;
+}
diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.h b/ext/datadog_profiling_native_extension/collectors_thread_context.h
@@ -9,6 +9,7 @@ void thread_context_collector_sample(
   VALUE profiler_overhead_stack_thread
 );
 void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
+void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples);
 VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
 void thread_context_collector_on_gc_start(VALUE self_instance);
 bool thread_context_collector_on_gc_finish(VALUE self_instance);

diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c
@@ -166,6 +166,12 @@ struct heap_recorder {
     size_t objects_frozen;
   } stats_last_update;
 };
+
+struct end_heap_allocation_args {
+  struct heap_recorder *heap_recorder;
+  ddog_prof_Slice_Location locations;
+};
+
 static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Location);
 static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
 static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, object_record *record);
@@ -176,6 +182,7 @@ static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
 static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
 static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
 static void commit_recording(heap_recorder*, heap_record*, recording);
+static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args);
 
 // ==========================
 // Heap Recorder External API
@@ -340,9 +347,28 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
   };
 }
 
-void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
+// end_heap_allocation_recording_with_rb_protect gets called while the stack_recorder is holding one of the profile
+// locks. To enable us to correctly unlock the profile on exception, we wrap the call to end_heap_allocation_recording
+// with an rb_protect.
+__attribute__((warn_unused_result))
+int end_heap_allocation_recording_with_rb_protect(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
+  int exception_state;
+  struct end_heap_allocation_args end_heap_allocation_args = {
+    .heap_recorder = heap_recorder,
+    .locations = locations,
+  };
+  rb_protect(end_heap_allocation_recording, (VALUE) &end_heap_allocation_args, &exception_state);
+  return exception_state;
+}
+
+static VALUE end_heap_allocation_recording(VALUE end_heap_allocation_args) {
+  struct end_heap_allocation_args *args = (struct end_heap_allocation_args *) end_heap_allocation_args;
+
+  struct heap_recorder *heap_recorder = args->heap_recorder;
+  ddog_prof_Slice_Location locations = args->locations;
+
   if (heap_recorder == NULL) {
-    return;
+    return Qnil;
   }
 
   recording active_recording = heap_recorder->active_recording;
@@ -356,15 +382,16 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro
   // data required for committing though.
   heap_recorder->active_recording = (recording) {0};
 
-  if (active_recording.object_record == &SKIPPED_RECORD) {
-    // special marker when we decided to skip due to sampling
-    return;
+  if (active_recording.object_record == &SKIPPED_RECORD) { // special marker when we decided to skip due to sampling
+    return Qnil;
   }
 
   heap_record *heap_record = get_or_create_heap_record(heap_recorder, locations);
 
   // And then commit the new allocation.
   commit_recording(heap_recorder, heap_record, active_recording);
+
+  return Qnil;
 }
 
 void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {

diff --git a/ext/datadog_profiling_native_extension/heap_recorder.h b/ext/datadog_profiling_native_extension/heap_recorder.h
@@ -114,7 +114,9 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
 // @param locations The stacktrace representing the location of the allocation.
 //
 // WARN: It is illegal to call this without previously having called ::start_heap_allocation_recording.
-void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations);
+// WARN: This method rescues exceptions with `rb_protect`, returning the exception state integer for the caller to handle.
+__attribute__((warn_unused_result))
+int end_heap_allocation_recording_with_rb_protect(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations);
 
 // Update the heap recorder to reflect the latest state of the VM and prepare internal structures
 // for efficient iteration.

diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c
@@ -586,7 +586,7 @@ static VALUE ruby_time_from(ddog_Timespec ddprof_time) {
   return rb_time_timespec_new(&time, utc);
 }
 
-void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels) {
+void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels, bool placeholder) {
   struct stack_recorder_state *state;
   TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
 
@@ -605,12 +605,20 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
   metric_values[position_for[TIMELINE_VALUE_ID]]      = values.timeline_wall_time_ns;
 
-  if (values.alloc_samples != 0) {
+  if (!placeholder && values.alloc_samples > 0) {
     // If we got an allocation sample end the heap allocation recording to commit the heap sample.
     // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
     //        very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
     //        be fixed with some refactoring but for now this leads to a less impactful change.
-    end_heap_allocation_recording(state->heap_recorder, locations);
+    //
+    // NOTE: The heap recorder is allowed to raise exceptions if something's wrong. But we also need to handle it
+    // on this side to make sure we properly unlock the active slot mutex on our way out. Otherwise, this would
+    // later lead to deadlocks (since the active slot mutex is not expected to be locked forever).
+    int exception_state = end_heap_allocation_recording_with_rb_protect(state->heap_recorder, locations);
+    if (exception_state) {
+      sampler_unlock_active_profile(active_slot);
+      rb_jump_tag(exception_state);
+    }
   }
 
   ddog_prof_Profile_Result result = ddog_prof_Profile_add(

diff --git a/ext/datadog_profiling_native_extension/stack_recorder.h b/ext/datadog_profiling_native_extension/stack_recorder.h
@@ -21,7 +21,7 @@ typedef struct sample_labels {
   int64_t end_timestamp_ns;
 } sample_labels;
 
-void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
+void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels, bool placeholder);
 void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
 void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class);
 VALUE enforce_recorder_instance(VALUE object);
diff --git a/lib/datadog/profiling/profiler.rb b/lib/datadog/profiling/profiler.rb
@@ -59,6 +59,7 @@ def component_failed(failed_component)
         # we're operating in a degraded state and crash tracking may still be helpful.
 
         if failed_component == :worker
+          scheduler.mark_profiler_failed
           stop_scheduler
         elsif failed_component == :scheduler
           stop_worker

diff --git a/lib/datadog/profiling/scheduler.rb b/lib/datadog/profiling/scheduler.rb
@@ -22,7 +22,8 @@ class Scheduler < Core::Worker
 
       attr_reader \
         :exporter,
-        :transport
+        :transport,
+        :profiler_failed
 
       public
 
@@ -34,6 +35,7 @@ def initialize(
       )
         @exporter = exporter
         @transport = transport
+        @profiler_failed = false
 
         # Workers::Async::Thread settings
         self.fork_policy = fork_policy
@@ -80,8 +82,14 @@ def loop_wait_before_first_iteration?
         true
       end
 
+      # This is called by the Profiler class whenever an issue happened in the profiler. This makes sure that even
+      # if there is data to be flushed, we don't try to flush it.
+      def mark_profiler_failed
+        @profiler_failed = true
+      end
+
       def work_pending?
-        exporter.can_flush?
+        !profiler_failed && exporter.can_flush?
       end
 
       def reset_after_fork

diff --git a/sig/datadog/profiling/scheduler.rbs b/sig/datadog/profiling/scheduler.rbs
@@ -14,6 +14,7 @@ module Datadog
       def start: (?on_failure_proc: ::Proc?) -> void
 
       def reset_after_fork: () -> void
+      def mark_profiler_failed: () -> true
     end
   end
 end
diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
@@ -507,6 +507,7 @@
 
       context 'with dynamic_sampling_rate_enabled' do
         let(:options) { { dynamic_sampling_rate_enabled: true } }
+
         it 'keeps statistics on how allocation sampling is doing' do
           stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
 
@@ -536,6 +537,34 @@
           one_second_in_ns = 1_000_000_000
           expect(sampling_time_ns_max).to be < one_second_in_ns, "A single sample should not take longer than 1s, #{stats}"
         end
+
+        # When large numbers of objects are allocated, the dynamic sampling rate kicks in, and we don't sample every
+        # object.
+        # We then assign a weight to every sample to compensate for this; to avoid bias, we have a limit on this weight,
+        # and we clamp it if it goes over the limit.
+        # But the total amount of allocations recorded should match the number we observed, and thus we record the
+        # remainder above the clamped value as a separate "Skipped Samples" step.
+        it 'records skipped allocation samples when weights are clamped' do
+          start
+
+          thread_that_allocates_as_fast_as_possible = Thread.new { loop { BasicObject.new } }
+
+          Thread.pass
+
+          allocation_samples = try_wait_until do
+            samples = samples_from_pprof(recorder.serialize!).select { |it| it.values[:'alloc-samples'] > 0 }
+            samples if samples.any? { |it| it.labels[:'thread name'] == 'Skipped Samples' }
+          end
+
+          thread_that_allocates_as_fast_as_possible.kill
+          thread_that_allocates_as_fast_as_possible.join
+
+          GC.start
+
+          cpu_and_wall_time_worker.stop
+
+          expect(allocation_samples).to_not be_empty
+        end
       end
 
       context 'when sampling optimized Ruby strings' do
@@ -629,6 +658,7 @@
       let(:options) { { dynamic_sampling_rate_enabled: false } }
 
       before do
+        skip 'Heap profiling is only supported on Ruby >= 2.7' if RUBY_VERSION < '2.7'
         allow(Datadog.logger).to receive(:warn)
         expect(Datadog.logger).to receive(:warn).with(/dynamic sampling rate disabled/)
       end

diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb
@@ -79,6 +79,10 @@ def sample_allocation(weight:, new_object: Object.new)
     described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object)
   end
 
+  def sample_skipped_allocation_samples(skipped_samples)
+    described_class::Testing._native_sample_skipped_allocation_samples(cpu_and_wall_time_collector, skipped_samples)
+  end
+
   def thread_list
     described_class::Testing._native_thread_list
   end
@@ -1210,6 +1214,41 @@ def self.otel_sdk_available?
     end
   end
 
+  describe '#sample_skipped_allocation_samples' do
+    let(:single_sample) do
+      expect(samples.size).to be 1
+      samples.first
+    end
+    before { sample_skipped_allocation_samples(123) }
+
+    it 'records the number of skipped allocations' do
+      expect(single_sample.values).to include('alloc-samples': 123)
+    end
+
+    it 'attributes the skipped samples to a "Skipped Samples" thread' do
+      expect(single_sample.labels).to include('thread id': 'SA', 'thread name': 'Skipped Samples')
+    end
+
+    it 'attributes the skipped samples to a "(Skipped Samples)" allocation class' do
+      expect(single_sample.labels).to include('allocation class': '(Skipped Samples)')
+    end
+
+    it 'includes a placeholder stack attributed to "Skipped Samples"' do
+      expect(single_sample.locations.size).to be 1
+      expect(single_sample.locations.first.path).to eq 'Skipped Samples'
+    end
+
+    context 'when heap sampling is enabled' do
+      let(:recorder) { build_stack_recorder(heap_samples_enabled: true) }
+
+      it 'records only the number of skipped allocations, and does not record any heap samples' do
+        GC.start # Force any incorrect heap samples to have age > 1
+
+        expect(single_sample.values).to include('alloc-samples': 123, 'heap-live-samples': 0)
+      end
+    end
+  end
+
   describe '#thread_list' do
     it "returns the same as Ruby's Thread.list" do
       expect(thread_list).to eq Thread.list

diff --git a/spec/datadog/profiling/profiler_spec.rb b/spec/datadog/profiling/profiler_spec.rb
@@ -123,6 +123,7 @@
       before do
         allow(scheduler).to receive(:enabled=)
         allow(scheduler).to receive(:stop)
+        allow(scheduler).to receive(:mark_profiler_failed)
       end
 
       it 'logs the issue' do
@@ -131,6 +132,12 @@
         worker_on_failure
       end
 
+      it 'marks the profiler as having failed in the scheduler' do
+        expect(scheduler).to receive(:mark_profiler_failed)
+
+        worker_on_failure
+      end
+
       it 'stops the scheduler' do
         expect(scheduler).to receive(:enabled=).with(false)
         expect(scheduler).to receive(:stop).with(true)