diff --git a/ext/libdatadog_api/crashtracker.c b/ext/libdatadog_api/crashtracker.c index ce574bca3a..b3e89f9804 100644 --- a/ext/libdatadog_api/crashtracker.c +++ b/ext/libdatadog_api/crashtracker.c @@ -5,20 +5,21 @@ static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self); static VALUE _native_stop(DDTRACE_UNUSED VALUE _self); -static void crashtracker_init(VALUE profiling_module); +static void crashtracker_init(VALUE crashtracking_module); // Used to report Ruby VM crashes. // Once initialized, segfaults will be reported automatically using libdatadog. void DDTRACE_EXPORT Init_libdatadog_api(void) { VALUE datadog_module = rb_define_module("Datadog"); - VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling"); + VALUE core_module = rb_define_module_under(datadog_module, "Core"); + VALUE crashtracking_module = rb_define_module_under(datadog_module, "Crashtracking"); - crashtracker_init(profiling_module); + crashtracker_init(crashtracking_module); } -void crashtracker_init(VALUE profiling_module) { - VALUE crashtracker_class = rb_define_class_under(profiling_module, "Crashtracker", rb_cObject); +void crashtracker_init(VALUE crashtracking_module) { + VALUE crashtracker_class = rb_define_class_under(crashtracking_module, "Component", rb_cObject); rb_define_singleton_method(crashtracker_class, "_native_start_or_update_on_fork", _native_start_or_update_on_fork, -1); rb_define_singleton_method(crashtracker_class, "_native_stop", _native_stop, 0); diff --git a/lib/datadog/core/configuration/components.rb b/lib/datadog/core/configuration/components.rb index f14152eecd..572d8c5dc1 100644 --- a/lib/datadog/core/configuration/components.rb +++ b/lib/datadog/core/configuration/components.rb @@ -13,6 +13,7 @@ require_relative '../../tracing/component' require_relative '../../profiling/component' require_relative '../../appsec/component' +require_relative '../crashtracking/component' module Datadog module Core @@ -58,6 +59,17 @@ def build_runtime_metrics_worker(settings) def build_telemetry(settings, agent_settings, logger) Telemetry::Component.build(settings, agent_settings, logger) end + + def build_crashtracker(settings, agent_settings) + return unless settings.crash_tracking.enabled + + if (libdatadog_api_failure = Datadog::Core::Crashtracking::Component::LIBDATADOG_API_FAILURE) + Datadog.logger.debug("Cannot enable crashtracking: #{libdatadog_api_failure}") + return + end + + Datadog::Core::Crashtracking::Component.build(settings, agent_settings) + end end include Datadog::Tracing::Component::InstanceMethods @@ -83,11 +95,13 @@ def initialize(settings) @remote = Remote::Component.build(settings, agent_settings) @tracer = self.class.build_tracer(settings, agent_settings, logger: @logger) + crashtracker = self.class.build_crashtracker(settings, agent_settings) @profiler, profiler_logger_extra = Datadog::Profiling::Component.build_profiler_component( settings: settings, agent_settings: agent_settings, optional_tracer: @tracer, + optional_crashtracker: crashtracker ) @environment_logger_extra.merge!(profiler_logger_extra) if profiler_logger_extra diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index db7e059633..51a0cfab73 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -450,19 +450,6 @@ def initialize(*_) o.env 'DD_PROFILING_UPLOAD_PERIOD' o.default 60 end - - # Enables reporting of information when the Ruby VM crashes. - # - # This feature is no longer experimental, and we plan to deprecate this setting and replace it with a - # properly-named one soon. - # - # @default `DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED` environment variable as a boolean, - # otherwise `true` - option :experimental_crash_tracking_enabled do |o| - o.type :bool - o.env 'DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED' - o.default true - end end # @public_api @@ -833,6 +820,17 @@ def initialize(*_) option :service end + settings :crash_tracking do + # Enables reporting of information when the Ruby VM crashes. + # + # @default `DD_CRASH_TRACKING_ENABLED` environment variable as a boolean, + # otherwise `true` + option :enabled do |o| + o.type :bool + o.default true + end + end + # TODO: Tracing should manage its own settings. # Keep this extension here for now to keep things working. extend Datadog::Tracing::Configuration::Settings diff --git a/lib/datadog/core/crashtracking/agent_base_url.rb b/lib/datadog/core/crashtracking/agent_base_url.rb new file mode 100644 index 0000000000..7fbc027813 --- /dev/null +++ b/lib/datadog/core/crashtracking/agent_base_url.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative '../configuration/ext' + +module Datadog + module Core + module Crashtracking + module AgentBaseUrl + module_function + + def resolve(agent_settings) + case agent_settings.adapter + when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER + "#{agent_settings.ssl ? 'https' : 'http'}://#{agent_settings.hostname}:#{agent_settings.port}/" + when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER + "unix://#{agent_settings.uds_path}" + else + Datadog.logger.warn("Unexpected adapter: #{agent_settings.adapter}") + nil + end + end + end + end + end +end diff --git a/lib/datadog/core/crashtracking/component.rb b/lib/datadog/core/crashtracking/component.rb new file mode 100644 index 0000000000..3d2e0194da --- /dev/null +++ b/lib/datadog/core/crashtracking/component.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +require 'libdatadog' + +require_relative 'tag_builder' +require_relative 'agent_base_url' + +module Datadog + module Core + module Crashtracking + # Used to report Ruby VM crashes. + # + # NOTE: The crashtracker native state is a singleton; so even if you create multiple instances of `Crashtracker` + # and start them, it only works as "last writer wins". Same for stop -- there's only one state, so calling stop + # on it will stop the crash tracker, regardless of which instance started it. + # + # Methods prefixed with _native_ are implemented in `crashtracker.c` + class Component + LIBDATADOG_API_FAILURE = + begin + require "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}" + nil + rescue LoadError => e + e.message + end + + def self.build(settings, agent_settings) + tags = TagBuilder.call(settings) + agent_base_url = AgentBaseUrl.resolve(agent_settings) + unless agent_base_url + Datadog.logger.warn('Missing agent base URL; cannot enable crash tracking') + end + + ld_library_path = Libdatadog.ld_library_path + unless ld_library_path + Datadog.logger.warn('Missing ld_library_path; cannot enable crash tracking') + end + + path_to_crashtracking_receiver_binary = Libdatadog.path_to_crashtracking_receiver_binary + unless path_to_crashtracking_receiver_binary + Datadog.logger.warn('Missing path_to_crashtracking_receiver_binary; cannot enable crash tracking') + end + + return if [agent_base_url, ld_library_path, path_to_crashtracking_receiver_binary].any?(&:nil?) + + new( + tags: tags, + agent_base_url: agent_base_url, + ld_library_path: ld_library_path, + path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary + ) + end + + def initialize(tags:, agent_base_url:, ld_library_path:, path_to_crashtracking_receiver_binary:) + @tags = tags + @agent_base_url = agent_base_url + @ld_library_path = ld_library_path + @path_to_crashtracking_receiver_binary = path_to_crashtracking_receiver_binary + end + + def start + start_or_update_on_fork(action: :start) + end + + def reset_after_fork + start_or_update_on_fork(action: :update_on_fork) + end + + def stop + begin + self.class._native_stop + Datadog.logger.debug('Crash tracking stopped successfully') + rescue => e + Datadog.logger.error("Failed to stop crash tracking: #{e.message}") + end + end + + private + + attr_reader :tags, :agent_base_url, :ld_library_path, :path_to_crashtracking_receiver_binary + + def start_or_update_on_fork(action:) + begin + self.class._native_start_or_update_on_fork( + action: action, + exporter_configuration: [:agent, agent_base_url], + path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary, + ld_library_path: ld_library_path, + tags_as_array: tags.to_a, + upload_timeout_seconds: 1 + ) + Datadog.logger.debug("Crash tracking #{action} successful") + rescue => e + Datadog.logger.error("Failed to #{action} crash tracking: #{e.message}") + end + end + end + end + end +end diff --git a/lib/datadog/core/crashtracking/tag_builder.rb b/lib/datadog/core/crashtracking/tag_builder.rb new file mode 100644 index 0000000000..b4285a2f3b --- /dev/null +++ b/lib/datadog/core/crashtracking/tag_builder.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require_relative '../utils' +require_relative '../environment/socket' +require_relative '../environment/identity' +require_relative '../environment/git' + +module Datadog + module Core + module Crashtracking + # Builds a hash of default plus user tags to be included in a profile + module TagBuilder + module_function + + def call(settings) + # When changing or adding these, make sure they are kept in sync with + # https://docs.google.com/spreadsheets/d/1LOGMf4c4Avbtn36uZ2SWvhIGKRPLM1BoWkUP4JYj7hA/ (Datadog internal link) + tags = { + 'host' => Environment::Socket.hostname, + 'language' => Environment::Identity.lang, + 'process_id' => Process.pid.to_s, + 'profiler_version' => Environment::Identity.gem_datadog_version, + 'runtime' => Environment::Identity.lang, # This is known to be repeated from language, above + 'runtime_engine' => Environment::Identity.lang_engine, + 'runtime-id' => Environment::Identity.id, + 'runtime_platform' => Environment::Identity.lang_platform, + 'runtime_version' => Environment::Identity.lang_version, + } + + tags['env'] = settings.env if settings.env + tags['service'] = settings.service if settings.service + tags['version'] = settings.version if settings.version + tags['git.repository_url'] = Environment::Git.git_repository_url if Environment::Git.git_repository_url + tags['git.commit.sha'] = Environment::Git.git_commit_sha if Environment::Git.git_commit_sha + + # Make sure everything is an utf-8 string, to avoid encoding issues in native code/libddprof/further downstream + settings.tags.merge(tags).map do |key, value| + [Utils.utf8_encode(key), Utils.utf8_encode(value)] + end.to_h + end + end + end + end +end diff --git a/lib/datadog/profiling.rb b/lib/datadog/profiling.rb index aa4d21f361..4364d1f9a1 100644 --- a/lib/datadog/profiling.rb +++ b/lib/datadog/profiling.rb @@ -145,7 +145,6 @@ def self.allocation_count # rubocop:disable Lint/NestedMethodDefinition (On purp require_relative 'profiling/collectors/idle_sampling_helper' require_relative 'profiling/collectors/stack' require_relative 'profiling/collectors/thread_context' - require_relative 'profiling/crashtracker' require_relative 'profiling/stack_recorder' require_relative 'profiling/exporter' require_relative 'profiling/flush' diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 5d4e68e82d..e383e16142 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -7,7 +7,7 @@ module Component # Passing in a `nil` tracer is supported and will disable the following profiling features: # * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span # * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call) - def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength + def self.build_profiler_component(settings:, agent_settings:, optional_tracer:, optional_crashtracker:) # rubocop:disable Metrics/MethodLength return [nil, { profiling_enabled: false }] unless settings.profiling.enabled # Workaround for weird dependency direction: the Core::Configuration::Components class currently has a @@ -73,8 +73,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) exporter = build_profiler_exporter(settings, recorder, worker, internal_metadata: internal_metadata) transport = build_profiler_transport(settings, agent_settings) scheduler = Profiling::Scheduler.new(exporter: exporter, transport: transport, interval: upload_period_seconds) - crashtracker = build_crashtracker(settings, transport) - profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: crashtracker) + profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: optional_crashtracker) if dir_interruption_workaround_enabled?(settings, no_signals_workaround_enabled) Datadog::Profiling::Ext::DirMonkeyPatches.apply! @@ -117,35 +116,6 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) ) end - private_class_method def self.build_crashtracker(settings, transport) - return unless settings.profiling.advanced.experimental_crash_tracking_enabled - - # By default, the transport is an instance of HttpTransport, which validates the configuration and makes - # it available for us to use here. - # But we support overriding the transport with a user-specific one, which may e.g. write stuff to a file, - # and thus can't really provide a valid configuration to talk to a Datadog agent. Thus, in this situation, - # we can't use the crashtracker, even if enabled. - unless transport.respond_to?(:exporter_configuration) - Datadog.logger.debug( - 'Cannot enable profiling crash tracking as a custom settings.profiling.exporter.transport is configured' - ) - return - end - - if Datadog::Profiling::Crashtracker::LIBDATADOG_API_FAILURE - Datadog.logger.debug( - "Cannot enable crashtracking: #{Datadog::Profiling::Crashtracker::LIBDATADOG_API_FAILURE}" - ) - return - end - - Datadog::Profiling::Crashtracker.new( - exporter_configuration: transport.exporter_configuration, - tags: Datadog::Profiling::TagBuilder.call(settings: settings), - upload_timeout_seconds: settings.profiling.upload.timeout_seconds, - ) - end - private_class_method def self.enable_gc_profiling?(settings) return false unless settings.profiling.advanced.gc_enabled diff --git a/lib/datadog/profiling/crashtracker.rb b/lib/datadog/profiling/crashtracker.rb deleted file mode 100644 index 99c55a4d4b..0000000000 --- a/lib/datadog/profiling/crashtracker.rb +++ /dev/null @@ -1,99 +0,0 @@ -# frozen_string_literal: true - -require 'libdatadog' - -module Datadog - module Profiling - # Used to report Ruby VM crashes. - # The interesting bits are implemented as native code and using libdatadog. - # - # NOTE: The crashtracker native state is a singleton; so even if you create multiple instances of `Crashtracker` - # and start them, it only works as "last writer wins". Same for stop -- there's only one state, so calling stop - # on it will stop the crash tracker, regardless of which instance started it. - # - # Methods prefixed with _native_ are implemented in `crashtracker.c` - class Crashtracker - LIBDATADOG_API_FAILURE = - begin - require "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}" - nil - rescue LoadError => e - e.message - end - - private - - attr_reader \ - :exporter_configuration, - :tags_as_array, - :path_to_crashtracking_receiver_binary, - :ld_library_path, - :upload_timeout_seconds - - public - - def initialize( - exporter_configuration:, - tags:, - upload_timeout_seconds:, - path_to_crashtracking_receiver_binary: Libdatadog.path_to_crashtracking_receiver_binary, - ld_library_path: Libdatadog.ld_library_path - ) - @exporter_configuration = exporter_configuration - @tags_as_array = tags.to_a - @upload_timeout_seconds = upload_timeout_seconds - @path_to_crashtracking_receiver_binary = path_to_crashtracking_receiver_binary - @ld_library_path = ld_library_path - end - - def start - start_or_update_on_fork(action: :start) - end - - def reset_after_fork - start_or_update_on_fork(action: :update_on_fork) - end - - def stop - begin - self.class._native_stop - Datadog.logger.debug('Crash tracking stopped successfully') - rescue => e - Datadog.logger.error("Failed to stop crash tracking: #{e.message}") - end - end - - private - - def start_or_update_on_fork(action:) - unless path_to_crashtracking_receiver_binary - Datadog.logger.warn( - "Cannot #{action} profiling crash tracking as no path_to_crashtracking_receiver_binary was found" - ) - return - end - - unless ld_library_path - Datadog.logger.warn( - "Cannot #{action} profiling crash tracking as no ld_library_path was found" - ) - return - end - - begin - self.class._native_start_or_update_on_fork( - action: action, - exporter_configuration: exporter_configuration, - path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary, - ld_library_path: ld_library_path, - tags_as_array: tags_as_array, - upload_timeout_seconds: Integer(upload_timeout_seconds), - ) - Datadog.logger.debug("Crash tracking #{action} successful") - rescue => e - Datadog.logger.error("Failed to #{action} crash tracking: #{e.message}") - end - end - end - end -end