Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyCTHsu committed Aug 6, 2024
1 parent 8dfe622 commit 5d676bf
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 150 deletions.
11 changes: 6 additions & 5 deletions ext/libdatadog_api/crashtracker.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@

static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self);
static void crashtracker_init(VALUE profiling_module);
static void crashtracker_init(VALUE crashtracking_module);

// Used to report Ruby VM crashes.
// Once initialized, segfaults will be reported automatically using libdatadog.

void DDTRACE_EXPORT Init_libdatadog_api(void) {
VALUE datadog_module = rb_define_module("Datadog");
VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling");
VALUE core_module = rb_define_module_under(datadog_module, "Core");
VALUE crashtracking_module = rb_define_module_under(datadog_module, "Crashtracking");

crashtracker_init(profiling_module);
crashtracker_init(crashtracking_module);
}

void crashtracker_init(VALUE profiling_module) {
VALUE crashtracker_class = rb_define_class_under(profiling_module, "Crashtracker", rb_cObject);
void crashtracker_init(VALUE crashtracking_module) {
VALUE crashtracker_class = rb_define_class_under(crashtracking_module, "Component", rb_cObject);

rb_define_singleton_method(crashtracker_class, "_native_start_or_update_on_fork", _native_start_or_update_on_fork, -1);
rb_define_singleton_method(crashtracker_class, "_native_stop", _native_stop, 0);
Expand Down
14 changes: 14 additions & 0 deletions lib/datadog/core/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
require_relative '../../tracing/component'
require_relative '../../profiling/component'
require_relative '../../appsec/component'
require_relative '../crashtracking/component'

module Datadog
module Core
Expand Down Expand Up @@ -58,6 +59,17 @@ def build_runtime_metrics_worker(settings)
def build_telemetry(settings, agent_settings, logger)
Telemetry::Component.build(settings, agent_settings, logger)
end

def build_crashtracker(settings, agent_settings)
return unless settings.crash_tracking.enabled

if (libdatadog_api_failure = Datadog::Core::Crashtracking::Component::LIBDATADOG_API_FAILURE)
Datadog.logger.debug("Cannot enable crashtracking: #{libdatadog_api_failure}")
return
end

Datadog::Core::Crashtracking::Component.build(settings, agent_settings)
end
end

include Datadog::Tracing::Component::InstanceMethods
Expand All @@ -83,11 +95,13 @@ def initialize(settings)

@remote = Remote::Component.build(settings, agent_settings)
@tracer = self.class.build_tracer(settings, agent_settings, logger: @logger)
crashtracker = self.class.build_crashtracker(settings, agent_settings)

@profiler, profiler_logger_extra = Datadog::Profiling::Component.build_profiler_component(
settings: settings,
agent_settings: agent_settings,
optional_tracer: @tracer,
optional_crashtracker: crashtracker
)
@environment_logger_extra.merge!(profiler_logger_extra) if profiler_logger_extra

Expand Down
24 changes: 11 additions & 13 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -450,19 +450,6 @@ def initialize(*_)
o.env 'DD_PROFILING_UPLOAD_PERIOD'
o.default 60
end

# Enables reporting of information when the Ruby VM crashes.
#
# This feature is no longer experimental, and we plan to deprecate this setting and replace it with a
# properly-named one soon.
#
# @default `DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED` environment variable as a boolean,
# otherwise `true`
option :experimental_crash_tracking_enabled do |o|
o.type :bool
o.env 'DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED'
o.default true
end
end

# @public_api
Expand Down Expand Up @@ -833,6 +820,17 @@ def initialize(*_)
option :service
end

settings :crash_tracking do
# Enables reporting of information when the Ruby VM crashes.
#
# @default `DD_CRASH_TRACKING_ENABLED` environment variable as a boolean,
# otherwise `true`
option :enabled do |o|
o.type :bool
o.default true
end
end

# TODO: Tracing should manage its own settings.
# Keep this extension here for now to keep things working.
extend Datadog::Tracing::Configuration::Settings
Expand Down
25 changes: 25 additions & 0 deletions lib/datadog/core/crashtracking/agent_base_url.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

require_relative '../configuration/ext'

module Datadog
module Core
module Crashtracking
module AgentBaseUrl
module_function

def resolve(agent_settings)
case agent_settings.adapter
when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER
"#{agent_settings.ssl ? 'https' : 'http'}://#{agent_settings.hostname}:#{agent_settings.port}/"
when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER
"unix://#{agent_settings.uds_path}"
else
Datadog.logger.warn("Unexpected adapter: #{agent_settings.adapter}")
nil
end
end
end
end
end
end
100 changes: 100 additions & 0 deletions lib/datadog/core/crashtracking/component.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# frozen_string_literal: true

require 'libdatadog'

require_relative 'tag_builder'
require_relative 'agent_base_url'

module Datadog
module Core
module Crashtracking
# Used to report Ruby VM crashes.
#
# NOTE: The crashtracker native state is a singleton; so even if you create multiple instances of `Crashtracker`
# and start them, it only works as "last writer wins". Same for stop -- there's only one state, so calling stop
# on it will stop the crash tracker, regardless of which instance started it.
#
# Methods prefixed with _native_ are implemented in `crashtracker.c`
class Component
LIBDATADOG_API_FAILURE =
begin
require "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}"
nil
rescue LoadError => e
e.message
end

def self.build(settings, agent_settings)
tags = TagBuilder.call(settings)
agent_base_url = AgentBaseUrl.resolve(agent_settings)
unless agent_base_url
Datadog.logger.warn('Missing agent base URL; cannot enable crash tracking')
end

ld_library_path = Libdatadog.ld_library_path
unless ld_library_path
Datadog.logger.warn('Missing ld_library_path; cannot enable crash tracking')
end

path_to_crashtracking_receiver_binary = Libdatadog.path_to_crashtracking_receiver_binary
unless path_to_crashtracking_receiver_binary
Datadog.logger.warn('Missing path_to_crashtracking_receiver_binary; cannot enable crash tracking')
end

return if [agent_base_url, ld_library_path, path_to_crashtracking_receiver_binary].any?(&:nil?)

new(
tags: tags,
agent_base_url: agent_base_url,
ld_library_path: ld_library_path,
path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary
)
end

def initialize(tags:, agent_base_url:, ld_library_path:, path_to_crashtracking_receiver_binary:)
@tags = tags
@agent_base_url = agent_base_url
@ld_library_path = ld_library_path
@path_to_crashtracking_receiver_binary = path_to_crashtracking_receiver_binary
end

def start
start_or_update_on_fork(action: :start)
end

def reset_after_fork
start_or_update_on_fork(action: :update_on_fork)
end

def stop
begin
self.class._native_stop
Datadog.logger.debug('Crash tracking stopped successfully')
rescue => e
Datadog.logger.error("Failed to stop crash tracking: #{e.message}")
end
end

private

attr_reader :tags, :agent_base_url, :ld_library_path, :path_to_crashtracking_receiver_binary

def start_or_update_on_fork(action:)
begin
self.class._native_start_or_update_on_fork(
action: action,
exporter_configuration: [:agent, agent_base_url],
path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
ld_library_path: ld_library_path,
tags_as_array: tags.to_a,
upload_timeout_seconds: 1
)
Datadog.logger.debug("Crash tracking #{action} successful")
rescue => e
Datadog.logger.error("Failed to #{action} crash tracking: #{e.message}")
end
end
end
end
end
end
44 changes: 44 additions & 0 deletions lib/datadog/core/crashtracking/tag_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# frozen_string_literal: true

require_relative '../utils'
require_relative '../environment/socket'
require_relative '../environment/identity'
require_relative '../environment/git'

module Datadog
module Core
module Crashtracking
# Builds a hash of default plus user tags to be included in a profile
module TagBuilder
module_function

def call(settings)
# When changing or adding these, make sure they are kept in sync with
# https://docs.google.com/spreadsheets/d/1LOGMf4c4Avbtn36uZ2SWvhIGKRPLM1BoWkUP4JYj7hA/ (Datadog internal link)
tags = {
'host' => Environment::Socket.hostname,
'language' => Environment::Identity.lang,
'process_id' => Process.pid.to_s,
'profiler_version' => Environment::Identity.gem_datadog_version,
'runtime' => Environment::Identity.lang, # This is known to be repeated from language, above
'runtime_engine' => Environment::Identity.lang_engine,
'runtime-id' => Environment::Identity.id,
'runtime_platform' => Environment::Identity.lang_platform,
'runtime_version' => Environment::Identity.lang_version,
}

tags['env'] = settings.env if settings.env
tags['service'] = settings.service if settings.service
tags['version'] = settings.version if settings.version
tags['git.repository_url'] = Environment::Git.git_repository_url if Environment::Git.git_repository_url
tags['git.commit.sha'] = Environment::Git.git_commit_sha if Environment::Git.git_commit_sha

# Make sure everything is an utf-8 string, to avoid encoding issues in native code/libddprof/further downstream
settings.tags.merge(tags).map do |key, value|
[Utils.utf8_encode(key), Utils.utf8_encode(value)]
end.to_h
end
end
end
end
end
1 change: 0 additions & 1 deletion lib/datadog/profiling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ def self.allocation_count # rubocop:disable Lint/NestedMethodDefinition (On purp
require_relative 'profiling/collectors/idle_sampling_helper'
require_relative 'profiling/collectors/stack'
require_relative 'profiling/collectors/thread_context'
require_relative 'profiling/crashtracker'
require_relative 'profiling/stack_recorder'
require_relative 'profiling/exporter'
require_relative 'profiling/flush'
Expand Down
34 changes: 2 additions & 32 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Component
# Passing in a `nil` tracer is supported and will disable the following profiling features:
# * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span
# * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call)
def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength
def self.build_profiler_component(settings:, agent_settings:, optional_tracer:, optional_crashtracker:) # rubocop:disable Metrics/MethodLength
return [nil, { profiling_enabled: false }] unless settings.profiling.enabled

# Workaround for weird dependency direction: the Core::Configuration::Components class currently has a
Expand Down Expand Up @@ -73,8 +73,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
exporter = build_profiler_exporter(settings, recorder, worker, internal_metadata: internal_metadata)
transport = build_profiler_transport(settings, agent_settings)
scheduler = Profiling::Scheduler.new(exporter: exporter, transport: transport, interval: upload_period_seconds)
crashtracker = build_crashtracker(settings, transport)
profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: crashtracker)
profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: optional_crashtracker)

if dir_interruption_workaround_enabled?(settings, no_signals_workaround_enabled)
Datadog::Profiling::Ext::DirMonkeyPatches.apply!
Expand Down Expand Up @@ -117,35 +116,6 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
)
end

private_class_method def self.build_crashtracker(settings, transport)
return unless settings.profiling.advanced.experimental_crash_tracking_enabled

# By default, the transport is an instance of HttpTransport, which validates the configuration and makes
# it available for us to use here.
# But we support overriding the transport with a user-specific one, which may e.g. write stuff to a file,
# and thus can't really provide a valid configuration to talk to a Datadog agent. Thus, in this situation,
# we can't use the crashtracker, even if enabled.
unless transport.respond_to?(:exporter_configuration)
Datadog.logger.debug(
'Cannot enable profiling crash tracking as a custom settings.profiling.exporter.transport is configured'
)
return
end

if Datadog::Profiling::Crashtracker::LIBDATADOG_API_FAILURE
Datadog.logger.debug(
"Cannot enable crashtracking: #{Datadog::Profiling::Crashtracker::LIBDATADOG_API_FAILURE}"
)
return
end

Datadog::Profiling::Crashtracker.new(
exporter_configuration: transport.exporter_configuration,
tags: Datadog::Profiling::TagBuilder.call(settings: settings),
upload_timeout_seconds: settings.profiling.upload.timeout_seconds,
)
end

private_class_method def self.enable_gc_profiling?(settings)
return false unless settings.profiling.advanced.gc_enabled

Expand Down
Loading

0 comments on commit 5d676bf

Please sign in to comment.