DataDog · majorgreys · Apr 11, 2019 · Feb 2, 2019 · Feb 2, 2019 · Feb 8, 2019
@@ -85,6 +85,7 @@ def add_global_tags(tracer):
     hostname = os.environ.get('DD_AGENT_HOST', os.environ.get('DATADOG_TRACE_AGENT_HOSTNAME'))
     port = os.environ.get("DATADOG_TRACE_AGENT_PORT")
     priority_sampling = os.environ.get("DATADOG_PRIORITY_SAMPLING")
+    runtime_metrics_enabled = get_env('runtime_metrics', 'enabled')
 
     opts = {}
 
@@ -97,6 +98,8 @@ def add_global_tags(tracer):
         opts["port"] = int(port)
     if priority_sampling:
         opts["priority_sampling"] = asbool(priority_sampling)
+    if runtime_metrics_enabled:
+        opts["collect_metrics"] = asbool(runtime_metrics_enabled)
 
     if opts:
         tracer.configure(**opts)

@@ -0,0 +1,12 @@
+from .runtime_metrics import (
+    RuntimeTags,
+    RuntimeMetrics,
+    RuntimeWorker,
+)
+
+
+__all__ = [
+    'RuntimeTags',
+    'RuntimeMetrics',
+    'RuntimeWorker',
+]
@@ -0,0 +1,85 @@
+import importlib
+
+from ..logger import get_logger
+
+log = get_logger(__name__)
+
+
+class ValueCollector(object):
+    """A basic state machine useful for collecting, caching and updating data
+    obtained from different Python modules.
+
+    The two primary use-cases are
+    1) data loaded once (like tagging information)
+    2) periodically updating data sources (like thread count)
+
+    Functionality is provided for requiring and importing modules which may or
+    may not be installed.
+    """
+    enabled = True
+    periodic = False
+    required_modules = []
+    value = None
+    value_loaded = False
+
+    def __init__(self, enabled=None, periodic=None, required_modules=None):
+        self.enabled = self.enabled if enabled is None else enabled
+        self.periodic = self.periodic if periodic is None else periodic
+        self.required_modules = self.required_modules if required_modules is None else required_modules
+
+        self._modules_successfully_loaded = False
+        self.modules = self._load_modules()
+        if self._modules_successfully_loaded:
+            self._on_modules_load()
+
+    def _on_modules_load(self):
+        """Hook triggered after all required_modules have been successfully loaded.
+        """
+
+    def _load_modules(self):
+        modules = {}
+        try:
+            for module in self.required_modules:
+                modules[module] = importlib.import_module(module)
+            self._modules_successfully_loaded = True
+        except ImportError:
+            # DEV: disable collector if we cannot load any of the required modules
+            self.enabled = False
+            log.warn('Could not import module "{}" for {}. Disabling collector.'.format(module, self))
+            return None
+        return modules
+
+    def collect(self, keys=None):
+        """Returns metrics as collected by `collect_fn`.
+
+        :param keys: The keys of the metrics to collect.
+        """
+        if not self.enabled:
+            return self.value
+
+        keys = keys or set()
+
+        if not self.periodic and self.value_loaded:
+            return self.value
+
+        # call underlying collect function and filter out keys not requested
+        self.value = self.collect_fn(keys)
+
+        # filter values for keys
+        if len(keys) > 0 and isinstance(self.value, list):
+            self.value = [
+                (k, v)
+                for (k, v) in self.value
+                if k in keys
+            ]
+
+        self.value_loaded = True
+        return self.value
+
+    def __repr__(self):
+        return '<{}(enabled={},periodic={},required_modules={})>'.format(
+            self.__class__.__name__,
+            self.enabled,
+            self.periodic,
+            self.required_modules,
+        )
@@ -0,0 +1,46 @@
+GC_GEN0_COUNT = 'runtime.python.gc.gen0_count'
+GC_GEN1_COUNT = 'runtime.python.gc.gen1_count'
+GC_GEN2_COUNT = 'runtime.python.gc.gen2_count'
+
+THREAD_COUNT = 'runtime.python.thread_count'
+MEM_RSS = 'runtime.python.mem.rss'
+CPU_TIME_SYS = 'runtime.python.cpu.time.sys'
+CPU_TIME_USER = 'runtime.python.cpu.time.user'
+CPU_PERCENT = 'runtime.python.cpu.percent'
+CTX_SWITCH_VOLUNTARY = 'runtime.python.cpu.ctx_switch.voluntary'
+CTX_SWITCH_INVOLUNTARY = 'runtime.python.cpu.ctx_switch.involuntary'
+
+GC_RUNTIME_METRICS = set([
+    GC_GEN0_COUNT,
+    GC_GEN1_COUNT,
+    GC_GEN2_COUNT,
+])
+
+PSUTIL_RUNTIME_METRICS = set([
+    THREAD_COUNT,
+    MEM_RSS,
+    CTX_SWITCH_VOLUNTARY,
+    CTX_SWITCH_INVOLUNTARY,
+    CPU_TIME_SYS,
+    CPU_TIME_USER,
+    CPU_PERCENT,
+])
+
+DEFAULT_RUNTIME_METRICS = GC_RUNTIME_METRICS | PSUTIL_RUNTIME_METRICS
+
+RUNTIME_ID = 'runtime.python.runtime-id'
+SERVICE = 'runtime.python.service'
+LANG_INTERPRETER = 'runtime.python.lang_interpreter'
+LANG_VERSION = 'runtime.python.lang_version'
+
+TRACER_TAGS = set([
+    RUNTIME_ID,
+    SERVICE,
+])
+
+PLATFORM_TAGS = set([
+    LANG_INTERPRETER,
+    LANG_VERSION
+])
+
+DEFAULT_RUNTIME_TAGS = TRACER_TAGS
@@ -0,0 +1,92 @@
+import os
+
+from .collector import ValueCollector
+from .constants import (
+    GC_GEN0_COUNT,
+    GC_GEN1_COUNT,
+    GC_GEN2_COUNT,
+    THREAD_COUNT,
+    MEM_RSS,
+    CTX_SWITCH_VOLUNTARY,
+    CTX_SWITCH_INVOLUNTARY,
+    CPU_TIME_SYS,
+    CPU_TIME_USER,
+    CPU_PERCENT,
+)
+
+
+class RuntimeMetricCollector(ValueCollector):
+    value = []
+    periodic = True
+
+
+class GCRuntimeMetricCollector(RuntimeMetricCollector):
+    """ Collector for garbage collection generational counts
+
+    More information at https://docs.python.org/3/library/gc.html
+    """
+    required_modules = ['gc']
+
+    def collect_fn(self, keys):
+        gc = self.modules.get('gc')
+
+        counts = gc.get_count()
+        metrics = [
+            (GC_GEN0_COUNT, counts[0]),
+            (GC_GEN1_COUNT, counts[1]),
+            (GC_GEN2_COUNT, counts[2]),
+        ]
+
+        return metrics
+
+
+class PSUtilRuntimeMetricCollector(RuntimeMetricCollector):
+    """Collector for psutil metrics.
+
+    Performs batched operations via proc.oneshot() to optimize the calls.
+    See https://psutil.readthedocs.io/en/latest/#psutil.Process.oneshot
+    for more information.
+    """
+    required_modules = ['psutil']
+    stored_value = dict(
+        CPU_TIME_SYS_TOTAL=0,
+        CPU_TIME_USER_TOTAL=0,
+        CTX_SWITCH_VOLUNTARY_TOTAL=0,
+        CTX_SWITCH_INVOLUNTARY_TOTAL=0,
+    )
+
+    def _on_modules_load(self):
+        self.proc = self.modules['psutil'].Process(os.getpid())
+
+    def collect_fn(self, keys):
+        with self.proc.oneshot():
+            # only return time deltas
+            # TODO[tahir]: better abstraction for metrics based on last value
+            cpu_time_sys_total = self.proc.cpu_times().system
+            cpu_time_user_total = self.proc.cpu_times().user
+            cpu_time_sys = cpu_time_sys_total - self.stored_value['CPU_TIME_SYS_TOTAL']
+            cpu_time_user = cpu_time_user_total - self.stored_value['CPU_TIME_USER_TOTAL']
+
+            ctx_switch_voluntary_total = self.proc.num_ctx_switches().voluntary
+            ctx_switch_involuntary_total = self.proc.num_ctx_switches().involuntary
+            ctx_switch_voluntary = ctx_switch_voluntary_total - self.stored_value['CTX_SWITCH_VOLUNTARY_TOTAL']
+            ctx_switch_involuntary = ctx_switch_involuntary_total - self.stored_value['CTX_SWITCH_INVOLUNTARY_TOTAL']
+
+            self.stored_value = dict(
+                CPU_TIME_SYS_TOTAL=cpu_time_sys_total,
+                CPU_TIME_USER_TOTAL=cpu_time_user_total,
+                CTX_SWITCH_VOLUNTARY_TOTAL=ctx_switch_voluntary_total,
+                CTX_SWITCH_INVOLUNTARY_TOTAL=ctx_switch_involuntary_total,
+            )
+
+            metrics = [
+                (THREAD_COUNT, self.proc.num_threads()),
+                (MEM_RSS, self.proc.memory_info().rss),
+                (CTX_SWITCH_VOLUNTARY, ctx_switch_voluntary),
+                (CTX_SWITCH_INVOLUNTARY, ctx_switch_involuntary),
+                (CPU_TIME_SYS, cpu_time_sys),
+                (CPU_TIME_USER, cpu_time_user),
+                (CPU_PERCENT, self.proc.cpu_percent()),
+            ]
+
+            return metrics
@@ -0,0 +1,107 @@
+import threading
+import time
+import itertools
+
+from ..logger import get_logger
+from .constants import (
+    DEFAULT_RUNTIME_METRICS,
+    DEFAULT_RUNTIME_TAGS,
+)
+from .metric_collectors import (
+    GCRuntimeMetricCollector,
+    PSUtilRuntimeMetricCollector,
+)
+from .tag_collectors import (
+    TracerTagCollector,
+)
+
+log = get_logger(__name__)
+
+
+class RuntimeCollectorsIterable(object):
+    def __init__(self, enabled=None):
+        self._enabled = enabled or self.ENABLED
+        # Initialize the collectors.
+        self._collectors = [c() for c in self.COLLECTORS]
+
+    def __iter__(self):
+        collected = (
+            collector.collect(self._enabled)
+            for collector in self._collectors
+        )
+        return itertools.chain.from_iterable(collected)
+
+    def __repr__(self):
+        return '{}(enabled={})'.format(
+            self.__class__.__name__,
+            self._enabled,
+        )
+
+
+class RuntimeTags(RuntimeCollectorsIterable):
+    ENABLED = DEFAULT_RUNTIME_TAGS
+    COLLECTORS = [
+        TracerTagCollector,
+    ]
+
+
+class RuntimeMetrics(RuntimeCollectorsIterable):
+    ENABLED = DEFAULT_RUNTIME_METRICS
+    COLLECTORS = [
+        GCRuntimeMetricCollector,
+        PSUtilRuntimeMetricCollector,
+    ]
+
+
+class RuntimeWorker(object):
+    """ Worker thread for collecting and writing runtime metrics to a DogStatsd
+        client.
+    """
+
+    FLUSH_INTERVAL = 10
+
+    def __init__(self, statsd_client, flush_interval=None):
+        self._stay_alive = None
+        self._thread = None
+        self._flush_interval = flush_interval or self.FLUSH_INTERVAL
+        self._statsd_client = statsd_client
+        self._runtime_metrics = RuntimeMetrics()
+
+    def _target(self):
+        while self._stay_alive:
+            self.flush()
+            time.sleep(self._flush_interval)
+
+    def start(self):
+        if not self._thread:
+            log.debug("Starting {}".format(self))
+            self._stay_alive = True
+            self._thread = threading.Thread(target=self._target)
+            self._thread.setDaemon(True)
+            self._thread.start()
+
+    def stop(self):
+        if self._thread and self._stay_alive:
+            log.debug("Stopping {}".format(self))
+            self._stay_alive = False
+
+    def _write_metric(self, key, value):
+        log.debug('Writing metric {}:{}'.format(key, value))
+        self._statsd_client.gauge(key, value)
+
+    def flush(self):
+        if not self._statsd_client:
+            log.warn('Attempted flush with uninitialized or failed statsd client')
+            return
+
+        for key, value in self._runtime_metrics:
+            self._write_metric(key, value)
+
+    def reset(self):
+        self._runtime_metrics = RuntimeMetrics()
+
+    def __repr__(self):
+        return '{}(runtime_metrics={})'.format(
+            self.__class__.__name__,
+            self._runtime_metrics,
+        )