diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java index 2ffc541f6e6..898933771fe 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java @@ -72,9 +72,9 @@ public static void start(final Instrumentation inst, final URL bootstrapURL) { */ if (appUsingCustomLogManager) { log.debug("Custom logger detected. Delaying JMXFetch initialization."); - registerLogManagerCallback(new StartJmxFetchCallback(bootstrapURL)); + registerLogManagerCallback(new StartJmxCallback(bootstrapURL)); } else { - startJmxFetch(bootstrapURL); + startJmx(bootstrapURL); } /* @@ -151,8 +151,8 @@ public void run() { public abstract void execute(); } - protected static class StartJmxFetchCallback extends ClassLoadCallBack { - StartJmxFetchCallback(final URL bootstrapURL) { + protected static class StartJmxCallback extends ClassLoadCallBack { + StartJmxCallback(final URL bootstrapURL) { super(bootstrapURL); } @@ -163,7 +163,7 @@ public String getName() { @Override public void execute() { - startJmxFetch(bootstrapURL); + startJmx(bootstrapURL); } } @@ -262,6 +262,27 @@ private static synchronized void installDatadogTracer() { } } + private static synchronized void startJmx(final URL bootstrapURL) { + startJmxFetch(bootstrapURL); + initializeJmxThreadCpuTimeProvider(); + } + + /** Enable JMX based thread CPU time provider once it is safe to touch JMX */ + private static synchronized void initializeJmxThreadCpuTimeProvider() { + log.info("Initializing JMX thread CPU time provider"); + if (AGENT_CLASSLOADER == null) { + throw new IllegalStateException("Datadog agent should have been started already"); + } + try { + final Class tracerInstallerClass = + AGENT_CLASSLOADER.loadClass("datadog.trace.common.util.ThreadCpuTimeAccess"); + final Method enableJmxMethod = tracerInstallerClass.getMethod("enableJmx"); + enableJmxMethod.invoke(null); + } catch (final Throwable ex) { + log.error("Throwable thrown while initializing JMX thread CPU time provider", ex); + } + } + private static synchronized void startJmxFetch(final URL bootstrapURL) { if (JMXFETCH_CLASSLOADER == null) { final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader(); diff --git a/dd-smoke-tests/profiling-integration-tests/src/main/java/datadog/smoketest/profiling/ProfilingTestApplication.java b/dd-smoke-tests/profiling-integration-tests/src/main/java/datadog/smoketest/profiling/ProfilingTestApplication.java index 3d693a40080..c9bf3a40534 100644 --- a/dd-smoke-tests/profiling-integration-tests/src/main/java/datadog/smoketest/profiling/ProfilingTestApplication.java +++ b/dd-smoke-tests/profiling-integration-tests/src/main/java/datadog/smoketest/profiling/ProfilingTestApplication.java @@ -1,9 +1,13 @@ package datadog.smoketest.profiling; import datadog.trace.api.Trace; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; +import java.util.Random; import java.util.concurrent.TimeUnit; public class ProfilingTestApplication { + private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean(); public static void main(final String[] args) throws InterruptedException { long exitDelay = -1; @@ -23,6 +27,22 @@ public static void main(final String[] args) throws InterruptedException { @Trace private static void tracedMethod() throws InterruptedException { System.out.println("Tracing"); - Thread.sleep(100); + tracedBusyMethod(); + Thread.sleep(50); + } + + @Trace + private static void tracedBusyMethod() { + long startTime = THREAD_MX_BEAN.getCurrentThreadCpuTime(); + Random random = new Random(); + long accumulator = 0L; + while (true) { + accumulator += random.nextInt(113); + if (THREAD_MX_BEAN.getCurrentThreadCpuTime() - startTime > 10_000_000L) { + // looking for at least 10ms CPU time + break; + } + } + System.out.println("accumulated: " + accumulator); } } diff --git a/dd-smoke-tests/profiling-integration-tests/src/test/groovy/datadog/smoketest/ProfilingIntegrationContinuousProfilesTest.groovy b/dd-smoke-tests/profiling-integration-tests/src/test/groovy/datadog/smoketest/ProfilingIntegrationContinuousProfilesTest.groovy index a76b2ee42fb..7643d868859 100644 --- a/dd-smoke-tests/profiling-integration-tests/src/test/groovy/datadog/smoketest/ProfilingIntegrationContinuousProfilesTest.groovy +++ b/dd-smoke-tests/profiling-integration-tests/src/test/groovy/datadog/smoketest/ProfilingIntegrationContinuousProfilesTest.groovy @@ -6,8 +6,11 @@ import net.jpountz.lz4.LZ4FrameInputStream import okhttp3.mockwebserver.MockResponse import okhttp3.mockwebserver.MockWebServer import okhttp3.mockwebserver.RecordedRequest +import org.openjdk.jmc.common.item.Aggregators +import org.openjdk.jmc.common.item.Attribute import org.openjdk.jmc.common.item.IItemCollection import org.openjdk.jmc.common.item.ItemFilters +import org.openjdk.jmc.common.unit.UnitLookup import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit import java.time.Instant @@ -101,6 +104,14 @@ class ProfilingIntegrationContinuousProfilesTest extends AbstractSmokeTest { IItemCollection scopeEvents = events.apply(ItemFilters.type("datadog.Scope")) scopeEvents.size() > 0 - } + def cpuTimeAttr = Attribute.attr("cpuTime", "cpuTime", UnitLookup.TIMESPAN) + + // filter out scope events without CPU time data + def filteredScopeEvents = scopeEvents.apply(ItemFilters.more(cpuTimeAttr, UnitLookup.NANOSECOND.quantity(Long.MIN_VALUE))) + // make sure there is at least one scope event with CPU time data + filteredScopeEvents.size() > 0 + + filteredScopeEvents.getAggregate(Aggregators.min("datadog.Scope", cpuTimeAttr)).longValue() >= 10_000L + } } diff --git a/dd-trace-ot/jfr-openjdk/jfr-openjdk.gradle b/dd-trace-ot/jfr-openjdk/jfr-openjdk.gradle index 3500efa998d..8f37a36d411 100644 --- a/dd-trace-ot/jfr-openjdk/jfr-openjdk.gradle +++ b/dd-trace-ot/jfr-openjdk/jfr-openjdk.gradle @@ -9,6 +9,7 @@ apply plugin: 'idea' dependencies { compile deps.slf4j compile project(':dd-trace-ot') + testCompile project(':dd-java-agent:testing') } /* diff --git a/dd-trace-ot/jfr-openjdk/src/main/java/datadog/opentracing/jfr/openjdk/ScopeEvent.java b/dd-trace-ot/jfr-openjdk/src/main/java/datadog/opentracing/jfr/openjdk/ScopeEvent.java index cdefad81eaf..55b1298599b 100644 --- a/dd-trace-ot/jfr-openjdk/src/main/java/datadog/opentracing/jfr/openjdk/ScopeEvent.java +++ b/dd-trace-ot/jfr-openjdk/src/main/java/datadog/opentracing/jfr/openjdk/ScopeEvent.java @@ -2,12 +2,14 @@ import datadog.opentracing.DDSpanContext; import datadog.opentracing.jfr.DDScopeEvent; +import datadog.trace.common.util.ThreadCpuTimeAccess; import jdk.jfr.Category; import jdk.jfr.Description; import jdk.jfr.Event; import jdk.jfr.Label; import jdk.jfr.Name; import jdk.jfr.StackTrace; +import jdk.jfr.Timespan; @Name("datadog.Scope") @Label("Scope") @@ -38,6 +40,11 @@ public final class ScopeEvent extends Event implements DDScopeEvent { @Label("Operation Name") private String operationName; + @Label("Thread CPU Time") + @Timespan + // does not need to be volatile since the event is created and committed from the same thread + private long cpuTime = 0L; + ScopeEvent(final DDSpanContext spanContext) { this.spanContext = spanContext; } @@ -45,6 +52,7 @@ public final class ScopeEvent extends Event implements DDScopeEvent { @Override public void start() { if (isEnabled()) { + cpuTime = ThreadCpuTimeAccess.getCurrentThreadCpuTime(); begin(); } } @@ -53,6 +61,9 @@ public void start() { public void finish() { end(); if (shouldCommit()) { + if (cpuTime > 0) { + cpuTime = ThreadCpuTimeAccess.getCurrentThreadCpuTime() - cpuTime; + } traceId = spanContext.getTraceId().toString(IDS_RADIX); spanId = spanContext.getSpanId().toString(IDS_RADIX); parentId = spanContext.getParentId().toString(IDS_RADIX); diff --git a/dd-trace-ot/jfr-openjdk/src/test/groovy/ScopeEventFactoryTest.groovy b/dd-trace-ot/jfr-openjdk/src/test/groovy/datadog/opentracing/jfr/openjdk/ScopeEventFactoryTest.groovy similarity index 85% rename from dd-trace-ot/jfr-openjdk/src/test/groovy/ScopeEventFactoryTest.groovy rename to dd-trace-ot/jfr-openjdk/src/test/groovy/datadog/opentracing/jfr/openjdk/ScopeEventFactoryTest.groovy index ea30c156c39..85a52c9f9bf 100644 --- a/dd-trace-ot/jfr-openjdk/src/test/groovy/ScopeEventFactoryTest.groovy +++ b/dd-trace-ot/jfr-openjdk/src/test/groovy/datadog/opentracing/jfr/openjdk/ScopeEventFactoryTest.groovy @@ -1,6 +1,6 @@ +package datadog.opentracing.jfr.openjdk + import datadog.opentracing.jfr.DDNoopScopeEvent -import datadog.opentracing.jfr.openjdk.ScopeEvent -import datadog.opentracing.jfr.openjdk.ScopeEventFactory import spock.lang.Requires import spock.lang.Specification diff --git a/dd-trace-ot/jfr-openjdk/src/test/groovy/ScopeEventTest.groovy b/dd-trace-ot/jfr-openjdk/src/test/groovy/datadog/opentracing/jfr/openjdk/ScopeEventTest.groovy similarity index 51% rename from dd-trace-ot/jfr-openjdk/src/test/groovy/ScopeEventTest.groovy rename to dd-trace-ot/jfr-openjdk/src/test/groovy/datadog/opentracing/jfr/openjdk/ScopeEventTest.groovy index a687ac96558..e7f020137e0 100644 --- a/dd-trace-ot/jfr-openjdk/src/test/groovy/ScopeEventTest.groovy +++ b/dd-trace-ot/jfr-openjdk/src/test/groovy/datadog/opentracing/jfr/openjdk/ScopeEventTest.groovy @@ -1,21 +1,26 @@ +package datadog.opentracing.jfr.openjdk + import datadog.opentracing.DDSpanContext import datadog.opentracing.DDTracer import datadog.opentracing.PendingTrace +import datadog.trace.agent.test.utils.ConfigUtils +import datadog.trace.api.Config import datadog.trace.api.sampling.PrioritySampling import datadog.trace.common.sampling.RateByServiceSampler +import datadog.trace.common.util.ThreadCpuTimeAccess import datadog.trace.common.writer.ListWriter import datadog.trace.context.TraceScope +import datadog.trace.util.test.DDSpecification import io.opentracing.Scope import io.opentracing.Span import spock.lang.Requires -import spock.lang.Specification import java.time.Duration import static datadog.trace.api.Config.DEFAULT_SERVICE_NAME @Requires({ jvm.java11Compatible }) -class ScopeEventTest extends Specification { +class ScopeEventTest extends DDSpecification { private static final int IDS_RADIX = 16 private static final Duration SLEEP_DURATION = Duration.ofSeconds(1) @@ -45,8 +50,45 @@ class ScopeEventTest extends Specification { .withServiceName("test service") .withResourceName("test resource") - def "Scope event is written"() { + def "Scope event is written with thread CPU time"() { + setup: + ConfigUtils.updateConfig { + System.properties.setProperty("dd.${Config.PROFILING_ENABLED}", "true") + } + ThreadCpuTimeAccess.enableJmx() + def recording = JfrHelper.startRecording() + + when: + Scope scope = builder.startActive(false) + Span span = scope.span() + sleep(SLEEP_DURATION.toMillis()) + scope.close() + def events = JfrHelper.stopRecording(recording) + span.finish() + + then: + events.size() == 1 + def event = events[0] + event.eventType.name == "datadog.Scope" + event.duration >= SLEEP_DURATION + event.getString("traceId") == span.context().traceId.toString(IDS_RADIX) + event.getString("spanId") == span.context().spanId.toString(IDS_RADIX) + event.getString("parentId") == span.context().parentId.toString(IDS_RADIX) + event.getString("serviceName") == "test service" + event.getString("resourceName") == "test resource" + event.getString("operationName") == "test operation" + event.getLong("cpuTime") != Long.MIN_VALUE + + cleanup: + ThreadCpuTimeAccess.disableJmx() + } + + def "Scope event is written without thread CPU time - profiling enabled"() { setup: + ConfigUtils.updateConfig { + System.properties.setProperty("dd.${Config.PROFILING_ENABLED}", "true") + } + ThreadCpuTimeAccess.disableJmx() def recording = JfrHelper.startRecording() when: @@ -68,6 +110,43 @@ class ScopeEventTest extends Specification { event.getString("serviceName") == "test service" event.getString("resourceName") == "test resource" event.getString("operationName") == "test operation" + event.getLong("cpuTime") == Long.MIN_VALUE + + cleanup: + ThreadCpuTimeAccess.disableJmx() + } + + def "Scope event is written without thread CPU time - profiling disabled"() { + setup: + ConfigUtils.updateConfig { + System.properties.setProperty("dd.${Config.PROFILING_ENABLED}", "false") + } + ThreadCpuTimeAccess.enableJmx() + def recording = JfrHelper.startRecording() + + when: + Scope scope = builder.startActive(false) + Span span = scope.span() + sleep(SLEEP_DURATION.toMillis()) + scope.close() + def events = JfrHelper.stopRecording(recording) + span.finish() + + then: + events.size() == 1 + def event = events[0] + event.eventType.name == "datadog.Scope" + event.duration >= SLEEP_DURATION + event.getString("traceId") == span.context().traceId.toString(IDS_RADIX) + event.getString("spanId") == span.context().spanId.toString(IDS_RADIX) + event.getString("parentId") == span.context().parentId.toString(IDS_RADIX) + event.getString("serviceName") == "test service" + event.getString("resourceName") == "test resource" + event.getString("operationName") == "test operation" + event.getLong("cpuTime") == Long.MIN_VALUE + + cleanup: + ThreadCpuTimeAccess.disableJmx() } def "Scope event is written after continuation activation"() { diff --git a/dd-trace-ot/jfr-openjdk/src/test/groovy/JfrHelper.java b/dd-trace-ot/jfr-openjdk/src/test/java/datadog/opentracing/jfr/openjdk/JfrHelper.java similarity index 94% rename from dd-trace-ot/jfr-openjdk/src/test/groovy/JfrHelper.java rename to dd-trace-ot/jfr-openjdk/src/test/java/datadog/opentracing/jfr/openjdk/JfrHelper.java index 9bbf5d6678f..d91785fe6d7 100644 --- a/dd-trace-ot/jfr-openjdk/src/test/groovy/JfrHelper.java +++ b/dd-trace-ot/jfr-openjdk/src/test/java/datadog/opentracing/jfr/openjdk/JfrHelper.java @@ -1,3 +1,5 @@ +package datadog.opentracing.jfr.openjdk; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; diff --git a/dd-trace-ot/src/main/java/datadog/trace/common/util/JmxThreadCpuTimeProvider.java b/dd-trace-ot/src/main/java/datadog/trace/common/util/JmxThreadCpuTimeProvider.java new file mode 100644 index 00000000000..f1c8ac2a796 --- /dev/null +++ b/dd-trace-ot/src/main/java/datadog/trace/common/util/JmxThreadCpuTimeProvider.java @@ -0,0 +1,22 @@ +package datadog.trace.common.util; + +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; + +/** Thread CPU time provider based on {@linkplain ThreadMXBean#getCurrentThreadCpuTime()} */ +final class JmxThreadCpuTimeProvider implements ThreadCpuTimeProvider { + private final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); + + public static final JmxThreadCpuTimeProvider INSTANCE = new JmxThreadCpuTimeProvider(); + + private JmxThreadCpuTimeProvider() {} + + /** + * @return the actual thread CPU time as reported by {@linkplain + * ThreadMXBean#getCurrentThreadCpuTime()} + */ + @Override + public long getThreadCpuTime() { + return threadMXBean.getCurrentThreadCpuTime(); + } +} diff --git a/dd-trace-ot/src/main/java/datadog/trace/common/util/NoneThreadCpuTimeProvider.java b/dd-trace-ot/src/main/java/datadog/trace/common/util/NoneThreadCpuTimeProvider.java new file mode 100644 index 00000000000..aaef7ddce89 --- /dev/null +++ b/dd-trace-ot/src/main/java/datadog/trace/common/util/NoneThreadCpuTimeProvider.java @@ -0,0 +1,8 @@ +package datadog.trace.common.util; + +final class NoneThreadCpuTimeProvider implements ThreadCpuTimeProvider { + @Override + public long getThreadCpuTime() { + return Long.MIN_VALUE; + } +} diff --git a/dd-trace-ot/src/main/java/datadog/trace/common/util/ThreadCpuTimeAccess.java b/dd-trace-ot/src/main/java/datadog/trace/common/util/ThreadCpuTimeAccess.java new file mode 100644 index 00000000000..dc7480a9adb --- /dev/null +++ b/dd-trace-ot/src/main/java/datadog/trace/common/util/ThreadCpuTimeAccess.java @@ -0,0 +1,59 @@ +package datadog.trace.common.util; + +import datadog.trace.api.Config; +import lombok.extern.slf4j.Slf4j; + +/** + * Thread CPU time accessor.
+ * This class abstracts away the actual method used to get the current thread CPU time. + */ +@Slf4j +public final class ThreadCpuTimeAccess { + private static volatile ThreadCpuTimeProvider cpuTimeProvider = ThreadCpuTimeProvider.NONE; + + /** + * Disable JMX based thread CPU time. Will flip back to the {@linkplain + * ThreadCpuTimeProvider#NONE} implementation. + */ + public static void disableJmx() { + log.debug("Disabling JMX thread CPU time provider"); + cpuTimeProvider = ThreadCpuTimeProvider.NONE; + } + + /** Enable JMX based thread CPU time */ + public static void enableJmx() { + if (!Config.get().isProfilingEnabled()) { + log.debug("Will not enable thread CPU time access. Profiling is disabled."); + return; + } + try { + log.debug("Enabling JMX thread CPU time provider"); + /* + * Can not use direct class reference to JmxThreadCpuTimeProvider since on some rare JVM implementations + * using eager class resolution that class could be resolved at the moment when ThreadCpuTime is being loaded, + * potentially triggering j.u.l initialization which is potentially dangerous and can be done only at certain + * point in time. + * Using reflection should alleviate this problem - no class constant to resolve during class load. The JMX + * thread cpu time provider will be loaded at exact moment when the reflection code is executed. Then it is up + * to the caller to ensure that it is safe to use JMX. + */ + cpuTimeProvider = + (ThreadCpuTimeProvider) + Class.forName("datadog.trace.common.util.JmxThreadCpuTimeProvider") + .getField("INSTANCE") + .get(null); + } catch (ClassNotFoundException | NoSuchFieldException | IllegalAccessException e) { + log.info("Unable to initialize JMX thread CPU time provider", e); + } + } + + /** + * Get the current thread CPU time + * + * @return the actual current thread CPU time or {@linkplain Long#MIN_VALUE} if the JMX provider + * is not available + */ + public static long getCurrentThreadCpuTime() { + return cpuTimeProvider.getThreadCpuTime(); + } +} diff --git a/dd-trace-ot/src/main/java/datadog/trace/common/util/ThreadCpuTimeProvider.java b/dd-trace-ot/src/main/java/datadog/trace/common/util/ThreadCpuTimeProvider.java new file mode 100644 index 00000000000..380b3f8241c --- /dev/null +++ b/dd-trace-ot/src/main/java/datadog/trace/common/util/ThreadCpuTimeProvider.java @@ -0,0 +1,16 @@ +package datadog.trace.common.util; + +/** + * A pluggable thread CPU time provider used by {@linkplain ThreadCpuTimeAccess}. {@linkplain + * ThreadCpuTimeAccess} may not use JMX classes (even via transitive dependencies) due to potential + * race in j.u.l initialization. Therefore it uses an abstract {@linkplain ThreadCpuTimeProvider} + * type to hold the actual implementation which may be switched between the {@linkplain + * ThreadCpuTimeProvider#NONE} and {@linkplain JmxThreadCpuTimeProvider} on-the-fly once JMX is safe + * to use. + */ +interface ThreadCpuTimeProvider { + ThreadCpuTimeProvider NONE = new NoneThreadCpuTimeProvider(); + + /** Get the current thread CPU time */ + long getThreadCpuTime(); +} diff --git a/dd-trace-ot/src/test/groovy/datadog/trace/common/util/ThreadCpuTimeAccessTest.groovy b/dd-trace-ot/src/test/groovy/datadog/trace/common/util/ThreadCpuTimeAccessTest.groovy new file mode 100644 index 00000000000..88f6f1e85b0 --- /dev/null +++ b/dd-trace-ot/src/test/groovy/datadog/trace/common/util/ThreadCpuTimeAccessTest.groovy @@ -0,0 +1,83 @@ +package datadog.trace.common.util + +import datadog.trace.agent.test.utils.ConfigUtils +import datadog.trace.api.Config +import datadog.trace.util.test.DDSpecification + +class ThreadCpuTimeAccessTest extends DDSpecification { + def "No thread CPU time provider - profiling enabled"() { + setup: + ConfigUtils.updateConfig { + System.properties.setProperty("dd.${Config.PROFILING_ENABLED}", "true") + } + ThreadCpuTimeAccess.disableJmx() + + when: + def threadCpuTime1 = ThreadCpuTimeAccess.getCurrentThreadCpuTime() + // burn some cpu + def sum = 0 + for (int i = 0; i < 10_000; i++) { + sum += i + } + def threadCpuTime2 = ThreadCpuTimeAccess.getCurrentThreadCpuTime() + + then: + sum > 0 + threadCpuTime1 == Long.MIN_VALUE + threadCpuTime2 == Long.MIN_VALUE + + cleanup: + ThreadCpuTimeAccess.disableJmx() + } + + def "No thread CPU time provider - profiling disabled"() { + setup: + ConfigUtils.updateConfig { + System.properties.setProperty("dd.${Config.PROFILING_ENABLED}", "false") + } + ThreadCpuTimeAccess.enableJmx() + + when: + def threadCpuTime1 = ThreadCpuTimeAccess.getCurrentThreadCpuTime() + // burn some cpu + def sum = 0 + for (int i = 0; i < 10_000; i++) { + sum += i + } + def threadCpuTime2 = ThreadCpuTimeAccess.getCurrentThreadCpuTime() + + then: + sum > 0 + threadCpuTime1 == Long.MIN_VALUE + threadCpuTime2 == Long.MIN_VALUE + + cleanup: + ThreadCpuTimeAccess.disableJmx() + } + + def "JMX thread CPU time provider"() { + setup: + ConfigUtils.updateConfig { + System.properties.setProperty("dd.${Config.PROFILING_ENABLED}", "true") + } + ThreadCpuTimeAccess.enableJmx() + + when: + def threadCpuTime1 = ThreadCpuTimeAccess.getCurrentThreadCpuTime() + // burn some cpu + def sum = 0 + for (int i = 0; i < 10_000; i++) { + sum += i + } + def threadCpuTime2 = ThreadCpuTimeAccess.getCurrentThreadCpuTime() + + then: + sum > 0 + threadCpuTime1 != Long.MIN_VALUE + threadCpuTime2 != Long.MIN_VALUE + threadCpuTime2 > threadCpuTime1 + + cleanup: + ThreadCpuTimeAccess.disableJmx() + } +}