From e16d0b0c7853cd23c500bfd4988175418c497112 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Bempel Date: Mon, 27 May 2024 18:03:55 +0200 Subject: [PATCH] Add circuit breaker for Exception Debugging (#7074) To avoid having a spike of exceptions to analyze (fingerprint/instrument) we put in front of handling the exception a circuit breaker which open when we reach more than 100 (configurable via DD_EXCEPTION_REPLAY_MAX_EXCEPTION_ANALYSIS_LIMIT) per second --- .../datadog/debugger/agent/DebuggerAgent.java | 5 ++- .../exception/DefaultExceptionDebugger.java | 15 +++++-- .../datadog/debugger/util/CircuitBreaker.java | 41 +++++++++++++++++++ .../DefaultExceptionDebuggerTest.java | 5 ++- .../ExceptionProbeInstrumentationTest.java | 2 +- .../debugger/util/CircuitBreakerTest.java | 37 +++++++++++++++++ .../datadog/trace/api/ConfigDefaults.java | 1 + .../trace/api/config/DebuggerConfig.java | 2 + .../main/java/datadog/trace/api/Config.java | 10 +++++ 9 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/util/CircuitBreaker.java create mode 100644 dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/util/CircuitBreakerTest.java diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/DebuggerAgent.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/DebuggerAgent.java index e9cb8d0a2a1..f91b20798eb 100644 --- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/DebuggerAgent.java +++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/DebuggerAgent.java @@ -86,7 +86,10 @@ public static synchronized void run( if (config.isDebuggerExceptionEnabled()) { defaultExceptionDebugger = new DefaultExceptionDebugger( - configurationUpdater, classNameFiltering, EXCEPTION_CAPTURE_INTERVAL); + configurationUpdater, + classNameFiltering, + EXCEPTION_CAPTURE_INTERVAL, + config.getDebuggerMaxExceptionPerSecond()); DebuggerContext.initExceptionDebugger(defaultExceptionDebugger); } if (config.isDebuggerInstrumentTheWorld()) { diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java index a3fb68c8663..587151526c4 100644 --- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java +++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java @@ -7,6 +7,7 @@ import com.datadog.debugger.agent.DebuggerAgent; import com.datadog.debugger.exception.ExceptionProbeManager.ThrowableState; import com.datadog.debugger.sink.Snapshot; +import com.datadog.debugger.util.CircuitBreaker; import com.datadog.debugger.util.ClassNameFiltering; import com.datadog.debugger.util.ExceptionHelper; import datadog.trace.bootstrap.debugger.DebuggerContext; @@ -31,24 +32,29 @@ public class DefaultExceptionDebugger implements DebuggerContext.ExceptionDebugg private final ExceptionProbeManager exceptionProbeManager; private final ConfigurationUpdater configurationUpdater; private final ClassNameFiltering classNameFiltering; + private final CircuitBreaker circuitBreaker; public DefaultExceptionDebugger( ConfigurationUpdater configurationUpdater, ClassNameFiltering classNameFiltering, - Duration captureInterval) { + Duration captureInterval, + int maxExceptionPerSecond) { this( new ExceptionProbeManager(classNameFiltering, captureInterval), configurationUpdater, - classNameFiltering); + classNameFiltering, + maxExceptionPerSecond); } DefaultExceptionDebugger( ExceptionProbeManager exceptionProbeManager, ConfigurationUpdater configurationUpdater, - ClassNameFiltering classNameFiltering) { + ClassNameFiltering classNameFiltering, + int maxExceptionPerSecond) { this.exceptionProbeManager = exceptionProbeManager; this.configurationUpdater = configurationUpdater; this.classNameFiltering = classNameFiltering; + this.circuitBreaker = new CircuitBreaker(maxExceptionPerSecond, Duration.ofSeconds(1)); } @Override @@ -59,6 +65,9 @@ public void handleException(Throwable t, AgentSpan span) { } return; } + if (!circuitBreaker.trip()) { + return; + } String fingerprint = Fingerprinter.fingerprint(t, classNameFiltering); if (fingerprint == null) { LOGGER.debug("Unable to fingerprint exception", t); diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/util/CircuitBreaker.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/util/CircuitBreaker.java new file mode 100644 index 00000000000..f8a756471ae --- /dev/null +++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/util/CircuitBreaker.java @@ -0,0 +1,41 @@ +package com.datadog.debugger.util; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicInteger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +// CircuitBreaker is a simple circuit breaker implementation that allows a certain number of trips +// within a time window. If the number of trips exceeds the limit, the circuit breaker will trip and +// return false until the time window has passed. +public class CircuitBreaker { + private static final Logger LOGGER = LoggerFactory.getLogger(CircuitBreaker.class); + + private final int maxTrips; + private final Duration timeWindow; + private AtomicInteger count = new AtomicInteger(0); + private volatile long lastResetTime = System.currentTimeMillis(); + private volatile long lastLoggingTime = System.currentTimeMillis(); + + public CircuitBreaker(int maxTrips, Duration timeWindow) { + this.maxTrips = maxTrips; + this.timeWindow = timeWindow; + } + + public boolean trip() { + int localCount = count.incrementAndGet(); + if (localCount > maxTrips) { + long currentTime = System.currentTimeMillis(); + if (currentTime - lastLoggingTime > Duration.ofMinutes(1).toMillis()) { + lastLoggingTime = currentTime; + LOGGER.debug("Circuit breaker opened"); + } + if (currentTime - lastResetTime > timeWindow.toMillis()) { + lastResetTime = currentTime; + localCount = 1; + count.set(localCount); + } + } + return localCount <= maxTrips; + } +} diff --git a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/DefaultExceptionDebuggerTest.java b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/DefaultExceptionDebuggerTest.java index 4474c7c21ac..c1ac05b892f 100644 --- a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/DefaultExceptionDebuggerTest.java +++ b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/DefaultExceptionDebuggerTest.java @@ -60,7 +60,8 @@ public void setUp() { configurationUpdater = mock(ConfigurationUpdater.class); classNameFiltering = new ClassNameFiltering(emptySet()); exceptionDebugger = - new DefaultExceptionDebugger(configurationUpdater, classNameFiltering, Duration.ofHours(1)); + new DefaultExceptionDebugger( + configurationUpdater, classNameFiltering, Duration.ofHours(1), 100); listener = new TestSnapshotListener(createConfig(), mock(ProbeStatusSink.class)); DebuggerAgentHelper.injectSink(listener); } @@ -203,7 +204,7 @@ public void doubleNestedException() { public void filteringOutErrors() { ExceptionProbeManager manager = mock(ExceptionProbeManager.class); exceptionDebugger = - new DefaultExceptionDebugger(manager, configurationUpdater, classNameFiltering); + new DefaultExceptionDebugger(manager, configurationUpdater, classNameFiltering, 100); exceptionDebugger.handleException(new AssertionError("test"), mock(AgentSpan.class)); verify(manager, times(0)).isAlreadyInstrumented(any()); } diff --git a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeInstrumentationTest.java b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeInstrumentationTest.java index b59045d8d79..f3ad0373fa5 100644 --- a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeInstrumentationTest.java +++ b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeInstrumentationTest.java @@ -326,7 +326,7 @@ private TestSnapshotListener setupExceptionDebugging( DebuggerContext.initValueSerializer(new JsonSnapshotSerializer()); DefaultExceptionDebugger exceptionDebugger = new DefaultExceptionDebugger( - exceptionProbeManager, configurationUpdater, classNameFiltering); + exceptionProbeManager, configurationUpdater, classNameFiltering, 100); DebuggerContext.initExceptionDebugger(exceptionDebugger); configurationUpdater.accept(REMOTE_CONFIG, null); return listener; diff --git a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/util/CircuitBreakerTest.java b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/util/CircuitBreakerTest.java new file mode 100644 index 00000000000..6c510b5cce1 --- /dev/null +++ b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/util/CircuitBreakerTest.java @@ -0,0 +1,37 @@ +package com.datadog.debugger.util; + +import static org.junit.jupiter.api.Assertions.*; + +import java.time.Duration; +import java.util.concurrent.locks.LockSupport; +import org.junit.jupiter.api.Test; + +class CircuitBreakerTest { + + @Test + void noBreaker() { + CircuitBreaker cb = new CircuitBreaker(3, Duration.ofMillis(10)); + for (int i = 0; i < 10; i++) { + assertTrue(cb.trip()); + LockSupport.parkNanos(Duration.ofMillis(50).toNanos()); + } + } + + @Test + void breaker() { + CircuitBreaker cb = new CircuitBreaker(3, Duration.ofMillis(200)); + for (int i = 0; i < 3; i++) { + assertTrue(cb.trip()); + } + for (int i = 0; i < 100; i++) { + assertFalse(cb.trip()); + } + LockSupport.parkNanos(Duration.ofMillis(250).toNanos()); + for (int i = 0; i < 3; i++) { + assertTrue(cb.trip()); + } + for (int i = 0; i < 100; i++) { + assertFalse(cb.trip()); + } + } +} diff --git a/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java b/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java index 1e9b2fec7d2..6ff2a227290 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java @@ -174,6 +174,7 @@ public final class ConfigDefaults { static final boolean DEFAULT_DEBUGGER_SYMBOL_FORCE_UPLOAD = false; static final int DEFAULT_DEBUGGER_SYMBOL_FLUSH_THRESHOLD = 100; // nb of classes static final boolean DEFAULT_DEBUGGER_EXCEPTION_ENABLED = false; + static final int DEFAULT_DEBUGGER_MAX_EXCEPTION_PER_SECOND = 100; static final boolean DEFAULT_DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT = true; static final boolean DEFAULT_TRACE_REPORT_HOSTNAME = false; diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/DebuggerConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/DebuggerConfig.java index 487123b65db..ebfd3afb14f 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/DebuggerConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/DebuggerConfig.java @@ -31,6 +31,8 @@ public final class DebuggerConfig { public static final String DEBUGGER_SYMBOL_FLUSH_THRESHOLD = "symbol.database.flush.threshold"; public static final String DEBUGGER_EXCEPTION_ENABLED = "exception.debugging.enabled"; public static final String EXCEPTION_REPLAY_ENABLED = "exception.replay.enabled"; + public static final String DEBUGGER_MAX_EXCEPTION_PER_SECOND = + "exception.replay.max.exception.analysis.limit"; public static final String DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT = "internal.exception.replay.only.local.root"; public static final String THIRD_PARTY_INCLUDES = "third.party.includes"; diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index c08b5bacf75..201dfcdafd6 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -47,6 +47,7 @@ import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_EXCEPTION_ENABLED; import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT; import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_INSTRUMENT_THE_WORLD; +import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_MAX_EXCEPTION_PER_SECOND; import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_MAX_PAYLOAD_SIZE; import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_METRICS_ENABLED; import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_POLL_INTERVAL; @@ -213,6 +214,7 @@ import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT; import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_EXCLUDE_FILES; import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_INSTRUMENT_THE_WORLD; +import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_MAX_EXCEPTION_PER_SECOND; import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_MAX_PAYLOAD_SIZE; import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_METRICS_ENABLED; import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_POLL_INTERVAL; @@ -837,6 +839,7 @@ static class HostNameHolder { private final String debuggerSymbolIncludes; private final int debuggerSymbolFlushThreshold; private final boolean debuggerExceptionEnabled; + private final int debuggerMaxExceptionPerSecond; private final boolean debuggerExceptionOnlyLocalRoot; private final Set debuggerThirdPartyIncludes; @@ -1891,6 +1894,9 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) DEBUGGER_EXCEPTION_ENABLED, DEFAULT_DEBUGGER_EXCEPTION_ENABLED, EXCEPTION_REPLAY_ENABLED); + debuggerMaxExceptionPerSecond = + configProvider.getInteger( + DEBUGGER_MAX_EXCEPTION_PER_SECOND, DEFAULT_DEBUGGER_MAX_EXCEPTION_PER_SECOND); debuggerExceptionOnlyLocalRoot = configProvider.getBoolean( DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT, DEFAULT_DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT); @@ -3215,6 +3221,10 @@ public boolean isDebuggerExceptionEnabled() { return debuggerExceptionEnabled; } + public int getDebuggerMaxExceptionPerSecond() { + return debuggerMaxExceptionPerSecond; + } + public boolean isDebuggerExceptionOnlyLocalRoot() { return debuggerExceptionOnlyLocalRoot; }