Skip to content

Commit

Permalink
Add circuit breaker for Exception Debugging (#7074)
Browse files Browse the repository at this point in the history
To avoid having a spike of exceptions to analyze
(fingerprint/instrument) we put in front of handling the exception
a circuit breaker which open when we reach more than 100
(configurable via DD_EXCEPTION_REPLAY_MAX_EXCEPTION_ANALYSIS_LIMIT) per second
  • Loading branch information
jpbempel committed May 27, 2024
1 parent 61ada2c commit e16d0b0
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ public static synchronized void run(
if (config.isDebuggerExceptionEnabled()) {
defaultExceptionDebugger =
new DefaultExceptionDebugger(
configurationUpdater, classNameFiltering, EXCEPTION_CAPTURE_INTERVAL);
configurationUpdater,
classNameFiltering,
EXCEPTION_CAPTURE_INTERVAL,
config.getDebuggerMaxExceptionPerSecond());
DebuggerContext.initExceptionDebugger(defaultExceptionDebugger);
}
if (config.isDebuggerInstrumentTheWorld()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.datadog.debugger.agent.DebuggerAgent;
import com.datadog.debugger.exception.ExceptionProbeManager.ThrowableState;
import com.datadog.debugger.sink.Snapshot;
import com.datadog.debugger.util.CircuitBreaker;
import com.datadog.debugger.util.ClassNameFiltering;
import com.datadog.debugger.util.ExceptionHelper;
import datadog.trace.bootstrap.debugger.DebuggerContext;
Expand All @@ -31,24 +32,29 @@ public class DefaultExceptionDebugger implements DebuggerContext.ExceptionDebugg
private final ExceptionProbeManager exceptionProbeManager;
private final ConfigurationUpdater configurationUpdater;
private final ClassNameFiltering classNameFiltering;
private final CircuitBreaker circuitBreaker;

public DefaultExceptionDebugger(
ConfigurationUpdater configurationUpdater,
ClassNameFiltering classNameFiltering,
Duration captureInterval) {
Duration captureInterval,
int maxExceptionPerSecond) {
this(
new ExceptionProbeManager(classNameFiltering, captureInterval),
configurationUpdater,
classNameFiltering);
classNameFiltering,
maxExceptionPerSecond);
}

DefaultExceptionDebugger(
ExceptionProbeManager exceptionProbeManager,
ConfigurationUpdater configurationUpdater,
ClassNameFiltering classNameFiltering) {
ClassNameFiltering classNameFiltering,
int maxExceptionPerSecond) {
this.exceptionProbeManager = exceptionProbeManager;
this.configurationUpdater = configurationUpdater;
this.classNameFiltering = classNameFiltering;
this.circuitBreaker = new CircuitBreaker(maxExceptionPerSecond, Duration.ofSeconds(1));
}

@Override
Expand All @@ -59,6 +65,9 @@ public void handleException(Throwable t, AgentSpan span) {
}
return;
}
if (!circuitBreaker.trip()) {
return;
}
String fingerprint = Fingerprinter.fingerprint(t, classNameFiltering);
if (fingerprint == null) {
LOGGER.debug("Unable to fingerprint exception", t);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.datadog.debugger.util;

import java.time.Duration;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

// CircuitBreaker is a simple circuit breaker implementation that allows a certain number of trips
// within a time window. If the number of trips exceeds the limit, the circuit breaker will trip and
// return false until the time window has passed.
public class CircuitBreaker {
private static final Logger LOGGER = LoggerFactory.getLogger(CircuitBreaker.class);

private final int maxTrips;
private final Duration timeWindow;
private AtomicInteger count = new AtomicInteger(0);
private volatile long lastResetTime = System.currentTimeMillis();
private volatile long lastLoggingTime = System.currentTimeMillis();

public CircuitBreaker(int maxTrips, Duration timeWindow) {
this.maxTrips = maxTrips;
this.timeWindow = timeWindow;
}

public boolean trip() {
int localCount = count.incrementAndGet();
if (localCount > maxTrips) {
long currentTime = System.currentTimeMillis();
if (currentTime - lastLoggingTime > Duration.ofMinutes(1).toMillis()) {
lastLoggingTime = currentTime;
LOGGER.debug("Circuit breaker opened");
}
if (currentTime - lastResetTime > timeWindow.toMillis()) {
lastResetTime = currentTime;
localCount = 1;
count.set(localCount);
}
}
return localCount <= maxTrips;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ public void setUp() {
configurationUpdater = mock(ConfigurationUpdater.class);
classNameFiltering = new ClassNameFiltering(emptySet());
exceptionDebugger =
new DefaultExceptionDebugger(configurationUpdater, classNameFiltering, Duration.ofHours(1));
new DefaultExceptionDebugger(
configurationUpdater, classNameFiltering, Duration.ofHours(1), 100);
listener = new TestSnapshotListener(createConfig(), mock(ProbeStatusSink.class));
DebuggerAgentHelper.injectSink(listener);
}
Expand Down Expand Up @@ -203,7 +204,7 @@ public void doubleNestedException() {
public void filteringOutErrors() {
ExceptionProbeManager manager = mock(ExceptionProbeManager.class);
exceptionDebugger =
new DefaultExceptionDebugger(manager, configurationUpdater, classNameFiltering);
new DefaultExceptionDebugger(manager, configurationUpdater, classNameFiltering, 100);
exceptionDebugger.handleException(new AssertionError("test"), mock(AgentSpan.class));
verify(manager, times(0)).isAlreadyInstrumented(any());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ private TestSnapshotListener setupExceptionDebugging(
DebuggerContext.initValueSerializer(new JsonSnapshotSerializer());
DefaultExceptionDebugger exceptionDebugger =
new DefaultExceptionDebugger(
exceptionProbeManager, configurationUpdater, classNameFiltering);
exceptionProbeManager, configurationUpdater, classNameFiltering, 100);
DebuggerContext.initExceptionDebugger(exceptionDebugger);
configurationUpdater.accept(REMOTE_CONFIG, null);
return listener;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.datadog.debugger.util;

import static org.junit.jupiter.api.Assertions.*;

import java.time.Duration;
import java.util.concurrent.locks.LockSupport;
import org.junit.jupiter.api.Test;

class CircuitBreakerTest {

@Test
void noBreaker() {
CircuitBreaker cb = new CircuitBreaker(3, Duration.ofMillis(10));
for (int i = 0; i < 10; i++) {
assertTrue(cb.trip());
LockSupport.parkNanos(Duration.ofMillis(50).toNanos());
}
}

@Test
void breaker() {
CircuitBreaker cb = new CircuitBreaker(3, Duration.ofMillis(200));
for (int i = 0; i < 3; i++) {
assertTrue(cb.trip());
}
for (int i = 0; i < 100; i++) {
assertFalse(cb.trip());
}
LockSupport.parkNanos(Duration.ofMillis(250).toNanos());
for (int i = 0; i < 3; i++) {
assertTrue(cb.trip());
}
for (int i = 0; i < 100; i++) {
assertFalse(cb.trip());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ public final class ConfigDefaults {
static final boolean DEFAULT_DEBUGGER_SYMBOL_FORCE_UPLOAD = false;
static final int DEFAULT_DEBUGGER_SYMBOL_FLUSH_THRESHOLD = 100; // nb of classes
static final boolean DEFAULT_DEBUGGER_EXCEPTION_ENABLED = false;
static final int DEFAULT_DEBUGGER_MAX_EXCEPTION_PER_SECOND = 100;
static final boolean DEFAULT_DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT = true;

static final boolean DEFAULT_TRACE_REPORT_HOSTNAME = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ public final class DebuggerConfig {
public static final String DEBUGGER_SYMBOL_FLUSH_THRESHOLD = "symbol.database.flush.threshold";
public static final String DEBUGGER_EXCEPTION_ENABLED = "exception.debugging.enabled";
public static final String EXCEPTION_REPLAY_ENABLED = "exception.replay.enabled";
public static final String DEBUGGER_MAX_EXCEPTION_PER_SECOND =
"exception.replay.max.exception.analysis.limit";
public static final String DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT =
"internal.exception.replay.only.local.root";
public static final String THIRD_PARTY_INCLUDES = "third.party.includes";
Expand Down
10 changes: 10 additions & 0 deletions internal-api/src/main/java/datadog/trace/api/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_EXCEPTION_ENABLED;
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT;
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_INSTRUMENT_THE_WORLD;
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_MAX_EXCEPTION_PER_SECOND;
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_MAX_PAYLOAD_SIZE;
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_METRICS_ENABLED;
import static datadog.trace.api.ConfigDefaults.DEFAULT_DEBUGGER_POLL_INTERVAL;
Expand Down Expand Up @@ -213,6 +214,7 @@
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT;
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_EXCLUDE_FILES;
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_INSTRUMENT_THE_WORLD;
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_MAX_EXCEPTION_PER_SECOND;
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_MAX_PAYLOAD_SIZE;
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_METRICS_ENABLED;
import static datadog.trace.api.config.DebuggerConfig.DEBUGGER_POLL_INTERVAL;
Expand Down Expand Up @@ -837,6 +839,7 @@ static class HostNameHolder {
private final String debuggerSymbolIncludes;
private final int debuggerSymbolFlushThreshold;
private final boolean debuggerExceptionEnabled;
private final int debuggerMaxExceptionPerSecond;
private final boolean debuggerExceptionOnlyLocalRoot;

private final Set<String> debuggerThirdPartyIncludes;
Expand Down Expand Up @@ -1891,6 +1894,9 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment())
DEBUGGER_EXCEPTION_ENABLED,
DEFAULT_DEBUGGER_EXCEPTION_ENABLED,
EXCEPTION_REPLAY_ENABLED);
debuggerMaxExceptionPerSecond =
configProvider.getInteger(
DEBUGGER_MAX_EXCEPTION_PER_SECOND, DEFAULT_DEBUGGER_MAX_EXCEPTION_PER_SECOND);
debuggerExceptionOnlyLocalRoot =
configProvider.getBoolean(
DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT, DEFAULT_DEBUGGER_EXCEPTION_ONLY_LOCAL_ROOT);
Expand Down Expand Up @@ -3215,6 +3221,10 @@ public boolean isDebuggerExceptionEnabled() {
return debuggerExceptionEnabled;
}

public int getDebuggerMaxExceptionPerSecond() {
return debuggerMaxExceptionPerSecond;
}

public boolean isDebuggerExceptionOnlyLocalRoot() {
return debuggerExceptionOnlyLocalRoot;
}
Expand Down

0 comments on commit e16d0b0

Please sign in to comment.