Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for sending OOME events #7253

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package datadog.communication.monitor;

import com.timgroup.statsd.Event;
import com.timgroup.statsd.ServiceCheck;
import datadog.trace.api.StatsDClient;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

final class DDAgentStatsDClient implements StatsDClient {
public final class DDAgentStatsDClient implements StatsDClient {
private static final Logger log = LoggerFactory.getLogger(DDAgentStatsDClient.class);
private final DDAgentStatsDConnection connection;
private final Function<String, String> nameMapping;
private final Function<String[], String[]> tagMapping;
Expand Down Expand Up @@ -84,6 +88,30 @@ public void serviceCheck(
connection.statsd.recordServiceCheckRun(serviceCheck);
}

/**
* Record a statsd event
*
* @param type the type of event (error, warning, info, success - @see Event.AlertType)
* @param source the source of the event (e.g. java, myapp, CrashTracking, Telemetry, etc)
* @param eventName the name of the event (or title)
* @param message the message of the event
* @param tags the tags to attach to the event
*/
public void recordEvent(
String type, String source, String eventName, String message, String... tags) {
Event.AlertType alertType = Event.AlertType.valueOf(type.toUpperCase());
log.debug(
"Recording event: {} - {} - {} - {} [{}]", alertType, source, eventName, message, tags);
Event.Builder eventBuilder =
Event.builder()
.withTitle(eventName)
.withText(message)
.withSourceTypeName(source)
.withDate(System.currentTimeMillis())
.withAlertType(alertType);
connection.statsd.recordEvent(eventBuilder.build(), tagMapping.apply(tags));
}
Comment on lines +100 to +113
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this new method be part of the StatsD client API?
I would rather not expose the :communication implementation and revert it back to package private.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, this method should really have been added to StatsDClient interface in the internal-api so you didn't then need to expose and cast to the implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #7267


static ServiceCheck.Status serviceCheckStatus(final String status) {
switch (status) {
case "OK":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,47 @@ class DDAgentStatsDClientTest extends DDSpecification {
// spotless:on
}

def "single statsd client with event"() {
setup:
injectSysConfig(DOGSTATSD_START_DELAY, '0')
def server = new StatsDServer()
server.start()

def client = statsDClientManager().statsDClient('127.0.0.1', server.socket.localPort, null, namespace, constantTags as String[], false)

String[] tags = ["type:BufferPool", "jmx_domain:java.nio"]

expect:
client.recordEvent(eventType, "test", "test.event", "test event", tags)
def event = server.waitForMessage()
event.startsWith("_e{10,10}:test.event|test event|d:") && event.contains("|t:$expectedType|s:test|#$expectedTags")

cleanup:
client.close()
server.close()

where:
// spotless:off
namespace | eventType | expectedType | constantTags | expectedTags
null | "INFO" | "info" | null | "jmx_domain:java.nio,type:BufferPool"
null | "INFO" | "info" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
"example" | "INFO" | "info" | null | "jmx_domain:java.nio,type:BufferPool"
"example" | "INFO" | "info" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
null | "WARNING" | "warning" | null | "jmx_domain:java.nio,type:BufferPool"
null | "WARNING" | "warning" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
"example" | "WARNING" | "warning" | null | "jmx_domain:java.nio,type:BufferPool"
"example" | "WARNING" | "warning" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
null | "ERROR" | "error" | null | "jmx_domain:java.nio,type:BufferPool"
null | "ERROR" | "error" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
"example" | "ERROR" | "error" | null | "jmx_domain:java.nio,type:BufferPool"
"example" | "ERROR" | "error" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
null | "SUCCESS" | "success" | null | "jmx_domain:java.nio,type:BufferPool"
null | "SUCCESS" | "success" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
"example" | "SUCCESS" | "success" | null | "jmx_domain:java.nio,type:BufferPool"
"example" | "SUCCESS" | "success" | ["lang:java", "lang_version:1.8.0"] | "jmx_domain:java.nio,type:BufferPool,lang:java,lang_version:1.8.0"
// spotless:on
}

def "multiple statsd clients"() {
setup:
injectSysConfig(DOGSTATSD_START_DELAY, '0')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,8 @@ private static synchronized void startJmx() {
if (jmxStarting.getAndSet(true)) {
return; // another thread is already in startJmx
}
// crash uploader initialization relies on JMX being available
initializeCrashUploader();
// error tracking initialization relies on JMX being available
initializeErrorTracking();
if (jmxFetchEnabled) {
startJmxFetch();
}
Expand Down Expand Up @@ -870,7 +870,7 @@ private static void stopTelemetry() {
}
}

private static void initializeCrashUploader() {
private static void initializeErrorTracking() {
if (Platform.isJ9()) {
// TODO currently crash tracking is supported only for HotSpot based JVMs
return;
Expand Down
11 changes: 5 additions & 6 deletions dd-java-agent/agent-crashtracking/build.gradle
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
apply from: "$rootDir/gradle/java.gradle"

// FIXME: Improve test coverage.
minimumBranchCoverage = 0.6
// runtime dependent parts (eg. looking up values from the JVM args) are not easy to exercise in unit tests
// the minimum coverage is reduced to reflect that
// minimumInstructionCoverage = 0.9
minimumInstructionCoverage = 0.7
// The functionality is tested in dd-smoke-tests/crashtracking

minimumBranchCoverage = 0.0
minimumInstructionCoverage = 0.0
excludedClassesCoverage += ['com.datadog.crashtracking.*']

tasks.withType(Test).configureEach { subTask ->
dependsOn ':dd-java-agent:shadowJar'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ public CrashUploader() {

ConfigProvider configProvider = config.configProvider();

System.out.println("===> telemetryUrl: " + telemetryUrl);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a leftover?

telemetryClient =
OkHttpUtils.buildHttpClient(
config,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package com.datadog.crashtracking;

import static datadog.communication.monitor.DDAgentStatsDClientManager.statsDClientManager;

import datadog.communication.monitor.DDAgentStatsDClient;
import de.thetaphi.forbiddenapis.SuppressForbidden;
import java.util.concurrent.locks.LockSupport;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class OOMENotifier {
private static final Logger log = LoggerFactory.getLogger(OOMENotifier.class);

// This method is called via CLI so we don't need to be paranoid about the forbiddend APIs
@SuppressForbidden
public static void sendOomeEvent(String taglist) {
try (DDAgentStatsDClient client =
(DDAgentStatsDClient)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be using the interface rather than the implementation here

statsDClientManager().statsDClient(null, null, null, null, null, false)) {
String[] tags = taglist.split(",");
client.recordEvent(
"error",
"java",
"OutOfMemoryError",
"Java process encountered out of memory error",
tags);
log.info("OOME event sent");
LockSupport.parkNanos(2_000_000_000L); // wait 2s to allow statsd client flushing the event
}
}
}
Loading
Loading