Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
import org.jets3t.service.security.AWSCredentials;

import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Optional;
Expand Down Expand Up @@ -102,6 +103,8 @@ public class SingularityMainModule implements Module {

public static final String CURRENT_HTTP_REQUEST = "_singularity_current_http_request";

public static final String LOST_TASKS_METER = "singularity.lost.tasks.meter";

private final SingularityConfiguration configuration;

public SingularityMainModule(final SingularityConfiguration configuration) {
Expand Down Expand Up @@ -347,4 +350,11 @@ public Optional<HttpServletRequest> providesUrl(Provider<HttpServletRequest> req
return Optional.absent();
}
}

@Provides
@Singleton
@Named(LOST_TASKS_METER)
public Meter providesLostTasksMeter(MetricRegistry registry) {
return registry.meter("com.hubspot.singularity.lostTasks");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codahale.metrics.Meter;
import com.codahale.metrics.annotation.Timed;
import com.google.common.base.Optional;
import com.google.common.base.Strings;
Expand Down Expand Up @@ -74,6 +75,7 @@ public class SingularityMesosStatusUpdateHandler implements Managed {
private final SingularityAbort singularityAbort;
private final SingularityConfiguration configuration;
private final Multiset<Protos.TaskStatus.Reason> taskLostReasons;
private final Meter lostTasksMeter;

private Future statusUpdateFuture;

Expand All @@ -87,7 +89,8 @@ public SingularityMesosStatusUpdateHandler(TaskManager taskManager, DeployManage
@Named(SingularityMainModule.STATUS_UPDATE_THREADPOOL_NAME) ScheduledExecutorService executorService,
SingularityConfiguration configuration,
SingularityAbort singularityAbort,
@Named(SingularityMesosModule.TASK_LOST_REASONS_COUNTER) Multiset<Protos.TaskStatus.Reason> taskLostReasons) {
@Named(SingularityMesosModule.TASK_LOST_REASONS_COUNTER) Multiset<Protos.TaskStatus.Reason> taskLostReasons,
@Named(SingularityMainModule.LOST_TASKS_METER) Meter lostTasksMeter) {
this.taskManager = taskManager;
this.deployManager = deployManager;
this.requestManager = requestManager;
Expand All @@ -105,6 +108,7 @@ public SingularityMesosStatusUpdateHandler(TaskManager taskManager, DeployManage
this.singularityAbort = singularityAbort;
this.configuration = configuration;
this.taskLostReasons = taskLostReasons;
this.lostTasksMeter = lostTasksMeter;
this.handlerStarted = new AtomicBoolean();

this.statusUpdateQueue = new ArrayBlockingQueue<>(configuration.getStatusUpdateQueueCapacity());
Expand Down Expand Up @@ -167,9 +171,7 @@ private Optional<String> getStatusMessage(Protos.TaskStatus status, Optional<Sin
}

private void updateDisasterStats(Protos.TaskStatus status) {
if (status.getState() == TaskState.TASK_LOST) {
taskLostReasons.add(status.getReason());
}

}

private SchedulerDriver getSchedulerDriver() {
Expand Down Expand Up @@ -214,10 +216,14 @@ private void unsafeProcessStatusUpdate(Protos.TaskStatus status) {
return;
}

if (configuration.getDisasterDetection().isEnabled()) {
updateDisasterStats(status);
if (status.getState() == TaskState.TASK_LOST) {
lostTasksMeter.mark();
if (configuration.getDisasterDetection().isEnabled()) {
taskLostReasons.add(status.getReason());
}
}


final Optional<SingularityTask> task = taskManager.getTask(taskIdObj);

final boolean isActiveTask = taskManager.isActiveTask(taskId);
Expand Down