Skip to content

Commit

Permalink
check more often, use time triggered as a factor in disasters
Browse files Browse the repository at this point in the history
  • Loading branch information
ssalinas committed Aug 29, 2016
1 parent cdb3e68 commit 224dfea
Show file tree
Hide file tree
Showing 9 changed files with 196 additions and 130 deletions.
@@ -1,13 +1,11 @@
package com.hubspot.singularity;

import java.util.Collections;
import java.util.List;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
import com.google.common.primitives.Longs;

public class SingularityDisasterStats {
public class SingularityDisasterDataPoint implements Comparable<SingularityDisasterDataPoint> {
private final long timestamp;
private final int numActiveTasks;
private final int numPendingTasks;
Expand All @@ -18,7 +16,7 @@ public class SingularityDisasterStats {
private final int numLostSlaves;

@JsonCreator
public SingularityDisasterStats(@JsonProperty("timestamp") long timestamp,
public SingularityDisasterDataPoint(@JsonProperty("timestamp") long timestamp,
@JsonProperty("numActiveTasks") int numActiveTasks,
@JsonProperty("numPendingTasks") int numPendingTasks,
@JsonProperty("numLateTasks") int numLateTasks,
Expand Down Expand Up @@ -76,7 +74,7 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) {
return false;
}
SingularityDisasterStats that = (SingularityDisasterStats) o;
SingularityDisasterDataPoint that = (SingularityDisasterDataPoint) o;
return timestamp == that.timestamp &&
numActiveTasks == that.numActiveTasks &&
numPendingTasks == that.numPendingTasks &&
Expand Down Expand Up @@ -105,4 +103,9 @@ public String toString() {
.add("numLostSlaves", numLostSlaves)
.toString();
}

@Override
public int compareTo(SingularityDisasterDataPoint o) {
return Longs.compare(this.timestamp, o.getTimestamp());
}
}
@@ -0,0 +1,49 @@
package com.hubspot.singularity;

import java.util.ArrayList;
import java.util.List;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;

public class SingularityDisasterDataPoints {
private final List<SingularityDisasterDataPoint> dataPoints;

@JsonCreator
public SingularityDisasterDataPoints(@JsonProperty("dataPoints") List<SingularityDisasterDataPoint> dataPoints) {
this.dataPoints = dataPoints;
}

public static SingularityDisasterDataPoints empty() {
return new SingularityDisasterDataPoints(new ArrayList<SingularityDisasterDataPoint>());
}

public List<SingularityDisasterDataPoint> getDataPoints() {
return dataPoints;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
SingularityDisasterDataPoints that = (SingularityDisasterDataPoints) o;
return Objects.equal(dataPoints, that.dataPoints);
}

@Override
public int hashCode() {
return Objects.hashCode(dataPoints);
}

@Override
public String toString() {
return Objects.toStringHelper(this)
.add("dataPoints", dataPoints)
.toString();
}
}
@@ -1,43 +1,34 @@
package com.hubspot.singularity;

import java.util.List;
import java.util.Map;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
import com.google.common.base.Optional;

public class SingularityDisastersData {
private final Optional<SingularityDisasterStats> currentStats;
private final Optional<SingularityDisasterStats> lastStats;
private final List<SingularityDisasterDataPoint> stats;
private final List<SingularityDisaster> disasters;
private final boolean automatedActionDisabled;

@JsonCreator
public SingularityDisastersData(@JsonProperty("currentStats") Optional<SingularityDisasterStats> currentStats,
@JsonProperty("lastStats") Optional<SingularityDisasterStats> lastStats,
public SingularityDisastersData(@JsonProperty("stats") List<SingularityDisasterDataPoint> stats,
@JsonProperty("disasterStates") List<SingularityDisaster> disasters,
@JsonProperty("automatedActionDisabled") boolean automatedActionDisabled) {
this.currentStats = currentStats;
this.lastStats = lastStats;
this.stats = stats;
this.disasters = disasters;
this.automatedActionDisabled = automatedActionDisabled;
}

public Optional<SingularityDisasterStats> getCurrentStats() {
return currentStats;
}

public Optional<SingularityDisasterStats> getLastStats() {
return lastStats;
public List<SingularityDisasterDataPoint> getStats() {
return stats;
}

public List<SingularityDisaster> getDisasters() {
return disasters;
}

public boolean isautomatedActionsDisabled() {
public boolean isAutomatedActionsDisabled() {
return automatedActionDisabled;
}

Expand All @@ -51,21 +42,19 @@ public boolean equals(Object o) {
}
SingularityDisastersData that = (SingularityDisastersData) o;
return automatedActionDisabled == that.automatedActionDisabled &&
Objects.equal(currentStats, that.currentStats) &&
Objects.equal(lastStats, that.lastStats) &&
Objects.equal(stats, that.stats) &&
Objects.equal(disasters, that.disasters);
}

@Override
public int hashCode() {
return Objects.hashCode(currentStats, lastStats, disasters, automatedActionDisabled);
return Objects.hashCode(stats, disasters, automatedActionDisabled);
}

@Override
public String toString() {
return Objects.toStringHelper(this)
.add("currentStats", currentStats)
.add("lastStats", lastStats)
.add("stats", stats)
.add("disasters", disasters)
.add("automatedActionDisabled", automatedActionDisabled)
.toString();
Expand Down
Expand Up @@ -14,25 +14,29 @@

public class DisasterDetectionConfiguration {

private boolean enabled = false;
private boolean enabled = true;

private long runEveryMillis = TimeUnit.SECONDS.toMillis(30);
private int statsHistorySize = 10;

private long runEveryMillis = TimeUnit.SECONDS.toMillis(10);

@JsonProperty("disableActionsOnDisaster")
@NotNull
private List<SingularityDisabledActionType> disableActionsOnDisaster = Collections.emptyList();

private boolean checkLateTasks = true;

private long criticalAvgTaskLagMillis = 240000L;
private long criticalAvgTaskLagMillis = TimeUnit.MINUTES.toMillis(4);

private double criticalOverdueTaskPortion = 0.1;

private long triggerAfterMillisOverTaskLagThreshold = TimeUnit.SECONDS.toMillis(45);

private boolean checkLostSlaves = true;

private double criticalLostSlavePortion = 0.2;

private boolean includePreviousLostSlavesCount = true;
private long includeLostSlavesInLastMillis = TimeUnit.SECONDS.toMillis(30);

private boolean checkLostTasks = true;

Expand All @@ -43,7 +47,7 @@ public class DisasterDetectionConfiguration {

private double criticalLostTaskPortion = 0.2;

private boolean includePreviousLostTaskCount = true;
private long includeLostTasksInLastMillis = TimeUnit.SECONDS.toMillis(30);

public boolean isEnabled() {
return enabled;
Expand All @@ -61,6 +65,14 @@ public void setRunEveryMillis(long runEveryMillis) {
this.runEveryMillis = runEveryMillis;
}

public int getStatsHistorySize() {
return statsHistorySize;
}

public void setStatsHistorySize(int statsHistorySize) {
this.statsHistorySize = statsHistorySize;
}

public List<SingularityDisabledActionType> getDisableActionsOnDisaster() {
return disableActionsOnDisaster;
}
Expand Down Expand Up @@ -93,6 +105,14 @@ public void setCriticalOverdueTaskPortion(double criticalOverdueTaskPortion) {
this.criticalOverdueTaskPortion = criticalOverdueTaskPortion;
}

public long getTriggerAfterMillisOverTaskLagThreshold() {
return triggerAfterMillisOverTaskLagThreshold;
}

public void setTriggerAfterMillisOverTaskLagThreshold(long triggerAfterMillisOverTaskLagThreshold) {
this.triggerAfterMillisOverTaskLagThreshold = triggerAfterMillisOverTaskLagThreshold;
}

public boolean isCheckLostSlaves() {
return checkLostSlaves;
}
Expand All @@ -109,12 +129,12 @@ public void setCriticalLostSlavePortion(double criticalLostSlavePortion) {
this.criticalLostSlavePortion = criticalLostSlavePortion;
}

public boolean isIncludePreviousLostSlavesCount() {
return includePreviousLostSlavesCount;
public long getIncludeLostSlavesInLastMillis() {
return includeLostSlavesInLastMillis;
}

public void setIncludePreviousLostSlavesCount(boolean includePreviousLostSlavesCount) {
this.includePreviousLostSlavesCount = includePreviousLostSlavesCount;
public void setIncludeLostSlavesInLastMillis(long includeLostSlavesInLastMillis) {
this.includeLostSlavesInLastMillis = includeLostSlavesInLastMillis;
}

public boolean isCheckLostTasks() {
Expand All @@ -141,11 +161,11 @@ public void setCriticalLostTaskPortion(double criticalLostTaskPortion) {
this.criticalLostTaskPortion = criticalLostTaskPortion;
}

public boolean isIncludePreviousLostTaskCount() {
return includePreviousLostTaskCount;
public long getIncludeLostTasksInLastMillis() {
return includeLostTasksInLastMillis;
}

public void setIncludePreviousLostTaskCount(boolean includePreviousLostTaskCount) {
this.includePreviousLostTaskCount = includePreviousLostTaskCount;
public void setIncludeLostTasksInLastMillis(long includeLostTasksInLastMillis) {
this.includeLostTasksInLastMillis = includeLostTasksInLastMillis;
}
}
@@ -1,11 +1,8 @@
package com.hubspot.singularity.data;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.utils.ZKPaths;
Expand All @@ -18,7 +15,7 @@
import com.hubspot.singularity.SingularityDisabledAction;
import com.hubspot.singularity.SingularityDisabledActionType;
import com.hubspot.singularity.SingularityDisaster;
import com.hubspot.singularity.SingularityDisasterStats;
import com.hubspot.singularity.SingularityDisasterDataPoints;
import com.hubspot.singularity.SingularityDisasterType;
import com.hubspot.singularity.SingularityDisastersData;
import com.hubspot.singularity.SingularityUser;
Expand All @@ -29,19 +26,18 @@ public class DisasterManager extends CuratorAsyncManager {
private static final String DISASTERS_ROOT = "/disasters";
private static final String DISABLED_ACTIONS_PATH = DISASTERS_ROOT + "/disabled-actions";
private static final String ACTIVE_DISASTERS_PATH = DISASTERS_ROOT + "/active";
private static final String DISASTER_STATS_PATH = DISASTERS_ROOT + "/stats";
private static final String PREVIOUS_DISASTER_STATS_PATH = DISASTERS_ROOT + "/previous-stats";
private static final String DISASTER_STATS_PATH = DISASTERS_ROOT + "/statistics";
private static final String DISABLE_AUTOMATED_PATH = DISASTERS_ROOT + "/disabled";

private static final String MESSAGE_FORMAT = "Cannot perform action %s: %s";
private static final String DEFAULT_MESSAGE = "Action is currently disabled";

private final Transcoder<SingularityDisabledAction> disabledActionTranscoder;
private final Transcoder<SingularityDisasterStats> disasterStatsTranscoder;
private final Transcoder<SingularityDisasterDataPoints> disasterStatsTranscoder;

@Inject
public DisasterManager(CuratorFramework curator, SingularityConfiguration configuration, MetricRegistry metricRegistry,
Transcoder<SingularityDisabledAction> disabledActionTranscoder, Transcoder<SingularityDisasterStats> disasterStatsTranscoder) {
Transcoder<SingularityDisabledAction> disabledActionTranscoder, Transcoder<SingularityDisasterDataPoints> disasterStatsTranscoder) {
super(curator, configuration, metricRegistry);
this.disabledActionTranscoder = disabledActionTranscoder;
this.disasterStatsTranscoder = disasterStatsTranscoder;
Expand Down Expand Up @@ -119,24 +115,18 @@ public List<SingularityDisaster> getAllDisasterStates(List<SingularityDisasterTy
return disasters;
}

public void saveDisasterStats(SingularityDisasterStats stats) {
public void saveDisasterStats(SingularityDisasterDataPoints stats) {
save(DISASTER_STATS_PATH, stats, disasterStatsTranscoder);
}

public Optional<SingularityDisasterStats> getDisasterStats() {
return getData(DISASTER_STATS_PATH, disasterStatsTranscoder);
}

public void savePreviousDisasterStats(SingularityDisasterStats stats) {
save(PREVIOUS_DISASTER_STATS_PATH, stats, disasterStatsTranscoder);
}

public Optional<SingularityDisasterStats> getPreviousDisasterStats() {
return getData(PREVIOUS_DISASTER_STATS_PATH, disasterStatsTranscoder);
public SingularityDisasterDataPoints getDisasterStats() {
SingularityDisasterDataPoints stats = getData(DISASTER_STATS_PATH, disasterStatsTranscoder).or(SingularityDisasterDataPoints.empty());
Collections.sort(stats.getDataPoints());
return stats;
}

public SingularityDisastersData getDisastersData() {
return new SingularityDisastersData(getDisasterStats(), getPreviousDisasterStats(), getAllDisasterStates(), isAutomatedDisabledActionsDisabled());
return new SingularityDisastersData(getDisasterStats().getDataPoints(), getAllDisasterStates(), isAutomatedDisabledActionsDisabled());
}

public void updateActiveDisasters(List<SingularityDisasterType> previouslyActiveDisasters, List<SingularityDisasterType> newActiveDisasters) {
Expand Down
Expand Up @@ -12,7 +12,7 @@
import com.hubspot.singularity.SingularityDeployStatistics;
import com.hubspot.singularity.SingularityDeployUpdate;
import com.hubspot.singularity.SingularityDisabledAction;
import com.hubspot.singularity.SingularityDisasterStats;
import com.hubspot.singularity.SingularityDisasterDataPoints;
import com.hubspot.singularity.SingularityHostState;
import com.hubspot.singularity.SingularityKilledTaskIdRecord;
import com.hubspot.singularity.SingularityLoadBalancerUpdate;
Expand Down Expand Up @@ -85,7 +85,7 @@ public void configure(final Binder binder) {
bindTranscoder(binder).asJson(SingularityExpiringSkipHealthchecks.class);
bindTranscoder(binder).asJson(SingularityTaskDestroyFrameworkMessage.class);
bindTranscoder(binder).asJson(SingularityDisabledAction.class);
bindTranscoder(binder).asJson(SingularityDisasterStats.class);
bindTranscoder(binder).asJson(SingularityDisasterDataPoints.class);

bindTranscoder(binder).asCompressedJson(SingularityDeployHistory.class);
bindTranscoder(binder).asCompressedJson(SingularityDeploy.class);
Expand Down

0 comments on commit 224dfea

Please sign in to comment.