New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better task balancing #1482

Merged
merged 73 commits into from Jun 8, 2017
Commits
Jump to file or symbol
Failed to load files and symbols.
+35 鈭32
Diff settings

Always

Just for now

Viewing a subset of changes. View all

track tasks that are considered long running

  • Loading branch information...
darcatron committed Apr 27, 2017
commit e5290d6177ccdd073b77cb68b3903ad9113aba74
@@ -1,8 +1,8 @@
package com.hubspot.singularity;
import java.util.Map;
import java.util.Optional;
import com.google.common.base.Optional;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -20,7 +20,7 @@
private final double cpusUsed;
private final Optional<Long> memoryMbTotal;
private final Optional<Double> cpuTotal;
private final Map<RequestType, Map<ResourceUsageType, Number>> usagePerRequestType;
private final Map<ResourceUsageType, Number> longRunningTasksUsage;
@JsonCreator
public SingularitySlaveUsage(@JsonProperty("memoryBytesUsed") long memoryBytesUsed,
@@ -29,14 +29,14 @@ public SingularitySlaveUsage(@JsonProperty("memoryBytesUsed") long memoryBytesUs
@JsonProperty("numTasks") int numTasks,
@JsonProperty("memoryMbTotal") Optional<Long> memoryMbTotal,
@JsonProperty("cpuTotal") Optional<Double> cpuTotal,
@JsonProperty("usagePerRequestType") Map<RequestType, Map<ResourceUsageType, Number>> usagePerRequestType) {
@JsonProperty("longRunningTasksUsage") Map<ResourceUsageType, Number> longRunningTasksUsage) {
this.memoryBytesUsed = memoryBytesUsed;
this.timestamp = timestamp;
this.cpusUsed = cpusUsed;
this.numTasks = numTasks;
this.memoryMbTotal = memoryMbTotal;
this.cpuTotal = cpuTotal;
this.usagePerRequestType = usagePerRequestType;
this.longRunningTasksUsage = longRunningTasksUsage;
}
public long getMemoryBytesUsed() {
@@ -56,19 +56,19 @@ public int getNumTasks() {
}
public Optional<Long> getMemoryBytesTotal() {
return memoryMbTotal.isPresent() ? Optional.of(memoryMbTotal.get() * BYTES_PER_MEGABYTE) : Optional.empty();
return memoryMbTotal.isPresent() ? Optional.of(memoryMbTotal.get() * BYTES_PER_MEGABYTE) : Optional.absent();
}
public Optional<Long> getMemoryMbTotal() {
return memoryMbTotal.isPresent() ? Optional.of(memoryMbTotal.get()) : Optional.empty();
return memoryMbTotal.isPresent() ? Optional.of(memoryMbTotal.get()) : Optional.absent();
}
public Optional<Double> getCpuTotal() {
return cpuTotal;
}
public Map<RequestType, Map<ResourceUsageType, Number>> getUsagePerRequestType() {
return usagePerRequestType;
public Map<ResourceUsageType, Number> getLongRunningTasksUsage() {
return longRunningTasksUsage;
}
@Override
@@ -3,24 +3,25 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import com.google.common.base.Optional;
import com.hubspot.mesos.client.MesosClient;
import com.hubspot.mesos.json.MesosTaskMonitorObject;
import com.hubspot.singularity.RequestType;
import com.hubspot.singularity.SingularityDeployStatistics;
import com.hubspot.singularity.SingularitySlave;
import com.hubspot.singularity.SingularitySlaveUsage;
import com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType;
import com.hubspot.singularity.SingularityTaskCurrentUsage;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskUsage;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.DeployManager;
import com.hubspot.singularity.data.RequestManager;
import com.hubspot.singularity.data.UsageManager;
import com.hubspot.singularity.sentry.SingularityExceptionNotifier;
@@ -35,14 +36,16 @@
private final SingularityUsageHelper usageHelper;
private final SingularityExceptionNotifier exceptionNotifier;
private final RequestManager requestManager;
private final DeployManager deployManager;
@Inject
SingularityUsagePoller(SingularityConfiguration configuration,
SingularityUsageHelper usageHelper,
UsageManager usageManager,
MesosClient mesosClient,
SingularityExceptionNotifier exceptionNotifier,
RequestManager requestManager) {
RequestManager requestManager,
DeployManager deployManager) {
super(configuration.getCheckUsageEveryMillis(), TimeUnit.MILLISECONDS);
this.configuration = configuration;
@@ -51,16 +54,18 @@
this.usageManager = usageManager;
this.exceptionNotifier = exceptionNotifier;
this.requestManager = requestManager;
this.deployManager = deployManager;
}
@Override
public void runActionOnPoll() {
final long now = System.currentTimeMillis();
Map<RequestType, Map<ResourceUsageType, Number>> usagesPerRequestType = new HashMap<>();
Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
for (SingularitySlave slave : usageHelper.getSlavesToTrackUsageFor()) {
Optional<Long> memoryMbTotal = Optional.empty();
Optional<Double> cpuTotal = Optional.empty();
Optional<Long> memoryMbTotal = Optional.absent();
Optional<Double> cpuTotal = Optional.absent();
long memoryBytesUsed = 0;
double cpusUsed = 0;
@@ -86,7 +91,9 @@ public void runActionOnPoll() {
double taskCpusUsed = ((usage.getCpuSeconds() - lastUsage.getCpuSeconds()) / (usage.getTimestamp() - lastUsage.getTimestamp()));
updateUsagesPerRequestType(usagesPerRequestType, getRequestType(taskUsage), usage.getMemoryRssBytes(), taskCpusUsed);
if (getRequestType(taskUsage).isLongRunning() || isConsideredLongRunning(taskUsage)) {
updateLongRunningTasksUsage(longRunningTasksUsage, usage.getMemoryRssBytes(), taskCpusUsed);
}

This comment has been minimized.

@darcatron

darcatron Apr 27, 2017

Contributor

Added this to also include the usage for tasks that are non long running, but run for a considerable amount of time to warrant adding their usage

@darcatron

darcatron Apr 27, 2017

Contributor

Added this to also include the usage for tasks that are non long running, but run for a considerable amount of time to warrant adding their usage

This comment has been minimized.

@darcatron

darcatron Apr 27, 2017

Contributor

This is not outdated GitHub.....
bitmoji

@darcatron

darcatron Apr 27, 2017

Contributor

This is not outdated GitHub.....
bitmoji

SingularityTaskCurrentUsage currentUsage = new SingularityTaskCurrentUsage(usage.getMemoryRssBytes(), now, taskCpusUsed);
usageManager.saveCurrentTaskUsage(taskId, currentUsage);
@@ -104,7 +111,7 @@ public void runActionOnPoll() {
cpuTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue());
}
SingularitySlaveUsage slaveUsage = new SingularitySlaveUsage(memoryBytesUsed, now, cpusUsed, allTaskUsage.size(), memoryMbTotal, cpuTotal, usagesPerRequestType);
SingularitySlaveUsage slaveUsage = new SingularitySlaveUsage(memoryBytesUsed, now, cpusUsed, allTaskUsage.size(), memoryMbTotal, cpuTotal, longRunningTasksUsage);
List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId());
if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) {
usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0));
@@ -130,23 +137,19 @@ private RequestType getRequestType(MesosTaskMonitorObject task) {
return requestManager.getRequest(SingularityTaskId.valueOf(task.getSource()).getRequestId()).get().getRequest().getRequestType();
}
private void updateUsagesPerRequestType(Map<RequestType, Map<ResourceUsageType, Number>> usagePerRequestType, RequestType type, long memBytesUsed, double cpuUsed) {
if (usagePerRequestType.containsKey(type)) {
long oldMemUsed = 0L;
double oldCpuUsed = 0;
if (usagePerRequestType.get(type).containsKey(ResourceUsageType.MEMORY_BYTES_USED)) {
oldMemUsed = usagePerRequestType.get(type).get(ResourceUsageType.MEMORY_BYTES_USED).longValue();
}
if (usagePerRequestType.get(type).containsKey(ResourceUsageType.CPU_USED)) {
oldCpuUsed = usagePerRequestType.get(type).get(ResourceUsageType.CPU_USED).doubleValue();
}
private boolean isConsideredLongRunning(MesosTaskMonitorObject task) {
SingularityTaskId taskId = SingularityTaskId.valueOf(task.getSource());
final Optional<SingularityDeployStatistics> deployStatistics = deployManager.getDeployStatistics(taskId.getRequestId(), taskId.getDeployId());
usagePerRequestType.get(type).put(ResourceUsageType.MEMORY_BYTES_USED, oldMemUsed + memBytesUsed);
usagePerRequestType.get(type).put(ResourceUsageType.CPU_USED, oldCpuUsed + cpuUsed);
} else {
usagePerRequestType.put(type, ImmutableMap.of(ResourceUsageType.MEMORY_BYTES_USED, memBytesUsed));
usagePerRequestType.put(type, ImmutableMap.of(ResourceUsageType.CPU_USED, cpuUsed));
if (deployStatistics.isPresent() && deployStatistics.get().getAverageRuntimeMillis().isPresent()) {
return deployStatistics.get().getAverageRuntimeMillis().get() >= configuration.getConsiderNonLongRunningTaskLongRunningAfterRunningForSeconds();
}
return false;
}
private void updateLongRunningTasksUsage(Map<ResourceUsageType, Number> longRunningTasksUsage, long memBytesUsed, double cpuUsed) {
longRunningTasksUsage.compute(ResourceUsageType.MEMORY_BYTES_USED, (k, v) -> (v == null) ? memBytesUsed : v.longValue() + memBytesUsed);
longRunningTasksUsage.compute(ResourceUsageType.CPU_USED, (k, v) -> (v == null) ? cpuUsed : v.doubleValue() + cpuUsed);
}
}
ProTip! Use n and p to navigate between commits in a pull request.