New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better task balancing #1482

Merged
merged 73 commits into from Jun 8, 2017
Commits
Jump to file or symbol
Failed to load files and symbols.
+206 鈭14
Diff settings

Always

Just for now

Viewing a subset of changes. View all

add tests and fix logic errors

  • Loading branch information...
darcatron committed Apr 14, 2017
commit 2a907c751293a9c1522b01327aafe5a40a6166e0
@@ -16,6 +16,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.Provider;
@@ -58,8 +59,6 @@
private final Provider<SingularitySchedulerStateCache> stateCacheProvider;
private final SchedulerDriverSupplier schedulerDriverSupplier;
private final Map<String, Integer> offerMatchAttemptsPerTask = new HashMap<>();
@Inject
public SingularityMesosOfferScheduler(MesosConfiguration mesosConfiguration,
CustomExecutorConfiguration customExecutorConfiguration,
@@ -118,14 +117,17 @@ public SingularityMesosOfferScheduler(MesosConfiguration mesosConfiguration,
int tasksScheduled = 0;
final List<SingularitySlaveUsageWithId> currentSlaveUsages = usageManager.getCurrentSlaveUsages(offerHolders.stream().map(o -> o.getOffer().getSlaveId().getValue()).collect(Collectors.toList()));
final Map<String, Integer> offerMatchAttemptsPerTask = new HashMap<>();
while (!pendingTaskIdToTaskRequest.isEmpty() && addedTaskInLastLoop && canScheduleAdditionalTasks(taskCredits)) {
addedTaskInLastLoop = false;
for (SingularityTaskRequestHolder taskRequestHolder : pendingTaskIdToTaskRequest.values()) {
Map<SingularityOfferHolder, Double> scorePerOffer = new HashMap<>();
double minScore = minScore(taskRequestHolder.getTaskRequest());
double minScore = minScore(taskRequestHolder.getTaskRequest(), offerMatchAttemptsPerTask, System.currentTimeMillis());
LOG.info("Minimum score for task {} is {}", taskRequestHolder.getTaskRequest().getPendingTask().getPendingTaskId().getId(), minScore);

This comment has been minimized.

@ssalinas

ssalinas Apr 20, 2017

Member

probably can be lower than info level here

@ssalinas

ssalinas Apr 20, 2017

Member

probably can be lower than info level here

for (SingularityOfferHolder offerHolder : offerHolders) {
@@ -255,46 +257,47 @@ private double score(SingularityOfferHolder offerHolder, SingularitySchedulerSta
return 0;
}
private double score(Offer offer, SingularityTaskRequest taskRequest, Optional<SingularitySlaveUsageWithId> maybeSlaveUsage) {
@VisibleForTesting
double score(Offer offer, SingularityTaskRequest taskRequest, Optional<SingularitySlaveUsageWithId> maybeSlaveUsage) {

This comment has been minimized.

@ssalinas

ssalinas Apr 20, 2017

Member

Let's go over this one in-person, think we are getting close, just easier to chat than typing a novel in github ;)

@ssalinas

ssalinas Apr 20, 2017

Member

Let's go over this one in-person, think we are getting close, just easier to chat than typing a novel in github ;)

double requestTypeCpuWeight = 0.20;

This comment has been minimized.

@ssalinas

ssalinas Apr 20, 2017

Member

Let's make these configurable, maybe another object in the configuration yaml?

@ssalinas

ssalinas Apr 20, 2017

Member

Let's make these configurable, maybe another object in the configuration yaml?

This comment has been minimized.

@darcatron

darcatron Apr 20, 2017

Contributor

Yup, I was in progress on this (now committed), but I kept the fields under SingularityConfiguration since I saw a lot of other stuff in there as well (e.g. caching). We could pull it into an OfferConfiguration file if you think that'd be better for organization

@darcatron

darcatron Apr 20, 2017

Contributor

Yup, I was in progress on this (now committed), but I kept the fields under SingularityConfiguration since I saw a lot of other stuff in there as well (e.g. caching). We could pull it into an OfferConfiguration file if you think that'd be better for organization

double requestTypeMemWeight = 0.30;
double freeCpuWeight = 0.20;
double freeMemWeight = 0.30;
double score = 0;
double defaultScoreForMissingUsage = 0.10;
String slaveId = offer.getSlaveId().getValue();
if (!maybeSlaveUsage.isPresent() || !maybeSlaveUsage.get().getCpuTotal().isPresent() || !maybeSlaveUsage.get().getMemoryTotal().isPresent()) {
LOG.info("Slave {} has no total usage data. Will default to {}", slaveId, defaultScoreForMissingUsage);
if (!maybeSlaveUsage.isPresent() || !maybeSlaveUsage.get().getCpuTotal().isPresent() || !maybeSlaveUsage.get().getMemoryMbTotal().isPresent()) {
LOG.info("Slave {} has no total usage data. Will default to {}", offer.getSlaveId().getValue(), defaultScoreForMissingUsage);
return defaultScoreForMissingUsage;
}
SingularitySlaveUsageWithId slaveUsage = maybeSlaveUsage.get();
Map<RequestType, Map<String, Number>> usagesPerRequestType = slaveUsage.getUsagePerRequestType();
Map<String, Number> usagePerResource = usagesPerRequestType.get(taskRequest.getRequest().getRequestType());
score += requestTypeCpuWeight * (1 - (usagePerResource.get(SingularitySlaveUsage.CPU_USED).doubleValue() / slaveUsage.getCpuTotal().get()));
score += requestTypeMemWeight * (1 - (usagePerResource.get(SingularitySlaveUsage.MEMORY_USED).longValue() / slaveUsage.getMemoryTotal().get()));
score += requestTypeCpuWeight * (1 - usagePerResource.get(SingularitySlaveUsage.CPU_USED).doubleValue() / slaveUsage.getCpuTotal().get());
score += requestTypeMemWeight * (1 - ((double) usagePerResource.get(SingularitySlaveUsage.MEMORY_BYTES_USED).longValue() / slaveUsage.getMemoryBytesTotal().get()));
score += freeCpuWeight * (MesosUtils.getNumCpus(offer) / slaveUsage.getCpuTotal().get());
score += freeMemWeight * (MesosUtils.getMemory(offer) / slaveUsage.getMemoryTotal().get());
score += freeMemWeight * (MesosUtils.getMemory(offer) / slaveUsage.getMemoryMbTotal().get());
return score;
}
private double minScore(SingularityTaskRequest taskRequest) {
@VisibleForTesting
double minScore(SingularityTaskRequest taskRequest, Map<String, Integer> offerMatchAttemptsPerTask, long now) {
double minScore = 0.80;

This comment has been minimized.

@darcatron

darcatron Apr 5, 2017

Contributor

this can be adjusted as necessary. I thought an 80% match might be a good starting point, but we could def reduce it

@darcatron

darcatron Apr 5, 2017

Contributor

this can be adjusted as necessary. I thought an 80% match might be a good starting point, but we could def reduce it

int maxOfferAttempts = 20;

This comment has been minimized.

@ssalinas

ssalinas Apr 20, 2017

Member

another that would be nice to have ocnfigurable

@ssalinas

ssalinas Apr 20, 2017

Member

another that would be nice to have ocnfigurable

long maxMillisPastDue = TimeUnit.MILLISECONDS.convert(10, TimeUnit.MINUTES);
minScore -= offerMatchAttemptsPerTask.getOrDefault(taskRequest.getPendingTask().getPendingTaskId().getId(), 0) / maxOfferAttempts;
minScore -= millisPastDue(taskRequest, System.currentTimeMillis()) / maxMillisPastDue;
minScore -= offerMatchAttemptsPerTask.getOrDefault(taskRequest.getPendingTask().getPendingTaskId().getId(), 0) / (double) maxOfferAttempts;
minScore -= millisPastDue(taskRequest, now) / (double) maxMillisPastDue;
return Math.max(minScore, 0);
}
private long millisPastDue(SingularityTaskRequest taskRequest, long now) {
return Math.max(now - taskRequest.getPendingTask().getPendingTaskId().getNextRunAt(), 1);
return Math.max(now - taskRequest.getPendingTask().getPendingTaskId().getNextRunAt(), 0);
}
private SingularityTask acceptTask(SingularityOfferHolder offerHolder,
@@ -0,0 +1,189 @@
package com.hubspot.singularity.mesos;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import org.apache.mesos.Protos.FrameworkID;
import org.apache.mesos.Protos.Offer;
import org.apache.mesos.Protos.OfferID;
import org.apache.mesos.Protos.Resource;
import org.apache.mesos.Protos.SlaveID;
import org.apache.mesos.Protos.Value.Scalar;
import org.apache.mesos.Protos.Value.Type;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import com.hubspot.horizon.shaded.org.jboss.netty.util.internal.ThreadLocalRandom;
import com.hubspot.mesos.MesosUtils;
import com.hubspot.singularity.RequestType;
import com.hubspot.singularity.SingularityCuratorTestBase;
import com.hubspot.singularity.SingularityPendingTask;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularitySlaveUsage;
import com.hubspot.singularity.SingularitySlaveUsageWithId;
import com.hubspot.singularity.SingularityTaskRequest;
public class SingularityMesosOfferSchedulerTest extends SingularityCuratorTestBase {
@Inject
protected SingularityMesosOfferScheduler scheduler;
private SingularityTaskRequest taskRequest = Mockito.mock(SingularityTaskRequest.class);
private SingularityRequest request = Mockito.mock(SingularityRequest.class);
private SingularityPendingTask task = Mockito.mock(SingularityPendingTask.class);
private SingularityPendingTaskId taskId = Mockito.mock(SingularityPendingTaskId.class);
private final Map<String, Integer> offerMatchAttemptsPerTask = new HashMap<>();
public SingularityMesosOfferSchedulerTest() {
super(false);
}
@Before
public void setup() {
Mockito.when(taskRequest.getRequest()).thenReturn(request);
Mockito.when(taskRequest.getPendingTask()).thenReturn(task);
Mockito.when(task.getPendingTaskId()).thenReturn(taskId);
}
@Test
public void itGetsTheCorrectScore() {
String slaveId = "slave";
Map<RequestType, Map<String, Number>> usagePerRequestType = new HashMap<>();
setRequestType(RequestType.SERVICE);
// no usage tracked -> default score
assertScoreIs(0.10, scheduler.score(getOffer(10, 10, slaveId), taskRequest, Optional.empty()));
// new slave (no resources used) -> perfect score
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(1, scheduler.score(getOffer(10, 10, slaveId), taskRequest, Optional.of(getUsage(0, 0, 10, 10, usagePerRequestType, slaveId))));
// cpu used, no mem used --- different request type
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.90, scheduler.score(getOffer(5, 10, slaveId), taskRequest, Optional.of(getUsage(0, 5, 10, 10, usagePerRequestType, slaveId))));
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.84, scheduler.score(getOffer(2, 10, slaveId), taskRequest, Optional.of(getUsage(0, 8, 10, 10, usagePerRequestType, slaveId))));
// cpu used, no mem used --- same request type
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 5, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.80, scheduler.score(getOffer(5, 10, slaveId), taskRequest, Optional.of(getUsage(0, 5, 10, 10, usagePerRequestType, slaveId))));
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 8, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.68, scheduler.score(getOffer(2, 10, slaveId), taskRequest, Optional.of(getUsage(0, 8, 10, 10, usagePerRequestType, slaveId))));
// cpu used, no mem used --- different request type
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.85, scheduler.score(getOffer(10, 5, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(5), 0, 10, 10, usagePerRequestType, slaveId))));
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.76, scheduler.score(getOffer(10, 2, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(8), 0, 10, 10, usagePerRequestType, slaveId))));
// no cpu used, mem used --- same request type
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, mbToBytes(5)));
assertScoreIs(0.70, scheduler.score(getOffer(10, 5, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(5), 0, 10, 10, usagePerRequestType, slaveId))));
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, mbToBytes(8)));
assertScoreIs(0.52, scheduler.score(getOffer(10, 2, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(8), 0, 10, 10, usagePerRequestType, slaveId))));
// cpu used, mem used --- different request type
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.75, scheduler.score(getOffer(5, 5, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(5), 5, 10, 10, usagePerRequestType, slaveId))));
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 0, SingularitySlaveUsage.MEMORY_BYTES_USED, 0));
assertScoreIs(0.60, scheduler.score(getOffer(2, 2, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(8), 8, 10, 10, usagePerRequestType, slaveId))));
// cpu used, mem used --- same request type
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 5, SingularitySlaveUsage.MEMORY_BYTES_USED, mbToBytes(5)));
assertScoreIs(0.50, scheduler.score(getOffer(5, 5, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(5), 5, 10, 10, usagePerRequestType, slaveId))));
usagePerRequestType.put(RequestType.SERVICE, ImmutableMap.of(SingularitySlaveUsage.CPU_USED, 8, SingularitySlaveUsage.MEMORY_BYTES_USED, mbToBytes(8)));
assertScoreIs(0.20, scheduler.score(getOffer(2, 2, slaveId), taskRequest, Optional.of(getUsage(mbToBytes(8), 8, 10, 10, usagePerRequestType, slaveId))));
}
@Test
public void itGetsTheCorrectMinScore() {
long now = System.currentTimeMillis();
String taskId = "taskId";
setNextRunAt(now);
setTaskId(taskId);
// no attempts, no delay
addOrUpdateOfferMatchAttempt(taskId, 0);
assertScoreIs(0.80, scheduler.minScore(taskRequest, offerMatchAttemptsPerTask, now));
// no attempts, delay
assertScoreIs(0.30, scheduler.minScore(taskRequest, offerMatchAttemptsPerTask, now + TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES)));
// attempts, no delay
addOrUpdateOfferMatchAttempt(taskId, 10);
assertScoreIs(0.30, scheduler.minScore(taskRequest, offerMatchAttemptsPerTask, now));
// attempts, delay
assertScoreIs(0, scheduler.minScore(taskRequest, offerMatchAttemptsPerTask, now + TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES)));
addOrUpdateOfferMatchAttempt(taskId, 1);
assertScoreIs(0.25, scheduler.minScore(taskRequest, offerMatchAttemptsPerTask, now + TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES)));
addOrUpdateOfferMatchAttempt(taskId, 4);
assertScoreIs(0.10, scheduler.minScore(taskRequest, offerMatchAttemptsPerTask, now + TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES)));
}
private void assertScoreIs(double expectedScore, double actualScore) {
Assert.assertTrue(Math.round(actualScore * 100.0) / 100.0 == expectedScore);
}
private Offer getOffer(double cpus, long memMb, String slaveId) {
return Offer.newBuilder()
.setId(OfferID.newBuilder().setValue("offer" + ThreadLocalRandom.current().nextInt(1000)).build())
.setFrameworkId(FrameworkID.newBuilder().setValue("framework1").build())
.setSlaveId(SlaveID.newBuilder().setValue(slaveId).build())
.setHostname("host")
.addResources(getCpuResource(cpus))
.addResources(getMemResource(memMb))
.build();
}
private long mbToBytes(long memMb) {
return memMb * 1024L * 1024L;
}
private SingularitySlaveUsageWithId getUsage(long memBytes, double cpus, long memMbTotal, double cpusTotal, Map<RequestType, Map<String, Number>> usagePerRequestType, String slaveId) {
return new SingularitySlaveUsageWithId(new SingularitySlaveUsage(memBytes, 0L, cpus, 1, Optional.of(memMbTotal), Optional.of(cpusTotal), usagePerRequestType), slaveId);
}
private Resource.Builder getCpuResource(double cpus) {
return Resource.newBuilder().setType(Type.SCALAR).setName(MesosUtils.CPUS).setScalar(Scalar.newBuilder().setValue(cpus));
}
private Resource.Builder getMemResource(double memMb) {
return Resource.newBuilder().setType(Type.SCALAR).setName(MesosUtils.MEMORY).setScalar(Scalar.newBuilder().setValue(memMb));
}
private void setNextRunAt(long time) {
Mockito.when(taskId.getNextRunAt()).thenReturn(time);
}
private void setTaskId(String id) {
Mockito.when(taskId.getId()).thenReturn(id);
}
private void setRequestType(RequestType type) {
Mockito.when(request.getRequestType()).thenReturn(type);
}
private void addOrUpdateOfferMatchAttempt(String id, int attempts) {
offerMatchAttemptsPerTask.put(id, attempts);
}
}
ProTip! Use n and p to navigate between commits in a pull request.