New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better task balancing #1482

Merged
merged 73 commits into from Jun 8, 2017
Commits
Jump to file or symbol
Failed to load files and symbols.
+10 鈭11
Diff settings

Always

Just for now

Viewing a subset of changes. View all

refactor

  • Loading branch information...
darcatron committed Apr 27, 2017
commit d4d486c24a601e4a3248eec3fa773aead9fde2ce
@@ -8,7 +8,7 @@
private final String slaveId;
public SingularitySlaveUsageWithId(SingularitySlaveUsage usage, String slaveId) {
super(usage.getMemoryBytesUsed(), usage.getTimestamp(), usage.getCpusUsed(), usage.getNumTasks(), usage.getMemoryMbTotal(), usage.getCpuTotal(), usage.getUsagePerRequestType());
super(usage.getMemoryBytesUsed(), usage.getTimestamp(), usage.getCpusUsed(), usage.getNumTasks(), usage.getMemoryMbTotal(), usage.getCpuTotal(), usage.getLongRunningTasksUsage());
this.slaveId = slaveId;
}
@@ -24,7 +24,6 @@
import com.google.inject.name.Named;
import com.hubspot.mesos.json.MesosMasterSlaveObject;
import com.hubspot.mesos.json.MesosMasterStateObject;
import com.hubspot.mesos.json.MesosResourcesObject;
import com.hubspot.singularity.MachineState;
import com.hubspot.singularity.SingularityMachineAbstraction;
import com.hubspot.singularity.SingularityPendingRequest.PendingType;
@@ -102,7 +101,7 @@ public SlaveMatchState doesOfferMatch(SingularityOfferHolder offer, SingularityT
return SlaveMatchState.RACK_DECOMMISSIONING;
}
if (!taskRequest.getRequest().getRackAffinity().or(Collections.<String> emptyList()).isEmpty()) {
if (!taskRequest.getRequest().getRackAffinity().or(Collections.emptyList()).isEmpty()) {
if (!taskRequest.getRequest().getRackAffinity().get().contains(rackId)) {
LOG.trace("Task {} requires a rack in {} (current rack {})", taskRequest.getPendingTask().getPendingTaskId(), taskRequest.getRequest().getRackAffinity().get(), rackId);
return SlaveMatchState.RACK_AFFINITY_NOT_MATCHING;
@@ -232,7 +231,7 @@ private boolean isSlaveAttributesMatch(SingularityOfferHolder offer, Singularity
Map<String, String> mergedAttributes = taskRequest.getRequest().getRequiredSlaveAttributes().or(new HashMap<String, String>());
mergedAttributes.putAll(taskRequest.getRequest().getAllowedSlaveAttributes().or(new HashMap<String, String>()));
if (!slaveAndRackHelper.hasRequiredAttributes(mergedAttributes, reservedSlaveAttributes)) {
LOG.trace("Slaves with attributes {} are reserved for matching tasks. Task with attributes {} does not match", reservedSlaveAttributes, taskRequest.getRequest().getRequiredSlaveAttributes().or(Collections.<String, String>emptyMap()));
LOG.trace("Slaves with attributes {} are reserved for matching tasks. Task with attributes {} does not match", reservedSlaveAttributes, taskRequest.getRequest().getRequiredSlaveAttributes().or(Collections.emptyMap()));
return true;
}
} else {
@@ -292,7 +291,7 @@ public void slaveLost(SlaveID slaveIdObj) {
if (slave.isPresent()) {
MachineState previousState = slave.get().getCurrentState().getState();
slaveManager.changeState(slave.get(), MachineState.DEAD, Optional.<String> absent(), Optional.<String> absent());
slaveManager.changeState(slave.get(), MachineState.DEAD, Optional.absent(), Optional.absent());
if (configuration.getDisasterDetection().isEnabled()) {
updateDisasterCounter(previousState);
}
@@ -323,7 +322,7 @@ private void checkRackAfterSlaveLoss(SingularitySlave lostSlave) {
LOG.info("Found {} slaves left in rack {}", numInRack, lostSlave.getRackId());
if (numInRack == 0) {
rackManager.changeState(lostSlave.getRackId(), MachineState.DEAD, Optional.<String> absent(), Optional.<String> absent());
rackManager.changeState(lostSlave.getRackId(), MachineState.DEAD, Optional.absent(), Optional.absent());
}
}
@@ -369,11 +368,11 @@ public void loadSlavesAndRacksFromMaster(MesosMasterStateObject state, boolean i
}
for (SingularitySlave leftOverSlave : activeSlavesById.values()) {
slaveManager.changeState(leftOverSlave, isStartup ? MachineState.MISSING_ON_STARTUP : MachineState.DEAD, Optional.<String> absent(), Optional.<String> absent());
slaveManager.changeState(leftOverSlave, isStartup ? MachineState.MISSING_ON_STARTUP : MachineState.DEAD, Optional.absent(), Optional.absent());
}
for (SingularityRack leftOverRack : remainingActiveRacks.values()) {
rackManager.changeState(leftOverRack, isStartup ? MachineState.MISSING_ON_STARTUP : MachineState.DEAD, Optional.<String> absent(), Optional.<String> absent());
rackManager.changeState(leftOverRack, isStartup ? MachineState.MISSING_ON_STARTUP : MachineState.DEAD, Optional.absent(), Optional.absent());
}
LOG.info("Found {} new racks ({} missing) and {} new slaves ({} missing)", racks, remainingActiveRacks.size(), slaves, activeSlavesById.size());
@@ -400,7 +399,7 @@ public void loadSlavesAndRacksFromMaster(MesosMasterStateObject state, boolean i
return CheckResult.ALREADY_ACTIVE;
case DEAD:
case MISSING_ON_STARTUP:
manager.changeState(object.getId(), MachineState.ACTIVE, Optional.<String> absent(), Optional.<String> absent());
manager.changeState(object.getId(), MachineState.ACTIVE, Optional.absent(), Optional.absent());
return CheckResult.NEW;
case DECOMMISSIONED:
case DECOMMISSIONING:
@@ -418,14 +417,14 @@ public void checkOffer(Offer offer) {
final String host = slaveAndRackHelper.getMaybeTruncatedHost(offer);
final Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(offer);
final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.<MesosResourcesObject>absent());
final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.absent());
if (check(slave, slaveManager) == CheckResult.NEW) {
if (inactiveSlaveManager.isInactive(slave.getHost())) {
LOG.info("Slave {} on inactive host {} attempted to rejoin. Marking as decommissioned.", slave, host);
slaveManager.changeState(slave, MachineState.STARTING_DECOMMISSION,
Optional.of(String.format("Slave %s on inactive host %s attempted to rejoin cluster.", slaveId, host)),
Optional.<String>absent());
Optional.absent());
} else {
LOG.info("Offer revealed a new slave {}", slave);
}
ProTip! Use n and p to navigate between commits in a pull request.