New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better task balancing #1482

Merged
merged 73 commits into from Jun 8, 2017
Commits
Jump to file or symbol
Failed to load files and symbols.
+972 鈭194
Diff settings

Always

Just for now

@@ -174,13 +174,13 @@ These settings should live under the "mesos" field inside the root configuration
#### Resource Limits ####
| Parameter | Default | Description | Type |
|-----------|---------|-------------|------|
| defaultCpus | 1 | Number of CPUs to request for a task if none are specified | int |
| defaultCpus | 1 | Number of CPUs to request for a task if none are specified | int |
| defaultMemory | 64 | MB of memory to request for a task if none is specified | int |
| maxNumInstancesPerRequest | 25 | Max instances (tasks) to allow for a request (requests using over this will return a 400) | int |
| maxNumCpusPerInstance | 50 | Max number of CPUs allowed on a given task | int |
| maxNumCpusPerRequest | 900 | Max number of CPUs allowed for a given request (cpus per task * task instance) | int |
| maxMemoryMbPerInstance | 24000 | Max MB of memory allowed on a given task | int |
| maxMemoryMbPerRequest | 450000 | Max MB of memory allowed for a given request (memoryMb per task * task instances) | int |
| maxNumCpusPerInstance | 50 | Max number of CPUs allowed on a given task | int |
| maxNumCpusPerRequest | 900 | Max number of CPUs allowed for a given request (cpus per task * task instance) | int |
| maxMemoryMbPerInstance | 24000 | Max MB of memory allowed on a given task | int |
| maxMemoryMbPerRequest | 450000 | Max MB of memory allowed for a given request (memoryMb per task * task instances) | int |
#### Racks ####
| Parameter | Default | Description | Type |
@@ -192,7 +192,18 @@ These settings should live under the "mesos" field inside the root configuration
| Parameter | Default | Description | Type |
|-----------|---------|-------------|------|
| slaveHttpPort | 5051 | The port to talk to slaves on | int |
| slaveHttpsPort | absent | The HTTPS port to talk to slaves on | Integer (Optional) |
| slaveHttpsPort | absent | The HTTPS port to talk to slaves on | Integer (Optional) |
#### Offers ####
| Parameter | Default | Description | Type |
|-----------|---------|-------------|------|
| longRunningUsedCpuWeightForOffer | 0.30 | The weight long running tasks' cpu utilization carries when scoring an offer (should add up to 1 with longRunningUsedMemWeightForOffer) | double |
| longRunningUsedMemWeightForOffer | 0.70 | The weight long running tasks' memory utilization carries when scoring an offer (should add up to 1 with longRunningUsedCpuWeightForOffer) | double |
| freeCpuWeightForOffer | 0.30 | The weight the slave's free cpu carries when scoring an offer (should add up to 1 with freeMemWeightForOffer) | double |
| freeMemWeightForOffer | 0.70 | The weight the slave's free memory carries when scoring an offer (should add up to 1 with freeCpuWeightForOffer) | double |
| defaultOfferScoreForMissingUsage | 0.30 | The default offer score used for offers without utilization metrics | double |
| considerNonLongRunningTaskLongRunningAfterRunningForSeconds | 21600 (6 hours) | If a non long running task runs, on average, this long or more, it's considered a long running task | long |
| maxNonLongRunningUsedResourceWeight | 0.50 | The max weight long running tasks' utilization can carry when scoring a non long running task for an offer | double
## Database ##
@@ -0,0 +1,57 @@
package com.hubspot.singularity;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
public class SingularityClusterUtilization {
private long totalMemBytesUsed;
private long totalMemBytesAvailable;
private double totalCpuUsed;
private double totalCpuAvailable;
private final long timestamp;
@JsonCreator
public SingularityClusterUtilization(@JsonProperty("totalMemBytesUsed") long totalMemBytesUsed,
@JsonProperty("totalMemBytesAvailable") long totalMemBytesAvailable,
@JsonProperty("totalCpuUsed") double totalCpuUsed,
@JsonProperty("totalCpuAvailable") double totalCpuAvailable,
@JsonProperty("timestamp") long timestamp) {
this.totalMemBytesUsed = totalMemBytesUsed;
this.totalMemBytesAvailable = totalMemBytesAvailable;
this.totalCpuUsed = totalCpuUsed;
this.totalCpuAvailable = totalCpuAvailable;
this.timestamp = timestamp;
}
public long getTotalMemBytesUsed() {
return totalMemBytesUsed;
}
public long getTotalMemBytesAvailable() {
return totalMemBytesAvailable;
}
public double getTotalCpuUsed() {
return totalCpuUsed;
}
public double getTotalCpuAvailable() {
return totalCpuAvailable;
}
public long getTimestamp() {
return timestamp;
}
@Override
public String toString() {
return "SingularityClusterUtilization [" +
"totalMemBytesUsed=" + totalMemBytesUsed +
", totalMemBytesAvailable=" + totalMemBytesAvailable +
", totalCpuUsed=" + totalCpuUsed +
", totalCpuAvailable=" + totalCpuAvailable +
", timestamp=" + timestamp +
"]";
}
}
@@ -1,28 +1,58 @@
package com.hubspot.singularity;
import java.util.Map;
import com.google.common.base.Optional;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
public class SingularitySlaveUsage {
public enum ResourceUsageType {
CPU_USED, MEMORY_BYTES_USED, CPU_FREE, MEMORY_BYTES_FREE
}
public static final long BYTES_PER_MEGABYTE = 1024L * 1024L;
private final long memoryBytesUsed;
private final long memoryMbReserved;
private final int numTasks;
private final long timestamp;
private final double cpusUsed;
private final double cpusReserved;
private final Optional<Long> memoryMbTotal;
private final Optional<Double> cpusTotal;
private final Map<ResourceUsageType, Number> longRunningTasksUsage;
@JsonCreator
public SingularitySlaveUsage(@JsonProperty("memoryBytesUsed") long memoryBytesUsed, @JsonProperty("timestamp") long timestamp, @JsonProperty("cpusUsed") double cpusUsed,
@JsonProperty("numTasks") int numTasks) {
public SingularitySlaveUsage(@JsonProperty("memoryBytesUsed") long memoryBytesUsed,
@JsonProperty("memoryMbReserved") long memoryMbReserved,
@JsonProperty("timestamp") long timestamp,
@JsonProperty("cpusUsed") double cpusUsed,
@JsonProperty("cpusReserved") double cpusReserved,
@JsonProperty("numTasks") int numTasks,
@JsonProperty("memoryMbTotal") Optional<Long> memoryMbTotal,
@JsonProperty("cpusTotal") Optional<Double> cpusTotal,
@JsonProperty("longRunningTasksUsage") Map<ResourceUsageType, Number> longRunningTasksUsage) {
this.memoryBytesUsed = memoryBytesUsed;
this.memoryMbReserved = memoryMbReserved;
this.timestamp = timestamp;
this.cpusUsed = cpusUsed;
this.cpusReserved = cpusReserved;
this.numTasks = numTasks;
this.memoryMbTotal = memoryMbTotal;
this.cpusTotal = cpusTotal;
this.longRunningTasksUsage = longRunningTasksUsage;
}
public long getMemoryBytesUsed() {
return memoryBytesUsed;
}
public long getMemoryMbReserved() {
return memoryMbReserved;
}
public long getTimestamp() {
return timestamp;
}
@@ -31,13 +61,41 @@ public double getCpusUsed() {
return cpusUsed;
}
public double getCpusReserved() {
return cpusReserved;
}
public int getNumTasks() {
return numTasks;
}
public Optional<Long> getMemoryBytesTotal() {
return memoryMbTotal.isPresent() ? Optional.of(memoryMbTotal.get() * BYTES_PER_MEGABYTE) : Optional.absent();
}
public Optional<Long> getMemoryMbTotal() {
return memoryMbTotal.isPresent() ? Optional.of(memoryMbTotal.get()) : Optional.absent();
}
public Optional<Double> getCpusTotal() {
return cpusTotal;
}
public Map<ResourceUsageType, Number> getLongRunningTasksUsage() {
return longRunningTasksUsage;
}
@Override
public String toString() {
return "SingularitySlaveUsage [memoryBytesUsed=" + memoryBytesUsed + ", numTasks=" + numTasks + ", timestamp=" + timestamp + ", cpusUsed=" + cpusUsed + "]";
return "SingularitySlaveUsage [memoryBytesUsed=" + memoryBytesUsed +
", memoryMbReserved=" + memoryMbReserved +
", memoryMbTotal=" + memoryMbTotal +
", cpusUsed=" + cpusUsed +
", cpusReserved=" + cpusReserved +
", cpusTotal=" + cpusTotal +
", numTasks=" + numTasks +
", longRunningTasksUsage=" + longRunningTasksUsage +
", timestamp=" + timestamp +
"]";
}
}
@@ -8,7 +8,7 @@
private final String slaveId;
public SingularitySlaveUsageWithId(SingularitySlaveUsage usage, String slaveId) {
super(usage.getMemoryBytesUsed(), usage.getTimestamp(), usage.getCpusUsed(), usage.getNumTasks());
super(usage.getMemoryBytesUsed(), usage.getMemoryMbReserved(), usage.getTimestamp(), usage.getCpusUsed(), usage.getCpusReserved(), usage.getNumTasks(), usage.getMemoryMbTotal(), usage.getCpusTotal(), usage.getLongRunningTasksUsage());
this.slaveId = slaveId;
}
@@ -59,16 +59,16 @@
@JsonCreator
public SingularityState(@JsonProperty("activeTasks") int activeTasks, @JsonProperty("launchingTasks") int launchingTasks, @JsonProperty("activeRequests") int activeRequests, @JsonProperty("cooldownRequests") int cooldownRequests,
@JsonProperty("pausedRequests") int pausedRequests, @JsonProperty("scheduledTasks") int scheduledTasks, @JsonProperty("pendingRequests") int pendingRequests, @JsonProperty("lbCleanupTasks") int lbCleanupTasks,
@JsonProperty("lbCleanupRequests") int lbCleanupRequests, @JsonProperty("cleaningRequests") int cleaningRequests, @JsonProperty("activeSlaves") int activeSlaves, @JsonProperty("deadSlaves") int deadSlaves,
@JsonProperty("decommissioningSlaves") int decommissioningSlaves, @JsonProperty("activeRacks") int activeRacks, @JsonProperty("deadRacks") int deadRacks, @JsonProperty("decommissioningRacks") int decommissioningRacks,
@JsonProperty("cleaningTasks") int cleaningTasks, @JsonProperty("hostStates") List<SingularityHostState> hostStates, @JsonProperty("oldestDeploy") long oldestDeploy, @JsonProperty("numDeploys") int numDeploys,
@JsonProperty("oldestDeployStep") long oldestDeployStep, @JsonProperty("activeDeploys") List<SingularityDeployMarker> activeDeploys,
@JsonProperty("lateTasks") int lateTasks, @JsonProperty("futureTasks") int futureTasks, @JsonProperty("maxTaskLag") long maxTaskLag, @JsonProperty("generatedAt") long generatedAt,
@JsonProperty("overProvisionedRequestIds") List<String> overProvisionedRequestIds, @JsonProperty("underProvisionedRequestIds") List<String> underProvisionedRequestIds,
@JsonProperty("overProvisionedRequests") int overProvisionedRequests, @JsonProperty("underProvisionedRequests") int underProvisionedRequests, @JsonProperty("finishedRequests") int finishedRequests,
@JsonProperty("unknownRacks") int unknownRacks, @JsonProperty("unknownSlaves") int unknownSlaves, @JsonProperty("authDatastoreHealthy") Optional<Boolean> authDatastoreHealthy, @JsonProperty("minimumPriorityLevel") Optional<Double> minimumPriorityLevel,
@JsonProperty("avgStatusUpdateDelayMs") long avgStatusUpdateDelayMs) {
@JsonProperty("pausedRequests") int pausedRequests, @JsonProperty("scheduledTasks") int scheduledTasks, @JsonProperty("pendingRequests") int pendingRequests, @JsonProperty("lbCleanupTasks") int lbCleanupTasks,
@JsonProperty("lbCleanupRequests") int lbCleanupRequests, @JsonProperty("cleaningRequests") int cleaningRequests, @JsonProperty("activeSlaves") int activeSlaves, @JsonProperty("deadSlaves") int deadSlaves,
@JsonProperty("decommissioningSlaves") int decommissioningSlaves, @JsonProperty("activeRacks") int activeRacks, @JsonProperty("deadRacks") int deadRacks, @JsonProperty("decommissioningRacks") int decommissioningRacks,
@JsonProperty("cleaningTasks") int cleaningTasks, @JsonProperty("hostStates") List<SingularityHostState> hostStates, @JsonProperty("oldestDeploy") long oldestDeploy, @JsonProperty("numDeploys") int numDeploys,
@JsonProperty("oldestDeployStep") long oldestDeployStep, @JsonProperty("activeDeploys") List<SingularityDeployMarker> activeDeploys,
@JsonProperty("lateTasks") int lateTasks, @JsonProperty("futureTasks") int futureTasks, @JsonProperty("maxTaskLag") long maxTaskLag, @JsonProperty("generatedAt") long generatedAt,
@JsonProperty("overProvisionedRequestIds") List<String> overProvisionedRequestIds, @JsonProperty("underProvisionedRequestIds") List<String> underProvisionedRequestIds,
@JsonProperty("overProvisionedRequests") int overProvisionedRequests, @JsonProperty("underProvisionedRequests") int underProvisionedRequests, @JsonProperty("finishedRequests") int finishedRequests,
@JsonProperty("unknownRacks") int unknownRacks, @JsonProperty("unknownSlaves") int unknownSlaves, @JsonProperty("authDatastoreHealthy") Optional<Boolean> authDatastoreHealthy, @JsonProperty("minimumPriorityLevel") Optional<Double> minimumPriorityLevel,
@JsonProperty("avgStatusUpdateDelayMs") long avgStatusUpdateDelayMs) {
this.activeTasks = activeTasks;
this.launchingTasks = launchingTasks;
this.activeRequests = activeRequests;
@@ -198,6 +198,20 @@
private int maxTasksPerOfferPerRequest = 0;
private double longRunningUsedCpuWeightForOffer = 0.30;
private double longRunningUsedMemWeightForOffer = 0.70;
private double freeCpuWeightForOffer = 0.30;
private double freeMemWeightForOffer = 0.70;
private double defaultOfferScoreForMissingUsage = 0.30;
private long considerNonLongRunningTaskLongRunningAfterRunningForSeconds = TimeUnit.HOURS.toSeconds(6);
private double maxNonLongRunningUsedResourceWeight = 0.50;
private int maxRequestIdSize = 100;
private int maxUserIdSize = 100;
@@ -672,6 +686,33 @@ public int getMaxTasksPerOfferPerRequest() {
return maxTasksPerOfferPerRequest;
}
public double getLongRunningUsedCpuWeightForOffer() {
return longRunningUsedCpuWeightForOffer;
}
public double getLongRunningUsedMemWeightForOffer() {
return longRunningUsedMemWeightForOffer;
}
public double getFreeCpuWeightForOffer() {
return freeCpuWeightForOffer;
}
public double getFreeMemWeightForOffer() {
return freeMemWeightForOffer;
}
public double getDefaultOfferScoreForMissingUsage() {
return defaultOfferScoreForMissingUsage;
}
public long getConsiderNonLongRunningTaskLongRunningAfterRunningForSeconds() {
return considerNonLongRunningTaskLongRunningAfterRunningForSeconds;
}
public double getMaxNonLongRunningUsedResourceWeight() {
return maxNonLongRunningUsedResourceWeight;
}
public MesosConfiguration getMesosConfiguration() {
return mesosConfiguration;
}
@@ -1032,6 +1073,40 @@ public void setMaxTasksPerOfferPerRequest(int maxTasksPerOfferPerRequest) {
this.maxTasksPerOfferPerRequest = maxTasksPerOfferPerRequest;
}
public SingularityConfiguration setLongRunningUsedCpuWeightForOffer(double longRunningUsedCpuWeightForOffer) {
this.longRunningUsedCpuWeightForOffer = longRunningUsedCpuWeightForOffer;
return this;
}
public SingularityConfiguration setLongRunningUsedMemWeightForOffer(double longRunningUsedMemWeightForOffer) {
this.longRunningUsedMemWeightForOffer = longRunningUsedMemWeightForOffer;
return this;
}
public SingularityConfiguration setFreeCpuWeightForOffer(double freeCpuWeightForOffer) {
this.freeCpuWeightForOffer = freeCpuWeightForOffer;
return this;
}
public SingularityConfiguration setFreeMemWeightForOffer(double freeMemWeightForOffer) {
this.freeMemWeightForOffer = freeMemWeightForOffer;
return this;
}
public SingularityConfiguration setDefaultOfferScoreForMissingUsage(double defaultOfferScoreForMissingUsage) {
this.defaultOfferScoreForMissingUsage = defaultOfferScoreForMissingUsage;
return this;
}
public SingularityConfiguration setConsiderNonLongRunningTaskLongRunningAfterRunningForSeconds(long considerNonLongRunningTaskLongRunningAfterRunningForSeconds) {
this.considerNonLongRunningTaskLongRunningAfterRunningForSeconds = considerNonLongRunningTaskLongRunningAfterRunningForSeconds;
return this;
}
public SingularityConfiguration setMaxNonLongRunningUsedResourceWeight(double maxNonLongRunningUsedResourceWeight) {
this.maxNonLongRunningUsedResourceWeight = maxNonLongRunningUsedResourceWeight;
return this;
}
public void setMesosConfiguration(MesosConfiguration mesosConfiguration) {
this.mesosConfiguration = mesosConfiguration;
}
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.