Skip to content
This repository has been archived by the owner on May 14, 2022. It is now read-only.

Commit

Permalink
Provide more information about the relocation trigger (#1194)
Browse files Browse the repository at this point in the history
* Provide more information about the relocation trigger

* Make sure TaskRelocationReason is always set
  • Loading branch information
tbak committed Dec 13, 2021
1 parent a2f2420 commit 7439929
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@

import java.util.Objects;

import com.netflix.titus.common.util.Evaluators;

public class TaskRelocationPlan {

public enum TaskRelocationReason {
TaskMigration
AgentEvacuation,
SelfManagedMigration,
TaskMigration,
Unknown,
}

private final String taskId;
Expand All @@ -32,7 +37,7 @@ public enum TaskRelocationReason {

public TaskRelocationPlan(String taskId, TaskRelocationReason reason, String reasonMessage, long decisionTime, long relocationTime) {
this.taskId = taskId;
this.reason = reason;
this.reason = Evaluators.getOrDefault(reason, TaskRelocationReason.Unknown);
this.reasonMessage = reasonMessage;
this.decisionTime = decisionTime;
this.relocationTime = relocationTime;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ private TaskRelocationPlan newNotDelayedRelocationPlan(Task task, boolean approv
: "Not enough quota to migrate the task (but no migration delay configured)";
return TaskRelocationPlan.newBuilder()
.withTaskId(task.getId())
.withReason(TaskRelocationReason.TaskMigration)
.withReason(TaskRelocationReason.AgentEvacuation)
.withReasonMessage(reasonMessage)
.withDecisionTime(now)
.withRelocationTime(now)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ private TaskRelocationPlan buildSelfManagedRelocationPlan(Job<?> job, Task task,

TaskRelocationPlan relocationPlan = TaskRelocationPlan.newBuilder()
.withTaskId(task.getId())
.withReason(TaskRelocationReason.TaskMigration)
.withReason(TaskRelocationReason.SelfManagedMigration)
.withReasonMessage(reason)
.withDecisionTime(now)
.withRelocationTime(now + selfManaged.getRelocationTimeMs())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@
import java.util.stream.Collectors;

import com.google.common.base.Stopwatch;
import com.netflix.titus.api.relocation.model.TaskRelocationPlan;
import com.netflix.titus.api.relocation.model.TaskRelocationStatus;
import com.netflix.titus.api.relocation.model.TaskRelocationStatus.TaskRelocationState;
import com.netflix.titus.common.runtime.TitusRuntime;
import com.netflix.titus.common.util.DateTimeExt;
import com.netflix.titus.common.util.ExceptionExt;
import com.netflix.titus.common.util.code.CodeInvariants;
import com.netflix.titus.common.util.rx.ReactorExt;
import com.netflix.titus.common.util.time.Clock;
import com.netflix.titus.runtime.connector.eviction.EvictionServiceClient;
import com.netflix.titus.api.relocation.model.TaskRelocationPlan;
import com.netflix.titus.api.relocation.model.TaskRelocationStatus;
import com.netflix.titus.api.relocation.model.TaskRelocationStatus.TaskRelocationState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Mono;
Expand Down Expand Up @@ -90,9 +90,20 @@ private Map<String, TaskRelocationStatus> execute(Map<String, TaskRelocationPlan
.collect(Collectors.toMap(
TaskRelocationPlan::getTaskId,
p -> {
String message = String.format("%s: reasonCode=%s, plannedRelocationTime=%s",
p.getReasonMessage(), p.getReason(), DateTimeExt.toUtcDateTimeString(p.getRelocationTime())
);
String message;
switch (p.getReason()) {
case AgentEvacuation:
message = String.format("Agent evacuation: %s", p.getReasonMessage());
break;
case SelfManagedMigration:
message = String.format("Self managed migration requested on %s: %s", DateTimeExt.toUtcDateTimeString(p.getDecisionTime()), p.getReasonMessage());
break;
case TaskMigration:
message = p.getReasonMessage();
break;
default:
message = String.format("[unrecognized relocation reason %s]: %s" + p.getReason(), p.getReasonMessage());
}
return evictionServiceClient.terminateTask(p.getTaskId(), message).timeout(EVICTION_TIMEOUT);
}));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ public void testAllExpectedJobMigrationsAreFound() {
assertThat(result.getAgentInstance().getServerGroupId()).isEqualTo("removable1");
}
TaskRelocationPlan plan = result.getTaskRelocationPlan();
assertThat(plan.getReason()).isEqualTo(TaskRelocationReason.TaskMigration);
if (plan.getTaskId().startsWith("jobImmediate")) {
assertThat(plan.getReason()).isEqualTo(TaskRelocationReason.TaskMigration);
} else {
assertThat(plan.getReason()).isEqualTo(TaskRelocationReason.AgentEvacuation);
}
if (isImmediateJobMigration) {
assertThat(plan.getReasonMessage()).containsSequence("Job marked for immediate eviction");
} else {
Expand Down Expand Up @@ -143,9 +147,9 @@ private void verifyRelocationPlan(long relocationDelay, String reasonMessage) {
);

Job<ServiceJobExt> job = JobGenerator.serviceJobs(
oneTaskServiceJobDescriptor()
.but(ofServiceSize(2),
withDisruptionBudget(budget(selfManagedPolicy(relocationDelay), unlimitedRate(), Collections.emptyList()))))
oneTaskServiceJobDescriptor()
.but(ofServiceSize(2),
withDisruptionBudget(budget(selfManagedPolicy(relocationDelay), unlimitedRate(), Collections.emptyList()))))
.getValue();

ServiceJobTask task = JobGenerator.serviceTasks(job).getValue();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import com.netflix.titus.supplementary.relocation.model.DeschedulingResult;
import com.netflix.titus.testkit.model.job.JobGenerator;
import com.netflix.titus.testkit.model.job.JobTestFunctions;
import org.junit.Before;
import org.junit.Test;

import static com.netflix.titus.api.jobmanager.model.job.JobFunctions.ofServiceSize;
Expand Down Expand Up @@ -82,6 +83,11 @@ public class TaskMigrationDeschedulerTest {

private final NodeDataResolver nodeDataResolver = relocationConnectorStubs.getNodeDataResolver();

@Before
public void setUp() {
// So it does not start at 0.
clock.advanceTime(Duration.ofDays(1));
}

@Test
public void testImmediateMigrations() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public void testPlannedRelocation() throws Exception {
// Get the plan
TaskRelocationPlan plan = doTry(() -> findRelocationPlan(task.getId()));
assertThat(plan.getTaskId()).isEqualTo(task.getId());
assertThat(plan.getReasonCode()).isEqualTo(TaskRelocationReason.TaskMigration.name());
assertThat(plan.getReasonCode()).isEqualTo(TaskRelocationReason.SelfManagedMigration.name());
assertThat(plan.getRelocationTime()).isLessThanOrEqualTo(clock.wallTime() + RELOCATION_TIME_MS);

// Wait for the relocation
Expand Down

0 comments on commit 7439929

Please sign in to comment.