From 03ca4081ebe8023d90b6e45827372ecf4117ac99 Mon Sep 17 00:00:00 2001
From: Jiajun Yao <jeromeyjj@gmail.com>
Date: Sat, 11 May 2024 07:11:50 -0700
Subject: [PATCH] [Core] Fix worker process leaks after job finishes (#44214)

Signed-off-by: Jiajun Yao <jeromeyjj@gmail.com>
Signed-off-by: gchurch <gabe1church@gmail.com>
---
 python/ray/_private/worker.py                 |  2 +-
 python/ray/_raylet.pyx                        |  5 +
 python/ray/includes/libcoreworker.pxd         |  1 +
 python/ray/tests/test_job.py                  | 94 +++++++++++++++++++
 .../workflow/tests/test_workflow_queuing.py   | 14 +++
 src/mock/ray/raylet/worker.h                  |  1 +
 src/ray/common/ray_config_def.h               |  3 -
 src/ray/common/task/task_spec.cc              | 17 +++-
 src/ray/common/task/task_spec.h               | 14 ++-
 src/ray/common/task/task_util.h               | 12 ++-
 src/ray/common/test/task_spec_test.cc         | 72 ++++++++++++++
 src/ray/core_worker/context.cc                |  8 ++
 src/ray/core_worker/context.h                 |  5 +
 src/ray/core_worker/core_worker.cc            | 35 +++++--
 src/ray/protobuf/common.proto                 |  4 +
 src/ray/raylet/local_task_manager.cc          |  5 +
 src/ray/raylet/node_manager.cc                | 27 +++++-
 .../scheduling/cluster_task_manager_test.cc   | 19 +++-
 src/ray/raylet/test/util.h                    |  6 ++
 src/ray/raylet/worker.h                       |  8 ++
 src/ray/raylet/worker_pool.cc                 | 76 +++++++++------
 src/ray/raylet/worker_pool.h                  | 17 ++--
 src/ray/raylet/worker_pool_test.cc            | 85 ++++++++++++++++-
 23 files changed, 469 insertions(+), 61 deletions(-)

diff --git a/python/ray/_private/worker.py b/python/ray/_private/worker.py
index 1e0ac6b99e811f..26407708475d8f 100644
--- a/python/ray/_private/worker.py
+++ b/python/ray/_private/worker.py
@@ -2718,7 +2718,7 @@ def put(
 @PublicAPI
 @client_mode_hook
 def wait(
-    ray_waitables: Union["ObjectRef[R]", "ObjectRefGenerator[R]"],
+    ray_waitables: List[Union["ObjectRef[R]", "ObjectRefGenerator[R]"]],
     *,
     num_returns: int = 1,
     timeout: Optional[float] = None,
diff --git a/python/ray/_raylet.pyx b/python/ray/_raylet.pyx
index cf595f0dc23296..5a053aa5c9f487 100644
--- a/python/ray/_raylet.pyx
+++ b/python/ray/_raylet.pyx
@@ -4855,6 +4855,11 @@ cdef class CoreWorker:
         return (CCoreWorkerProcess.GetCoreWorker().GetWorkerContext()
                 .CurrentActorMaxConcurrency())
 
+    def get_current_root_detached_actor_id(self) -> ActorID:
+        # This is only used in test
+        return ActorID(CCoreWorkerProcess.GetCoreWorker().GetWorkerContext()
+                       .GetRootDetachedActorID().Binary())
+
     def get_queued_future(self, task_id: Optional[TaskID]) -> ConcurrentFuture:
         """Get a asyncio.Future that's queued in the event loop."""
         with self._task_id_to_future_lock:
diff --git a/python/ray/includes/libcoreworker.pxd b/python/ray/includes/libcoreworker.pxd
index 71bca30f4ff252..cb3ed32b1bd8ec 100644
--- a/python/ray/includes/libcoreworker.pxd
+++ b/python/ray/includes/libcoreworker.pxd
@@ -85,6 +85,7 @@ cdef extern from "ray/core_worker/context.h" nogil:
         c_bool CurrentActorIsAsync()
         const c_string &GetCurrentSerializedRuntimeEnv()
         int CurrentActorMaxConcurrency()
+        const CActorID &GetRootDetachedActorID()
 
 cdef extern from "ray/core_worker/generator_waiter.h" nogil:
     cdef cppclass CGeneratorBackpressureWaiter "ray::core::GeneratorBackpressureWaiter": # noqa
diff --git a/python/ray/tests/test_job.py b/python/ray/tests/test_job.py
index e748036a09c7cc..021d36da2aa672 100644
--- a/python/ray/tests/test_job.py
+++ b/python/ray/tests/test_job.py
@@ -17,6 +17,7 @@
     run_string_as_driver_nonblocking,
     wait_for_condition,
     format_web_url,
+    wait_for_pid_to_exit,
 )
 from ray.job_config import JobConfig
 from ray.job_submission import JobSubmissionClient
@@ -296,6 +297,99 @@ def verify():
         # TODO(sang): Client entrypoint not supported yet.
 
 
+def test_task_spec_root_detached_actor_id(shutdown_only):
+    """Test to make sure root detached actor id is set correctly
+    for task spec of submitted task or actor.
+    """
+
+    ray.init()
+
+    @ray.remote
+    def get_task_root_detached_actor_id():
+        core_worker = ray._private.worker.global_worker.core_worker
+        return core_worker.get_current_root_detached_actor_id().hex()
+
+    @ray.remote
+    class Actor:
+        def get_root_detached_actor_id(self):
+            core_worker = ray._private.worker.global_worker.core_worker
+            return core_worker.get_current_root_detached_actor_id().hex()
+
+    @ray.remote(lifetime="detached")
+    class DetachedActor:
+        def check(self):
+            core_worker = ray._private.worker.global_worker.core_worker
+            assert (
+                ray.get_runtime_context().get_actor_id()
+                == core_worker.get_current_root_detached_actor_id().hex()
+            )
+            assert ray.get_runtime_context().get_actor_id() == ray.get(
+                get_task_root_detached_actor_id.remote()
+            )
+            actor = Actor.remote()
+            assert ray.get_runtime_context().get_actor_id() == ray.get(
+                actor.get_root_detached_actor_id.remote()
+            )
+
+    assert (
+        ray.get(get_task_root_detached_actor_id.remote())
+        == ray._raylet.ActorID.nil().hex()
+    )
+    actor = Actor.remote()
+    assert (
+        ray.get(actor.get_root_detached_actor_id.remote())
+        == ray._raylet.ActorID.nil().hex()
+    )
+    detached_actor = DetachedActor.remote()
+    ray.get(detached_actor.check.remote())
+
+
+def test_no_process_leak_after_job_finishes(ray_start_cluster):
+    """Test to make sure when a job finishes,
+    all the worker processes belonging to it exit.
+    """
+    cluster = ray_start_cluster
+    cluster.add_node(num_cpus=8)
+    ray.init(address=cluster.address)
+
+    @ray.remote(num_cpus=0)
+    class PidActor:
+        def __init__(self):
+            self.pids = set()
+            self.pids.add(os.getpid())
+
+        def add_pid(self, pid):
+            self.pids.add(pid)
+
+        def get_pids(self):
+            return self.pids
+
+    @ray.remote
+    def child(pid_actor):
+        # child worker process should be forcibly killed
+        # when the job finishes.
+        ray.get(pid_actor.add_pid.remote(os.getpid()))
+        time.sleep(1000000)
+
+    @ray.remote
+    def parent(pid_actor):
+        ray.get(pid_actor.add_pid.remote(os.getpid()))
+        child.remote(pid_actor)
+
+    pid_actor = PidActor.remote()
+    ray.get(parent.remote(pid_actor))
+
+    wait_for_condition(lambda: len(ray.get(pid_actor.get_pids.remote())) == 3)
+
+    pids = ray.get(pid_actor.get_pids.remote())
+
+    ray.shutdown()
+    # Job finishes at this point
+
+    for pid in pids:
+        wait_for_pid_to_exit(pid)
+
+
 if __name__ == "__main__":
 
     # Make subprocess happy in bazel.
diff --git a/python/ray/workflow/tests/test_workflow_queuing.py b/python/ray/workflow/tests/test_workflow_queuing.py
index 297dd0ff3aaf69..05c2ec8eb59d13 100644
--- a/python/ray/workflow/tests/test_workflow_queuing.py
+++ b/python/ray/workflow/tests/test_workflow_queuing.py
@@ -1,6 +1,8 @@
+import os
 import pytest
 import ray
 from ray import workflow
+from ray._private.test_utils import wait_for_condition
 from ray.tests.conftest import *  # noqa
 
 
@@ -146,6 +148,8 @@ def test_workflow_queuing_resume_all(shutdown_only, tmp_path):
 
     @ray.remote
     def long_running(x):
+        file_path = str(tmp_path / f".long_running_{x}")
+        open(file_path, "w")
         with filelock.FileLock(lock_path):
             return x
 
@@ -156,6 +160,16 @@ def long_running(x):
             workflow.run_async(wfs[i], workflow_id=f"workflow_{i}") for i in range(4)
         ]
 
+        # Make sure workflow_0 and workflow_1 are running user code
+        # Otherwise it might run workflow code that contains
+        # ray.get() when ray.shutdown()
+        # is called and that can cause ray.get() to throw exception
+        # since raylet is stopped
+        # before worker process (this is a bug we should fix)
+        # and transition the workflow to FAILED status.
+        wait_for_condition(lambda: os.path.isfile(str(tmp_path / ".long_running_0")))
+        wait_for_condition(lambda: os.path.isfile(str(tmp_path / ".long_running_1")))
+
         assert sorted(x[0] for x in workflow.list_all({workflow.RUNNING})) == [
             "workflow_0",
             "workflow_1",
diff --git a/src/mock/ray/raylet/worker.h b/src/mock/ray/raylet/worker.h
index 43878348626e7d..aaf7de98506e57 100644
--- a/src/mock/ray/raylet/worker.h
+++ b/src/mock/ray/raylet/worker.h
@@ -80,6 +80,7 @@ class MockWorkerInterface : public WorkerInterface {
   MOCK_METHOD(bool, IsRegistered, (), (override));
   MOCK_METHOD(rpc::CoreWorkerClientInterface *, rpc_client, (), (override));
   MOCK_METHOD(bool, SetJobId, (const JobID &job_id), (override));
+  MOCK_METHOD(const ActorID &, GetRootDetachedActorId, (), (override));
 };
 
 }  // namespace raylet
diff --git a/src/ray/common/ray_config_def.h b/src/ray/common/ray_config_def.h
index 926a0bfb36bd8a..5f7ea3412eed31 100644
--- a/src/ray/common/ray_config_def.h
+++ b/src/ray/common/ray_config_def.h
@@ -862,9 +862,6 @@ RAY_CONFIG(int64_t,
 RAY_CONFIG(bool, worker_core_dump_exclude_plasma_store, true)
 RAY_CONFIG(bool, raylet_core_dump_exclude_plasma_store, true)
 
-/// Whether to kill idle workers of a terminated job.
-RAY_CONFIG(bool, kill_idle_workers_of_terminated_job, true)
-
 // Instruct the Python default worker to preload the specified imports.
 // This is specified as a comma-separated list.
 // If left empty, no such attempt will be made.
diff --git a/src/ray/common/task/task_spec.cc b/src/ray/common/task/task_spec.cc
index 218e565429b8e8..29cf326649a873 100644
--- a/src/ray/common/task/task_spec.cc
+++ b/src/ray/common/task/task_spec.cc
@@ -159,6 +159,13 @@ TaskID TaskSpecification::ParentTaskId() const {
   return TaskID::FromBinary(message_->parent_task_id());
 }
 
+ActorID TaskSpecification::RootDetachedActorId() const {
+  if (message_->root_detached_actor_id().empty() /* e.g., empty proto default */) {
+    return ActorID::Nil();
+  }
+  return ActorID::FromBinary(message_->root_detached_actor_id());
+}
+
 TaskID TaskSpecification::SubmitterTaskId() const {
   if (message_->submitter_task_id().empty() /* e.g., empty proto default */) {
     return TaskID::Nil();
@@ -198,7 +205,8 @@ int TaskSpecification::GetRuntimeEnvHash() const {
   WorkerCacheKey env = {SerializedRuntimeEnv(),
                         GetRequiredResources().GetResourceMap(),
                         IsActorCreationTask(),
-                        GetRequiredResources().Get(scheduling::ResourceID::GPU()) > 0};
+                        GetRequiredResources().Get(scheduling::ResourceID::GPU()) > 0,
+                        !(RootDetachedActorId().IsNil())};
   return env.IntHash();
 }
 
@@ -594,13 +602,15 @@ WorkerCacheKey::WorkerCacheKey(
     const std::string serialized_runtime_env,
     const absl::flat_hash_map<std::string, double> &required_resources,
     bool is_actor,
-    bool is_gpu)
+    bool is_gpu,
+    bool is_root_detached_actor)
     : serialized_runtime_env(serialized_runtime_env),
       required_resources(RayConfig::instance().worker_resource_limits_enabled()
                              ? required_resources
                              : absl::flat_hash_map<std::string, double>{}),
       is_actor(is_actor && RayConfig::instance().isolate_workers_across_task_types()),
       is_gpu(is_gpu && RayConfig::instance().isolate_workers_across_resource_types()),
+      is_root_detached_actor(is_root_detached_actor),
       hash_(CalculateHash()) {}
 
 std::size_t WorkerCacheKey::CalculateHash() const {
@@ -617,6 +627,7 @@ std::size_t WorkerCacheKey::CalculateHash() const {
     boost::hash_combine(hash, serialized_runtime_env);
     boost::hash_combine(hash, is_actor);
     boost::hash_combine(hash, is_gpu);
+    boost::hash_combine(hash, is_root_detached_actor);
 
     std::vector<std::pair<std::string, double>> resource_vars(required_resources.begin(),
                                                               required_resources.end());
@@ -637,7 +648,7 @@ bool WorkerCacheKey::operator==(const WorkerCacheKey &k) const {
 
 bool WorkerCacheKey::EnvIsEmpty() const {
   return IsRuntimeEnvEmpty(serialized_runtime_env) && required_resources.empty() &&
-         !is_gpu;
+         !is_gpu && !is_root_detached_actor;
 }
 
 std::size_t WorkerCacheKey::Hash() const { return hash_; }
diff --git a/src/ray/common/task/task_spec.h b/src/ray/common/task/task_spec.h
index cf6a0bc44fd60b..b3ae0c11a5c61d 100644
--- a/src/ray/common/task/task_spec.h
+++ b/src/ray/common/task/task_spec.h
@@ -287,6 +287,8 @@ class TaskSpecification : public MessageWrapper<rpc::TaskSpec> {
 
   TaskID ParentTaskId() const;
 
+  ActorID RootDetachedActorId() const;
+
   TaskID SubmitterTaskId() const;
 
   size_t ParentCounter() const;
@@ -527,12 +529,17 @@ class WorkerCacheKey {
   /// worker. \param required_resources The required resouce.
   /// worker. \param is_actor Whether the worker will be an actor. This is set when
   ///         task type isolation between workers is enabled.
-  /// worker. \param iis_gpu Whether the worker will be using GPUs. This is set when
+  /// worker. \param is_gpu Whether the worker will be using GPUs. This is set when
   ///         resource type isolation between workers is enabled.
+  /// worker. \param is_root_detached_actor Whether the worker will be running
+  ///         tasks or actors whose root ancestor is a detached actor. This is set
+  ///         to prevent worker reuse between tasks whose root is the driver process
+  ///         and tasks whose root is a detached actor.
   WorkerCacheKey(const std::string serialized_runtime_env,
                  const absl::flat_hash_map<std::string, double> &required_resources,
                  bool is_actor,
-                 bool is_gpu);
+                 bool is_gpu,
+                 bool is_root_detached_actor);
 
   bool operator==(const WorkerCacheKey &k) const;
 
@@ -564,6 +571,9 @@ class WorkerCacheKey {
   const bool is_actor;
   /// Whether the worker is to use a GPU.
   const bool is_gpu;
+  /// Whether the worker is to run tasks or actors
+  /// whose root is a detached actor.
+  const bool is_root_detached_actor;
   /// The hash of the worker's environment.  This is set to 0
   /// for unspecified or empty environments.
   const std::size_t hash_ = 0;
diff --git a/src/ray/common/task/task_util.h b/src/ray/common/task/task_util.h
index 30460d6300ee6b..488c52069aa49d 100644
--- a/src/ray/common/task/task_util.h
+++ b/src/ray/common/task/task_util.h
@@ -172,12 +172,16 @@ class TaskSpecBuilder {
       int max_retries,
       bool retry_exceptions,
       const std::string &serialized_retry_exception_allowlist,
-      const rpc::SchedulingStrategy &scheduling_strategy) {
+      const rpc::SchedulingStrategy &scheduling_strategy,
+      const ActorID root_detached_actor_id) {
     message_->set_max_retries(max_retries);
     message_->set_retry_exceptions(retry_exceptions);
     message_->set_serialized_retry_exception_allowlist(
         serialized_retry_exception_allowlist);
     message_->mutable_scheduling_strategy()->CopyFrom(scheduling_strategy);
+    if (!root_detached_actor_id.IsNil()) {
+      message_->set_root_detached_actor_id(root_detached_actor_id.Binary());
+    }
     return *this;
   }
 
@@ -230,7 +234,8 @@ class TaskSpecBuilder {
       bool is_asyncio = false,
       const std::vector<ConcurrencyGroup> &concurrency_groups = {},
       const std::string &extension_data = "",
-      bool execute_out_of_order = false) {
+      bool execute_out_of_order = false,
+      ActorID root_detached_actor_id = ActorID::Nil()) {
     message_->set_type(TaskType::ACTOR_CREATION_TASK);
     auto actor_creation_spec = message_->mutable_actor_creation_task_spec();
     actor_creation_spec->set_actor_id(actor_id.Binary());
@@ -258,6 +263,9 @@ class TaskSpecBuilder {
     }
     actor_creation_spec->set_execute_out_of_order(execute_out_of_order);
     message_->mutable_scheduling_strategy()->CopyFrom(scheduling_strategy);
+    if (!root_detached_actor_id.IsNil()) {
+      message_->set_root_detached_actor_id(root_detached_actor_id.Binary());
+    }
     return *this;
   }
 
diff --git a/src/ray/common/test/task_spec_test.cc b/src/ray/common/test/task_spec_test.cc
index 1a0bf3fbcf4737..72cf8a1f6031fa 100644
--- a/src/ray/common/test/task_spec_test.cc
+++ b/src/ray/common/test/task_spec_test.cc
@@ -15,6 +15,7 @@
 #include "ray/common/task/task_spec.h"
 
 #include "gtest/gtest.h"
+#include "ray/common/task/task_util.h"
 
 namespace ray {
 TEST(TaskSpecTest, TestSchedulingClassDescriptor) {
@@ -146,6 +147,77 @@ TEST(TaskSpecTest, TestTaskSpecification) {
   ASSERT_TRUE(task_spec.GetNodeAffinitySchedulingStrategyNodeId() == node_id);
 }
 
+TEST(TaskSpecTest, TestRootDetachedActorId) {
+  ActorID actor_id =
+      ActorID::Of(JobID::FromInt(1), TaskID::FromRandom(JobID::FromInt(1)), 0);
+  TaskSpecification task_spec;
+  ASSERT_TRUE(task_spec.RootDetachedActorId().IsNil());
+  task_spec.GetMutableMessage().set_root_detached_actor_id(actor_id.Binary());
+  ASSERT_EQ(task_spec.RootDetachedActorId(), actor_id);
+}
+
+TEST(TaskSpecTest, TestTaskSpecBuilderRootDetachedActorId) {
+  TaskSpecBuilder task_spec_builder;
+  task_spec_builder.SetNormalTaskSpec(
+      0, false, "", rpc::SchedulingStrategy(), ActorID::Nil());
+  ASSERT_TRUE(task_spec_builder.Build().RootDetachedActorId().IsNil());
+  ActorID actor_id =
+      ActorID::Of(JobID::FromInt(1), TaskID::FromRandom(JobID::FromInt(1)), 0);
+  task_spec_builder.SetNormalTaskSpec(0, false, "", rpc::SchedulingStrategy(), actor_id);
+  ASSERT_EQ(task_spec_builder.Build().RootDetachedActorId(), actor_id);
+
+  TaskSpecBuilder actor_spec_builder;
+  actor_spec_builder.SetActorCreationTaskSpec(actor_id,
+                                              /*serialized_actor_handle=*/"",
+                                              rpc::SchedulingStrategy(),
+                                              /*max_restarts=*/0,
+                                              /*max_task_retries=*/0,
+                                              /*dynamic_worker_options=*/{},
+                                              /*max_concurrency=*/1,
+                                              /*is_detached=*/false,
+                                              /*name=*/"",
+                                              /*ray_namespace=*/"",
+                                              /*is_asyncio=*/false,
+                                              /*concurrency_groups=*/{},
+                                              /*extension_data=*/"",
+                                              /*execute_out_of_order=*/false,
+                                              /*root_detached_actor_id=*/ActorID::Nil());
+  ASSERT_TRUE(actor_spec_builder.Build().RootDetachedActorId().IsNil());
+  actor_spec_builder.SetActorCreationTaskSpec(actor_id,
+                                              /*serialized_actor_handle=*/"",
+                                              rpc::SchedulingStrategy(),
+                                              /*max_restarts=*/0,
+                                              /*max_task_retries=*/0,
+                                              /*dynamic_worker_options=*/{},
+                                              /*max_concurrency=*/1,
+                                              /*is_detached=*/true,
+                                              /*name=*/"",
+                                              /*ray_namespace=*/"",
+                                              /*is_asyncio=*/false,
+                                              /*concurrency_groups=*/{},
+                                              /*extension_data=*/"",
+                                              /*execute_out_of_order=*/false,
+                                              /*root_detached_actor_id=*/actor_id);
+  ASSERT_EQ(actor_spec_builder.Build().RootDetachedActorId(), actor_id);
+}
+
+TEST(TaskSpecTest, TestWorkerCacheKey) {
+  // Test TaskSpec calculates the correct WorkerCacheKey hash.
+  std::string serialized_runtime_env_A = "mock_env_A";
+  rpc::RuntimeEnvInfo runtime_env_info_A;
+  runtime_env_info_A.set_serialized_runtime_env(serialized_runtime_env_A);
+  TaskSpecification task_spec;
+  task_spec.GetMutableMessage().mutable_runtime_env_info()->CopyFrom(runtime_env_info_A);
+  const WorkerCacheKey key_A = {serialized_runtime_env_A, {}, false, false, false};
+  ASSERT_EQ(task_spec.GetRuntimeEnvHash(), key_A.IntHash());
+  ActorID actor_id =
+      ActorID::Of(JobID::FromInt(1), TaskID::FromRandom(JobID::FromInt(1)), 0);
+  task_spec.GetMutableMessage().set_root_detached_actor_id(actor_id.Binary());
+  ASSERT_NE(task_spec.GetRuntimeEnvHash(), key_A.IntHash());
+  const WorkerCacheKey key_B = {serialized_runtime_env_A, {}, false, false, true};
+  ASSERT_EQ(task_spec.GetRuntimeEnvHash(), key_B.IntHash());
+}
+
 TEST(TaskSpecTest, TestNodeLabelSchedulingStrategy) {
   rpc::SchedulingStrategy scheduling_strategy_1;
   auto expr_1 = scheduling_strategy_1.mutable_node_label_scheduling_strategy()
diff --git a/src/ray/core_worker/context.cc b/src/ray/core_worker/context.cc
index 826c94cb2dc8d1..f463abf338a2c5 100644
--- a/src/ray/core_worker/context.cc
+++ b/src/ray/core_worker/context.cc
@@ -163,6 +163,7 @@ WorkerContext::WorkerContext(WorkerType worker_type,
       current_actor_placement_group_id_(PlacementGroupID::Nil()),
       placement_group_capture_child_tasks_(false),
       main_thread_id_(boost::this_thread::get_id()),
+      root_detached_actor_id_(ActorID::Nil()),
       mutex_() {
   // For worker main thread which initializes the WorkerContext,
   // set task_id according to whether current worker is a driver.
@@ -290,6 +291,7 @@ void WorkerContext::SetCurrentTask(const TaskSpecification &task_spec) {
   RAY_CHECK(current_job_id_ == task_spec.JobId());
   if (task_spec.IsNormalTask()) {
     current_task_is_direct_call_ = true;
+    root_detached_actor_id_ = task_spec.RootDetachedActorId();
   } else if (task_spec.IsActorCreationTask()) {
     if (!current_actor_id_.IsNil()) {
       RAY_CHECK(current_actor_id_ == task_spec.ActorCreationId());
@@ -301,6 +303,7 @@ void WorkerContext::SetCurrentTask(const TaskSpecification &task_spec) {
     is_detached_actor_ = task_spec.IsDetachedActor();
     current_actor_placement_group_id_ = task_spec.PlacementGroupBundleId().first;
     placement_group_capture_child_tasks_ = task_spec.PlacementGroupCaptureChildTasks();
+    root_detached_actor_id_ = task_spec.RootDetachedActorId();
   } else if (task_spec.IsActorTask()) {
     RAY_CHECK(current_actor_id_ == task_spec.ActorId());
   } else {
@@ -330,6 +333,11 @@ const ActorID &WorkerContext::GetCurrentActorID() const {
   return current_actor_id_;
 }
 
+const ActorID &WorkerContext::GetRootDetachedActorID() const {
+  absl::ReaderMutexLock lock(&mutex_);
+  return root_detached_actor_id_;
+}
+
 bool WorkerContext::CurrentThreadIsMain() const {
   return boost::this_thread::get_id() == main_thread_id_;
 }
diff --git a/src/ray/core_worker/context.h b/src/ray/core_worker/context.h
index 26a926080c47db..007ee4401d39f4 100644
--- a/src/ray/core_worker/context.h
+++ b/src/ray/core_worker/context.h
@@ -95,6 +95,8 @@ class WorkerContext {
 
   const ActorID &GetCurrentActorID() const ABSL_LOCKS_EXCLUDED(mutex_);
 
+  const ActorID &GetRootDetachedActorID() const ABSL_LOCKS_EXCLUDED(mutex_);
+
   /// Returns whether the current thread is the main worker thread.
   bool CurrentThreadIsMain() const;
 
@@ -156,6 +158,9 @@ class WorkerContext {
   /// for concurrent actor, or the main thread's task id for other cases.
   /// Used merely for observability purposes to track task hierarchy.
   TaskID main_thread_or_actor_creation_task_id_ ABSL_GUARDED_BY(mutex_);
+  /// If the current task or actor is originated from a detached actor,
+  /// this contains that actor's id otherwise it's nil.
+  ActorID root_detached_actor_id_ ABSL_GUARDED_BY(mutex_);
   // To protect access to mutable members;
   mutable absl::Mutex mutex_;
 
diff --git a/src/ray/core_worker/core_worker.cc b/src/ray/core_worker/core_worker.cc
index afb60dd8522f4f..8c74b471fe9793 100644
--- a/src/ray/core_worker/core_worker.cc
+++ b/src/ray/core_worker/core_worker.cc
@@ -2193,10 +2193,15 @@ std::vector<rpc::ObjectReference> CoreWorker::SubmitTask(
                       /*generator_backpressure_num_objects*/
                       task_options.generator_backpressure_num_objects,
                       /*enable_task_event*/ task_options.enable_task_events);
+  ActorID root_detached_actor_id;
+  if (!worker_context_.GetRootDetachedActorID().IsNil()) {
+    root_detached_actor_id = worker_context_.GetRootDetachedActorID();
+  }
   builder.SetNormalTaskSpec(max_retries,
                             retry_exceptions,
                             serialized_retry_exception_allowlist,
-                            scheduling_strategy);
+                            scheduling_strategy,
+                            root_detached_actor_id);
   TaskSpecification task_spec = builder.Build();
   RAY_LOG(DEBUG) << "Submitting normal task " << task_spec.DebugString();
   std::vector<rpc::ObjectReference> returned_refs;
@@ -2301,6 +2306,12 @@ Status CoreWorker::CreateActor(const RayFunction &function,
       actor_creation_options.enable_task_events);
   std::string serialized_actor_handle;
   actor_handle->Serialize(&serialized_actor_handle);
+  ActorID root_detached_actor_id;
+  if (is_detached) {
+    root_detached_actor_id = actor_id;
+  } else if (!worker_context_.GetRootDetachedActorID().IsNil()) {
+    root_detached_actor_id = worker_context_.GetRootDetachedActorID();
+  }
   builder.SetActorCreationTaskSpec(actor_id,
                                    serialized_actor_handle,
                                    actor_creation_options.scheduling_strategy,
@@ -2314,7 +2325,8 @@ Status CoreWorker::CreateActor(const RayFunction &function,
                                    actor_creation_options.is_asyncio,
                                    actor_creation_options.concurrency_groups,
                                    extension_data,
-                                   actor_creation_options.execute_out_of_order);
+                                   actor_creation_options.execute_out_of_order,
+                                   root_detached_actor_id);
   // Add the actor handle before we submit the actor creation task, since the
   // actor handle must be in scope by the time the GCS sends the
   // WaitForActorOutOfScopeRequest.
@@ -4332,26 +4344,31 @@ void CoreWorker::HandleDeleteSpilledObjects(rpc::DeleteSpilledObjectsRequest req
 void CoreWorker::HandleExit(rpc::ExitRequest request,
                             rpc::ExitReply *reply,
                             rpc::SendReplyCallback send_reply_callback) {
-  bool own_objects = reference_counter_->OwnObjects();
-  int64_t pins_in_flight = local_raylet_client_->GetPinsInFlight();
+  const bool own_objects = reference_counter_->OwnObjects();
+  const size_t num_pending_tasks = task_manager_->NumPendingTasks();
+  const int64_t pins_in_flight = local_raylet_client_->GetPinsInFlight();
   // We consider the worker to be idle if it doesn't own any objects and it doesn't have
-  // any object pinning RPCs in flight.
-  bool is_idle = !own_objects && pins_in_flight == 0;
+  // any object pinning RPCs in flight and it doesn't have pending tasks.
+  bool is_idle = !own_objects && (pins_in_flight == 0) && (num_pending_tasks == 0);
   bool force_exit = request.force_exit();
   RAY_LOG(DEBUG) << "Exiting: is_idle: " << is_idle << " force_exit: " << force_exit;
   if (!is_idle && force_exit) {
     RAY_LOG(INFO) << "Force exiting worker that owns object. This may cause other "
                      "workers that depends on the object to lose it. "
                   << "Own objects: " << own_objects
-                  << " # Pins in flight: " << pins_in_flight;
+                  << " # Pins in flight: " << pins_in_flight
+                  << " # pending tasks: " << num_pending_tasks;
   }
   bool will_exit = is_idle || force_exit;
   reply->set_success(will_exit);
   send_reply_callback(
       Status::OK(),
-      [this, will_exit]() {
+      [this, will_exit, force_exit]() {
         // If the worker is idle, we exit.
-        if (will_exit) {
+        if (force_exit) {
+          ForceExit(rpc::WorkerExitType::INTENDED_SYSTEM_EXIT,
+                    "Worker force exits because its job has finished");
+        } else if (will_exit) {
           Exit(rpc::WorkerExitType::INTENDED_SYSTEM_EXIT,
                "Worker exits because it was idle (it doesn't have objects it owns while "
                "no task or actor has been scheduled) for a long time.");
diff --git a/src/ray/protobuf/common.proto b/src/ray/protobuf/common.proto
index 81c7af97dcfc7a..05c148b3fb2b09 100644
--- a/src/ray/protobuf/common.proto
+++ b/src/ray/protobuf/common.proto
@@ -509,6 +509,10 @@ message TaskSpec {
   int64 generator_backpressure_num_objects = 38;
   // Boolean if task events enabled, i.e tasks events would be reported.
   bool enable_task_events = 39;
+  // If this task is originated from a detached actor,
+  // this field contains the detached actor id.
+  // Otherwise it's empty and is originated from a driver.
+  bytes root_detached_actor_id = 40;
 }
 
 message TaskInfoEntry {
diff --git a/src/ray/raylet/local_task_manager.cc b/src/ray/raylet/local_task_manager.cc
index 1a663e655618f6..a3df5a229ccaa1 100644
--- a/src/ray/raylet/local_task_manager.cc
+++ b/src/ray/raylet/local_task_manager.cc
@@ -522,6 +522,11 @@ bool LocalTaskManager::PoppedWorkerHandler(
             task_id,
             rpc::RequestWorkerLeaseReply::SCHEDULING_CANCELLED_RUNTIME_ENV_SETUP_FAILED,
             /*scheduling_failure_message*/ runtime_env_setup_error_message);
+      } else if (status == PopWorkerStatus::JobFinished) {
+        // The task job finished.
+        // Just remove the task from dispatch queue.
+        RAY_LOG(DEBUG) << "Call back to a job finished task, task id = " << task_id;
+        erase_from_dispatch_queue_fn(work, scheduling_class);
       } else {
         // In other cases, set the work status `WAITING` to make this task
         // could be re-dispatched.
diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc
index e86ea9ea641415..e354d9c828ceeb 100644
--- a/src/ray/raylet/node_manager.cc
+++ b/src/ray/raylet/node_manager.cc
@@ -616,6 +616,31 @@ void NodeManager::HandleJobStarted(const JobID &job_id, const JobTableData &job_
 void NodeManager::HandleJobFinished(const JobID &job_id, const JobTableData &job_data) {
   RAY_LOG(DEBUG) << "HandleJobFinished " << job_id;
   RAY_CHECK(job_data.is_dead());
+  // Force kill all the worker processes belonging to the finished job
+  // so that no worker processes is leaked.
+  for (const auto &pair : leased_workers_) {
+    auto &worker = pair.second;
+    RAY_CHECK(!worker->GetAssignedJobId().IsNil());
+    if (worker->GetRootDetachedActorId().IsNil() &&
+        (worker->GetAssignedJobId() == job_id)) {
+      // Don't kill worker processes belonging to the detached actor
+      // since those are expected to outlive the job.
+      RAY_LOG(INFO) << "The leased worker " << worker->WorkerId()
+                    << " is killed because the job " << job_id << " finished.";
+      rpc::ExitRequest request;
+      request.set_force_exit(true);
+      worker->rpc_client()->Exit(
+          request, [this, worker](const ray::Status &status, const rpc::ExitReply &r) {
+            if (!status.ok()) {
+              RAY_LOG(WARNING) << "Failed to send exit request to worker "
+                               << worker->WorkerId() << ": " << status.ToString()
+                               << ". Killing it using SIGKILL instead.";
+              // Just kill-9 as a last resort.
+              KillWorker(worker, /* force */ true);
+            }
+          });
+    }
+  }
   worker_pool_.HandleJobFinished(job_id);
 }
 
@@ -1053,8 +1078,6 @@ void NodeManager::HandleUnexpectedWorkerFailure(const rpc::WorkerDeltaData &data
     RAY_LOG(DEBUG) << "Lease " << worker->WorkerId() << " owned by " << owner_worker_id;
     RAY_CHECK(!owner_worker_id.IsNil() && !owner_node_id.IsNil());
     if (!worker->IsDetachedActor()) {
-      // TODO (Alex): Cancel all pending child tasks of the tasks whose owners have failed
-      // because the owner could've submitted lease requests before failing.
       if (!worker_id.IsNil()) {
         // If the failed worker was a leased worker's owner, then kill the leased worker.
         if (owner_worker_id == worker_id) {
diff --git a/src/ray/raylet/scheduling/cluster_task_manager_test.cc b/src/ray/raylet/scheduling/cluster_task_manager_test.cc
index 149c95677c37a5..c4bad4e83048bd 100644
--- a/src/ray/raylet/scheduling/cluster_task_manager_test.cc
+++ b/src/ray/raylet/scheduling/cluster_task_manager_test.cc
@@ -185,7 +185,7 @@ RayTask CreateTask(
     }
   }
 
-  spec_builder.SetNormalTaskSpec(0, false, "", scheduling_strategy);
+  spec_builder.SetNormalTaskSpec(0, false, "", scheduling_strategy, ActorID::Nil());
 
   return RayTask(spec_builder.Build());
 }
@@ -544,7 +544,7 @@ TEST_F(ClusterTaskManagerTest, DispatchQueueNonBlockingTest) {
   pool_.TriggerCallbacks();
 
   // Push a worker that can only run task A.
-  const WorkerCacheKey env_A = {serialized_runtime_env_A, {}, false, false};
+  const WorkerCacheKey env_A = {serialized_runtime_env_A, {}, false, false, false};
   const int runtime_env_hash_A = env_A.IntHash();
   std::shared_ptr<MockWorker> worker_A =
       std::make_shared<MockWorker>(WorkerID::FromRandom(), 1234, runtime_env_hash_A);
@@ -1130,6 +1130,21 @@ TEST_F(ClusterTaskManagerTest, NotOKPopWorkerTest) {
   ASSERT_TRUE(reply.canceled());
   ASSERT_EQ(reply.scheduling_failure_message(), runtime_env_error_msg);
 
+  // Test that local task manager handles PopWorkerStatus::JobFinished correctly.
+  callback_called = false;
+  reply.Clear();
+  RayTask task3 = CreateTask({{ray::kCPU_ResourceLabel, 1}});
+  task_manager_.QueueAndScheduleTask(task3, false, false, &reply, callback);
+  ASSERT_EQ(NumTasksToDispatchWithStatus(internal::WorkStatus::WAITING_FOR_WORKER), 1);
+  ASSERT_EQ(NumTasksToDispatchWithStatus(internal::WorkStatus::WAITING), 0);
+  ASSERT_EQ(NumRunningTasks(), 1);
+  pool_.TriggerCallbacksWithNotOKStatus(PopWorkerStatus::JobFinished);
+  // The task should be removed from the dispatch queue.
+  ASSERT_FALSE(callback_called);
+  ASSERT_EQ(NumTasksToDispatchWithStatus(internal::WorkStatus::WAITING_FOR_WORKER), 0);
+  ASSERT_EQ(NumTasksToDispatchWithStatus(internal::WorkStatus::WAITING), 0);
+  ASSERT_EQ(NumRunningTasks(), 0);
+
   AssertNoLeaks();
 }
 
diff --git a/src/ray/raylet/test/util.h b/src/ray/raylet/test/util.h
index d1ad7f5198fa8f..e9b4351af891d1 100644
--- a/src/ray/raylet/test/util.h
+++ b/src/ray/raylet/test/util.h
@@ -40,6 +40,7 @@ class MockWorker : public WorkerInterface {
   void SetAssignedTask(const RayTask &assigned_task) override {
     task_ = assigned_task;
     task_assign_time_ = absl::Now();
+    root_detached_actor_id_ = assigned_task.GetTaskSpecification().RootDetachedActorId();
   };
 
   absl::Time GetAssignedTaskTime() const override { return task_assign_time_; };
@@ -155,6 +156,10 @@ class MockWorker : public WorkerInterface {
 
   void SetJobId(const JobID &job_id) override { job_id_ = job_id; }
 
+  const ActorID &GetRootDetachedActorId() const override {
+    return root_detached_actor_id_;
+  }
+
  protected:
   void SetStartupToken(StartupToken startup_token) override {
     RAY_CHECK(false) << "Method unused";
@@ -175,6 +180,7 @@ class MockWorker : public WorkerInterface {
   int runtime_env_hash_;
   TaskID task_id_;
   JobID job_id_;
+  ActorID root_detached_actor_id_;
 };
 
 }  // namespace raylet
diff --git a/src/ray/raylet/worker.h b/src/ray/raylet/worker.h
index 13ad92cc17d7b9..f45878568c3498 100644
--- a/src/ray/raylet/worker.h
+++ b/src/ray/raylet/worker.h
@@ -112,6 +112,8 @@ class WorkerInterface {
 
   virtual void SetJobId(const JobID &job_id) = 0;
 
+  virtual const ActorID &GetRootDetachedActorId() const = 0;
+
  protected:
   virtual void SetStartupToken(StartupToken startup_token) = 0;
 
@@ -120,6 +122,7 @@ class WorkerInterface {
   FRIEND_TEST(WorkerPoolDriverRegisteredTest,
               TestWorkerCappingLaterNWorkersNotOwningObjects);
   FRIEND_TEST(WorkerPoolDriverRegisteredTest, TestJobFinishedForceKillIdleWorker);
+  FRIEND_TEST(WorkerPoolDriverRegisteredTest, TestJobFinishedForPopWorker);
   FRIEND_TEST(WorkerPoolDriverRegisteredTest,
               WorkerFromAliveJobDoesNotBlockWorkerFromDeadJobFromGettingKilled);
   FRIEND_TEST(WorkerPoolDriverRegisteredTest, TestWorkerCappingWithExitDelay);
@@ -205,6 +208,8 @@ class Worker : public WorkerInterface {
     lifetime_allocated_instances_ = allocated_instances;
   };
 
+  const ActorID &GetRootDetachedActorId() const { return root_detached_actor_id_; }
+
   std::shared_ptr<TaskResourceInstances> GetLifetimeAllocatedInstances() {
     return lifetime_allocated_instances_;
   };
@@ -216,6 +221,7 @@ class Worker : public WorkerInterface {
   void SetAssignedTask(const RayTask &assigned_task) {
     assigned_task_ = assigned_task;
     task_assign_time_ = absl::Now();
+    root_detached_actor_id_ = assigned_task.GetTaskSpecification().RootDetachedActorId();
   }
 
   absl::Time GetAssignedTaskTime() const { return task_assign_time_; };
@@ -271,6 +277,8 @@ class Worker : public WorkerInterface {
   const int runtime_env_hash_;
   /// The worker's actor ID. If this is nil, then the worker is not an actor.
   ActorID actor_id_;
+  /// Root detached actor ID for the worker's last assigned task.
+  ActorID root_detached_actor_id_;
   /// The worker's placement group bundle. It is used to detect if the worker is
   /// associated with a placement group bundle.
   BundleID bundle_id_;
diff --git a/src/ray/raylet/worker_pool.cc b/src/ray/raylet/worker_pool.cc
index d2ab8539a80025..74e682b7a52e25 100644
--- a/src/ray/raylet/worker_pool.cc
+++ b/src/ray/raylet/worker_pool.cc
@@ -183,7 +183,8 @@ void WorkerPool::SetRuntimeEnvAgentClient(
   runtime_env_agent_client_ = runtime_env_agent_client;
 }
 
-void WorkerPool::PopWorkerCallbackAsync(const PopWorkerCallback &callback,
+void WorkerPool::PopWorkerCallbackAsync(const TaskSpecification &task_spec,
+                                        const PopWorkerCallback &callback,
                                         std::shared_ptr<WorkerInterface> worker,
                                         PopWorkerStatus status) {
   // This method shouldn't be invoked when runtime env creation has failed because
@@ -192,17 +193,34 @@ void WorkerPool::PopWorkerCallbackAsync(const PopWorkerCallback &callback,
   RAY_CHECK(status != PopWorkerStatus::RuntimeEnvCreationFailed);
   // Call back this function asynchronously to make sure executed in different stack.
   io_service_->post(
-      [this, callback, worker, status]() {
-        PopWorkerCallbackInternal(callback, worker, status);
+      [this, task_spec, callback, worker, status]() {
+        PopWorkerCallbackInternal(task_spec, callback, worker, status);
       },
       "WorkerPool.PopWorkerCallback");
 }
 
-void WorkerPool::PopWorkerCallbackInternal(const PopWorkerCallback &callback,
+void WorkerPool::PopWorkerCallbackInternal(const TaskSpecification &task_spec,
+                                           const PopWorkerCallback &callback,
                                            std::shared_ptr<WorkerInterface> worker,
                                            PopWorkerStatus status) {
   RAY_CHECK(callback);
-  auto used = callback(worker, status, /*runtime_env_setup_error_message*/ "");
+  auto used = false;
+  if (worker && finished_jobs_.contains(task_spec.JobId()) &&
+      task_spec.RootDetachedActorId().IsNil()) {
+    // When a job finishes, node manager will kill leased workers one time
+    // and worker pool will kill idle workers periodically.
+    // The current worker is already removed from the idle workers
+    // but hasn't been added to the leased workers since the callback is not called yet.
+    // We shouldn't add this worker to the leased workers since killing leased workers
+    // for this finished job may already happen and won't happen again (this is one time)
+    // so it will cause a process leak.
+    // Instead we fail the PopWorker and add the worker back to the idle workers so it can
+    // be killed later.
+    RAY_CHECK(status == PopWorkerStatus::OK);
+    callback(nullptr, PopWorkerStatus::JobFinished, "");
+  } else {
+    used = callback(worker, status, /*runtime_env_setup_error_message*/ "");
+  }
   if (worker && !used) {
     // The invalid worker not used, restore it to worker pool.
     PushWorker(worker);
@@ -338,20 +356,18 @@ WorkerPool::BuildProcessCommandArgs(const Language &language,
     worker_command_args.push_back("--worker-launch-time-ms=" +
                                   std::to_string(current_sys_time_ms()));
     worker_command_args.push_back("--node-id=" + node_id_.Hex());
+    // TODO(jjyao) This should be renamed to worker cache key hash
+    worker_command_args.push_back("--runtime-env-hash=" +
+                                  std::to_string(runtime_env_hash));
   } else if (language == Language::CPP) {
     worker_command_args.push_back("--startup_token=" +
                                   std::to_string(worker_startup_token_counter_));
+    worker_command_args.push_back("--ray_runtime_env_hash=" +
+                                  std::to_string(runtime_env_hash));
   }
 
   if (serialized_runtime_env_context != "{}" && !serialized_runtime_env_context.empty()) {
     worker_command_args.push_back("--language=" + Language_Name(language));
-    if (language == Language::CPP) {
-      worker_command_args.push_back("--ray_runtime_env_hash=" +
-                                    std::to_string(runtime_env_hash));
-    } else {
-      worker_command_args.push_back("--runtime-env-hash=" +
-                                    std::to_string(runtime_env_hash));
-    }
     worker_command_args.push_back("--serialized-runtime-env-context=" +
                                   serialized_runtime_env_context);
   } else if (language == Language::PYTHON && worker_command_args.size() >= 2 &&
@@ -561,14 +577,12 @@ void WorkerPool::MonitorStartingWorkerProcess(const Process &proc,
       process_failed_pending_registration_++;
       bool found;
       bool used;
-      TaskID task_id;
       InvokePopWorkerCallbackForProcess(state.starting_workers_to_tasks,
                                         proc_startup_token,
                                         nullptr,
                                         status,
                                         &found,
-                                        &used,
-                                        &task_id);
+                                        &used);
       DeleteRuntimeEnvIfPossible(it->second.runtime_env_info.serialized_runtime_env());
       RemoveWorkerProcess(state, proc_startup_token);
       if (IsIOWorkerType(worker_type)) {
@@ -972,21 +986,29 @@ void WorkerPool::InvokePopWorkerCallbackForProcess(
     const std::shared_ptr<WorkerInterface> &worker,
     const PopWorkerStatus &status,
     bool *found,
-    bool *worker_used,
-    TaskID *task_id) {
+    bool *worker_used) {
   *found = false;
   *worker_used = false;
   auto it = starting_workers_to_tasks.find(startup_token);
   if (it != starting_workers_to_tasks.end()) {
     *found = true;
-    *task_id = it->second.task_id;
     const auto &callback = it->second.callback;
     RAY_CHECK(callback);
     // This method shouldn't be invoked when runtime env creation has failed because
     // when runtime env is failed to be created, they are all
     // invoking the callback immediately.
     RAY_CHECK(status != PopWorkerStatus::RuntimeEnvCreationFailed);
-    *worker_used = callback(worker, status, /*runtime_env_setup_error_message*/ "");
+    if (worker && finished_jobs_.contains(it->second.task_spec.JobId()) &&
+        it->second.task_spec.RootDetachedActorId().IsNil()) {
+      // If the job has finished, we should fail the PopWorker callback
+      // and add the worker back to the idle workers so it can be killed later.
+      // This doesn't apply to detached actor and its descendants
+      // since they can outlive the job.
+      RAY_CHECK(status == PopWorkerStatus::OK);
+      callback(nullptr, PopWorkerStatus::JobFinished, "");
+    } else {
+      *worker_used = callback(worker, status, /*runtime_env_setup_error_message*/ "");
+    }
     starting_workers_to_tasks.erase(it);
   }
 }
@@ -998,14 +1020,12 @@ void WorkerPool::PushWorker(const std::shared_ptr<WorkerInterface> &worker) {
   auto &state = GetStateForLanguage(worker->GetLanguage());
   bool found;
   bool used;
-  TaskID task_id;
   InvokePopWorkerCallbackForProcess(state.starting_workers_to_tasks,
                                     worker->GetStartupToken(),
                                     worker,
                                     PopWorkerStatus::OK,
                                     &found,
-                                    &used,
-                                    &task_id);
+                                    &used);
   RAY_LOG(DEBUG) << "PushWorker " << worker->WorkerId() << " used: " << used;
   if (!used) {
     // Put the worker to the idle pool.
@@ -1102,8 +1122,7 @@ void WorkerPool::KillIdleWorker(std::shared_ptr<WorkerInterface> idle_worker,
   RAY_CHECK(rpc_client);
   rpc::ExitRequest request;
   const auto &job_id = idle_worker->GetAssignedJobId();
-  if (finished_jobs_.contains(job_id) &&
-      RayConfig::instance().kill_idle_workers_of_terminated_job()) {
+  if (finished_jobs_.contains(job_id) && idle_worker->GetRootDetachedActorId().IsNil()) {
     RAY_LOG(INFO) << "Force exiting worker whose job has exited "
                   << idle_worker->WorkerId();
     request.set_force_exit(true);
@@ -1170,7 +1189,7 @@ void WorkerPool::PopWorker(const TaskSpecification &task_spec,
     if (status == PopWorkerStatus::OK) {
       RAY_CHECK(proc.IsValid());
       WarnAboutSize();
-      auto task_info = TaskWaitingForWorkerInfo{task_spec.TaskId(), callback};
+      auto task_info = TaskWaitingForWorkerInfo{task_spec, callback};
       state.starting_workers_to_tasks[startup_token] = std::move(task_info);
     } else if (status == PopWorkerStatus::TooManyStartingWorkerProcesses) {
       // TODO(jjyao) As an optimization, we don't need to delete the runtime env
@@ -1180,7 +1199,7 @@ void WorkerPool::PopWorker(const TaskSpecification &task_spec,
           PopWorkerRequest{task_spec, callback, allocated_instances_serialized_json});
     } else {
       DeleteRuntimeEnvIfPossible(task_spec.SerializedRuntimeEnv());
-      PopWorkerCallbackAsync(callback, nullptr, status);
+      PopWorkerCallbackAsync(task_spec, callback, nullptr, status);
     }
   };
 
@@ -1297,7 +1316,7 @@ void WorkerPool::PopWorker(const TaskSpecification &task_spec,
     RAY_LOG(DEBUG) << "Re-using worker " << worker->WorkerId() << " for task "
                    << task_spec.DebugString();
     stats::NumWorkersStartedFromCache.Record(1);
-    PopWorkerCallbackAsync(callback, worker);
+    PopWorkerCallbackAsync(task_spec, callback, worker);
   }
 }
 
@@ -1341,7 +1360,8 @@ void WorkerPool::PrestartDefaultCpuWorkers(ray::Language language, int64_t num_n
   static const WorkerCacheKey kDefaultCpuWorkerCacheKey{/*serialized_runtime_env*/ "",
                                                         {{"CPU", 1}},
                                                         /*is_actor*/ false,
-                                                        /*is_gpu*/ false};
+                                                        /*is_gpu*/ false,
+                                                        /*is_root_detached_actor*/ false};
   RAY_LOG(DEBUG) << "PrestartDefaultCpuWorkers " << num_needed;
   for (int i = 0; i < num_needed; i++) {
     PopWorkerStatus status;
diff --git a/src/ray/raylet/worker_pool.h b/src/ray/raylet/worker_pool.h
index b8e03198dfd200..dcd705addcd4e8 100644
--- a/src/ray/raylet/worker_pool.h
+++ b/src/ray/raylet/worker_pool.h
@@ -59,6 +59,9 @@ enum PopWorkerStatus {
   // Any fails of runtime env creation.
   // A nullptr worker will be returned with callback.
   RuntimeEnvCreationFailed = 4,
+  // The task's job has finished.
+  // A nullptr worker will be returned with callback.
+  JobFinished = 5,
 };
 
 /// \param[in] worker The started worker instance. Nullptr if worker is not started.
@@ -442,7 +445,8 @@ class WorkerPool : public WorkerPoolInterface, public IOWorkerPoolInterface {
   virtual void WarnAboutSize();
 
   /// Make this synchronized function for unit test.
-  void PopWorkerCallbackInternal(const PopWorkerCallback &callback,
+  void PopWorkerCallbackInternal(const TaskSpecification &task_spec,
+                                 const PopWorkerCallback &callback,
                                  std::shared_ptr<WorkerInterface> worker,
                                  PopWorkerStatus status);
 
@@ -486,8 +490,8 @@ class WorkerPool : public WorkerPoolInterface, public IOWorkerPoolInterface {
   };
 
   struct TaskWaitingForWorkerInfo {
-    /// The id of task.
-    TaskID task_id;
+    /// The spec of task.
+    TaskSpecification task_spec;
     /// The callback function which should be called when worker registered.
     PopWorkerCallback callback;
   };
@@ -608,7 +612,8 @@ class WorkerPool : public WorkerPoolInterface, public IOWorkerPoolInterface {
 
   /// Call the `PopWorkerCallback` function asynchronously to make sure executed in
   /// different stack.
-  virtual void PopWorkerCallbackAsync(const PopWorkerCallback &callback,
+  virtual void PopWorkerCallbackAsync(const TaskSpecification &task_spec,
+                                      const PopWorkerCallback &callback,
                                       std::shared_ptr<WorkerInterface> worker,
                                       PopWorkerStatus status = PopWorkerStatus::OK);
 
@@ -623,15 +628,13 @@ class WorkerPool : public WorkerPoolInterface, public IOWorkerPoolInterface {
   /// \param found  Whether the related task found or not.
   /// \param worker_used Whether the worker is used by the task, only valid when found is
   /// true.
-  /// \param task_id  The related task id.
   void InvokePopWorkerCallbackForProcess(
       absl::flat_hash_map<StartupToken, TaskWaitingForWorkerInfo> &workers_to_tasks,
       StartupToken startup_token,
       const std::shared_ptr<WorkerInterface> &worker,
       const PopWorkerStatus &status,
       bool *found /* output */,
-      bool *worker_used /* output */,
-      TaskID *task_id /* output */);
+      bool *worker_used /* output */);
 
   /// We manage all runtime env resources locally by the two methods:
   /// `GetOrCreateRuntimeEnv` and `DeleteRuntimeEnvIfPossible`.
diff --git a/src/ray/raylet/worker_pool_test.cc b/src/ray/raylet/worker_pool_test.cc
index fdfeeeff2d9fe3..ab194e595093ad 100644
--- a/src/ray/raylet/worker_pool_test.cc
+++ b/src/ray/raylet/worker_pool_test.cc
@@ -160,10 +160,11 @@ class WorkerPoolMock : public WorkerPool {
   using WorkerPool::PopWorkerCallbackInternal;
 
   // Mock `PopWorkerCallbackAsync` to synchronized function.
-  void PopWorkerCallbackAsync(const PopWorkerCallback &callback,
+  void PopWorkerCallbackAsync(const TaskSpecification &task_spec,
+                              const PopWorkerCallback &callback,
                               std::shared_ptr<WorkerInterface> worker,
                               PopWorkerStatus status = PopWorkerStatus::OK) override {
-    PopWorkerCallbackInternal(callback, worker, status);
+    PopWorkerCallbackInternal(task_spec, callback, worker, status);
   }
 
   Process StartProcess(const std::vector<std::string> &worker_command_args,
@@ -1377,6 +1378,86 @@ TEST_F(WorkerPoolDriverRegisteredTest, TestWorkerCappingWithExitDelay) {
   ASSERT_EQ(worker_pool_->GetIdleWorkerSize(), workers.size());
 }
 
+TEST_F(WorkerPoolDriverRegisteredTest, TestJobFinishedForPopWorker) {
+  // Test to make sure that if job finishes,
+  // PopWorker should fail with PopWorkerStatus::JobFinished
+
+  auto job_id = JOB_ID;
+
+  /// Add worker to the pool.
+  PopWorkerStatus status;
+  auto [proc, token] = worker_pool_->StartWorkerProcess(
+      Language::PYTHON, rpc::WorkerType::WORKER, job_id, &status);
+  auto worker = worker_pool_->CreateWorker(Process(), Language::PYTHON, job_id);
+  worker->SetStartupToken(worker_pool_->GetStartupToken(proc));
+  RAY_CHECK_OK(worker_pool_->RegisterWorker(
+      worker, proc.GetId(), worker_pool_->GetStartupToken(proc), [](Status, int) {}));
+  worker_pool_->OnWorkerStarted(worker);
+  worker_pool_->PushWorker(worker);
+  ASSERT_EQ(worker_pool_->GetIdleWorkerSize(), 1);
+
+  auto mock_rpc_client_it = mock_worker_rpc_clients_.find(worker->WorkerId());
+  auto mock_rpc_client = mock_rpc_client_it->second;
+
+  // Finish the job.
+  worker_pool_->HandleJobFinished(job_id);
+
+  auto task_spec = ExampleTaskSpec(/*actor_id=*/ActorID::Nil(), Language::PYTHON, job_id);
+  PopWorkerStatus pop_worker_status;
+  // This PopWorker should fail since the job finished.
+  worker = worker_pool_->PopWorkerSync(task_spec, false, &pop_worker_status);
+  ASSERT_EQ(pop_worker_status, PopWorkerStatus::JobFinished);
+  ASSERT_FALSE(worker);
+  ASSERT_EQ(worker_pool_->GetIdleWorkerSize(), 1);
+
+  worker_pool_->TryKillingIdleWorkers();
+  ASSERT_EQ(mock_rpc_client->exit_count, 1);
+  ASSERT_EQ(mock_rpc_client->last_exit_forced, true);
+  mock_rpc_client->ExitReplySucceed();
+
+  job_id = JOB_ID2;
+  rpc::JobConfig job_config;
+  RegisterDriver(Language::PYTHON, job_id, job_config);
+  task_spec = ExampleTaskSpec(/*actor_id=*/ActorID::Nil(), Language::PYTHON, job_id);
+  pop_worker_status = PopWorkerStatus::OK;
+  // This will start a new worker.
+  worker_pool_->PopWorker(
+      task_spec,
+      [&](const std::shared_ptr<WorkerInterface> worker,
+          PopWorkerStatus status,
+          const std::string &runtime_env_setup_error_message) -> bool {
+        pop_worker_status = status;
+        return false;
+      });
+  auto process = worker_pool_->LastStartedWorkerProcess();
+  RAY_CHECK(process.IsValid());
+  ASSERT_EQ(1, worker_pool_->NumWorkersStarting());
+
+  worker = worker_pool_->CreateWorker(Process());
+  worker->SetStartupToken(worker_pool_->GetStartupToken(process));
+  RAY_CHECK_OK(worker_pool_->RegisterWorker(
+      worker, process.GetId(), worker_pool_->GetStartupToken(process), [](Status, int) {
+      }));
+  // Call `OnWorkerStarted` to emulate worker port announcement.
+  worker_pool_->OnWorkerStarted(worker);
+
+  mock_rpc_client_it = mock_worker_rpc_clients_.find(worker->WorkerId());
+  mock_rpc_client = mock_rpc_client_it->second;
+
+  // Finish the job.
+  worker_pool_->HandleJobFinished(job_id);
+
+  // This will trigger the PopWorker callback.
+  worker_pool_->PushWorker(worker);
+  ASSERT_EQ(pop_worker_status, PopWorkerStatus::JobFinished);
+  ASSERT_EQ(worker_pool_->GetIdleWorkerSize(), 1);
+
+  worker_pool_->TryKillingIdleWorkers();
+  ASSERT_EQ(mock_rpc_client->exit_count, 1);
+  ASSERT_EQ(mock_rpc_client->last_exit_forced, true);
+  mock_rpc_client->ExitReplySucceed();
+}
+
 TEST_F(WorkerPoolDriverRegisteredTest, TestJobFinishedForceKillIdleWorker) {
   auto job_id = JOB_ID;