diff --git a/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp b/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp index 5e6d4ff1901..b165dee6b8b 100644 --- a/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp +++ b/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp @@ -89,7 +89,7 @@ std::unique_ptr CacheTransceiverFactory::createCacheTransc } else { - backendType = executor::CacheTransceiverConfig::BackendType::NIXL; + backendType = executor::CacheTransceiverConfig::BackendType::UCX; } } cacheTransceiverConfig.value().setBackendType(backendType); diff --git a/docs/source/features/disagg-serving.md b/docs/source/features/disagg-serving.md index cbeea3cc503..983f2016499 100644 --- a/docs/source/features/disagg-serving.md +++ b/docs/source/features/disagg-serving.md @@ -118,7 +118,7 @@ cache_transceiver_config: max_tokens_in_buffer: ``` -`backend` specifies the communication backend for transferring the kvCache, valid options include `DEFAULT`,`UCX`, `NIXL`, and `MPI`, the default backend is NIXL. +`backend` specifies the communication backend for transferring the kvCache, valid options include `DEFAULT`,`UCX`, `NIXL`, and `MPI`, the default backend is UCX. `max_tokens_in_buffer` defines the buffer size for kvCache transfers, it is recommended to set this value greater than or equal to the maximum ISL (Input Sequence Length) of all requests for optimal performance. diff --git a/examples/disaggregated/README.md b/examples/disaggregated/README.md index 511bce36195..fe44e93d9b9 100644 --- a/examples/disaggregated/README.md +++ b/examples/disaggregated/README.md @@ -12,7 +12,7 @@ The `trtllm-serve` command supports the `extra-llm-config.yaml` parameter. In th ```yaml cache_transceiver_config: - # KV cache transmission backend. Valid options include `DEFAULT` (i.e., NIXL), `UCX`, `NIXL`. + # KV cache transmission backend. Valid options include `DEFAULT` (i.e., UCX), `UCX`, `NIXL`. backend: # KV cache buffer size. Set it ≥ the maximum ISL (Input Sequence Length) for best performance. max_tokens_in_buffer: diff --git a/tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py b/tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py index 73ee3f5c7b5..7070fd1f927 100644 --- a/tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py +++ b/tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py @@ -38,10 +38,10 @@ def create_kv_cache_transceiver( if cache_transceiver_config.backend == "DEFAULT": # When cache_transceiver_config.backend is not set, fallback to env_vars settings - # NIXL is the default backend - cache_transceiver_config.backend = "NIXL" + # UCX is the default backend + cache_transceiver_config.backend = "UCX" # Ordered by priority - env_vars = [("TRTLLM_USE_UCX_KVCACHE", "UCX"), + env_vars = [("TRTLLM_USE_NIXL_KVCACHE", "NIXL"), ("TRTLLM_USE_MPI_KVCACHE", "MPI")] for env_var, be_type in env_vars: if getenv(env_var) == "1":