From 096eae34a9948ca5d56b67e01bdb6bc5ba2ff3a5 Mon Sep 17 00:00:00 2001 From: bukejiyu <395822456@qq.com> Date: Mon, 8 Sep 2025 15:48:03 +0800 Subject: [PATCH] update doc --- docs/usage/environment_variables.md | 4 ++++ docs/zh/usage/environment_variables.md | 3 +++ fastdeploy/envs.py | 2 +- fastdeploy/model_executor/load_weight_utils.py | 8 ++++++-- tests/model_loader/test_model_cache.py | 2 +- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/usage/environment_variables.md b/docs/usage/environment_variables.md index 6cae7053507..103ff1e5ef2 100644 --- a/docs/usage/environment_variables.md +++ b/docs/usage/environment_variables.md @@ -72,7 +72,11 @@ environment_variables: dict[str, Callable[[], Any]] = { "FD_USE_DEEP_GEMM": lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))), + # Whether to enable model cache feature + "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))), + # Whether to use Machete for wint4 dense GEMM. "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"), + } ``` diff --git a/docs/zh/usage/environment_variables.md b/docs/zh/usage/environment_variables.md index 74d0fc9dd5f..c1289bf984d 100644 --- a/docs/zh/usage/environment_variables.md +++ b/docs/zh/usage/environment_variables.md @@ -72,6 +72,9 @@ environment_variables: dict[str, Callable[[], Any]] = { "FD_USE_DEEP_GEMM": lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))), + # 是否启用模型权重缓存功能 + "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))), + # 是否使用 Machete 后端的 wint4 GEMM. "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"), } diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py index 2ec78cf3be2..eaac558ee53 100644 --- a/fastdeploy/envs.py +++ b/fastdeploy/envs.py @@ -98,7 +98,7 @@ # Whether to use new get_output and save_output method (0 or 1) "FD_USE_GET_SAVE_OUTPUT_V1": lambda: bool(int(os.getenv("FD_USE_GET_SAVE_OUTPUT_V1", "0"))), # Whether to enable model cache feature - "FD_ENABLE_MODEL_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_CACHE", "0"))), + "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))), } diff --git a/fastdeploy/model_executor/load_weight_utils.py b/fastdeploy/model_executor/load_weight_utils.py index f1f6ee28900..961043be73a 100644 --- a/fastdeploy/model_executor/load_weight_utils.py +++ b/fastdeploy/model_executor/load_weight_utils.py @@ -79,7 +79,7 @@ def is_weight_cache_enabled(fd_config, weight_cache_path=".cache"): weight_cache_context = contextlib.nullcontext() weight_cache_dir = None enable_cache = False - if envs.FD_ENABLE_MODEL_CACHE: + if envs.FD_ENABLE_MODEL_LOAD_CACHE: model_weight_cache_path = os.path.join(fd_config.model_config.model, weight_cache_path) # model_type + quantization + tp_size + ep_size weight_cache_key = "_".join( @@ -132,7 +132,11 @@ def wrapper(*args, **kwargs): with context: result = func(*args, **kwargs) - if envs.FD_ENABLE_MODEL_CACHE and weight_cache_dir is not None and not os.path.exists(weight_cache_dir): + if ( + envs.FD_ENABLE_MODEL_LOAD_CACHE + and weight_cache_dir is not None + and not os.path.exists(weight_cache_dir) + ): assert fd_config.quant_config is not None and getattr( fd_config.quant_config, "is_checkpoint_bf16", False ), "Save cache only for dynamic quantization" diff --git a/tests/model_loader/test_model_cache.py b/tests/model_loader/test_model_cache.py index 8b1504efa09..342c901af20 100644 --- a/tests/model_loader/test_model_cache.py +++ b/tests/model_loader/test_model_cache.py @@ -41,7 +41,7 @@ "quantizations": [ { "quant_type": "wint4", - "env": {"FD_ENABLE_MODEL_CACHE": "1"}, + "env": {"FD_ENABLE_MODEL_LOAD_CACHE": "1"}, } ], }