From 096eae34a9948ca5d56b67e01bdb6bc5ba2ff3a5 Mon Sep 17 00:00:00 2001
From: bukejiyu <395822456@qq.com>
Date: Mon, 8 Sep 2025 15:48:03 +0800
Subject: [PATCH] update doc

---
 docs/usage/environment_variables.md            | 4 ++++
 docs/zh/usage/environment_variables.md         | 3 +++
 fastdeploy/envs.py                             | 2 +-
 fastdeploy/model_executor/load_weight_utils.py | 8 ++++++--
 tests/model_loader/test_model_cache.py         | 2 +-
 5 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/docs/usage/environment_variables.md b/docs/usage/environment_variables.md
index 6cae7053507..103ff1e5ef2 100644
--- a/docs/usage/environment_variables.md
+++ b/docs/usage/environment_variables.md
@@ -72,7 +72,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "FD_USE_DEEP_GEMM":
     lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
 
+    # Whether to enable model cache feature
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
+
     # Whether to use Machete for wint4 dense GEMM.
     "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
+
 }
 ```
diff --git a/docs/zh/usage/environment_variables.md b/docs/zh/usage/environment_variables.md
index 74d0fc9dd5f..c1289bf984d 100644
--- a/docs/zh/usage/environment_variables.md
+++ b/docs/zh/usage/environment_variables.md
@@ -72,6 +72,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "FD_USE_DEEP_GEMM":
     lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
 
+    # 是否启用模型权重缓存功能
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
+
     # 是否使用 Machete 后端的 wint4 GEMM.
     "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
 }
diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
index 2ec78cf3be2..eaac558ee53 100644
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -98,7 +98,7 @@
     # Whether to use new get_output and save_output method (0 or 1)
     "FD_USE_GET_SAVE_OUTPUT_V1": lambda: bool(int(os.getenv("FD_USE_GET_SAVE_OUTPUT_V1", "0"))),
     # Whether to enable model cache feature
-    "FD_ENABLE_MODEL_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_CACHE", "0"))),
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
 }
 
 
diff --git a/fastdeploy/model_executor/load_weight_utils.py b/fastdeploy/model_executor/load_weight_utils.py
index f1f6ee28900..961043be73a 100644
--- a/fastdeploy/model_executor/load_weight_utils.py
+++ b/fastdeploy/model_executor/load_weight_utils.py
@@ -79,7 +79,7 @@ def is_weight_cache_enabled(fd_config, weight_cache_path=".cache"):
     weight_cache_context = contextlib.nullcontext()
     weight_cache_dir = None
     enable_cache = False
-    if envs.FD_ENABLE_MODEL_CACHE:
+    if envs.FD_ENABLE_MODEL_LOAD_CACHE:
         model_weight_cache_path = os.path.join(fd_config.model_config.model, weight_cache_path)
         # model_type + quantization + tp_size + ep_size
         weight_cache_key = "_".join(
@@ -132,7 +132,11 @@ def wrapper(*args, **kwargs):
 
             with context:
                 result = func(*args, **kwargs)
-            if envs.FD_ENABLE_MODEL_CACHE and weight_cache_dir is not None and not os.path.exists(weight_cache_dir):
+            if (
+                envs.FD_ENABLE_MODEL_LOAD_CACHE
+                and weight_cache_dir is not None
+                and not os.path.exists(weight_cache_dir)
+            ):
                 assert fd_config.quant_config is not None and getattr(
                     fd_config.quant_config, "is_checkpoint_bf16", False
                 ), "Save cache only for dynamic quantization"
diff --git a/tests/model_loader/test_model_cache.py b/tests/model_loader/test_model_cache.py
index 8b1504efa09..342c901af20 100644
--- a/tests/model_loader/test_model_cache.py
+++ b/tests/model_loader/test_model_cache.py
@@ -41,7 +41,7 @@
         "quantizations": [
             {
                 "quant_type": "wint4",
-                "env": {"FD_ENABLE_MODEL_CACHE": "1"},
+                "env": {"FD_ENABLE_MODEL_LOAD_CACHE": "1"},
             }
         ],
     }