PaddlePaddle · yuanlehome · Sep 11, 2025 · Sep 8, 2025 · Sep 9, 2025
diff --git a/docs/usage/environment_variables.md b/docs/usage/environment_variables.md
@@ -72,7 +72,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "FD_USE_DEEP_GEMM":
     lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
 
+    # Whether to enable model cache feature
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
+
     # Whether to use Machete for wint4 dense GEMM.
     "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
+
 }
 ```
diff --git a/docs/zh/usage/environment_variables.md b/docs/zh/usage/environment_variables.md
@@ -72,6 +72,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "FD_USE_DEEP_GEMM":
     lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
 
+    # 是否启用模型权重缓存功能
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
+
     # 是否使用 Machete 后端的 wint4 GEMM.
     "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
 }

diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
@@ -98,7 +98,7 @@
     # Whether to use new get_output and save_output method (0 or 1)
     "FD_USE_GET_SAVE_OUTPUT_V1": lambda: bool(int(os.getenv("FD_USE_GET_SAVE_OUTPUT_V1", "0"))),
     # Whether to enable model cache feature
-    "FD_ENABLE_MODEL_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_CACHE", "0"))),
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
 }
 
 

diff --git a/fastdeploy/model_executor/load_weight_utils.py b/fastdeploy/model_executor/load_weight_utils.py
@@ -79,7 +79,7 @@ def is_weight_cache_enabled(fd_config, weight_cache_path=".cache"):
     weight_cache_context = contextlib.nullcontext()
     weight_cache_dir = None
     enable_cache = False
-    if envs.FD_ENABLE_MODEL_CACHE:
+    if envs.FD_ENABLE_MODEL_LOAD_CACHE:
         model_weight_cache_path = os.path.join(fd_config.model_config.model, weight_cache_path)
         # model_type + quantization + tp_size + ep_size
         weight_cache_key = "_".join(
@@ -132,7 +132,11 @@ def wrapper(*args, **kwargs):
 
             with context:
                 result = func(*args, **kwargs)
-            if envs.FD_ENABLE_MODEL_CACHE and weight_cache_dir is not None and not os.path.exists(weight_cache_dir):
+            if (
+                envs.FD_ENABLE_MODEL_LOAD_CACHE
+                and weight_cache_dir is not None
+                and not os.path.exists(weight_cache_dir)
+            ):
                 assert fd_config.quant_config is not None and getattr(
                     fd_config.quant_config, "is_checkpoint_bf16", False
                 ), "Save cache only for dynamic quantization"

diff --git a/tests/model_loader/test_model_cache.py b/tests/model_loader/test_model_cache.py
@@ -41,7 +41,7 @@
         "quantizations": [
             {
                 "quant_type": "wint4",
-                "env": {"FD_ENABLE_MODEL_CACHE": "1"},
+                "env": {"FD_ENABLE_MODEL_LOAD_CACHE": "1"},
             }
         ],
     }