GradientHQ · gufengc · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ packages = [
 dependencies = [
   "msgpack>=1.0.7",
   "safetensors>=0.5.1",
+  "huggingface-hub",
   "numpy>=1.26",
   "pyzmq>=25.0",
   "psutil>=5.9.5",

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
@@ -1,4 +1,5 @@
-from mlx_lm.utils import get_model_path, load_config
+import json
+from pathlib import Path
 
 from scheduling.model_info import ModelInfo
 
@@ -20,6 +21,7 @@
     # "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
     "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
     "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8",
+    "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
     # "Qwen/Qwen2.5-3B-Instruct",
     # "Qwen/Qwen2.5-7B-Instruct",
     # "Qwen/Qwen2.5-14B-Instruct",
@@ -35,8 +37,21 @@
 
 
 def get_model_info(model_name):
-    model_path = get_model_path(model_name)[0]
-    config = load_config(model_path)
+    def _load_config_only(name: str) -> dict:
+        local_path = Path(name)
+        if local_path.exists():
+            config_path = local_path / "config.json"
+            with open(config_path, "r") as f:
+                return json.load(f)
+
+        # Hugging Face only – download just config.json
+        from huggingface_hub import hf_hub_download  # type: ignore
+
+        config_file = hf_hub_download(repo_id=name, filename="config.json")
+        with open(config_file, "r") as f:
+            return json.load(f)
+
+    config = _load_config_only(model_name)
 
     # get quant method
     quant_method = config.get("quant_method", None)

diff --git a/src/parallax/launch.py b/src/parallax/launch.py
@@ -36,6 +36,7 @@
     "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
     "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8": "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit",
     "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8": "mlx-community/Qwen3-235B-A22B-Thinking-2507-4bit",
+    "Qwen/Qwen3-235B-A22B-GPTQ-Int4": "mlx-community/Qwen3-235B-A22B-4bit",
     "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
 }
 

diff --git a/src/scheduling/layer_allocation.py b/src/scheduling/layer_allocation.py
@@ -815,7 +815,7 @@ def global_allocation(self) -> bool:
         total_cap = sum(node.get_decoder_layer_capacity() for node in self.nodes)
 
         if num_layers <= 0 or num_nodes == 0 or total_cap < num_layers:
-            logger.debug(
+            logger.warning(
                 "[DP] Insufficient resources: nodes=%d, layers=%d, total_cap=%d",
                 num_nodes,
                 num_layers,