From a044eb5e83a564ca412fe430659dded0bcd769ee Mon Sep 17 00:00:00 2001
From: gufengc <gufeng@gradient.network>
Date: Mon, 13 Oct 2025 16:12:54 +0800
Subject: [PATCH 1/2] feat(model): Add qwen3-235B-int4

---
 pyproject.toml                      |  1 +
 src/backend/server/static_config.py | 21 ++++++++++++++++++---
 src/parallax/launch.py              |  1 +
 src/scheduling/layer_allocation.py  |  2 +-
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c52667dc..1032b410 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ packages = [
 dependencies = [
   "msgpack>=1.0.7",
   "safetensors>=0.5.1",
+  "huggingface-hub"
   "numpy>=1.26",
   "pyzmq>=25.0",
   "psutil>=5.9.5",
diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index 8324e7ba..d5c82873 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -1,4 +1,5 @@
-from mlx_lm.utils import get_model_path, load_config
+import json
+from pathlib import Path
 
 from scheduling.model_info import ModelInfo
 
@@ -20,6 +21,7 @@
     # "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
     "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
     "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8",
+    "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
     # "Qwen/Qwen2.5-3B-Instruct",
     # "Qwen/Qwen2.5-7B-Instruct",
     # "Qwen/Qwen2.5-14B-Instruct",
@@ -35,8 +37,21 @@
 
 
 def get_model_info(model_name):
-    model_path = get_model_path(model_name)[0]
-    config = load_config(model_path)
+    def _load_config_only(name: str) -> dict:
+        local_path = Path(name)
+        if local_path.exists():
+            config_path = local_path / "config.json"
+            with open(config_path, "r") as f:
+                return json.load(f)
+
+        # Hugging Face only – download just config.json
+        from huggingface_hub import hf_hub_download  # type: ignore
+
+        config_file = hf_hub_download(repo_id=name, filename="config.json")
+        with open(config_file, "r") as f:
+            return json.load(f)
+
+    config = _load_config_only(model_name)
 
     # get quant method
     quant_method = config.get("quant_method", None)
diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index cb02b4b5..a68e16d7 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -36,6 +36,7 @@
     "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
     "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8": "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit",
     "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8": "mlx-community/Qwen3-235B-A22B-Thinking-2507-4bit",
+    "Qwen/Qwen3-235B-A22B-GPTQ-Int4": "mlx-community/Qwen3-235B-A22B-4bit",
     "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
 }
 
diff --git a/src/scheduling/layer_allocation.py b/src/scheduling/layer_allocation.py
index cd73edd1..91a8e127 100644
--- a/src/scheduling/layer_allocation.py
+++ b/src/scheduling/layer_allocation.py
@@ -815,7 +815,7 @@ def global_allocation(self) -> bool:
         total_cap = sum(node.get_decoder_layer_capacity() for node in self.nodes)
 
         if num_layers <= 0 or num_nodes == 0 or total_cap < num_layers:
-            logger.debug(
+            logger.warning(
                 "[DP] Insufficient resources: nodes=%d, layers=%d, total_cap=%d",
                 num_nodes,
                 num_layers,

From f06167825cae548011de4e63296b03d762c8c900 Mon Sep 17 00:00:00 2001
From: gufengc <gufeng@gradient.network>
Date: Mon, 13 Oct 2025 16:13:36 +0800
Subject: [PATCH 2/2] update

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1032b410..fdc6d91f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ packages = [
 dependencies = [
   "msgpack>=1.0.7",
   "safetensors>=0.5.1",
-  "huggingface-hub"
+  "huggingface-hub",
   "numpy>=1.26",
   "pyzmq>=25.0",
   "psutil>=5.9.5",