From 5aa76e7fa8c7d61ad6ccebcc3c9abb9c8dbfc848 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 15:18:57 +0800
Subject: [PATCH 01/13] add model

---
 src/backend/server/static_config.py | 1 +
 src/parallax/launch.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index 39dd184a..d2691c39 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -50,6 +50,7 @@
     "deepseek-ai/DeepSeek-V3",
     "deepseek-ai/DeepSeek-V2",
     "MiniMaxAI/MiniMax-M2",
+    "zai-org/GLM-4.6",
 ]
 
 NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join"""
diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index 7627388d..e4443c23 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -41,6 +41,7 @@
     "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
     "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
     "MiniMaxAI/MiniMax-M2": "mlx-community/MiniMax-M2-4bit",
+    "zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
 }
 
 if __name__ == "__main__":

From e5e40a9298c739d691ecd748cff4305b6ba316ba Mon Sep 17 00:00:00 2001
From: Alien mac air <2214632589@qq.com>
Date: Wed, 29 Oct 2025 17:09:18 +0800
Subject: [PATCH 02/13] fix params

---
 src/backend/server/static_config.py | 29 +++++++++++++++++++++++++----
 src/parallax/launch.py              | 15 +--------------
 src/scheduling/model_info.py        |  5 +++++
 src/scheduling/node.py              | 17 ++++++++++++++---
 4 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index d2691c39..761100a2 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-
+import logging
 from scheduling.model_info import ModelInfo
 
 # Supported model list
@@ -53,6 +53,21 @@
     "zai-org/GLM-4.6",
 ]
 
+MLX_MODEL_NAME_MAP = {
+    "openai/gpt-oss-20b": "mlx-community/gpt-oss-20b-MXFP4-Q8",
+    "openai/gpt-oss-120b": "mlx-community/gpt-oss-120b-4bit",
+    "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8": "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit",
+    "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
+    "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8": "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit",
+    "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8": "mlx-community/Qwen3-235B-A22B-Thinking-2507-4bit",
+    "Qwen/Qwen3-235B-A22B-GPTQ-Int4": "mlx-community/Qwen3-235B-A22B-4bit",
+    "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
+    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
+    "MiniMaxAI/MiniMax-M2": "mlx-community/MiniMax-M2-4bit",
+    "zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
+}
+
+logger = logging.getLogger(__name__)
 NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join"""
 
 NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """
@@ -76,6 +91,8 @@ def _load_config_only(name: str) -> dict:
     config = _load_config_only(model_name)
 
     # get quant method
+    # logger.info(f"Loading model config from {model_name}")
+
     quant_method = config.get("quant_method", None)
     quantization_config = config.get("quantization_config", None)
     if quant_method is None and quantization_config is not None:
@@ -88,9 +105,12 @@ def _load_config_only(name: str) -> dict:
     elif quant_method in ("mxfp4", "int4", "awq", "gptq"):
         param_bytes_per_element = 0.5
 
-    # Only for hack, fix it when support different quantization bits
-    # if "minimax-m2" in model_name.lower():
-    #     param_bytes_per_element = 0.5
+    mlx_param_bytes_per_element = param_bytes_per_element
+    if model_name in MLX_MODEL_NAME_MAP:
+        mlx_config = _load_config_only(MLX_MODEL_NAME_MAP[model_name])
+        mlx_quant_dict = mlx_config.get("quantization_config", None)
+        if "bits" in mlx_quant_dict:
+            mlx_param_bytes_per_element = mlx_quant_dict["bits"] / 8
 
     # get local experts
     num_local_experts = config.get("num_local_experts", None)
@@ -112,6 +132,7 @@ def _load_config_only(name: str) -> dict:
         num_layers=config.get("num_hidden_layers", 0),
         ffn_num_projections=3,
         param_bytes_per_element=param_bytes_per_element,
+        mlx_param_bytes_per_element=mlx_param_bytes_per_element,
         cache_bytes_per_element=2,
         embedding_bytes_per_element=2,
         num_local_experts=num_local_experts,
diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index e4443c23..3795dccf 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -26,23 +26,10 @@
 from parallax.utils.utils import get_current_device
 from parallax_utils.ascii_anime import display_parallax_join
 from parallax_utils.logging_config import get_logger, set_log_level
+from backend.server.static_config import MLX_MODEL_NAME_MAP
 
 logger = get_logger("parallax.launch")
 
-"""Currently hard code model name for MAC"""
-MLX_MODEL_NAME_MAP = {
-    "openai/gpt-oss-20b": "mlx-community/gpt-oss-20b-MXFP4-Q8",
-    "openai/gpt-oss-120b": "mlx-community/gpt-oss-120b-4bit",
-    "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8": "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit",
-    "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
-    "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8": "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit",
-    "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8": "mlx-community/Qwen3-235B-A22B-Thinking-2507-4bit",
-    "Qwen/Qwen3-235B-A22B-GPTQ-Int4": "mlx-community/Qwen3-235B-A22B-4bit",
-    "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
-    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
-    "MiniMaxAI/MiniMax-M2": "mlx-community/MiniMax-M2-4bit",
-    "zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
-}
 
 if __name__ == "__main__":
     multiprocessing.set_start_method("spawn", force=True)
diff --git a/src/scheduling/model_info.py b/src/scheduling/model_info.py
index 5d6cd9f2..1736aecf 100644
--- a/src/scheduling/model_info.py
+++ b/src/scheduling/model_info.py
@@ -37,6 +37,7 @@ class ModelInfo:
     tie_embedding: bool = False
     # Default int8
     param_bytes_per_element: float = 1
+    mlx_param_bytes_per_element: float = 1
     cache_bytes_per_element: int = 1
     embedding_bytes_per_element: int = 1
 
@@ -70,6 +71,10 @@ def k_dim(self) -> int:
         """Return key head dim."""
         return self.num_kv_heads * self.head_size_k
 
+    @property
+    def mlx_bit_factor(self) -> float:
+        return self.mlx_param_bytes_per_element / self.param_bytes_per_element
+
     @property
     def embedding_io_bytes(self) -> int:
         """Estimate memory for input_embeddings / or lm_head."""
diff --git a/src/scheduling/node.py b/src/scheduling/node.py
index 5824ddae..f0ea4a6c 100644
--- a/src/scheduling/node.py
+++ b/src/scheduling/node.py
@@ -15,6 +15,7 @@
 from typing import Callable, Dict, List, Optional
 
 from parallax_utils.logging_config import get_logger
+from parallax.utils.utils import get_current_device
 from parallax_utils.utils import bytes_per_element, compute_max_batch_size
 from scheduling.model_info import ModelInfo
 
@@ -294,9 +295,19 @@ def get_decoder_layer_capacity(
             available_memory_bytes,
             self.model_info.decoder_layer_io_bytes(roofline=False),
         )
-        return floor(
-            available_memory_bytes / self.model_info.decoder_layer_io_bytes(roofline=False)
-        )
+        if get_current_device() == "mlx":
+            # For mlx, consider mlx bit factor
+            return floor(
+                available_memory_bytes
+                / (
+                    self.model_info.decoder_layer_io_bytes(roofline=False)
+                    * self.model_info.mlx_bit_factor
+                )
+            )
+        else:
+            return floor(
+                available_memory_bytes / self.model_info.decoder_layer_io_bytes(roofline=False)
+            )
 
     @property
     def per_decoder_layer_kv_cache_memory(self) -> Optional[int]:

From 7c47b3b55538db7d92f1481e7e8598a49056de1b Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 18:00:42 +0800
Subject: [PATCH 03/13] update

---
 src/backend/server/static_config.py | 3 ++-
 src/parallax/launch.py              | 2 +-
 src/scheduling/node.py              | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index 761100a2..3c70de3f 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -1,6 +1,7 @@
 import json
-from pathlib import Path
 import logging
+from pathlib import Path
+
 from scheduling.model_info import ModelInfo
 
 # Supported model list
diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index 3795dccf..736118fb 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -18,6 +18,7 @@
 import tempfile
 import threading
 
+from backend.server.static_config import MLX_MODEL_NAME_MAP
 from common.version_check import check_latest_release
 from parallax.p2p.server import ServerState, launch_p2p_server
 from parallax.server.executor import Executor
@@ -26,7 +27,6 @@
 from parallax.utils.utils import get_current_device
 from parallax_utils.ascii_anime import display_parallax_join
 from parallax_utils.logging_config import get_logger, set_log_level
-from backend.server.static_config import MLX_MODEL_NAME_MAP
 
 logger = get_logger("parallax.launch")
 
diff --git a/src/scheduling/node.py b/src/scheduling/node.py
index f0ea4a6c..6f4882ea 100644
--- a/src/scheduling/node.py
+++ b/src/scheduling/node.py
@@ -14,8 +14,8 @@
 from math import floor
 from typing import Callable, Dict, List, Optional
 
-from parallax_utils.logging_config import get_logger
 from parallax.utils.utils import get_current_device
+from parallax_utils.logging_config import get_logger
 from parallax_utils.utils import bytes_per_element, compute_max_batch_size
 from scheduling.model_info import ModelInfo
 

From 65aa8106a8e43a61dd68d96cbc1ecf0d280e461e Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 18:55:40 +0800
Subject: [PATCH 04/13] update device get way

---
 src/backend/server/rpc_connection_handler.py | 18 ++++++++++++------
 src/backend/server/static_config.py          |  4 +++-
 src/parallax/server/server_info.py           |  4 ++++
 src/scheduling/node.py                       |  4 ++--
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/backend/server/rpc_connection_handler.py b/src/backend/server/rpc_connection_handler.py
index 0fee9922..6fc4ec4c 100644
--- a/src/backend/server/rpc_connection_handler.py
+++ b/src/backend/server/rpc_connection_handler.py
@@ -140,12 +140,18 @@ def get_layer_allocation(self, current_node_id):
         list_node_allocations = self.scheduler.list_node_allocations()
         for node_id, start_layer, end_layer in list_node_allocations:
             if current_node_id == node_id:
-                return {
-                    "node_id": node_id,
-                    "model_name": self.scheduler.model_info.model_name,
-                    "start_layer": start_layer,
-                    "end_layer": end_layer,
-                }
+                node = self.scheduler.get_node(node_id)
+                if node:
+                    return {
+                        "node_id": node_id,
+                        "model_name": (
+                            node.model_info.model_name
+                            if node.hardware.device != "mlx"
+                            else node.model_info.mlx_model_name
+                        ),
+                        "start_layer": start_layer,
+                        "end_layer": end_layer,
+                    }
         return {}
 
     def build_node(self, node_json: dict):
diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index 3c70de3f..f2c10e18 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -107,8 +107,10 @@ def _load_config_only(name: str) -> dict:
         param_bytes_per_element = 0.5
 
     mlx_param_bytes_per_element = param_bytes_per_element
+    mlx_model_name = model_name
     if model_name in MLX_MODEL_NAME_MAP:
-        mlx_config = _load_config_only(MLX_MODEL_NAME_MAP[model_name])
+        mlx_model_name = MLX_MODEL_NAME_MAP[model_name]
+        mlx_config = _load_config_only(mlx_model_name)
         mlx_quant_dict = mlx_config.get("quantization_config", None)
         if "bits" in mlx_quant_dict:
             mlx_param_bytes_per_element = mlx_quant_dict["bits"] / 8
diff --git a/src/parallax/server/server_info.py b/src/parallax/server/server_info.py
index 4d056c42..e83d675e 100644
--- a/src/parallax/server/server_info.py
+++ b/src/parallax/server/server_info.py
@@ -182,6 +182,7 @@ def detect_node_hardware(node_id: Optional[str]) -> Dict[str, Any]:
             "gpu_name": "Unknown",
             "memory_gb": 16.0,
             "memory_bandwidth_gbps": 100.0,
+            "device": "Unknown",
         }
 
     if isinstance(hw, NvidiaHardwareInfo):
@@ -191,6 +192,7 @@ def detect_node_hardware(node_id: Optional[str]) -> Dict[str, Any]:
             "gpu_name": hw.chip,
             "memory_gb": hw.vram_gb,
             "memory_bandwidth_gbps": hw.memory_bandwidth_gbps,
+            "device": "cuda",
         }
     if isinstance(hw, AppleSiliconHardwareInfo):
         # Use unified memory size as memory_gb; bandwidth rough estimate per family
@@ -201,6 +203,7 @@ def detect_node_hardware(node_id: Optional[str]) -> Dict[str, Any]:
             "gpu_name": hw.chip,
             "memory_gb": hw.total_ram_gb,
             "memory_bandwidth_gbps": est_bandwidth,
+            "device": "mlx",
         }
     # Generic fallback
     return {
@@ -209,6 +212,7 @@ def detect_node_hardware(node_id: Optional[str]) -> Dict[str, Any]:
         "gpu_name": "Unknown",
         "memory_gb": 16.0,
         "memory_bandwidth_gbps": 100.0,
+        "device": "Unknown",
     }
 
 
diff --git a/src/scheduling/node.py b/src/scheduling/node.py
index 6f4882ea..d33276c7 100644
--- a/src/scheduling/node.py
+++ b/src/scheduling/node.py
@@ -14,7 +14,6 @@
 from math import floor
 from typing import Callable, Dict, List, Optional
 
-from parallax.utils.utils import get_current_device
 from parallax_utils.logging_config import get_logger
 from parallax_utils.utils import bytes_per_element, compute_max_batch_size
 from scheduling.model_info import ModelInfo
@@ -36,6 +35,7 @@ class NodeHardwareInfo:
     gpu_name: str
     memory_gb: float
     memory_bandwidth_gbps: float
+    device: str
 
 
 @dataclass
@@ -295,7 +295,7 @@ def get_decoder_layer_capacity(
             available_memory_bytes,
             self.model_info.decoder_layer_io_bytes(roofline=False),
         )
-        if get_current_device() == "mlx":
+        if self.hardware.device == "mlx":
             # For mlx, consider mlx bit factor
             return floor(
                 available_memory_bytes

From 83c99ac94283ca8f9280e5175eadc41bdcb8e918 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 18:56:51 +0800
Subject: [PATCH 05/13] update

---
 src/scheduling/model_info.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/scheduling/model_info.py b/src/scheduling/model_info.py
index 1736aecf..bd16da69 100644
--- a/src/scheduling/model_info.py
+++ b/src/scheduling/model_info.py
@@ -23,6 +23,7 @@ class ModelInfo:
     """
 
     model_name: str
+    mlx_model_name: str = None
     head_size: int
     hidden_dim: int
     intermediate_dim: int

From 5d9ab45f84bb3e167457824ceb658100d9cb7c5e Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 19:04:03 +0800
Subject: [PATCH 06/13] update

---
 src/scheduling/model_info.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scheduling/model_info.py b/src/scheduling/model_info.py
index bd16da69..b79d9e45 100644
--- a/src/scheduling/model_info.py
+++ b/src/scheduling/model_info.py
@@ -23,7 +23,7 @@ class ModelInfo:
     """
 
     model_name: str
-    mlx_model_name: str = None
+    mlx_model_name: str
     head_size: int
     hidden_dim: int
     intermediate_dim: int

From 8b548c2ddfed8cc30f2f0d68c47dfb49c56c2684 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 19:18:39 +0800
Subject: [PATCH 07/13] update

---
 src/backend/server/rpc_connection_handler.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/backend/server/rpc_connection_handler.py b/src/backend/server/rpc_connection_handler.py
index 6fc4ec4c..234652c6 100644
--- a/src/backend/server/rpc_connection_handler.py
+++ b/src/backend/server/rpc_connection_handler.py
@@ -183,10 +183,12 @@ def build_hardware(self, hardware_json):
         gpu_name = hardware_json.get("gpu_name")
         memory_gb = hardware_json.get("memory_gb")
         memory_bandwidth_gbps = hardware_json.get("memory_bandwidth_gbps")
+        device = hardware_json.get("device")
         return NodeHardwareInfo(
             node_id=node_id,
             tflops_fp16=tflops_fp16,
             gpu_name=gpu_name,
             memory_gb=memory_gb,
             memory_bandwidth_gbps=memory_bandwidth_gbps,
+            device=device,
         )

From d7f6c33c2b12ea1b934ef31cf34481712f269229 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 19:24:11 +0800
Subject: [PATCH 08/13] update

---
 src/backend/server/rpc_connection_handler.py | 2 +-
 src/backend/server/static_config.py          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/backend/server/rpc_connection_handler.py b/src/backend/server/rpc_connection_handler.py
index 234652c6..0288f26c 100644
--- a/src/backend/server/rpc_connection_handler.py
+++ b/src/backend/server/rpc_connection_handler.py
@@ -140,7 +140,7 @@ def get_layer_allocation(self, current_node_id):
         list_node_allocations = self.scheduler.list_node_allocations()
         for node_id, start_layer, end_layer in list_node_allocations:
             if current_node_id == node_id:
-                node = self.scheduler.get_node(node_id)
+                node = self.scheduler.node_id_to_node.get(node_id)
                 if node:
                     return {
                         "node_id": node_id,
diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index f2c10e18..d8934c92 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -124,6 +124,7 @@ def _load_config_only(name: str) -> dict:
 
     model_info = ModelInfo(
         model_name=model_name,
+        mlx_model_name=mlx_model_name,
         head_size=config.get("head_dim", 128),
         qk_nope_head_dim=config.get("qk_nope_head_dim", None),
         qk_rope_head_dim=config.get("qk_rope_head_dim", None),

From 1dcf6025821f8b768fb0c008b4fc84396a69b292 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Wed, 29 Oct 2025 19:28:15 +0800
Subject: [PATCH 09/13] update test

---
 .../scheduler_tests/test_layer_allocation.py  |  8 ++---
 tests/scheduler_tests/test_scheduler.py       |  1 +
 tests/scheduler_tests/test_utils.py           | 29 ++++++++++++++++---
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/tests/scheduler_tests/test_layer_allocation.py b/tests/scheduler_tests/test_layer_allocation.py
index 780a0de8..972950f0 100644
--- a/tests/scheduler_tests/test_layer_allocation.py
+++ b/tests/scheduler_tests/test_layer_allocation.py
@@ -26,10 +26,10 @@
 
 def _build_node(gpu_type: str, model: ModelInfo, id_suffix: str = "") -> Node:
     hw_map = {
-        "a100-80g": NodeHardwareInfo("a100-80g" + id_suffix, 312.0, "", 80.0, 2039.0),
-        "a100-40g": NodeHardwareInfo("a100-40g" + id_suffix, 312.0, "", 40.0, 1935.0),
-        "rtx5090": NodeHardwareInfo("rtx5090" + id_suffix, 165, "", 32.0, 1792.0),
-        "rtx4090": NodeHardwareInfo("rtx4090" + id_suffix, 82.6, "", 24.0, 1008.0),
+        "a100-80g": NodeHardwareInfo("a100-80g" + id_suffix, 312.0, "", 80.0, 2039.0, "cuda"),
+        "a100-40g": NodeHardwareInfo("a100-40g" + id_suffix, 312.0, "", 40.0, 1935.0, "cuda"),
+        "rtx5090": NodeHardwareInfo("rtx5090" + id_suffix, 165, "", 32.0, 1792.0, "cuda"),
+        "rtx4090": NodeHardwareInfo("rtx4090" + id_suffix, 82.6, "", 24.0, 1008.0, "cuda"),
     }
     hw = hw_map[gpu_type]
     return Node(node_id=hw.node_id, hardware=hw, model_info=model)
diff --git a/tests/scheduler_tests/test_scheduler.py b/tests/scheduler_tests/test_scheduler.py
index 2c8f6083..e8e4ff53 100644
--- a/tests/scheduler_tests/test_scheduler.py
+++ b/tests/scheduler_tests/test_scheduler.py
@@ -18,6 +18,7 @@ def _build_node(node_id: str, model: ModelInfo, *, tflops: float, mem_gb: float)
         gpu_name="",
         memory_gb=mem_gb,
         memory_bandwidth_gbps=1000.0,
+        device="cuda",
     )
     n = Node(node_id=node_id, hardware=hw, model_info=model)
     # Ensure latency estimation uses a defined speedup
diff --git a/tests/scheduler_tests/test_utils.py b/tests/scheduler_tests/test_utils.py
index d888b88c..b663aa88 100644
--- a/tests/scheduler_tests/test_utils.py
+++ b/tests/scheduler_tests/test_utils.py
@@ -11,16 +11,36 @@
 from scheduling.node import Node, NodeHardwareInfo
 
 A100_80G = NodeHardwareInfo(
-    node_id="a100-80g", tflops_fp16=312.0, gpu_name="", memory_gb=80.0, memory_bandwidth_gbps=2039
+    node_id="a100-80g",
+    tflops_fp16=312.0,
+    gpu_name="",
+    memory_gb=80.0,
+    memory_bandwidth_gbps=2039,
+    device="cuda",
 )
 A100_40G = NodeHardwareInfo(
-    node_id="a100-40g", tflops_fp16=312.0, gpu_name="", memory_gb=40.0, memory_bandwidth_gbps=1935
+    node_id="a100-40g",
+    tflops_fp16=312.0,
+    gpu_name="",
+    memory_gb=40.0,
+    memory_bandwidth_gbps=1935,
+    device="cuda",
 )
 RTX5090 = NodeHardwareInfo(
-    node_id="rtx5090", tflops_fp16=104.8, gpu_name="", memory_gb=32.0, memory_bandwidth_gbps=1792
+    node_id="rtx5090",
+    tflops_fp16=104.8,
+    gpu_name="",
+    memory_gb=32.0,
+    memory_bandwidth_gbps=1792,
+    device="cuda",
 )
 RTX4090 = NodeHardwareInfo(
-    node_id="rtx4090", tflops_fp16=82.6, gpu_name="", memory_gb=24.0, memory_bandwidth_gbps=1008
+    node_id="rtx4090",
+    tflops_fp16=82.6,
+    gpu_name="",
+    memory_gb=24.0,
+    memory_bandwidth_gbps=1008,
+    device="cuda",
 )
 
 
@@ -60,6 +80,7 @@ def build_node(
         gpu_name="",
         memory_gb=mem_gb,
         memory_bandwidth_gbps=mem_bandwidth_gbps,
+        device="cuda",
     )
     n = Node(node_id=node_id, hardware=hw, model_info=model, _force_max_concurrent_requests=True)
     # Attach coordinates for RTT synthesis in tests

From a3b1538b2b39493ce70c382806a15991f5f42b5f Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Thu, 30 Oct 2025 13:34:44 +0800
Subject: [PATCH 10/13] update

---
 tests/scheduler_tests/test_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/scheduler_tests/test_utils.py b/tests/scheduler_tests/test_utils.py
index b663aa88..47ffcfe1 100644
--- a/tests/scheduler_tests/test_utils.py
+++ b/tests/scheduler_tests/test_utils.py
@@ -48,6 +48,7 @@ def build_model_info(num_layers: int) -> ModelInfo:
     """Build a model config used across tests (matches allocation tests)."""
     return ModelInfo(
         model_name=f"GPUOss-{num_layers}L",
+        mlx_model_name=f"MLXOss-{num_layers}L",
         head_size=64,
         hidden_dim=2880,
         intermediate_dim=2880,
@@ -59,6 +60,7 @@ def build_model_info(num_layers: int) -> ModelInfo:
         num_local_experts=128,
         num_experts_per_tok=4,
         param_bytes_per_element=1,
+        mlx_param_bytes_per_element=1,
         cache_bytes_per_element=2,
         embedding_bytes_per_element=2,
     )

From f405c9b57ebbda9dc0969739d64ca72a52420837 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Thu, 30 Oct 2025 14:28:55 +0800
Subject: [PATCH 11/13] update

---
 src/backend/server/static_config.py | 103 ++++++++++++----------------
 src/parallax/launch.py              |  14 +---
 2 files changed, 48 insertions(+), 69 deletions(-)

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index d8934c92..ff01b309 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -4,68 +4,55 @@
 
 from scheduling.model_info import ModelInfo
 
-# Supported model list
-MODEL_LIST = [
-    "Qwen/Qwen3-0.6B",
-    "openai/gpt-oss-20b",
-    "openai/gpt-oss-120b",
-    "moonshotai/Kimi-K2-Instruct",
-    "moonshotai/Kimi-K2-Instruct-0905",
-    "Qwen/Qwen3-Next-80B-A3B-Instruct",
-    "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
-    "Qwen/Qwen3-Next-80B-A3B-Thinking",
-    "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8",
-    "Qwen/Qwen3-0.6B-FP8",
-    "Qwen/Qwen3-1.7B",
-    "Qwen/Qwen3-1.7B-FP8",
-    "Qwen/Qwen3-4B",
-    "Qwen/Qwen3-4B-FP8",
-    "Qwen/Qwen3-4B-Instruct-2507",
-    "Qwen/Qwen3-4B-Instruct-2507-FP8",
-    "Qwen/Qwen3-4B-Thinking-2507",
-    "Qwen/Qwen3-4B-Thinking-2507-FP8",
-    "Qwen/Qwen3-8B",
-    "Qwen/Qwen3-8B-FP8",
-    "Qwen/Qwen3-14B",
-    "Qwen/Qwen3-14B-FP8",
-    "Qwen/Qwen3-32B",
-    "Qwen/Qwen3-32B-FP8",
-    "Qwen/Qwen3-30B-A3B",
-    "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
-    "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
-    "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
-    "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8",
-    "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
-    "Qwen/Qwen2.5-0.5B-Instruct",
-    "Qwen/Qwen2.5-1.5B-Instruct",
-    "Qwen/Qwen2.5-3B-Instruct",
-    "Qwen/Qwen2.5-7B-Instruct",
-    "Qwen/Qwen2.5-14B-Instruct",
-    "Qwen/Qwen2.5-32B-Instruct",
-    "Qwen/Qwen2.5-72B-Instruct",
-    "nvidia/Llama-3.3-70B-Instruct-FP8",
-    "nvidia/Llama-3.1-70B-Instruct-FP8",
-    "nvidia/Llama-3.1-8B-Instruct-FP8",
-    "deepseek-ai/DeepSeek-V3.1",
-    "deepseek-ai/DeepSeek-R1",
-    "deepseek-ai/DeepSeek-V3",
-    "deepseek-ai/DeepSeek-V2",
-    "MiniMaxAI/MiniMax-M2",
-    "zai-org/GLM-4.6",
-]
-
-MLX_MODEL_NAME_MAP = {
+# Supported model list - key: model name, value: MLX model name (same as key if no MLX variant)
+MODELS = {
+    "Qwen/Qwen3-0.6B": "Qwen/Qwen3-0.6B",
     "openai/gpt-oss-20b": "mlx-community/gpt-oss-20b-MXFP4-Q8",
     "openai/gpt-oss-120b": "mlx-community/gpt-oss-120b-4bit",
+    "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
+    "moonshotai/Kimi-K2-Instruct-0905": "mlx-community/Kimi-K2-Instruct-0905-mlx-DQ3_K_M",
+    "Qwen/Qwen3-Next-80B-A3B-Instruct": "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit",
     "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8": "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit",
+    "Qwen/Qwen3-Next-80B-A3B-Thinking": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
     "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8": "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit",
+    "Qwen/Qwen3-0.6B-FP8": "Qwen/Qwen3-0.6B",
+    "Qwen/Qwen3-1.7B": "Qwen/Qwen3-1.7B",
+    "Qwen/Qwen3-1.7B-FP8": "Qwen/Qwen3-1.7B",
+    "Qwen/Qwen3-4B": "Qwen/Qwen3-4B",
+    "Qwen/Qwen3-4B-FP8": "Qwen/Qwen3-4B",
+    "Qwen/Qwen3-4B-Instruct-2507": "Qwen/Qwen3-4B-Instruct-2507",
+    "Qwen/Qwen3-4B-Instruct-2507-FP8": "Qwen/Qwen3-4B-Instruct-2507-FP8",
+    "Qwen/Qwen3-4B-Thinking-2507": "Qwen/Qwen3-4B-Thinking-2507",
+    "Qwen/Qwen3-4B-Thinking-2507-FP8": "Qwen/Qwen3-4B-Thinking-2507-FP8",
+    "Qwen/Qwen3-8B": "Qwen/Qwen3-8B",
+    "Qwen/Qwen3-8B-FP8": "Qwen/Qwen3-8B-FP8",
+    "Qwen/Qwen3-14B": "Qwen/Qwen3-14B",
+    "Qwen/Qwen3-14B-FP8": "Qwen/Qwen3-14B-FP8",
+    "Qwen/Qwen3-32B": "Qwen/Qwen3-32B",
+    "Qwen/Qwen3-32B-FP8": "Qwen/Qwen3-32B-FP8",
+    "Qwen/Qwen3-30B-A3B": "Qwen/Qwen3-30B-A3B",
+    "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8": "Qwen/Qwen3-30B-A3B-Instruct-2507-FP8",
+    "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8": "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
     "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8": "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit",
     "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8": "mlx-community/Qwen3-235B-A22B-Thinking-2507-4bit",
     "Qwen/Qwen3-235B-A22B-GPTQ-Int4": "mlx-community/Qwen3-235B-A22B-4bit",
-    "moonshotai/Kimi-K2-Instruct": "mlx-community/Kimi-K2-Instruct-4bit",
-    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
+    "Qwen/Qwen2.5-0.5B-Instruct": "Qwen/Qwen2.5-0.5B-Instruct",
+    "Qwen/Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
+    "Qwen/Qwen2.5-3B-Instruct": "Qwen/Qwen2.5-3B-Instruct",
+    "Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct",
+    "Qwen/Qwen2.5-14B-Instruct": "Qwen/Qwen2.5-14B-Instruct",
+    "Qwen/Qwen2.5-32B-Instruct": "Qwen/Qwen2.5-32B-Instruct",
+    "Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct",
+    "nvidia/Llama-3.3-70B-Instruct-FP8": "nvidia/Llama-3.3-70B-Instruct-FP8",
+    "nvidia/Llama-3.1-70B-Instruct-FP8": "nvidia/Llama-3.1-70B-Instruct-FP8",
+    "nvidia/Llama-3.1-8B-Instruct-FP8": "nvidia/Llama-3.1-8B-Instruct-FP8",
+    "deepseek-ai/DeepSeek-V3.1": "deepseek-ai/DeepSeek-V3.1",
+    "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
+    "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
+    "deepseek-ai/DeepSeek-V2": "deepseek-ai/DeepSeek-V2",
     "MiniMaxAI/MiniMax-M2": "mlx-community/MiniMax-M2-4bit",
     "zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
+    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
 }
 
 logger = logging.getLogger(__name__)
@@ -107,12 +94,12 @@ def _load_config_only(name: str) -> dict:
         param_bytes_per_element = 0.5
 
     mlx_param_bytes_per_element = param_bytes_per_element
-    mlx_model_name = model_name
-    if model_name in MLX_MODEL_NAME_MAP:
-        mlx_model_name = MLX_MODEL_NAME_MAP[model_name]
+    mlx_model_name = MODELS.get(model_name, model_name)
+
+    if mlx_model_name != model_name:
         mlx_config = _load_config_only(mlx_model_name)
         mlx_quant_dict = mlx_config.get("quantization_config", None)
-        if "bits" in mlx_quant_dict:
+        if mlx_quant_dict and "bits" in mlx_quant_dict:
             mlx_param_bytes_per_element = mlx_quant_dict["bits"] / 8
 
     # get local experts
@@ -147,7 +134,7 @@ def _load_config_only(name: str) -> dict:
 
 
 def get_model_list():
-    return MODEL_LIST
+    return list(MODELS.keys())
 
 
 def get_node_join_command(scheduler_addr, is_local_network):
diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index 736118fb..49dd41ad 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -18,7 +18,6 @@
 import tempfile
 import threading
 
-from backend.server.static_config import MLX_MODEL_NAME_MAP
 from common.version_check import check_latest_release
 from parallax.p2p.server import ServerState, launch_p2p_server
 from parallax.server.executor import Executor
@@ -52,11 +51,7 @@
         logger.debug(f"executor_input_addr: {args.executor_input_ipc}")
         logger.debug(f"executor_output_addr: {args.executor_output_ipc}")
         # Hard code for mlx-community models
-        if get_current_device() == "mlx":
-            mlx_model_repo = MLX_MODEL_NAME_MAP.get(args.model_path, None)
-            if mlx_model_repo is not None:
-                args.model_path = mlx_model_repo
-                logger.debug(f"Replace mlx model path: {mlx_model_repo}")
+        logger.debug(f"self.model_path before mlx check: {args.model_path}")
         if args.scheduler_addr is None:
             if args.log_level != "DEBUG":
                 display_parallax_join(args.model_path)
@@ -109,11 +104,8 @@
             args.end_layer = gradient_server.block_end_index
             args.model_path = gradient_server.model_name
             # Hard code for mlx-community models
-            if get_current_device() == "mlx":
-                mlx_model_repo = MLX_MODEL_NAME_MAP.get(args.model_path, None)
-                if mlx_model_repo is not None:
-                    args.model_path = mlx_model_repo
-                    logger.debug(f"Replace mlx model path: {mlx_model_repo}")
+
+            logger.debug(f"self.model_path after mlx check: {args.model_path}")
             logger.debug(
                 f"Start Executor with start_layer: {args.start_layer}, end_layer: {args.end_layer}"
             )

From 9f63d43509f5fa8b1785ef86e0fa57ca829b3f26 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Thu, 30 Oct 2025 14:31:06 +0800
Subject: [PATCH 12/13] update

---
 src/parallax/launch.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index 49dd41ad..94b0a001 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -23,7 +23,6 @@
 from parallax.server.executor import Executor
 from parallax.server.http_server import launch_http_server
 from parallax.server.server_args import parse_args
-from parallax.utils.utils import get_current_device
 from parallax_utils.ascii_anime import display_parallax_join
 from parallax_utils.logging_config import get_logger, set_log_level
 

From 44829d533c77bb080814132bc5138eee0ab639c3 Mon Sep 17 00:00:00 2001
From: yuhao_zhang <yuhao@gradient.network>
Date: Fri, 31 Oct 2025 10:58:58 +0800
Subject: [PATCH 13/13] update

---
 src/backend/server/static_config.py | 1 -
 src/parallax/launch.py              | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
index ff01b309..5974e221 100644
--- a/src/backend/server/static_config.py
+++ b/src/backend/server/static_config.py
@@ -52,7 +52,6 @@
     "deepseek-ai/DeepSeek-V2": "deepseek-ai/DeepSeek-V2",
     "MiniMaxAI/MiniMax-M2": "mlx-community/MiniMax-M2-4bit",
     "zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
-    "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
 }
 
 logger = logging.getLogger(__name__)
diff --git a/src/parallax/launch.py b/src/parallax/launch.py
index 94b0a001..be706465 100644
--- a/src/parallax/launch.py
+++ b/src/parallax/launch.py
@@ -49,8 +49,6 @@
 
         logger.debug(f"executor_input_addr: {args.executor_input_ipc}")
         logger.debug(f"executor_output_addr: {args.executor_output_ipc}")
-        # Hard code for mlx-community models
-        logger.debug(f"self.model_path before mlx check: {args.model_path}")
         if args.scheduler_addr is None:
             if args.log_level != "DEBUG":
                 display_parallax_join(args.model_path)
@@ -103,8 +101,6 @@
             args.end_layer = gradient_server.block_end_index
             args.model_path = gradient_server.model_name
             # Hard code for mlx-community models
-
-            logger.debug(f"self.model_path after mlx check: {args.model_path}")
             logger.debug(
                 f"Start Executor with start_layer: {args.start_layer}, end_layer: {args.end_layer}"
             )