GradientHQ · sl-gn · Nov 11, 2025 · Nov 9, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 
 [project]
 name = "parallax"
-version = "0.0.1"
+version = "0.1.0"
 description = "Decentralised pipeline-parallel LLM serving with Sglang + MLX-LM + Lattica"
 readme = "README.md"
 requires-python = ">=3.11,<3.14"

diff --git a/src/backend/server/scheduler_manage.py b/src/backend/server/scheduler_manage.py
@@ -91,6 +91,9 @@ def get_peer_id(self):
             return None
         return self.lattica.peer_id()
 
+    def need_more_nodes(self):
+        return self.scheduler.need_more_nodes() if self.scheduler else False
+
     def get_cluster_status(self):
         return {
             "type": "cluster_status",
@@ -102,6 +105,7 @@ def get_cluster_status(self):
                     self.get_peer_id(), self.is_local_network
                 ),
                 "node_list": self.get_node_list(),
+                "need_more_nodes": self.need_more_nodes(),
             },
         }
 

diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py
@@ -1,9 +1,13 @@
+import concurrent.futures
 import json
-import logging
+import math
 from pathlib import Path
 
+from parallax_utils.logging_config import get_logger
 from scheduling.model_info import ModelInfo
 
+logger = get_logger(__name__)
+
 # Supported model list - key: model name, value: MLX model name (same as key if no MLX variant)
 MODELS = {
     "Qwen/Qwen3-0.6B": "Qwen/Qwen3-0.6B",
@@ -57,7 +61,6 @@
     "zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
 }
 
-logger = logging.getLogger(__name__)
 NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join"""
 
 NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """
@@ -80,9 +83,6 @@ def _load_config_only(name: str) -> dict:
 
     config = _load_config_only(model_name)
 
-    # get quant method
-    # logger.info(f"Loading model config from {model_name}")
-
     quant_method = config.get("quant_method", None)
     quantization_config = config.get("quantization_config", None)
     if quant_method is None and quantization_config is not None:
@@ -92,8 +92,13 @@ def _load_config_only(name: str) -> dict:
         param_bytes_per_element = 2
     elif quant_method == "fp8":
         param_bytes_per_element = 1
-    elif quant_method in ("mxfp4", "int4", "awq", "gptq"):
+    elif quant_method in ("mxfp4", "int4", "awq", "gptq", "compressed-tensors"):
         param_bytes_per_element = 0.5
+    else:
+        param_bytes_per_element = 1
+        logger.warning(
+            f"model_name:{model_name} quant_method {quant_method} not supported in get_model_info method"
+        )
 
     mlx_param_bytes_per_element = param_bytes_per_element
     mlx_model_name = MODELS.get(model_name, model_name)
@@ -135,8 +140,42 @@ def _load_config_only(name: str) -> dict:
     return model_info
 
 
+def get_model_info_list():
+    model_name_list = list(MODELS.keys())
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        model_info_list = list(executor.map(get_model_info, model_name_list))
+    return model_info_list
+
+
+model_info_list_cache = get_model_info_list()
+
+
 def get_model_list():
-    return list(MODELS.keys())
+    model_info_list = model_info_list_cache
+
+    def build_single_model(model_info):
+        return {
+            "name": model_info.model_name,
+            "vram_gb": math.ceil(estimate_vram_gb_required(model_info)),
+        }
+
+    results = [build_single_model(model_info) for model_info in model_info_list]
+    return results
+
+
+def estimate_vram_gb_required(model_info):
+    if model_info is None:
+        return 0
+    return (
+        (
+            model_info.embedding_io_bytes
+            + model_info.num_layers * model_info.decoder_layer_io_bytes(roofline=False)
+        )
+        * 1.0
+        / 1024
+        / 1024
+        / 1024
+    )
 
 
 def get_node_join_command(scheduler_addr, is_local_network):

diff --git a/src/frontend/dist/assets/App-BwG-l8Xs.js → src/frontend/dist/assets/App-Ba9WPx9O.js b/src/frontend/dist/assets/App-BwG-l8Xs.js → src/frontend/dist/assets/App-Ba9WPx9O.js
diff --git a/src/frontend/dist/assets/chat-DSga-3Xw.js → src/frontend/dist/assets/chat-D4M-59Zb.js b/src/frontend/dist/assets/chat-DSga-3Xw.js → src/frontend/dist/assets/chat-D4M-59Zb.js
diff --git a/src/frontend/dist/assets/chat-DlBZcbmF.js b/src/frontend/dist/assets/chat-DlBZcbmF.js
@@ -0,0 +1 @@
+import{c as t,j as e,C as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));
diff --git a/src/frontend/dist/assets/chat-fdxrhkT3.js b/src/frontend/dist/assets/chat-fdxrhkT3.js
diff --git a/src/frontend/dist/assets/join-BWhers2Y.js b/src/frontend/dist/assets/join-BWhers2Y.js
diff --git a/src/frontend/dist/assets/join-BzV0zEA8.js b/src/frontend/dist/assets/join-BzV0zEA8.js
diff --git a/src/frontend/dist/assets/main-C0U2HpN8.js b/src/frontend/dist/assets/main-C0U2HpN8.js
diff --git a/src/frontend/dist/assets/main-CrNN7OEz.js b/src/frontend/dist/assets/main-CrNN7OEz.js
@@ -0,0 +1 @@
+import{c as t,j as e,M as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));
diff --git a/...ntend/dist/assets/main-layout-C9FQ4SXh.js → ...ntend/dist/assets/main-layout-DhzCWI3u.js b/...ntend/dist/assets/main-layout-C9FQ4SXh.js → ...ntend/dist/assets/main-layout-DhzCWI3u.js
diff --git a/src/frontend/dist/assets/setup-CYeFrJeL.js b/src/frontend/dist/assets/setup-CYeFrJeL.js
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		import{c as t,j as e,C as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		import{c as t,j as e,M as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));