Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"

[project]
name = "parallax"
version = "0.0.1"
version = "0.1.0"
description = "Decentralised pipeline-parallel LLM serving with Sglang + MLX-LM + Lattica"
readme = "README.md"
requires-python = ">=3.11,<3.14"
Expand Down
4 changes: 4 additions & 0 deletions src/backend/server/scheduler_manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ def get_peer_id(self):
return None
return self.lattica.peer_id()

def need_more_nodes(self):
return self.scheduler.need_more_nodes() if self.scheduler else False

def get_cluster_status(self):
return {
"type": "cluster_status",
Expand All @@ -102,6 +105,7 @@ def get_cluster_status(self):
self.get_peer_id(), self.is_local_network
),
"node_list": self.get_node_list(),
"need_more_nodes": self.need_more_nodes(),
},
}

Expand Down
53 changes: 46 additions & 7 deletions src/backend/server/static_config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import concurrent.futures
import json
import logging
import math
from pathlib import Path

from parallax_utils.logging_config import get_logger
from scheduling.model_info import ModelInfo

logger = get_logger(__name__)

# Supported model list - key: model name, value: MLX model name (same as key if no MLX variant)
MODELS = {
"Qwen/Qwen3-0.6B": "Qwen/Qwen3-0.6B",
Expand Down Expand Up @@ -57,7 +61,6 @@
"zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
}

logger = logging.getLogger(__name__)
NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join"""

NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """
Expand All @@ -80,9 +83,6 @@ def _load_config_only(name: str) -> dict:

config = _load_config_only(model_name)

# get quant method
# logger.info(f"Loading model config from {model_name}")

quant_method = config.get("quant_method", None)
quantization_config = config.get("quantization_config", None)
if quant_method is None and quantization_config is not None:
Expand All @@ -92,8 +92,13 @@ def _load_config_only(name: str) -> dict:
param_bytes_per_element = 2
elif quant_method == "fp8":
param_bytes_per_element = 1
elif quant_method in ("mxfp4", "int4", "awq", "gptq"):
elif quant_method in ("mxfp4", "int4", "awq", "gptq", "compressed-tensors"):
param_bytes_per_element = 0.5
else:
param_bytes_per_element = 1
logger.warning(
f"model_name:{model_name} quant_method {quant_method} not supported in get_model_info method"
)

mlx_param_bytes_per_element = param_bytes_per_element
mlx_model_name = MODELS.get(model_name, model_name)
Expand Down Expand Up @@ -135,8 +140,42 @@ def _load_config_only(name: str) -> dict:
return model_info


def get_model_info_list():
model_name_list = list(MODELS.keys())
with concurrent.futures.ThreadPoolExecutor() as executor:
model_info_list = list(executor.map(get_model_info, model_name_list))
return model_info_list


model_info_list_cache = get_model_info_list()


def get_model_list():
return list(MODELS.keys())
model_info_list = model_info_list_cache

def build_single_model(model_info):
return {
"name": model_info.model_name,
"vram_gb": math.ceil(estimate_vram_gb_required(model_info)),
}

results = [build_single_model(model_info) for model_info in model_info_list]
return results


def estimate_vram_gb_required(model_info):
if model_info is None:
return 0
return (
(
model_info.embedding_io_bytes
+ model_info.num_layers * model_info.decoder_layer_io_bytes(roofline=False)
)
* 1.0
/ 1024
/ 1024
/ 1024
)


def get_node_join_command(scheduler_addr, is_local_network):
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/frontend/dist/assets/chat-DlBZcbmF.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import{c as t,j as e,C as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));
1 change: 0 additions & 1 deletion src/frontend/dist/assets/chat-fdxrhkT3.js

This file was deleted.

6 changes: 0 additions & 6 deletions src/frontend/dist/assets/join-BWhers2Y.js

This file was deleted.

6 changes: 6 additions & 0 deletions src/frontend/dist/assets/join-BzV0zEA8.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/frontend/dist/assets/main-C0U2HpN8.js

This file was deleted.

1 change: 1 addition & 0 deletions src/frontend/dist/assets/main-CrNN7OEz.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import{c as t,j as e,M as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));

Large diffs are not rendered by default.

6 changes: 0 additions & 6 deletions src/frontend/dist/assets/setup-CYeFrJeL.js

This file was deleted.

Loading
Loading