From 0bfba04462069e481adf7f90502d40234f52bd1b Mon Sep 17 00:00:00 2001 From: sibianl Date: Sat, 11 Oct 2025 16:52:44 +0800 Subject: [PATCH 1/2] fix(description): update project description and cli help info and readme --- README.md | 2 +- pyproject.toml | 4 ++-- src/parallax/cli.py | 15 +++++++++++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ee3c3d03..9ac0e866 100644 --- a/README.md +++ b/README.md @@ -253,8 +253,8 @@ For Windows, simply go to Control Panel → Programs → Uninstall a program, fi | | Provider | HuggingFace Collection | Blog | Description | |:-------------|:-------------|:----------------------------:|:----------------------------:|:----------------------------| |gpt-oss | OpenAI | [gpt-oss](https://huggingface.co/collections/openai/gpt-oss-68911959590a1634ba11c7a4) | [Introducing gpt-oss](https://openai.com/index/introducing-gpt-oss/) | "gpt-oss" refers to OpenAI's open-source GPT models, including gpt-oss-20b and gpt-oss-120b. The number (e.g., 20b, 120b) indicates the parameter count (20 billion, 120 billion). | +|Kimi-K2 | Moonshot AI | [Kimi-K2](https://huggingface.co/collections/moonshotai/kimi-k2-6871243b990f2af5ba60617d) | [Kimi K2: Open Agentic Intelligence](https://moonshotai.github.io/Kimi-K2/) | "Kimi-K2" is Moonshot AI's Kimi-K2 model family, including Kimi-K2-Instruct and Kimi-K2-Instruct-0905. The models are designed for agentic intelligence and available in different versions and parameter sizes. | |Qwen3-Next | Qwen | [Qwen3-Next](https://huggingface.co/collections/Qwen/qwen3-next-68c25fd6838e585db8eeea9d) | [Qwen3-Next: Towards Ultimate Training & Inference Efficiency](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list) | "Qwen3-Next" is the latest generation of Qwen models by Alibaba/Qwen, with improved efficiency and performance. Includes models like Qwen3-Next-80B-A3B-Instruct (80B parameters, instruction-tuned) and Qwen3-Next-80B-A3B-Thinking (80B, reasoning enhanced). Variants include FP8 quantized and instruction-tuned models. | |Qwen3 | Qwen | [Qwen3](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | [Qwen3: Think Deeper, Act Faster](https://qwen.ai/blog?id=1e3fa5c2d4662af2855586055ad037ed9e555125&from=research.research-list) | "Qwen3" is the third generation of Qwen LLMs, available in multiple sizes (e.g., 0.6B, 1.7B, 4B, 8B, 14B, 30B, 32B, 235B). Variants include FP8 quantized and instruction-tuned models. | |Qwen2.5 | Qwen | [Qwen2.5](https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e) | [Qwen2.5: A Party of Foundation Models!](https://qwen.ai/blog?id=6da44b4d3b48c53f5719bab9cc18b732a7065647&from=research.research-list) | "Qwen2.5" is an earlier generation of Qwen models, with sizes like 0.5B, 1.5B, 3B, 7B, 14B, 32B, 72B. These models are available in base and instruction-tuned versions. | |Meta Llama 3 | Meta | [Meta Llama 3](https://huggingface.co/collections/meta-llama/meta-llama-3-66214712577ca38149ebb2b6)
[Llama 3.1](https://huggingface.co/collections/meta-llama/llama-31-669fc079a0c406a149a5738f)
[Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
[Llama 3.3](https://huggingface.co/collections/meta-llama/llama-33-67531d5c405ec5d08a852000) | [Introducing Meta Llama 3: The most capable openly available LLM to date](https://ai.meta.com/blog/meta-llama-3/) | "Meta Llama 3" is Meta's third-generation Llama model, available in sizes such as 8B and 70B parameters. Includes instruction-tuned and quantized (e.g., FP8) variants. | -|Kimi-K2 | Moonshot AI | [Kimi-K2](https://huggingface.co/collections/moonshotai/kimi-k2-6871243b990f2af5ba60617d) | [Kimi K2: Open Agentic Intelligence](https://moonshotai.github.io/Kimi-K2/) | "Kimi-K2" is Moonshot AI's Kimi-K2 model family, including Kimi-K2-Instruct and Kimi-K2-Instruct-0905. The models are designed for agentic intelligence and available in different versions and parameter sizes. | diff --git a/pyproject.toml b/pyproject.toml index 854edbef..f6164942 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,9 +6,9 @@ build-backend = "poetry.core.masonry.api" [project] name = "parallax" version = "0.0.1" -description = "Decentralised pipeline-parallel LLM serving with MLX-LM + Hivemind" +description = "Decentralised pipeline-parallel LLM serving with Sglang + MLX-LM + Lattica" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.11" packages = [ { include = "parallax", from = "src" }, { include = "scheduling", from = "src" }, diff --git a/src/parallax/cli.py b/src/parallax/cli.py index de545cb5..e8b78a76 100644 --- a/src/parallax/cli.py +++ b/src/parallax/cli.py @@ -198,9 +198,12 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - parallax run # Start scheduler with frontend - parallax run -m Qwen/Qwen3-0.6B -n 2 # Start scheduler without frontend - parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx # Join cluster + parallax run # Start scheduler with frontend + parallax run -m {model-name} -n {number-of-worker-nodes} # Start scheduler without frontend + parallax run -m Qwen/Qwen3-0.6B -n 2 # example + parallax join # Join cluster in local network + parallax join -s {scheduler-address} # Join cluster in public network + parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu # example """, ) @@ -221,7 +224,11 @@ def main(): "join", help="Join a distributed cluster (equivalent to scripts/join.sh)" ) join_parser.add_argument( - "-s", "--scheduler-addr", default="auto", type=str, help="Scheduler address (required)" + "-s", + "--scheduler-addr", + default="auto", + type=str, + help="Scheduler address (required)", ) join_parser.add_argument( "-r", "--use-relay", action="store_true", help="Use public relay servers" From 1b677ead7b5ddc67b6de576cb760b85d4fddc78a Mon Sep 17 00:00:00 2001 From: sibianl Date: Sat, 11 Oct 2025 17:04:12 +0800 Subject: [PATCH 2/2] fix --- src/backend/benchmark/benchmark_serving.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/backend/benchmark/benchmark_serving.py b/src/backend/benchmark/benchmark_serving.py index e4da1bed..d905bad4 100644 --- a/src/backend/benchmark/benchmark_serving.py +++ b/src/backend/benchmark/benchmark_serving.py @@ -93,13 +93,16 @@ def download_and_cache_file(url: str, filename: Optional[str] = None): chunk_size = 1024 # Download in chunks of 1KB # Use tqdm to display the progress bar - with open(filename, "wb") as f, tqdm( - desc=filename, - total=total_size, - unit="B", - unit_scale=True, - unit_divisor=1024, - ) as bar: + with ( + open(filename, "wb") as f, + tqdm( + desc=filename, + total=total_size, + unit="B", + unit_scale=True, + unit_divisor=1024, + ) as bar, + ): for chunk in response.iter_content(chunk_size=chunk_size): f.write(chunk) bar.update(len(chunk))