From d3b5f418e806a622876d2eefddf32b7288be409e Mon Sep 17 00:00:00 2001 From: sibianl Date: Sat, 11 Oct 2025 17:37:53 +0800 Subject: [PATCH 1/2] fix(relay): scheduler add default initial_peers in remote mode --- src/backend/server/scheduler_manage.py | 4 +++- src/backend/server/static_config.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/server/scheduler_manage.py b/src/backend/server/scheduler_manage.py index a2796f37..b0e76966 100644 --- a/src/backend/server/scheduler_manage.py +++ b/src/backend/server/scheduler_manage.py @@ -7,6 +7,7 @@ from backend.server.constants import NODE_STATUS_AVAILABLE, NODE_STATUS_WAITING from backend.server.rpc_connection_handler import RPCConnectionHandler from backend.server.static_config import ( + PUBLIC_INITIAL_PEERS, PUBLIC_RELAY_SERVERS, get_model_info, get_node_join_command, @@ -58,8 +59,9 @@ def run(self, model_name, init_nodes_num, is_local_network=True): f"SchedulerManage starting: model_name={model_name}, init_nodes_num={init_nodes_num}" ) self.is_local_network = is_local_network - if not is_local_network and not self.relay_servers: + if not is_local_network and not self.initial_peers and not self.relay_servers: logger.debug("Using public relay servers") + self.initial_peers = PUBLIC_INITIAL_PEERS self.relay_servers = PUBLIC_RELAY_SERVERS self._start_scheduler(model_name, init_nodes_num) diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py index b03f833b..078cd4b6 100644 --- a/src/backend/server/static_config.py +++ b/src/backend/server/static_config.py @@ -35,6 +35,11 @@ NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """ +PUBLIC_INITIAL_PEERS = [ + "/dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", + "/dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", +] + PUBLIC_RELAY_SERVERS = [ "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", "/dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", From 796d80010501bfc3c8c614e4e3cf8b6f77d9817a Mon Sep 17 00:00:00 2001 From: sibianl Date: Mon, 13 Oct 2025 11:01:18 +0800 Subject: [PATCH 2/2] fix --- pyproject.toml | 2 +- scripts/check.sh | 12 ------ scripts/join.sh | 56 -------------------------- scripts/start.sh | 55 ------------------------- src/backend/server/scheduler_manage.py | 8 +--- src/backend/server/static_config.py | 10 ----- src/common/static_config.py | 18 +++++++++ src/parallax/cli.py | 12 +----- 8 files changed, 22 insertions(+), 151 deletions(-) delete mode 100644 scripts/check.sh delete mode 100755 scripts/join.sh delete mode 100644 scripts/start.sh create mode 100644 src/common/static_config.py diff --git a/pyproject.toml b/pyproject.toml index f6164942..922bf36e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "numpy>=1.26", "pyzmq>=25.0", "psutil>=5.9.5", - "httpx", + "httpx[socks]>=0.26.0", "aiohttp", "uvicorn", "uvloop", diff --git a/scripts/check.sh b/scripts/check.sh deleted file mode 100644 index 1229760b..00000000 --- a/scripts/check.sh +++ /dev/null @@ -1,12 +0,0 @@ -PYTHON_VERSION=$(python3 -c 'import sys; print("{}.{}".format(sys.version_info.major, sys.version_info.minor))') -REQUIRED_VERSION="3.11" - -version_ge() { - # returns 0 if $1 >= $2 - [ "$(printf '%s\n' "$2" "$1" | sort -V | head -n1)" = "$2" ] -} - -if ! version_ge "$PYTHON_VERSION" "$REQUIRED_VERSION"; then - echo "Error: Python 3.11 or higher is required. Current version is $PYTHON_VERSION." - exit 1 -fi diff --git a/scripts/join.sh b/scripts/join.sh deleted file mode 100755 index 2e5b4c0d..00000000 --- a/scripts/join.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -source scripts/check.sh - -if [ $? -ne 0 ]; then - exit 1 -fi - -helpFunction() -{ - echo "" - echo "Usage: $0 [-s SCHEDULER_ADDR] [-r]" - echo -e "\t-s SCHEDULER_ADDR (default: auto)" - echo -e "\t-r (Optional) Use public relay servers" - exit 1 # Exit script after printing help -} - -SCHEDULER_ADDR="auto" -USE_RELAY=0 - -while getopts "s:r" opt -do - case "$opt" in - s ) SCHEDULER_ADDR="$OPTARG" ;; - r ) USE_RELAY=1 ;; - ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent - esac -done - -# Print helpFunction in case parameters are empty -if [ -z "$SCHEDULER_ADDR" ] -then - SCHEDULER_ADDR="auto" -fi - -echo "$SCHEDULER_ADDR" -if [ "$USE_RELAY" -eq 1 ]; then - echo "USE_RELAY: enabled" -fi - -export SGL_ENABLE_JIT_DEEPGEMM=0 - -CMD="python3 src/parallax/launch.py \ - --max-num-tokens-per-batch 4096 \ - --max-sequence-length 2048 \ - --max-batch-size 8 \ - --kv-block-size 1024 \ - --host 0.0.0.0 \ - --port 3000 \ - --scheduler-addr $SCHEDULER_ADDR" - -if [ "$USE_RELAY" -eq 1 ] || { [[ "$SCHEDULER_ADDR" == /* ]] && [ "$SCHEDULER_ADDR" != "auto" ]; }; then - CMD="$CMD --relay-servers /dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf" - CMD="$CMD --initial-peers /dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb /dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb" -fi - -eval $CMD diff --git a/scripts/start.sh b/scripts/start.sh deleted file mode 100644 index 46789fe2..00000000 --- a/scripts/start.sh +++ /dev/null @@ -1,55 +0,0 @@ -source scripts/check.sh - -if [ $? -ne 0 ]; then - exit 1 -fi - - -helpFunction() -{ - echo "" - echo "Usage: $0 [-n INIT_NODES_NUM] [-m MODEL_NAME] [-r]" - echo -e "\t-n (Optional) Number of initial nodes" - echo -e "\t-m (Optional) Model name" - echo -e "\t-r (Optional) Use public relay servers" - exit 1 # Exit script after printing help -} - -USE_RELAY=0 - -# Parse optional arguments -while getopts "n:m:rh" opt -do - case "$opt" in - n ) INIT_NODES_NUM="$OPTARG" ;; - m ) MODEL_NAME="$OPTARG" ;; - r ) USE_RELAY=1 ;; - h ) helpFunction ;; - ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent - esac -done - -# Show what was provided (for debugging) -if [ ! -z "$INIT_NODES_NUM" ]; then - echo "INIT_NODES_NUM: $INIT_NODES_NUM" -fi -if [ ! -z "$MODEL_NAME" ]; then - echo "MODEL_NAME: $MODEL_NAME" -fi -if [ "$USE_RELAY" -eq 1 ]; then - echo "USE_RELAY: enabled" -fi - -# Build the python command with optional arguments -CMD="python3 src/backend/main.py --dht-port 5001 --port 3001" -if [ ! -z "$MODEL_NAME" ]; then - CMD="$CMD --model-name $MODEL_NAME" -fi -if [ ! -z "$INIT_NODES_NUM" ]; then - CMD="$CMD --init-nodes-num $INIT_NODES_NUM" -fi -if [ "$USE_RELAY" -eq 1 ]; then - CMD="$CMD --relay-servers /dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf" -fi - -eval $CMD diff --git a/src/backend/server/scheduler_manage.py b/src/backend/server/scheduler_manage.py index b0e76966..582a9dc4 100644 --- a/src/backend/server/scheduler_manage.py +++ b/src/backend/server/scheduler_manage.py @@ -6,12 +6,8 @@ from backend.server.constants import NODE_STATUS_AVAILABLE, NODE_STATUS_WAITING from backend.server.rpc_connection_handler import RPCConnectionHandler -from backend.server.static_config import ( - PUBLIC_INITIAL_PEERS, - PUBLIC_RELAY_SERVERS, - get_model_info, - get_node_join_command, -) +from backend.server.static_config import get_model_info, get_node_join_command +from common.static_config import PUBLIC_INITIAL_PEERS, PUBLIC_RELAY_SERVERS from parallax.p2p.server import TransformerConnectionHandler from parallax_utils.logging_config import get_logger from scheduling.node import RequestSignal diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py index 078cd4b6..0133b2ef 100644 --- a/src/backend/server/static_config.py +++ b/src/backend/server/static_config.py @@ -35,16 +35,6 @@ NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """ -PUBLIC_INITIAL_PEERS = [ - "/dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", - "/dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", -] - -PUBLIC_RELAY_SERVERS = [ - "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", - "/dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", -] - def get_model_info(model_name): config_path = hf_hub_download(repo_id=model_name, filename="config.json") diff --git a/src/common/static_config.py b/src/common/static_config.py new file mode 100644 index 00000000..c87237d1 --- /dev/null +++ b/src/common/static_config.py @@ -0,0 +1,18 @@ +PUBLIC_INITIAL_PEERS = [ + "/dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", + "/dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", +] + +PUBLIC_RELAY_SERVERS = [ + "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", + "/dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", +] + + +def get_relay_params(): + return [ + "--relay-servers", + *PUBLIC_RELAY_SERVERS, + "--initial-peers", + *PUBLIC_INITIAL_PEERS, + ] diff --git a/src/parallax/cli.py b/src/parallax/cli.py index e8b78a76..6281a9ef 100644 --- a/src/parallax/cli.py +++ b/src/parallax/cli.py @@ -14,6 +14,7 @@ import sys from pathlib import Path +from common.static_config import get_relay_params from parallax_utils.logging_config import get_logger logger = get_logger("parallax.cli") @@ -41,17 +42,6 @@ def get_project_root(): return Path.cwd() -def get_relay_params(): - return [ - "--relay-servers", - "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", - "/dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", - "--initial-peers", - "/dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", - "/dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", - ] - - def run_command(args): """Run the scheduler (equivalent to scripts/start.sh).""" check_python_version()