diff --git a/README.md b/README.md index e959bf4c..87253de3 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,7 @@ parallax join # public network env parallax join -s {scheduler-address} # example -parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx +parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu ``` ![Node join](docs/images/node-join.png) @@ -170,9 +170,9 @@ parallax join -s {scheduler-address} For example: ```sh # first node -parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx +parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu # second node -parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx +parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu ``` #### Step 3: Call chat api with Scheduler @@ -236,3 +236,4 @@ curl --location 'http://localhost:3000/v1/chat/completions' --header 'Content-Ty |Qwen3 |[Link](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f)|[Link](https://qwenlm.github.io/blog/qwen3/)| |Qwen2.5 |[Link](https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e)|[Link](https://qwenlm.github.io/blog/qwen2.5/)| |Llama3 |[Link](https://huggingface.co/meta-llama/collections)|[Link](https://ai.meta.com/blog/meta-llama-3/)| +|Kimi |[Link](https://huggingface.co/collections/moonshotai/kimi-k2-6871243b990f2af5ba60617d)|[Link](https://platform.moonshot.ai/blog)| diff --git a/scripts/join.sh b/scripts/join.sh index b3bb7203..2e5b4c0d 100755 --- a/scripts/join.sh +++ b/scripts/join.sh @@ -8,17 +8,20 @@ fi helpFunction() { echo "" - echo "Usage: $0 [-s SCHEDULER_ADDR]" + echo "Usage: $0 [-s SCHEDULER_ADDR] [-r]" echo -e "\t-s SCHEDULER_ADDR (default: auto)" + echo -e "\t-r (Optional) Use public relay servers" exit 1 # Exit script after printing help } SCHEDULER_ADDR="auto" +USE_RELAY=0 -while getopts "s:" opt +while getopts "s:r" opt do case "$opt" in s ) SCHEDULER_ADDR="$OPTARG" ;; + r ) USE_RELAY=1 ;; ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent esac done @@ -30,14 +33,24 @@ then fi echo "$SCHEDULER_ADDR" +if [ "$USE_RELAY" -eq 1 ]; then + echo "USE_RELAY: enabled" +fi export SGL_ENABLE_JIT_DEEPGEMM=0 -python3 src/parallax/launch.py \ +CMD="python3 src/parallax/launch.py \ --max-num-tokens-per-batch 4096 \ --max-sequence-length 2048 \ --max-batch-size 8 \ --kv-block-size 1024 \ --host 0.0.0.0 \ --port 3000 \ - --scheduler-addr $SCHEDULER_ADDR + --scheduler-addr $SCHEDULER_ADDR" + +if [ "$USE_RELAY" -eq 1 ] || { [[ "$SCHEDULER_ADDR" == /* ]] && [ "$SCHEDULER_ADDR" != "auto" ]; }; then + CMD="$CMD --relay-servers /dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf" + CMD="$CMD --initial-peers /dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb /dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb" +fi + +eval $CMD diff --git a/scripts/start.sh b/scripts/start.sh index 776d9da8..46789fe2 100644 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -8,18 +8,22 @@ fi helpFunction() { echo "" - echo "Usage: $0 [-n INIT_NODES_NUM] [-m MODEL_NAME]" + echo "Usage: $0 [-n INIT_NODES_NUM] [-m MODEL_NAME] [-r]" echo -e "\t-n (Optional) Number of initial nodes" echo -e "\t-m (Optional) Model name" + echo -e "\t-r (Optional) Use public relay servers" exit 1 # Exit script after printing help } +USE_RELAY=0 + # Parse optional arguments -while getopts "n:m:h" opt +while getopts "n:m:rh" opt do case "$opt" in n ) INIT_NODES_NUM="$OPTARG" ;; m ) MODEL_NAME="$OPTARG" ;; + r ) USE_RELAY=1 ;; h ) helpFunction ;; ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent esac @@ -32,6 +36,9 @@ fi if [ ! -z "$MODEL_NAME" ]; then echo "MODEL_NAME: $MODEL_NAME" fi +if [ "$USE_RELAY" -eq 1 ]; then + echo "USE_RELAY: enabled" +fi # Build the python command with optional arguments CMD="python3 src/backend/main.py --dht-port 5001 --port 3001" @@ -41,5 +48,8 @@ fi if [ ! -z "$INIT_NODES_NUM" ]; then CMD="$CMD --init-nodes-num $INIT_NODES_NUM" fi +if [ "$USE_RELAY" -eq 1 ]; then + CMD="$CMD --relay-servers /dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf" +fi eval $CMD diff --git a/src/backend/main.py b/src/backend/main.py index 49f4b5fe..917f6d52 100644 --- a/src/backend/main.py +++ b/src/backend/main.py @@ -6,7 +6,7 @@ import uvicorn from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.responses import FileResponse, JSONResponse, StreamingResponse from fastapi.staticfiles import StaticFiles from backend.server.request_handler import RequestHandler @@ -65,12 +65,13 @@ async def scheduler_init(raw_request: Request): @app.get("/node/join/command") async def node_join_command(): + peer_id = scheduler_manage.get_peer_id() is_local_network = scheduler_manage.get_is_local_network() return JSONResponse( content={ "type": "node_join_command", - "data": get_node_join_command("${scheduler-addr}", is_local_network), + "data": get_node_join_command(peer_id, is_local_network), }, status_code=200, ) @@ -109,6 +110,17 @@ async def openai_v1_chat_completions(raw_request: Request): return await request_handler.v1_chat_completions(request_data, request_id, received_ts) +# Disable caching for index.html +@app.get("/") +async def serve_index(): + response = FileResponse("src/frontend/dist/index.html") + # Disable cache + response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0" + response.headers["Pragma"] = "no-cache" + response.headers["Expires"] = "0" + return response + + # mount the frontend app.mount("/", StaticFiles(directory="src/frontend/dist", html=True), name="static") diff --git a/src/backend/server/scheduler_manage.py b/src/backend/server/scheduler_manage.py index b98f7700..29360e67 100644 --- a/src/backend/server/scheduler_manage.py +++ b/src/backend/server/scheduler_manage.py @@ -6,7 +6,11 @@ from backend.server.constants import NODE_STATUS_AVAILABLE, NODE_STATUS_WAITING from backend.server.rpc_connection_handler import RPCConnectionHandler -from backend.server.static_config import get_model_info, get_node_join_command +from backend.server.static_config import ( + PUBLIC_RELAY_SERVERS, + get_model_info, + get_node_join_command, +) from parallax_utils.logging_config import get_logger from scheduling.node import RequestSignal from scheduling.scheduler import Scheduler @@ -53,6 +57,10 @@ def run(self, model_name, init_nodes_num, is_local_network=False): f"SchedulerManage starting: model_name={model_name}, init_nodes_num={init_nodes_num}" ) self.is_local_network = is_local_network + if not is_local_network and not self.relay_servers: + logger.debug("Using public relay servers") + self.relay_servers = PUBLIC_RELAY_SERVERS + self._start_scheduler(model_name, init_nodes_num) self._start_lattica() @@ -71,6 +79,11 @@ def get_init_nodes_num(self): def get_is_local_network(self): return self.is_local_network + def get_peer_id(self): + if self.lattica is None: + return None + return self.lattica.peer_id() + def get_cluster_status(self): return { "type": "cluster_status", @@ -79,7 +92,7 @@ def get_cluster_status(self): "model_name": self.model_name, "init_nodes_num": self.init_nodes_num, "node_join_command": get_node_join_command( - "${scheduler-addr}", self.is_local_network + self.get_peer_id(), self.is_local_network ), "node_list": self.get_node_list(), }, diff --git a/src/backend/server/static_config.py b/src/backend/server/static_config.py index 004cf3b9..64230299 100644 --- a/src/backend/server/static_config.py +++ b/src/backend/server/static_config.py @@ -33,7 +33,12 @@ NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join""" -NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr}""" +NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """ + +PUBLIC_RELAY_SERVERS = [ + "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", + "/dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", +] def get_model_info(model_name): diff --git a/src/parallax/cli.py b/src/parallax/cli.py index 254dcb66..a59a2cb2 100644 --- a/src/parallax/cli.py +++ b/src/parallax/cli.py @@ -52,13 +52,27 @@ def run_command(args): sys.exit(1) # Build the command to run the backend main.py - cmd = [sys.executable, str(backend_main), "--dht-port", "5001", "--port", "3001"] + cmd = [ + sys.executable, + str(backend_main), + "--dht-port", + "5001", + "--port", + "3001", + ] # Add optional arguments if provided if args.model_name: cmd.extend(["--model-name", args.model_name]) if args.init_nodes_num: cmd.extend(["--init-nodes-num", str(args.init_nodes_num)]) + if args.use_relay: + cmd.extend( + [ + "--relay-servers", + "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", + ] + ) logger.info(f"Running command: {' '.join(cmd)}") @@ -128,6 +142,21 @@ def join_command(args): "--scheduler-addr", args.scheduler_addr, ] + if args.use_relay or ( + args.scheduler_addr != "auto" and not str(args.scheduler_addr).startswith("/") + ): + cmd.extend( + [ + "--relay-servers", + "/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf", + ] + ) + cmd.extend( + [ + "--initial-peers", + "/dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb /dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb", + ] + ) logger.info(f"Running command: {' '.join(cmd)}") logger.info(f"Scheduler address: {args.scheduler_addr}") @@ -186,6 +215,9 @@ def main(): ) run_parser.add_argument("-n", "--init-nodes-num", type=int, help="Number of initial nodes") run_parser.add_argument("-m", "--model-name", type=str, help="Model name") + run_parser.add_argument( + "-r", "--use-relay", action="store_true", help="Use public relay servers" + ) # Add 'join' command parser join_parser = subparsers.add_parser( @@ -194,6 +226,9 @@ def main(): join_parser.add_argument( "-s", "--scheduler-addr", default="auto", type=str, help="Scheduler address (required)" ) + join_parser.add_argument( + "-r", "--use-relay", action="store_true", help="Use public relay servers" + ) args = parser.parse_args()