Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ parallax join
# public network env
parallax join -s {scheduler-address}
# example
parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx
parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu
```

![Node join](docs/images/node-join.png)
Expand Down Expand Up @@ -170,9 +170,9 @@ parallax join -s {scheduler-address}
For example:
```sh
# first node
parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx
parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu
# second node
parallax join -s /ip4/192.168.1.2/tcp/5001/p2p/xxxxxxxxxxxx
parallax join -s 12D3KooWLX7MWuzi1Txa5LyZS4eTQ2tPaJijheH8faHggB9SxnBu
```

#### Step 3: Call chat api with Scheduler
Expand Down Expand Up @@ -236,3 +236,4 @@ curl --location 'http://localhost:3000/v1/chat/completions' --header 'Content-Ty
|Qwen3 |[Link](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f)|[Link](https://qwenlm.github.io/blog/qwen3/)|
|Qwen2.5 |[Link](https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e)|[Link](https://qwenlm.github.io/blog/qwen2.5/)|
|Llama3 |[Link](https://huggingface.co/meta-llama/collections)|[Link](https://ai.meta.com/blog/meta-llama-3/)|
|Kimi |[Link](https://huggingface.co/collections/moonshotai/kimi-k2-6871243b990f2af5ba60617d)|[Link](https://platform.moonshot.ai/blog)|
21 changes: 17 additions & 4 deletions scripts/join.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ fi
helpFunction()
{
echo ""
echo "Usage: $0 [-s SCHEDULER_ADDR]"
echo "Usage: $0 [-s SCHEDULER_ADDR] [-r]"
echo -e "\t-s SCHEDULER_ADDR (default: auto)"
echo -e "\t-r (Optional) Use public relay servers"
exit 1 # Exit script after printing help
}

SCHEDULER_ADDR="auto"
USE_RELAY=0

while getopts "s:" opt
while getopts "s:r" opt
do
case "$opt" in
s ) SCHEDULER_ADDR="$OPTARG" ;;
r ) USE_RELAY=1 ;;
? ) helpFunction ;; # Print helpFunction in case parameter is non-existent
esac
done
Expand All @@ -30,14 +33,24 @@ then
fi

echo "$SCHEDULER_ADDR"
if [ "$USE_RELAY" -eq 1 ]; then
echo "USE_RELAY: enabled"
fi

export SGL_ENABLE_JIT_DEEPGEMM=0

python3 src/parallax/launch.py \
CMD="python3 src/parallax/launch.py \
--max-num-tokens-per-batch 4096 \
--max-sequence-length 2048 \
--max-batch-size 8 \
--kv-block-size 1024 \
--host 0.0.0.0 \
--port 3000 \
--scheduler-addr $SCHEDULER_ADDR
--scheduler-addr $SCHEDULER_ADDR"

if [ "$USE_RELAY" -eq 1 ] || { [[ "$SCHEDULER_ADDR" == /* ]] && [ "$SCHEDULER_ADDR" != "auto" ]; }; then
CMD="$CMD --relay-servers /dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf"
CMD="$CMD --initial-peers /dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb /dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb"
fi

eval $CMD
14 changes: 12 additions & 2 deletions scripts/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,22 @@ fi
helpFunction()
{
echo ""
echo "Usage: $0 [-n INIT_NODES_NUM] [-m MODEL_NAME]"
echo "Usage: $0 [-n INIT_NODES_NUM] [-m MODEL_NAME] [-r]"
echo -e "\t-n (Optional) Number of initial nodes"
echo -e "\t-m (Optional) Model name"
echo -e "\t-r (Optional) Use public relay servers"
exit 1 # Exit script after printing help
}

USE_RELAY=0

# Parse optional arguments
while getopts "n:m:h" opt
while getopts "n:m:rh" opt
do
case "$opt" in
n ) INIT_NODES_NUM="$OPTARG" ;;
m ) MODEL_NAME="$OPTARG" ;;
r ) USE_RELAY=1 ;;
h ) helpFunction ;;
? ) helpFunction ;; # Print helpFunction in case parameter is non-existent
esac
Expand All @@ -32,6 +36,9 @@ fi
if [ ! -z "$MODEL_NAME" ]; then
echo "MODEL_NAME: $MODEL_NAME"
fi
if [ "$USE_RELAY" -eq 1 ]; then
echo "USE_RELAY: enabled"
fi

# Build the python command with optional arguments
CMD="python3 src/backend/main.py --dht-port 5001 --port 3001"
Expand All @@ -41,5 +48,8 @@ fi
if [ ! -z "$INIT_NODES_NUM" ]; then
CMD="$CMD --init-nodes-num $INIT_NODES_NUM"
fi
if [ "$USE_RELAY" -eq 1 ]; then
CMD="$CMD --relay-servers /dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf"
fi

eval $CMD
16 changes: 14 additions & 2 deletions src/backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import uvicorn
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles

from backend.server.request_handler import RequestHandler
Expand Down Expand Up @@ -65,12 +65,13 @@ async def scheduler_init(raw_request: Request):

@app.get("/node/join/command")
async def node_join_command():
peer_id = scheduler_manage.get_peer_id()
is_local_network = scheduler_manage.get_is_local_network()

return JSONResponse(
content={
"type": "node_join_command",
"data": get_node_join_command("${scheduler-addr}", is_local_network),
"data": get_node_join_command(peer_id, is_local_network),
},
status_code=200,
)
Expand Down Expand Up @@ -109,6 +110,17 @@ async def openai_v1_chat_completions(raw_request: Request):
return await request_handler.v1_chat_completions(request_data, request_id, received_ts)


# Disable caching for index.html
@app.get("/")
async def serve_index():
response = FileResponse("src/frontend/dist/index.html")
# Disable cache
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
return response


# mount the frontend
app.mount("/", StaticFiles(directory="src/frontend/dist", html=True), name="static")

Expand Down
17 changes: 15 additions & 2 deletions src/backend/server/scheduler_manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@

from backend.server.constants import NODE_STATUS_AVAILABLE, NODE_STATUS_WAITING
from backend.server.rpc_connection_handler import RPCConnectionHandler
from backend.server.static_config import get_model_info, get_node_join_command
from backend.server.static_config import (
PUBLIC_RELAY_SERVERS,
get_model_info,
get_node_join_command,
)
from parallax_utils.logging_config import get_logger
from scheduling.node import RequestSignal
from scheduling.scheduler import Scheduler
Expand Down Expand Up @@ -53,6 +57,10 @@ def run(self, model_name, init_nodes_num, is_local_network=False):
f"SchedulerManage starting: model_name={model_name}, init_nodes_num={init_nodes_num}"
)
self.is_local_network = is_local_network
if not is_local_network and not self.relay_servers:
logger.debug("Using public relay servers")
self.relay_servers = PUBLIC_RELAY_SERVERS

self._start_scheduler(model_name, init_nodes_num)
self._start_lattica()

Expand All @@ -71,6 +79,11 @@ def get_init_nodes_num(self):
def get_is_local_network(self):
return self.is_local_network

def get_peer_id(self):
if self.lattica is None:
return None
return self.lattica.peer_id()

def get_cluster_status(self):
return {
"type": "cluster_status",
Expand All @@ -79,7 +92,7 @@ def get_cluster_status(self):
"model_name": self.model_name,
"init_nodes_num": self.init_nodes_num,
"node_join_command": get_node_join_command(
"${scheduler-addr}", self.is_local_network
self.get_peer_id(), self.is_local_network
),
"node_list": self.get_node_list(),
},
Expand Down
7 changes: 6 additions & 1 deletion src/backend/server/static_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,12 @@

NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join"""

NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr}"""
NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """

PUBLIC_RELAY_SERVERS = [
"/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf",
"/dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf",
]


def get_model_info(model_name):
Expand Down
37 changes: 36 additions & 1 deletion src/parallax/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,27 @@ def run_command(args):
sys.exit(1)

# Build the command to run the backend main.py
cmd = [sys.executable, str(backend_main), "--dht-port", "5001", "--port", "3001"]
cmd = [
sys.executable,
str(backend_main),
"--dht-port",
"5001",
"--port",
"3001",
]

# Add optional arguments if provided
if args.model_name:
cmd.extend(["--model-name", args.model_name])
if args.init_nodes_num:
cmd.extend(["--init-nodes-num", str(args.init_nodes_num)])
if args.use_relay:
cmd.extend(
[
"--relay-servers",
"/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf",
]
)

logger.info(f"Running command: {' '.join(cmd)}")

Expand Down Expand Up @@ -128,6 +142,21 @@ def join_command(args):
"--scheduler-addr",
args.scheduler_addr,
]
if args.use_relay or (
args.scheduler_addr != "auto" and not str(args.scheduler_addr).startswith("/")
):
cmd.extend(
[
"--relay-servers",
"/dns4/relay-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf /dns4/relay-lattica.gradient.network/tcp/18080/p2p/12D3KooWDaqDAsFupYvffBDxjHHuWmEAJE4sMDCXiuZiB8aG8rjf",
]
)
cmd.extend(
[
"--initial-peers",
"/dns4/bootstrap-lattica.gradient.network/udp/18080/quic-v1/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb /dns4/bootstrap-lattica.gradient.network/tcp/18080/p2p/12D3KooWJHXvu8TWkFn6hmSwaxdCLy4ZzFwr4u5mvF9Fe2rMmFXb",
]
)

logger.info(f"Running command: {' '.join(cmd)}")
logger.info(f"Scheduler address: {args.scheduler_addr}")
Expand Down Expand Up @@ -186,6 +215,9 @@ def main():
)
run_parser.add_argument("-n", "--init-nodes-num", type=int, help="Number of initial nodes")
run_parser.add_argument("-m", "--model-name", type=str, help="Model name")
run_parser.add_argument(
"-r", "--use-relay", action="store_true", help="Use public relay servers"
)

# Add 'join' command parser
join_parser = subparsers.add_parser(
Expand All @@ -194,6 +226,9 @@ def main():
join_parser.add_argument(
"-s", "--scheduler-addr", default="auto", type=str, help="Scheduler address (required)"
)
join_parser.add_argument(
"-r", "--use-relay", action="store_true", help="Use public relay servers"
)

args = parser.parse_args()

Expand Down