diff --git a/Dockerfile b/Dockerfile index 7432c3c40..8fcdf3b7d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.4.0-runtime-ubuntu20.04 as base +FROM nvcr.io/nvidia/tritonserver:24.04-py3-min as base ARG PYTORCH_VERSION=2.5.1 ARG PYTHON_VERSION=3.9 ARG CUDA_VERSION=12.4 @@ -38,5 +38,7 @@ WORKDIR /root COPY ./requirements.txt /lightllm/requirements.txt RUN pip install -r /lightllm/requirements.txt --no-cache-dir --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 +RUN pip install --no-cache-dir nvidia-nccl-cu12==2.25.1 # for allreduce hang issues in multinode H100 + COPY . /lightllm RUN pip install -e /lightllm --no-cache-dir diff --git a/docs/CN/source/getting_started/quickstart.rst b/docs/CN/source/getting_started/quickstart.rst index e7d303499..3e3fb7091 100755 --- a/docs/CN/source/getting_started/quickstart.rst +++ b/docs/CN/source/getting_started/quickstart.rst @@ -68,6 +68,7 @@ 双机H100部署 DeepSeek-R1 模型,启动命令如下: .. code-block:: console + $ # Node 0 $ LOADWORKER=8 python -m lightllm.server.api_server --model_dir ~/models/DeepSeek-R1 --tp 16 --graph_max_batch_size 100 --nccl_host master_addr --nnodes 2 --node_rank 0 $ # Node 1 diff --git a/docs/EN/source/getting_started/quickstart.rst b/docs/EN/source/getting_started/quickstart.rst index f9563a8ab..d5e7cd3bb 100755 --- a/docs/EN/source/getting_started/quickstart.rst +++ b/docs/EN/source/getting_started/quickstart.rst @@ -65,6 +65,7 @@ For the DeepSeek-R1 model on single H200, it can be launched with the following For the DeepSeek-R1 model on two H100, it can be launched with the following command: .. code-block:: console + $ # Node 0 $ LOADWORKER=8 python -m lightllm.server.api_server --model_dir ~/models/DeepSeek-R1 --tp 16 --graph_max_batch_size 100 --nccl_host master_addr --nnodes 2 --node_rank 0 $ # Node 1 diff --git a/lightllm/server/api_http.py b/lightllm/server/api_http.py index 35ac87579..22ae26a0c 100755 --- a/lightllm/server/api_http.py +++ b/lightllm/server/api_http.py @@ -326,7 +326,7 @@ async def tokens(request: Request): try: request_dict = await request.json() prompt = request_dict.pop("text") - parameters = request_dict.pop("parameters") + parameters = request_dict.pop("parameters", {}) return JSONResponse({"ntokens": g_objs.httpserver_manager.tokens(prompt, parameters)}, status_code=200) except Exception as e: return create_error_response(HTTPStatus.EXPECTATION_FAILED, f"error: {str(e)}") diff --git a/setup.py b/setup.py index d89a9570e..1d06b3976 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ package_data = {"lightllm": ["common/all_kernel_configs/*/*.json"]} setup( name="lightllm", - version="1.0.0", + version="1.0.1", packages=find_packages(exclude=("build", "include", "test", "dist", "docs", "benchmarks", "lightllm.egg-info")), author="model toolchain", author_email="",