diff --git a/docker/cuda_version_12.6.1/Dockerfile b/docker/cuda_version_12.6.1/Dockerfile new file mode 100644 index 000000000..6d67fcf4d --- /dev/null +++ b/docker/cuda_version_12.6.1/Dockerfile @@ -0,0 +1,48 @@ +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN pip install -U pip +RUN pip install -r /lightllm/requirements.txt --no-cache-dir + +RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly + +# TODO: offline compile +# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir diff --git a/docker/cuda_version_12.6.1/Dockerfile.deepep b/docker/cuda_version_12.6.1/Dockerfile.deepep new file mode 100644 index 000000000..e765978b9 --- /dev/null +++ b/docker/cuda_version_12.6.1/Dockerfile.deepep @@ -0,0 +1,84 @@ +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN pip install -U pip +RUN pip install -r /lightllm/requirements.txt --no-cache-dir + +RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly + +# TODO: offline compile +# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev + +ENV CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ + +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + + # Fix DeepEP IBGDA symlink +RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so + +RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ + && cd nvshmem \ + && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && NVSHMEM_SHMEM_SUPPORT=0 \ + NVSHMEM_UCX_SUPPORT=0 \ + NVSHMEM_USE_NCCL=0 \ + NVSHMEM_MPI_SUPPORT=0 \ + NVSHMEM_IBGDA_SUPPORT=1 \ + NVSHMEM_PMIX_SUPPORT=0 \ + NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ + NVSHMEM_USE_GDRCOPY=1 \ + cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ + && cmake --build build --target install -j64 + +ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 +RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. + +WORKDIR /root/DeepEP +ENV NVSHMEM_DIR=/root/nvshmem/install +RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir \ No newline at end of file diff --git a/docker/cuda_version_12.6.1/Dockerfile.nixl b/docker/cuda_version_12.6.1/Dockerfile.nixl new file mode 100644 index 000000000..b8047bbd0 --- /dev/null +++ b/docker/cuda_version_12.6.1/Dockerfile.nixl @@ -0,0 +1,94 @@ +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 + +RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly +RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev + +ENV CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ + +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + +RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ + DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ + rm -rf /usr/lib/ucx && \ + rm -rf /opt/hpcx/ucx && \ + cd /usr/local/src && \ + git clone https://github.com/openucx/ucx.git && \ + cd ucx && \ + git checkout v1.19.x && \ + ./autogen.sh && ./configure \ + --enable-shared \ + --disable-static \ + --disable-doxygen-doc \ + --enable-optimizations \ + --enable-cma \ + --enable-devel-headers \ + --with-cuda=/usr/local/cuda \ + --with-verbs=yes \ + --with-dm \ + --with-gdrcopy=/usr/local \ + --with-efa \ + --enable-mt && \ + make -j && \ + make -j install-strip && \ + ldconfig; + +RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ + cd /usr/local/src; \ + pip install --upgrade meson pybind11 patchelf; \ + git clone https://github.com/ai-dynamo/nixl.git -b main && \ + cd nixl && \ + rm -rf build && \ + mkdir build && \ + meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ + cd build && \ + ninja && \ + ninja install && \ + cd .. && pip install . --no-deps; + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir diff --git a/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep b/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep new file mode 100644 index 000000000..8ca06e109 --- /dev/null +++ b/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep @@ -0,0 +1,121 @@ +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 + +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM + +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 + +RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly +RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev + +ENV CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ + +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + + # Fix DeepEP IBGDA symlink +RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so + +RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ + && cd nvshmem \ + && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && NVSHMEM_SHMEM_SUPPORT=0 \ + NVSHMEM_UCX_SUPPORT=0 \ + NVSHMEM_USE_NCCL=0 \ + NVSHMEM_MPI_SUPPORT=0 \ + NVSHMEM_IBGDA_SUPPORT=1 \ + NVSHMEM_PMIX_SUPPORT=0 \ + NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ + NVSHMEM_USE_GDRCOPY=1 \ + cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ + && cmake --build build --target install -j64 + +ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 +RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. + +WORKDIR /root/DeepEP +ENV NVSHMEM_DIR=/root/nvshmem/install +RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install + +RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ + DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ + rm -rf /usr/lib/ucx && \ + rm -rf /opt/hpcx/ucx && \ + cd /usr/local/src && \ + git clone https://github.com/openucx/ucx.git && \ + cd ucx && \ + git checkout v1.19.x && \ + ./autogen.sh && ./configure \ + --enable-shared \ + --disable-static \ + --disable-doxygen-doc \ + --enable-optimizations \ + --enable-cma \ + --enable-devel-headers \ + --with-cuda=/usr/local/cuda \ + --with-verbs=yes \ + --with-dm \ + --with-gdrcopy=/usr/local \ + --with-efa \ + --enable-mt && \ + make -j && \ + make -j install-strip && \ + ldconfig; + +RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ + cd /usr/local/src; \ + pip install --upgrade meson pybind11 patchelf; \ + git clone https://github.com/ai-dynamo/nixl.git -b main && \ + cd nixl && \ + rm -rf build && \ + mkdir build && \ + meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ + cd build && \ + ninja && \ + ninja install && \ + cd .. && pip install . --no-deps; + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir diff --git a/docker/cuda_version_12.8.0/Dockerfile b/docker/cuda_version_12.8.0/Dockerfile new file mode 100644 index 000000000..6f9701805 --- /dev/null +++ b/docker/cuda_version_12.8.0/Dockerfile @@ -0,0 +1,48 @@ +ARG CUDA_VERSION=12.8.0 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN pip install -U pip +RUN pip install -r /lightllm/requirements.txt --no-cache-dir + +RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly + +# TODO: offline compile +# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir diff --git a/docker/cuda_version_12.8.0/Dockerfile.deepep b/docker/cuda_version_12.8.0/Dockerfile.deepep new file mode 100644 index 000000000..cd9774f64 --- /dev/null +++ b/docker/cuda_version_12.8.0/Dockerfile.deepep @@ -0,0 +1,84 @@ +ARG CUDA_VERSION=12.8.0 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN pip install -U pip +RUN pip install -r /lightllm/requirements.txt --no-cache-dir + +RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly + +# TODO: offline compile +# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update --allow-insecure-repositories && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev + +ENV CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ + +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + + # Fix DeepEP IBGDA symlink +RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so + +RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ + && cd nvshmem \ + && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && NVSHMEM_SHMEM_SUPPORT=0 \ + NVSHMEM_UCX_SUPPORT=0 \ + NVSHMEM_USE_NCCL=0 \ + NVSHMEM_MPI_SUPPORT=0 \ + NVSHMEM_IBGDA_SUPPORT=1 \ + NVSHMEM_PMIX_SUPPORT=0 \ + NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ + NVSHMEM_USE_GDRCOPY=1 \ + cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ + && cmake --build build --target install -j64 + +ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 +RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. + +WORKDIR /root/DeepEP +ENV NVSHMEM_DIR=/root/nvshmem/install +RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir \ No newline at end of file diff --git a/docker/cuda_version_12.8.0/Dockerfile.nixl b/docker/cuda_version_12.8.0/Dockerfile.nixl new file mode 100644 index 000000000..6db287fdb --- /dev/null +++ b/docker/cuda_version_12.8.0/Dockerfile.nixl @@ -0,0 +1,94 @@ +ARG CUDA_VERSION=12.8.0 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 + +RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly +RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev + +ENV CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ + +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + +RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ + DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ + rm -rf /usr/lib/ucx && \ + rm -rf /opt/hpcx/ucx && \ + cd /usr/local/src && \ + git clone https://github.com/openucx/ucx.git && \ + cd ucx && \ + git checkout v1.19.x && \ + ./autogen.sh && ./configure \ + --enable-shared \ + --disable-static \ + --disable-doxygen-doc \ + --enable-optimizations \ + --enable-cma \ + --enable-devel-headers \ + --with-cuda=/usr/local/cuda \ + --with-verbs=yes \ + --with-dm \ + --with-gdrcopy=/usr/local \ + --with-efa \ + --enable-mt && \ + make -j && \ + make -j install-strip && \ + ldconfig; + +RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ + cd /usr/local/src; \ + pip install --upgrade meson pybind11 patchelf; \ + git clone https://github.com/ai-dynamo/nixl.git -b main && \ + cd nixl && \ + rm -rf build && \ + mkdir build && \ + meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ + cd build && \ + ninja && \ + ninja install && \ + cd .. && pip install . --no-deps; + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir diff --git a/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep b/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep new file mode 100644 index 000000000..2dd7a1154 --- /dev/null +++ b/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep @@ -0,0 +1,121 @@ +ARG CUDA_VERSION=12.8.0 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 + +ARG PYTHON_VERSION=3.10 +ARG MAMBA_VERSION=24.7.1-0 +ARG TARGETPLATFORM + +ENV PATH=/opt/conda/bin:$PATH \ + CONDA_PREFIX=/opt/conda + +RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + g++ \ + make \ + git && \ + rm -rf /var/lib/apt/lists/* + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ + *) MAMBA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ + bash ~/mambaforge.sh -b -p /opt/conda && \ + rm ~/mambaforge.sh + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") exit 1 ;; \ + *) /opt/conda/bin/conda update -y conda && \ + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + esac && \ + /opt/conda/bin/conda clean -ya + + +WORKDIR /root + +COPY ./requirements.txt /lightllm/requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 + +RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly +RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . + +RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev + +ENV CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ + +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy + + # Fix DeepEP IBGDA symlink +RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so + +RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ + && cd nvshmem \ + && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ + && NVSHMEM_SHMEM_SUPPORT=0 \ + NVSHMEM_UCX_SUPPORT=0 \ + NVSHMEM_USE_NCCL=0 \ + NVSHMEM_MPI_SUPPORT=0 \ + NVSHMEM_IBGDA_SUPPORT=1 \ + NVSHMEM_PMIX_SUPPORT=0 \ + NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ + NVSHMEM_USE_GDRCOPY=1 \ + cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ + && cmake --build build --target install -j64 + +ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 +RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. + +WORKDIR /root/DeepEP +ENV NVSHMEM_DIR=/root/nvshmem/install +RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install + +RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ + DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ + rm -rf /usr/lib/ucx && \ + rm -rf /opt/hpcx/ucx && \ + cd /usr/local/src && \ + git clone https://github.com/openucx/ucx.git && \ + cd ucx && \ + git checkout v1.19.x && \ + ./autogen.sh && ./configure \ + --enable-shared \ + --disable-static \ + --disable-doxygen-doc \ + --enable-optimizations \ + --enable-cma \ + --enable-devel-headers \ + --with-cuda=/usr/local/cuda \ + --with-verbs=yes \ + --with-dm \ + --with-gdrcopy=/usr/local \ + --with-efa \ + --enable-mt && \ + make -j && \ + make -j install-strip && \ + ldconfig; + +RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ + cd /usr/local/src; \ + pip install --upgrade meson pybind11 patchelf; \ + git clone https://github.com/ai-dynamo/nixl.git -b main && \ + cd nixl && \ + rm -rf build && \ + mkdir build && \ + meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ + cd build && \ + ninja && \ + ninja install && \ + cd .. && pip install . --no-deps; + +COPY . /lightllm +RUN pip install -e /lightllm --no-cache-dir diff --git a/docs/CN/source/getting_started/installation.rst b/docs/CN/source/getting_started/installation.rst index 13c83c2d8..fb998b756 100755 --- a/docs/CN/source/getting_started/installation.rst +++ b/docs/CN/source/getting_started/installation.rst @@ -35,12 +35,14 @@ Lightllm 是一个纯python开发的推理框架,其中的算子使用triton $ --shm-size 2g -v your_local_path:/data/ \ $ ghcr.io/modeltc/lightllm:main /bin/bash -你也可以使用源码手动构建镜像并运行: +你也可以使用源码手动构建镜像并运行,建议手动构建镜像,因为更新比较频繁: .. code-block:: console - - $ # 手动构建镜像 - $ docker build -t . + + $ # 进入代码仓库的根目录 + $ cd /lightllm + $ # 手动构建镜像, docker 目录下有不同功能场景的镜像构建文件,按需构建。 + $ docker build -t -f ./docker/Dockerfile . $ $ # 运行 $ docker run -it --gpus all -p 8080:8080 \ diff --git a/docs/EN/source/getting_started/installation.rst b/docs/EN/source/getting_started/installation.rst index c85a9c953..75fa71476 100755 --- a/docs/EN/source/getting_started/installation.rst +++ b/docs/EN/source/getting_started/installation.rst @@ -42,9 +42,11 @@ The easiest way to install Lightllm is using the official image. You can directl You can also manually build the image from source and run it: .. code-block:: console - + + $ # move into lightllm root dir + $ cd /lightllm $ # Manually build the image - $ docker build -t . + $ docker build -t -f ./docker/Dockerfile . $ $ # Run, $ docker run -it --gpus all -p 8080:8080 \