-
Notifications
You must be signed in to change notification settings - Fork 282
dockerfile for cuda12.8.0 #1106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| ARG CUDA_VERSION=12.6.1 | ||
| FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 | ||
| ARG PYTHON_VERSION=3.10 | ||
| ARG MAMBA_VERSION=24.7.1-0 | ||
| ARG TARGETPLATFORM | ||
| ENV PATH=/opt/conda/bin:$PATH \ | ||
| CONDA_PREFIX=/opt/conda | ||
|
|
||
| RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
| ca-certificates \ | ||
| libssl-dev \ | ||
| curl \ | ||
| g++ \ | ||
| make \ | ||
| git && \ | ||
| rm -rf /var/lib/apt/lists/* | ||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") MAMBA_ARCH=aarch64 ;; \ | ||
| *) MAMBA_ARCH=x86_64 ;; \ | ||
| esac && \ | ||
| curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ | ||
| bash ~/mambaforge.sh -b -p /opt/conda && \ | ||
| rm ~/mambaforge.sh | ||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") exit 1 ;; \ | ||
| *) /opt/conda/bin/conda update -y conda && \ | ||
| /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ | ||
| esac && \ | ||
| /opt/conda/bin/conda clean -ya | ||
|
|
||
|
|
||
| WORKDIR /root | ||
|
|
||
| COPY ./requirements.txt /lightllm/requirements.txt | ||
| RUN pip install -U pip | ||
| RUN pip install -r /lightllm/requirements.txt --no-cache-dir | ||
|
Comment on lines
+37
to
+38
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly | ||
|
|
||
| # TODO: offline compile | ||
| # RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . | ||
|
|
||
| RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
|
|
||
| COPY . /lightllm | ||
| RUN pip install -e /lightllm --no-cache-dir | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| ARG CUDA_VERSION=12.6.1 | ||
| FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 | ||
| ARG PYTHON_VERSION=3.10 | ||
| ARG MAMBA_VERSION=24.7.1-0 | ||
| ARG TARGETPLATFORM | ||
| ENV PATH=/opt/conda/bin:$PATH \ | ||
| CONDA_PREFIX=/opt/conda | ||
|
|
||
| RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
| ca-certificates \ | ||
| libssl-dev \ | ||
| curl \ | ||
| g++ \ | ||
| make \ | ||
| git && \ | ||
| rm -rf /var/lib/apt/lists/* | ||
|
Comment on lines
+9
to
+16
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") MAMBA_ARCH=aarch64 ;; \ | ||
| *) MAMBA_ARCH=x86_64 ;; \ | ||
| esac && \ | ||
| curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ | ||
| bash ~/mambaforge.sh -b -p /opt/conda && \ | ||
| rm ~/mambaforge.sh | ||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") exit 1 ;; \ | ||
| *) /opt/conda/bin/conda update -y conda && \ | ||
| /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ | ||
| esac && \ | ||
| /opt/conda/bin/conda clean -ya | ||
|
|
||
|
|
||
| WORKDIR /root | ||
|
|
||
| COPY ./requirements.txt /lightllm/requirements.txt | ||
| RUN pip install -U pip | ||
| RUN pip install -r /lightllm/requirements.txt --no-cache-dir | ||
|
|
||
| RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly | ||
|
|
||
| # TODO: offline compile | ||
| # RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . | ||
|
|
||
| RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms | ||
| RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev | ||
|
Comment on lines
+45
to
+46
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These |
||
|
|
||
| ENV CUDA_HOME=/usr/local/cuda \ | ||
| GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ | ||
|
|
||
| RUN mkdir -p /tmp/gdrcopy && cd /tmp \ | ||
| && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ | ||
| && cd gdrcopy/packages \ | ||
| && CUDA=/usr/local/cuda ./build-deb-packages.sh \ | ||
| && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ | ||
| && cd / && rm -rf /tmp/gdrcopy | ||
|
|
||
| # Fix DeepEP IBGDA symlink | ||
| RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so | ||
|
|
||
| RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ | ||
| && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ | ||
| && cd nvshmem \ | ||
| && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ | ||
| && NVSHMEM_SHMEM_SUPPORT=0 \ | ||
| NVSHMEM_UCX_SUPPORT=0 \ | ||
| NVSHMEM_USE_NCCL=0 \ | ||
| NVSHMEM_MPI_SUPPORT=0 \ | ||
| NVSHMEM_IBGDA_SUPPORT=1 \ | ||
| NVSHMEM_PMIX_SUPPORT=0 \ | ||
| NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ | ||
| NVSHMEM_USE_GDRCOPY=1 \ | ||
| cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ | ||
| && cmake --build build --target install -j64 | ||
|
|
||
| ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 | ||
| RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. | ||
|
|
||
| WORKDIR /root/DeepEP | ||
| ENV NVSHMEM_DIR=/root/nvshmem/install | ||
| RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install | ||
|
|
||
| COPY . /lightllm | ||
| RUN pip install -e /lightllm --no-cache-dir | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| ARG CUDA_VERSION=12.6.1 | ||
| FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 | ||
| ARG PYTHON_VERSION=3.10 | ||
| ARG MAMBA_VERSION=24.7.1-0 | ||
| ARG TARGETPLATFORM | ||
| ENV PATH=/opt/conda/bin:$PATH \ | ||
| CONDA_PREFIX=/opt/conda | ||
|
|
||
| RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
| ca-certificates \ | ||
| libssl-dev \ | ||
| curl \ | ||
| g++ \ | ||
| make \ | ||
| git && \ | ||
| rm -rf /var/lib/apt/lists/* | ||
|
Comment on lines
+9
to
+16
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") MAMBA_ARCH=aarch64 ;; \ | ||
| *) MAMBA_ARCH=x86_64 ;; \ | ||
| esac && \ | ||
| curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ | ||
| bash ~/mambaforge.sh -b -p /opt/conda && \ | ||
| rm ~/mambaforge.sh | ||
|
Comment on lines
+22
to
+24
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") exit 1 ;; \ | ||
| *) /opt/conda/bin/conda update -y conda && \ | ||
| /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ | ||
| esac && \ | ||
| /opt/conda/bin/conda clean -ya | ||
|
|
||
|
|
||
| WORKDIR /root | ||
|
|
||
| COPY ./requirements.txt /lightllm/requirements.txt | ||
| RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 | ||
|
|
||
| RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly | ||
| RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . | ||
|
|
||
| RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This Dockerfile contains multiple |
||
| RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev | ||
|
|
||
| ENV CUDA_HOME=/usr/local/cuda \ | ||
| GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ | ||
|
|
||
| RUN mkdir -p /tmp/gdrcopy && cd /tmp \ | ||
| && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ | ||
| && cd gdrcopy/packages \ | ||
| && CUDA=/usr/local/cuda ./build-deb-packages.sh \ | ||
| && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ | ||
| && cd / && rm -rf /tmp/gdrcopy | ||
|
|
||
| RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ | ||
| DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ | ||
| rm -rf /usr/lib/ucx && \ | ||
| rm -rf /opt/hpcx/ucx && \ | ||
| cd /usr/local/src && \ | ||
| git clone https://github.com/openucx/ucx.git && \ | ||
| cd ucx && \ | ||
| git checkout v1.19.x && \ | ||
| ./autogen.sh && ./configure \ | ||
| --enable-shared \ | ||
| --disable-static \ | ||
| --disable-doxygen-doc \ | ||
| --enable-optimizations \ | ||
| --enable-cma \ | ||
| --enable-devel-headers \ | ||
| --with-cuda=/usr/local/cuda \ | ||
| --with-verbs=yes \ | ||
| --with-dm \ | ||
| --with-gdrcopy=/usr/local \ | ||
| --with-efa \ | ||
| --enable-mt && \ | ||
| make -j && \ | ||
| make -j install-strip && \ | ||
| ldconfig; | ||
|
|
||
| RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ | ||
| cd /usr/local/src; \ | ||
| pip install --upgrade meson pybind11 patchelf; \ | ||
| git clone https://github.com/ai-dynamo/nixl.git -b main && \ | ||
| cd nixl && \ | ||
| rm -rf build && \ | ||
| mkdir build && \ | ||
| meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ | ||
| cd build && \ | ||
| ninja && \ | ||
| ninja install && \ | ||
| cd .. && pip install . --no-deps; | ||
|
|
||
| COPY . /lightllm | ||
| RUN pip install -e /lightllm --no-cache-dir | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| ARG CUDA_VERSION=12.6.1 | ||
| FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 | ||
|
|
||
| ARG PYTHON_VERSION=3.10 | ||
| ARG MAMBA_VERSION=24.7.1-0 | ||
| ARG TARGETPLATFORM | ||
|
|
||
| ENV PATH=/opt/conda/bin:$PATH \ | ||
| CONDA_PREFIX=/opt/conda | ||
|
|
||
| RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
| ca-certificates \ | ||
| libssl-dev \ | ||
| curl \ | ||
| g++ \ | ||
| make \ | ||
| git && \ | ||
| rm -rf /var/lib/apt/lists/* | ||
|
Comment on lines
+11
to
+18
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") MAMBA_ARCH=aarch64 ;; \ | ||
| *) MAMBA_ARCH=x86_64 ;; \ | ||
| esac && \ | ||
| curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ | ||
| bash ~/mambaforge.sh -b -p /opt/conda && \ | ||
| rm ~/mambaforge.sh | ||
|
Comment on lines
+24
to
+26
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
|
||
| RUN case ${TARGETPLATFORM} in \ | ||
| "linux/arm64") exit 1 ;; \ | ||
| *) /opt/conda/bin/conda update -y conda && \ | ||
| /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ | ||
| esac && \ | ||
| /opt/conda/bin/conda clean -ya | ||
|
|
||
|
|
||
| WORKDIR /root | ||
|
|
||
| COPY ./requirements.txt /lightllm/requirements.txt | ||
| RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 | ||
|
|
||
| RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly | ||
| RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . | ||
|
|
||
| RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms | ||
| RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev | ||
|
|
||
| ENV CUDA_HOME=/usr/local/cuda \ | ||
| GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ | ||
|
|
||
| RUN mkdir -p /tmp/gdrcopy && cd /tmp \ | ||
| && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ | ||
| && cd gdrcopy/packages \ | ||
| && CUDA=/usr/local/cuda ./build-deb-packages.sh \ | ||
| && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ | ||
| && cd / && rm -rf /tmp/gdrcopy | ||
|
|
||
| # Fix DeepEP IBGDA symlink | ||
| RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so | ||
|
|
||
| RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ | ||
| && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ | ||
| && cd nvshmem \ | ||
| && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ | ||
| && NVSHMEM_SHMEM_SUPPORT=0 \ | ||
| NVSHMEM_UCX_SUPPORT=0 \ | ||
| NVSHMEM_USE_NCCL=0 \ | ||
| NVSHMEM_MPI_SUPPORT=0 \ | ||
| NVSHMEM_IBGDA_SUPPORT=1 \ | ||
| NVSHMEM_PMIX_SUPPORT=0 \ | ||
| NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ | ||
| NVSHMEM_USE_GDRCOPY=1 \ | ||
| cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ | ||
| && cmake --build build --target install -j64 | ||
|
|
||
| ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 | ||
| RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. | ||
|
|
||
| WORKDIR /root/DeepEP | ||
| ENV NVSHMEM_DIR=/root/nvshmem/install | ||
| RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install | ||
|
|
||
| RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ | ||
| DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ | ||
| rm -rf /usr/lib/ucx && \ | ||
| rm -rf /opt/hpcx/ucx && \ | ||
| cd /usr/local/src && \ | ||
| git clone https://github.com/openucx/ucx.git && \ | ||
| cd ucx && \ | ||
| git checkout v1.19.x && \ | ||
| ./autogen.sh && ./configure \ | ||
| --enable-shared \ | ||
| --disable-static \ | ||
| --disable-doxygen-doc \ | ||
| --enable-optimizations \ | ||
| --enable-cma \ | ||
| --enable-devel-headers \ | ||
| --with-cuda=/usr/local/cuda \ | ||
| --with-verbs=yes \ | ||
| --with-dm \ | ||
| --with-gdrcopy=/usr/local \ | ||
| --with-efa \ | ||
| --enable-mt && \ | ||
| make -j && \ | ||
| make -j install-strip && \ | ||
| ldconfig; | ||
|
|
||
| RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ | ||
| cd /usr/local/src; \ | ||
| pip install --upgrade meson pybind11 patchelf; \ | ||
| git clone https://github.com/ai-dynamo/nixl.git -b main && \ | ||
| cd nixl && \ | ||
| rm -rf build && \ | ||
| mkdir build && \ | ||
| meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ | ||
| cd build && \ | ||
| ninja && \ | ||
| ninja install && \ | ||
| cd .. && pip install . --no-deps; | ||
|
|
||
| COPY . /lightllm | ||
| RUN pip install -e /lightllm --no-cache-dir | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
chmod 777 -R /tmpis a security risk as it makes the temporary directory world-writable. This could allow any process or user within the container to tamper with files created by other processes. It's better to use more restrictive permissions or avoid changing them if not strictly necessary.