diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 3902163ef7..d6abd0948a 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -20,7 +20,6 @@ env:
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -65,7 +64,6 @@ jobs:
         if: github.event_name != 'pull_request'
         uses: sigstore/cosign-installer@v3.5.0
 
-
       # Workaround: https://github.com/docker/build-push-action/issues/461
       - name: Setup Docker buildx
         uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf
@@ -88,9 +86,8 @@ jobs:
         with:
           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
 
-      # Build and push Docker image with Buildx (don't push on PR)
-      # https://github.com/docker/build-push-action
-      - name: Build and push Docker image
+      # Build and push default image (cuda12.8.0)
+      - name: Build and push Docker image (default cuda12.8.0)
         id: build-and-push
         uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a
         with:
@@ -99,19 +96,11 @@ jobs:
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      # Build and push specific Docker image for deepep
-      # https://github.com/docker/build-push-action
-      - name: Build and push deepep Docker image
-        id: build-and-push-deepep
-        uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a
-        with:
-          context: .
-          file: ./docker/Dockerfile.deepep
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ghcr.io/modeltc/lightllm:main-deepep
+          build-args: |
+            CUDA_VERSION=12.8.0
+            ENABLE_DEEPEP=1
+            ENABLE_NIXL=1
+            ENABLE_CACHE=1
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
@@ -128,4 +117,4 @@ jobs:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         # This step uses the identity token to provision an ephemeral certificate
         # against the sigstore community Fulcio instance.
-        run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
+        run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
\ No newline at end of file
diff --git a/build_and_upload_docker.sh b/build_and_upload_docker.sh
index fc7fd871f7..9533b10f70 100755
--- a/build_and_upload_docker.sh
+++ b/build_and_upload_docker.sh
@@ -18,8 +18,4 @@ IMAGE_TAG=$2
 ACCOUNT=$1
 aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com
 DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG .
-docker push $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG
-
-#deepep
-DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.deepep -t $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG-deepep .
-docker push $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG-deepep
\ No newline at end of file
+docker push $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 6d67fcf4df..8f73a603cc 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,18 +1,26 @@
-ARG CUDA_VERSION=12.6.1
+ARG CUDA_VERSION=12.8.0
 FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
+
 ARG PYTHON_VERSION=3.10
 ARG MAMBA_VERSION=24.7.1-0
+ARG VLLM_VERSION=0.11.0
 ARG TARGETPLATFORM
+ARG ENABLE_DEEPEP=1
+ARG ENABLE_NIXL=1
+ARG ENABLE_CACHE=1
+
 ENV PATH=/opt/conda/bin:$PATH \
     CONDA_PREFIX=/opt/conda
 
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
+RUN chmod 777 -R /tmp && \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+      ca-certificates \
+      libssl-dev \
+      curl \
+      g++ \
+      make \
+      git && \
     rm -rf /var/lib/apt/lists/*
 
 RUN case ${TARGETPLATFORM} in \
@@ -25,24 +33,103 @@ RUN case ${TARGETPLATFORM} in \
 
 RUN case ${TARGETPLATFORM} in \
     "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
+    *)              /opt/conda/bin/conda update -y conda && \
+                    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
     esac && \
     /opt/conda/bin/conda clean -ya
 
-
 WORKDIR /root
 
 COPY ./requirements.txt /lightllm/requirements.txt
 RUN pip install -U pip
 RUN pip install -r /lightllm/requirements.txt --no-cache-dir
+RUN pip install --no-cache-dir vllm==${VLLM_VERSION}
+RUN pip install https://github.com/ModelTC/LightKernel/releases/download/v1.0.1/lightllm_kernel-0.1.0-cp310-cp310-linux_x86_64.whl
+
+RUN apt-get update && apt-get install -y libnuma-dev && rm -rf /var/lib/apt/lists/*
+
+ENV CUDA_HOME=/usr/local/cuda \
+    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
+
+RUN if [ "${ENABLE_CACHE}" = "1" ]; then \
+      apt-get update && apt-get install -y libboost-dev && rm -rf /var/lib/apt/lists/*; \
+      LIGHTMEM_REF=5900baf92d85ef4dbda6124093506b0af906011a; \
+      pip install --no-deps -v "git+https://github.com/ModelTC/LightMem.git@${LIGHTMEM_REF}#egg=light_mem"; \
+    fi
 
-RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly 
+RUN if [ "${ENABLE_NIXL}" = "1" ] || [ "${ENABLE_DEEPEP}" = "1" ]; then \
+      apt-get update && apt-get install -y wget devscripts debhelper dh-make build-essential dkms && \
+      apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev && \
+      rm -rf /var/lib/apt/lists/*; \
+      mkdir -p /tmp/gdrcopy && cd /tmp \
+      && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
+      && cd gdrcopy/packages \
+      && CUDA=/usr/local/cuda ./build-deb-packages.sh \
+      && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
+      && cd / && rm -rf /tmp/gdrcopy; \
+    fi
 
-# TODO: offline compile
-# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
+RUN if [ "${ENABLE_DEEPEP}" = "1" ]; then \
+      set -e; \
+      ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so; \
+      NVSHMEM_VERSION=3.3.9; \
+      CUDA_ARCHS=90; \
+      wget https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
+      && tar -xf nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz && mv nvshmem_src nvshmem \
+      && cd nvshmem \
+      && rm -f /root/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
+      && NVSHMEM_SHMEM_SUPPORT=0 \
+         NVSHMEM_UCX_SUPPORT=0 \
+         NVSHMEM_USE_NCCL=0 \
+         NVSHMEM_MPI_SUPPORT=0 \
+         NVSHMEM_IBGDA_SUPPORT=1 \
+         NVSHMEM_PMIX_SUPPORT=0 \
+         NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
+         NVSHMEM_USE_GDRCOPY=1 \
+         cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS} \
+      && cmake --build build --target install -j64; \
+      DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58; \
+      cd /root && git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..; \
+      cd /root/DeepEP && NVSHMEM_DIR=/root/nvshmem/install python setup.py install; \
+    fi
 
-RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel
+RUN if [ "${ENABLE_NIXL}" = "1" ]; then \
+      apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \
+      DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
+      rm -rf /usr/lib/ucx && rm -rf /opt/hpcx/ucx && \
+      cd /usr/local/src && \
+      git clone https://github.com/openucx/ucx.git && \
+      cd ucx && \
+      git checkout v1.19.x && \
+      ./autogen.sh && ./configure \
+      --enable-shared \
+      --disable-static \
+      --disable-doxygen-doc \
+      --enable-optimizations \
+      --enable-cma \
+      --enable-devel-headers \
+      --with-cuda=/usr/local/cuda \
+      --with-verbs=yes \
+      --with-dm \
+      --with-gdrcopy=/usr/local \
+      --with-efa \
+      --enable-mt && \
+      make -j && \
+      make -j install-strip && \
+      ldconfig; \
+      apt-get update && apt-get install -y pkg-config tmux net-tools && \
+      cd /usr/local/src; \
+      pip install --upgrade meson pybind11 patchelf; \
+      git clone https://github.com/ai-dynamo/nixl.git -b main && \
+      cd nixl && \
+      rm -rf build && \
+      mkdir build && \
+      meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
+      cd build && \
+      ninja && \
+      ninja install && \
+      cd .. && pip install . --no-deps; \
+    fi
 
 COPY . /lightllm
 RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/Dockerfile.deepep b/docker/Dockerfile.deepep
deleted file mode 100644
index e765978b91..0000000000
--- a/docker/Dockerfile.deepep
+++ /dev/null
@@ -1,84 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN pip install -U pip
-RUN pip install -r /lightllm/requirements.txt --no-cache-dir
-
-RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly 
-
-# TODO: offline compile
-# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
\ No newline at end of file
diff --git a/docker/Dockerfile.nixl b/docker/Dockerfile.nixl
deleted file mode 100644
index b8047bbd03..0000000000
--- a/docker/Dockerfile.nixl
+++ /dev/null
@@ -1,94 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
-RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel &&  pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b main && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/Dockerfile.nixl.deepep b/docker/Dockerfile.nixl.deepep
deleted file mode 100644
index 8ca06e1094..0000000000
--- a/docker/Dockerfile.nixl.deepep
+++ /dev/null
@@ -1,121 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
-RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel &&  pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b main && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.6.1/Dockerfile b/docker/cuda_version_12.6.1/Dockerfile
deleted file mode 100644
index 6d67fcf4df..0000000000
--- a/docker/cuda_version_12.6.1/Dockerfile
+++ /dev/null
@@ -1,48 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN pip install -U pip
-RUN pip install -r /lightllm/requirements.txt --no-cache-dir
-
-RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly 
-
-# TODO: offline compile
-# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.6.1/Dockerfile.deepep b/docker/cuda_version_12.6.1/Dockerfile.deepep
deleted file mode 100644
index e765978b91..0000000000
--- a/docker/cuda_version_12.6.1/Dockerfile.deepep
+++ /dev/null
@@ -1,84 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN pip install -U pip
-RUN pip install -r /lightllm/requirements.txt --no-cache-dir
-
-RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly 
-
-# TODO: offline compile
-# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
\ No newline at end of file
diff --git a/docker/cuda_version_12.6.1/Dockerfile.nixl b/docker/cuda_version_12.6.1/Dockerfile.nixl
deleted file mode 100644
index b8047bbd03..0000000000
--- a/docker/cuda_version_12.6.1/Dockerfile.nixl
+++ /dev/null
@@ -1,94 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
-RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel &&  pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b main && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep b/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep
deleted file mode 100644
index 8ca06e1094..0000000000
--- a/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep
+++ /dev/null
@@ -1,121 +0,0 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
-RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel &&  pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b main && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.8.0/Dockerfile b/docker/cuda_version_12.8.0/Dockerfile
deleted file mode 100644
index 439031ce48..0000000000
--- a/docker/cuda_version_12.8.0/Dockerfile
+++ /dev/null
@@ -1,48 +0,0 @@
-ARG CUDA_VERSION=12.8.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-RUN pip install --no-cache-dir vllm==0.11.0 --pre --extra-index-url https://wheels.vllm.ai/nightly
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN pip install -U pip
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu128
-
-# TODO: offline compile
-# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
-
-RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.8.0/Dockerfile.deepep b/docker/cuda_version_12.8.0/Dockerfile.deepep
deleted file mode 100644
index 99997fc5bd..0000000000
--- a/docker/cuda_version_12.8.0/Dockerfile.deepep
+++ /dev/null
@@ -1,83 +0,0 @@
-ARG CUDA_VERSION=12.8.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-RUN pip install --no-cache-dir vllm==0.11.0 --pre --extra-index-url https://wheels.vllm.ai/nightly
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN pip install -U pip
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu128
-# TODO: offline compile
-# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
-
-RUN apt-get update --allow-insecure-repositories && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
\ No newline at end of file
diff --git a/docker/cuda_version_12.8.0/Dockerfile.nixl b/docker/cuda_version_12.8.0/Dockerfile.nixl
deleted file mode 100644
index 4bcb66af56..0000000000
--- a/docker/cuda_version_12.8.0/Dockerfile.nixl
+++ /dev/null
@@ -1,95 +0,0 @@
-ARG CUDA_VERSION=12.8.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm==0.11.0 --pre --extra-index-url https://wheels.vllm.ai/nightly
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu128
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps -v 'git+https://github.com/ModelTC/LightKernel.git@07f2f62af5deb41f10a22660f9f42dba9273361e#egg=lightllm_kernel'
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools libaio-dev ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b 0.8.0 && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep b/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep
deleted file mode 100644
index 96461dcc1b..0000000000
--- a/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep
+++ /dev/null
@@ -1,122 +0,0 @@
-ARG CUDA_VERSION=12.8.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm==0.11.0 --pre --extra-index-url https://wheels.vllm.ai/nightly
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu128
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps -v 'git+https://github.com/ModelTC/LightKernel.git@07f2f62af5deb41f10a22660f9f42dba9273361e#egg=lightllm_kernel'
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools libaio-dev ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b 0.8.0 && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache b/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache
deleted file mode 100644
index 2ff2dc3616..0000000000
--- a/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache
+++ /dev/null
@@ -1,124 +0,0 @@
-ARG CUDA_VERSION=12.8.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
-
-ARG PYTHON_VERSION=3.10
-ARG MAMBA_VERSION=24.7.1-0
-ARG TARGETPLATFORM
-
-ENV PATH=/opt/conda/bin:$PATH \
-    CONDA_PREFIX=/opt/conda
-
-RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    ca-certificates \
-    libssl-dev \
-    curl \
-    g++ \
-    make \
-    git && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-    *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    rm ~/mambaforge.sh
-
-RUN case ${TARGETPLATFORM} in \
-    "linux/arm64")  exit 1 ;; \
-    *)              /opt/conda/bin/conda update -y conda &&  \
-    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" && \
-    /opt/conda/bin/conda install -y boost ;; \
-    esac && \
-    /opt/conda/bin/conda clean -ya
-
-
-WORKDIR /root
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install vllm==0.11.0 --pre --extra-index-url https://wheels.vllm.ai/nightly
-
-COPY ./requirements.txt /lightllm/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu128
-
-RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps -v 'git+https://github.com/ModelTC/LightKernel.git@07f2f62af5deb41f10a22660f9f42dba9273361e#egg=lightllm_kernel'
-RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps -v 'git+https://github.com/ModelTC/LightMem.git@5900baf92d85ef4dbda6124093506b0af906011a#egg=light_mem'
-
-RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
-RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
-
-ENV CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
-
-RUN mkdir -p /tmp/gdrcopy && cd /tmp \
- && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
- && cd gdrcopy/packages \
- && CUDA=/usr/local/cuda ./build-deb-packages.sh \
- && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
- && cd / && rm -rf /tmp/gdrcopy
-
- # Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-
-RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
- && cd nvshmem \
- && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
- && NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
- && cmake --build build --target install -j64
-
-ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
-RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
-
-WORKDIR /root/DeepEP
-ENV NVSHMEM_DIR=/root/nvshmem/install
-RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
-
-RUN apt-get update && apt-get install -y cmake automake autotools-dev  libtool libz-dev && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
-    rm -rf /usr/lib/ucx && \
-    rm -rf /opt/hpcx/ucx && \
-    cd /usr/local/src && \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx && 			     \
-    git checkout v1.19.x &&	     \
-    ./autogen.sh && ./configure     \
-    --enable-shared             \
-    --disable-static            \
-    --disable-doxygen-doc       \
-    --enable-optimizations      \
-    --enable-cma                \
-    --enable-devel-headers      \
-    --with-cuda=/usr/local/cuda \
-    --with-verbs=yes                \
-    --with-dm                   \
-    --with-gdrcopy=/usr/local   \
-    --with-efa                  \
-    --enable-mt &&              \
-    make -j &&                      \
-    make -j install-strip &&        \
-    ldconfig;
-
-RUN apt-get update && apt-get install -y  pkg-config tmux net-tools libaio-dev ;  \
-    cd /usr/local/src; \
-    pip install --upgrade meson pybind11 patchelf; \
-    git clone https://github.com/ai-dynamo/nixl.git -b 0.8.0 && \
-    cd nixl && \
-    rm -rf build && \
-    mkdir build && \
-    meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    cd .. && pip install . --no-deps;
-
-COPY . /lightllm
-RUN pip install -e /lightllm --no-cache-dir
diff --git a/docker/scripts/build.sh b/docker/scripts/build.sh
new file mode 100644
index 0000000000..1699b39dd7
--- /dev/null
+++ b/docker/scripts/build.sh
@@ -0,0 +1,104 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Notes:
+# - All toggles can be configured via CLI flags or environment variables.
+# - Default behavior matches the old build_default.sh: enable both DEEPEP and NIXL, and enable cache.
+#
+# Examples:
+#   ./docker/scripts/build.sh
+#   ./docker/scripts/build.sh --lite
+#   ./docker/scripts/build.sh --no-deepep --no-cache
+#   ./docker/scripts/build.sh --no-nixl
+#   ./docker/scripts/build.sh --cuda-version 12.4.1 --image-prefix myrepo/lightllm
+#   IMAGE_TAG=custom-cuda12 ./docker/scripts/build.sh
+#
+# Options:
+#   --no-deepep               Disable DEEPEP (default: enabled)
+#   --no-nixl                 Disable NIXL (default: enabled)
+#   --no-cache                Disable cache (default: enabled)
+#   --lite                    Disable DEEPEP, NIXL and cache in one shot
+#   --cuda-version <ver>      CUDA version (default: 12.8.0)
+#   --image-prefix <name>     Image prefix (default: lightllm)
+#   --image-tag <tag>         Image tag (default: generated from enabled features)
+#   -h / --help               Show help
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+cd "${ROOT_DIR}"
+
+IMAGE_PREFIX="${IMAGE_PREFIX:-lightllm}"
+CUDA_VERSION="${CUDA_VERSION:-12.8.0}"
+IMAGE_TAG="${IMAGE_TAG:-}"
+
+ENABLE_DEEPEP="${ENABLE_DEEPEP:-1}"
+ENABLE_NIXL="${ENABLE_NIXL:-1}"
+ENABLE_CACHE="${ENABLE_CACHE:-1}"
+
+print_help() {
+  sed -n '1,80p' "$0" | sed 's/^# \{0,1\}//'
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --no-deepep) ENABLE_DEEPEP=0 ;;
+    --no-nixl) ENABLE_NIXL=0 ;;
+    --no-cache) ENABLE_CACHE=0 ;;
+    --lite)
+      ENABLE_DEEPEP=0
+      ENABLE_NIXL=0
+      ENABLE_CACHE=0
+      ;;
+    --cuda-version)
+      CUDA_VERSION="${2:-}"
+      shift
+      ;;
+    --image-prefix)
+      IMAGE_PREFIX="${2:-}"
+      shift
+      ;;
+    --image-tag)
+      IMAGE_TAG="${2:-}"
+      shift
+      ;;
+    -h|--help)
+      print_help
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      print_help >&2
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+# Generate default image tag based on enabled features:
+# - All on: cuda${CUDA_VERSION} (same as old build_default.sh)
+# - Other combos: composed from enabled feature names
+if [[ -z "${IMAGE_TAG}" ]]; then
+  tag_parts=()
+  if [[ "${ENABLE_NIXL}" -eq 1 ]]; then
+    tag_parts+=("nixl")
+  fi
+  if [[ "${ENABLE_DEEPEP}" -eq 1 ]]; then
+    tag_parts+=("deepep")
+  fi
+  if [[ "${ENABLE_NIXL}" -eq 1 && "${ENABLE_DEEPEP}" -eq 1 && "${ENABLE_CACHE}" -eq 1 ]]; then
+    IMAGE_TAG="cuda${CUDA_VERSION}"
+  else
+    prefix=""
+    if [[ ${#tag_parts[@]} -gt 0 ]]; then
+      prefix="$(IFS='.'; echo "${tag_parts[*]}")-"
+    fi
+    IMAGE_TAG="${prefix}cuda${CUDA_VERSION}"
+  fi
+fi
+
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile \
+  --build-arg CUDA_VERSION="${CUDA_VERSION}" \
+  --build-arg ENABLE_DEEPEP="${ENABLE_DEEPEP}" \
+  --build-arg ENABLE_NIXL="${ENABLE_NIXL}" \
+  --build-arg ENABLE_CACHE="${ENABLE_CACHE}" \
+  -t "${IMAGE_PREFIX}:${IMAGE_TAG}" .
+
diff --git a/docs/CN/source/getting_started/installation.rst b/docs/CN/source/getting_started/installation.rst
index 5fa0e304d2..4a28cc6d1f 100755
--- a/docs/CN/source/getting_started/installation.rst
+++ b/docs/CN/source/getting_started/installation.rst
@@ -9,7 +9,7 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
 ------------
 
 * 操作系统: Linux
-* Python: 3.9
+* Python: 3.10
 * GPU: 计算能力 7.0 以上 (e.g., V100, T4, RTX20xx, A100, L4, H100, 等等.)
 
 .. _build_from_docker:
@@ -41,7 +41,7 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
 
     $ # 进入代码仓库的根目录
     $ cd /lightllm
-    $ # 手动构建镜像, docker 目录下有不同功能场景的镜像构建文件，按需构建。
+    $ # 手动构建镜像。
     $ docker build -t <image_name> -f ./docker/Dockerfile .
     $
     $ # 运行
@@ -57,8 +57,7 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
     $ python tools/quick_launch_docker.py --help
 
 .. note::
-    如果你使用多卡，你也许需要提高上面的 –shm_size 的参数设置。如果需要跑DeepSeek模型的EP模式，请使用镜像
-    ghcr.io/modeltc/lightllm:main-deepep。
+    如果你使用多卡，你也许需要提高上面的 –shm_size 的参数设置。
 
 .. _build_from_source:
 
@@ -70,14 +69,14 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
 .. code-block:: console
 
     $ # (推荐) 创建一个新的 conda 环境
-    $ conda create -n lightllm python=3.9 -y
+    $ conda create -n lightllm python=3.10 -y
     $ conda activate lightllm
     $
     $ # 下载lightllm的最新源码
     $ git clone https://github.com/ModelTC/lightllm.git
     $ cd lightllm
     $
-    $ # 安装lightllm的依赖 (cuda 12.4)
+    $ # 安装lightllm的依赖 (cuda 12.8)
     $ pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124
     $
     $ # 安装lightllm的依赖 (摩尔线程 GPU)
@@ -85,22 +84,4 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
     $ pip install -r requirements-musa.txt
     $
     $ # 安装lightllm
-    $ python setup.py install
-
-.. note::
-
-    Lightllm 的代码在多种GPU上都进行了测试，包括 V100, A100, A800, 4090, 和 H800。
-    如果你使用 A100 、A800 等显卡，那么推荐你安装 triton==3.0.0 ：
-
-    .. code-block:: console
-
-        $ pip install triton==3.0.0 --no-deps
-
-    如果你使用 H800、V100 等显卡，那么推荐你安装 triton-nightly：
-
-    .. code-block:: console
-
-        $ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --no-deps
-
-    具体原因可以参考：`issue <https://github.com/triton-lang/triton/issues/3619>`_ 和 `fix PR <https://github.com/triton-lang/triton/pull/3638>`_
-
+    $ python setup.py install
\ No newline at end of file
diff --git a/docs/EN/source/getting_started/installation.rst b/docs/EN/source/getting_started/installation.rst
index 6439c48de3..e008ac14c9 100755
--- a/docs/EN/source/getting_started/installation.rst
+++ b/docs/EN/source/getting_started/installation.rst
@@ -9,7 +9,7 @@ Environment Requirements
 ------------------------
 
 * Operating System: Linux
-* Python: 3.9
+* Python: 3.10
 * GPU: Compute Capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
 
 .. _build_from_docker:
@@ -61,10 +61,7 @@ Or you can directly use the script to launch the image and run it with one click
     $ python tools/quick_launch_docker.py --help
 
 .. note::
-    If you use multiple GPUs, you may need to increase the --shm-size parameter setting above. If you need to run DeepSeek models in EP mode, please use the image
-    ghcr.io/modeltc/lightllm:main-deepep.
-
-.. _build_from_source:
+    If you use multiple GPUs, you may need to increase the --shm-size parameter setting above. 
 
 Installation from Source
 ------------------------
@@ -74,14 +71,14 @@ You can also install Lightllm from source:
 .. code-block:: console
 
     $ # (Recommended) Create a new conda environment
-    $ conda create -n lightllm python=3.9 -y
+    $ conda create -n lightllm python=3.10 -y
     $ conda activate lightllm
     $
     $ # Download the latest Lightllm source code
     $ git clone https://github.com/ModelTC/lightllm.git
     $ cd lightllm
     $
-    $ # Install Lightllm dependencies (cuda 12.4)
+    $ # Install Lightllm dependencies (cuda 12.8)
     $ pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124
     $
     $ # Install Lightllm dependencies (Moore Threads GPU)
@@ -89,21 +86,4 @@ You can also install Lightllm from source:
     $ pip install -r requirements-musa.txt
     $
     $ # Install Lightllm
-    $ python setup.py install
-
-.. note::
-
-    Lightllm code has been tested on various GPUs including V100, A100, A800, 4090, and H800.
-    If you use A100, A800 and other graphics cards, it is recommended to install triton==3.0.0:
-
-    .. code-block:: console
-
-        $ pip install triton==3.0.0 --no-deps
-
-    If you use H800, V100 and other graphics cards, it is recommended to install triton-nightly:
-
-    .. code-block:: console
-
-        $ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --no-deps
-
-    For specific reasons, please refer to: `issue <https://github.com/triton-lang/triton/issues/3619>`_ and `fix PR <https://github.com/triton-lang/triton/pull/3638>`_
\ No newline at end of file
+    $ python setup.py install
\ No newline at end of file