From ff7c0166faa1645b1e4d7f283f307c421910edea Mon Sep 17 00:00:00 2001 From: haobibo Date: Mon, 22 Sep 2025 20:29:09 +0800 Subject: [PATCH 01/11] try to reduce torch img size --- docker_core/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 3a22eba0..9c8e6386 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -77,6 +77,11 @@ RUN set -eux \ echo "If CUDA version < 11.7, install pytorch 1.x, else install pytorch 2.x; if cuda doesn't exist, install pytorch 2.x" \ && export CUDA_VER_TORCH="117" && V=$([[ "${CUDA_VER:-999}" -lt "${CUDA_VER_TORCH}" ]] && echo "torch<2" || echo "torch") \ && pip install --no-cache-dir --root-user-action=ignore -U --pre "${V}" torchvision torchaudio --index-url "https://download.pytorch.org/whl/${IDX}" \ + && if [ "$(echo "${IDX}" | cut -c1-4)" = "cuda" ]; then \ + echo "Uninstalling nvidia python packages to reduce disk size..." \ + && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ + && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev \ + fi ) || echo "Skipping pytorch install" ) \ # ----------------------------- && echo "Handle paddle installation, cpu/gpu: https://www.paddlepaddle.org.cn/" \ From f09f5efe316522d7543f509ccf60b7f4bcd2d205 Mon Sep 17 00:00:00 2001 From: haobibo Date: Mon, 22 Sep 2025 20:43:07 +0800 Subject: [PATCH 02/11] typo --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 9c8e6386..047481b7 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -81,7 +81,7 @@ RUN set -eux \ echo "Uninstalling nvidia python packages to reduce disk size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev \ - fi + fi \ ) || echo "Skipping pytorch install" ) \ # ----------------------------- && echo "Handle paddle installation, cpu/gpu: https://www.paddlepaddle.org.cn/" \ From 3f4584094c6cd0116117663a9bc455eb287b61e8 Mon Sep 17 00:00:00 2001 From: haobibo Date: Mon, 22 Sep 2025 20:51:55 +0800 Subject: [PATCH 03/11] debug --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 047481b7..69fec79b 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -80,7 +80,7 @@ RUN set -eux \ && if [ "$(echo "${IDX}" | cut -c1-4)" = "cuda" ]; then \ echo "Uninstalling nvidia python packages to reduce disk size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ - && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev \ + && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev ; \ fi \ ) || echo "Skipping pytorch install" ) \ # ----------------------------- From 4e60bc5464e90d891e7f815e2d9e1f5e7a60dcd1 Mon Sep 17 00:00:00 2001 From: haobibo Date: Mon, 22 Sep 2025 21:28:35 +0800 Subject: [PATCH 04/11] debug cuda --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 69fec79b..2133cae5 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -77,7 +77,7 @@ RUN set -eux \ echo "If CUDA version < 11.7, install pytorch 1.x, else install pytorch 2.x; if cuda doesn't exist, install pytorch 2.x" \ && export CUDA_VER_TORCH="117" && V=$([[ "${CUDA_VER:-999}" -lt "${CUDA_VER_TORCH}" ]] && echo "torch<2" || echo "torch") \ && pip install --no-cache-dir --root-user-action=ignore -U --pre "${V}" torchvision torchaudio --index-url "https://download.pytorch.org/whl/${IDX}" \ - && if [ "$(echo "${IDX}" | cut -c1-4)" = "cuda" ]; then \ + && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ echo "Uninstalling nvidia python packages to reduce disk size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev ; \ From 1153e6aa2da8b6d0c123f5118ab2cfafeaa3a960 Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 00:46:02 +0800 Subject: [PATCH 05/11] debug --- docker_core/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 2133cae5..221f025b 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -77,11 +77,6 @@ RUN set -eux \ echo "If CUDA version < 11.7, install pytorch 1.x, else install pytorch 2.x; if cuda doesn't exist, install pytorch 2.x" \ && export CUDA_VER_TORCH="117" && V=$([[ "${CUDA_VER:-999}" -lt "${CUDA_VER_TORCH}" ]] && echo "torch<2" || echo "torch") \ && pip install --no-cache-dir --root-user-action=ignore -U --pre "${V}" torchvision torchaudio --index-url "https://download.pytorch.org/whl/${IDX}" \ - && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ - echo "Uninstalling nvidia python packages to reduce disk size..." \ - && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ - && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev ; \ - fi \ ) || echo "Skipping pytorch install" ) \ # ----------------------------- && echo "Handle paddle installation, cpu/gpu: https://www.paddlepaddle.org.cn/" \ @@ -98,6 +93,11 @@ RUN set -eux \ [ -f "/opt/utils/install_list_PY_${profile}.apt" ] && install_apt "/opt/utils/install_list_PY_${profile}.apt" || echo "apt install skipped for ${profile}" ; \ [ -f "/opt/utils/install_list_PY_${profile}.pip" ] && install_pip "/opt/utils/install_list_PY_${profile}.pip" || echo "pip install skipped for ${profile}" ; \ ) ; done \ + && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ + echo "Try to uninstall nvidia python packages to reduce storage size..." \ + && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ + && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev ; \ + fi \ # ----------------------------- && if echo "${ARG_PROFILE_GO}" | grep -q "base" ; then \ echo "Installing GO: ${ARG_PROFILE_GO}" && setup_GO ; \ From 362d93ac786b2819048700af63c4430853a4a675 Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 00:58:51 +0800 Subject: [PATCH 06/11] debug apt install --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 221f025b..091e7fa7 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -96,7 +96,7 @@ RUN set -eux \ && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ echo "Try to uninstall nvidia python packages to reduce storage size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ - && apt-get -qq install -y --no-install-recommends libcusparseLt0 libnccl2 libnccl-dev ; \ + && apt-get -qq install -y --no-install-recommends libcusparse-dev libnccl2 libnccl-dev ; \ fi \ # ----------------------------- && if echo "${ARG_PROFILE_GO}" | grep -q "base" ; then \ From 06373746ce7c1b6ea9ca818814af0d787bbf687b Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 01:24:34 +0800 Subject: [PATCH 07/11] debug apt-get install --- docker_core/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 091e7fa7..7376fbc7 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -83,8 +83,6 @@ RUN set -eux \ && ( $(grep -q "paddle" <<< "${ARG_PROFILE_PYTHON}") && ( \ URL_PYPI_PADDLE="https://www.paddlepaddle.org.cn/packages/stable/${IDX}/" \ && PADDLE=$( [ -x "$(command -v nvcc)" ] && echo "paddlepaddle-gpu" || echo "paddlepaddle") \ - # && PADDLE_VER=$(pip index versions ${PADDLE} -f ${URL_PYPI_PADDLE} | grep 'Available' | cut -d ":" -f 2 | tr ', ' '\n' | grep ${CUDA_VER:-'.'} | head -n 1) \ - # && V=$(echo ${PADDLE}==${PADDLE_VER}) && echo "to install paddle: ${V}" \ && pip install --no-cache-dir --root-user-action=ignore -U --pre --index-url ${URL_PYPI_PADDLE} "${PADDLE}" \ ) || echo "Skip paddle install" ) \ # ----------------------------- @@ -96,7 +94,7 @@ RUN set -eux \ && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ echo "Try to uninstall nvidia python packages to reduce storage size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ - && apt-get -qq install -y --no-install-recommends libcusparse-dev libnccl2 libnccl-dev ; \ + && apt-get -qq install -y --no-install-recommends libcusparselt0 libnccl2 libnccl-dev ; \ fi \ # ----------------------------- && if echo "${ARG_PROFILE_GO}" | grep -q "base" ; then \ From dcfe00b6066db33692b144ad1eaaa3a7fd7f51b6 Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 01:26:01 +0800 Subject: [PATCH 08/11] debug apt install --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 7376fbc7..699a2b63 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -94,7 +94,7 @@ RUN set -eux \ && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ echo "Try to uninstall nvidia python packages to reduce storage size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ - && apt-get -qq install -y --no-install-recommends libcusparselt0 libnccl2 libnccl-dev ; \ + && apt-get -qq install -y --no-install-recommends --allow-change-held-packages libcusparselt0 libnccl2 libnccl-dev ; \ fi \ # ----------------------------- && if echo "${ARG_PROFILE_GO}" | grep -q "base" ; then \ From 6b624d65ad6e931e488079acf88b01175b7044e3 Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 01:38:20 +0800 Subject: [PATCH 09/11] apt-get update --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 699a2b63..8e375d50 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -94,7 +94,7 @@ RUN set -eux \ && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ echo "Try to uninstall nvidia python packages to reduce storage size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ - && apt-get -qq install -y --no-install-recommends --allow-change-held-packages libcusparselt0 libnccl2 libnccl-dev ; \ + && apt-get -qq update --fix-missing && apt-get -qq install -y --no-install-recommends --allow-change-held-packages libcusparselt0 libnccl2 libnccl-dev ; \ fi \ # ----------------------------- && if echo "${ARG_PROFILE_GO}" | grep -q "base" ; then \ From 65adc5cf4c9e0fed04cb258db87d7e8951311664 Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 01:56:22 +0800 Subject: [PATCH 10/11] fix for non-torch situation --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index 8e375d50..b018e61c 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -91,7 +91,7 @@ RUN set -eux \ [ -f "/opt/utils/install_list_PY_${profile}.apt" ] && install_apt "/opt/utils/install_list_PY_${profile}.apt" || echo "apt install skipped for ${profile}" ; \ [ -f "/opt/utils/install_list_PY_${profile}.pip" ] && install_pip "/opt/utils/install_list_PY_${profile}.pip" || echo "pip install skipped for ${profile}" ; \ ) ; done \ - && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ]; then \ + && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ] && echo "${ARG_PROFILE_PYTHON}" | grep -q "torch" ; then \ echo "Try to uninstall nvidia python packages to reduce storage size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ && apt-get -qq update --fix-missing && apt-get -qq install -y --no-install-recommends --allow-change-held-packages libcusparselt0 libnccl2 libnccl-dev ; \ From d49edf3d60803b3bb7e82c6d793244600839bcff Mon Sep 17 00:00:00 2001 From: haobibo Date: Tue, 23 Sep 2025 02:16:38 +0800 Subject: [PATCH 11/11] fix for paddle img size --- docker_core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_core/Dockerfile b/docker_core/Dockerfile index b018e61c..31acfb4a 100644 --- a/docker_core/Dockerfile +++ b/docker_core/Dockerfile @@ -91,7 +91,7 @@ RUN set -eux \ [ -f "/opt/utils/install_list_PY_${profile}.apt" ] && install_apt "/opt/utils/install_list_PY_${profile}.apt" || echo "apt install skipped for ${profile}" ; \ [ -f "/opt/utils/install_list_PY_${profile}.pip" ] && install_pip "/opt/utils/install_list_PY_${profile}.pip" || echo "pip install skipped for ${profile}" ; \ ) ; done \ - && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ] && echo "${ARG_PROFILE_PYTHON}" | grep -q "torch" ; then \ + && if [ "$(echo "${IDX}" | cut -c1-2)" = "cu" ] && echo "${ARG_PROFILE_PYTHON}" | grep -qE "torch|paddle" ; then \ echo "Try to uninstall nvidia python packages to reduce storage size..." \ && pip freeze | grep -i '^nvidia-' | cut -d'=' -f1 | xargs -r pip uninstall -y \ && apt-get -qq update --fix-missing && apt-get -qq install -y --no-install-recommends --allow-change-held-packages libcusparselt0 libnccl2 libnccl-dev ; \