From f1f0135430bd35a68a6e8dbd6a68836d90dcbce5 Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Thu, 3 Oct 2024 21:32:07 +0000 Subject: [PATCH 1/6] Migrate to micromamba http://b/358349812 --- Dockerfile.tmpl | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index e4bde450..b5b62ad4 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -81,17 +81,21 @@ RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list & # b/128333086: Set PROJ_DATA to points to the proj4 cartographic library. ENV PROJ_DATA=/opt/conda/share/proj +# Install micromamba, setup channels, and replace conda with micromamba +RUN curl -L "https://micro.mamba.pm/install.sh" -o /tmp/micromamba-install.sh \ + && bash /tmp/micromamba-install.sh \ + && rm /tmp/micromamba-install.sh \ + && mv ~/.local/bin/micromamba /usr/bin/micromamba \ + && (!(which conda) || cp /usr/bin/micromamba $(which conda)) \ + && micromamba config append channels conda-forge \ + && micromamba config append channels nvidia \ + && micromamba config append channels rapidsai \ + && micromamba config set channel_priority strict + # Install conda packages not available on pip. # When using pip in a conda environment, conda commands should be ran first and then # the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/ -RUN conda config --add channels nvidia && \ - conda config --add channels rapidsai && \ - conda config --set solver libmamba && \ - # b/299991198: remove curl/libcurl install once DLVM base image includes version >= 7.86 - conda install -c conda-forge mamba curl libcurl && \ - # Base image channel order: conda-forge (highest priority), defaults. - # End state: rapidsai (highest priority), nvidia, conda-forge, defaults. - mamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ +RUN micromamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ rm -rf /opt/conda/lib/python3.10/site-packages/pyproj/proj_dir/ && \ /tmp/clean-layer.sh @@ -100,8 +104,8 @@ RUN conda config --add channels nvidia && \ # b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed. {{ if eq .Accelerator "gpu" }} RUN pip uninstall -y pyarrow && \ - mamba remove -y --force grpc-cpp && \ - mamba install -y -c conda-forge spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ + micromamba remove -y --force grpc-cpp && \ + micromamba install -y -c conda-forge spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ /tmp/clean-layer.sh {{ else }} RUN pip install spacy && \ @@ -114,7 +118,7 @@ RUN pip install spacy && \ COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/ # b/356397043: We are currently using cuda 12.3, # but magma-cuda121 is the latest compatible version -RUN mamba install -y -c pytorch magma-cuda121 && \ +RUN micromamba install -y -c pytorch magma-cuda121 && \ pip install /tmp/torch/*.whl && \ sudo apt -y install libsox-dev && \ rm -rf /tmp/torch && \ @@ -507,7 +511,7 @@ RUN pip install wandb \ pip install --no-dependencies fastai fastdownload && \ # b/343971718: remove duplicate aiohttp installs, and reinstall it rm -rf /opt/conda/lib/python3.10/site-packages/aiohttp* && \ - mamba install --force-reinstall -y aiohttp && \ + micromamba install --force-reinstall -y aiohttp && \ /tmp/clean-layer.sh # Download base easyocr models. From fdbc49b0361d718cfb6a50ac782b21c01460f2da Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Thu, 3 Oct 2024 23:47:34 +0000 Subject: [PATCH 2/6] fix paths and tests --- Dockerfile.tmpl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index b5b62ad4..f9b84a68 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -82,6 +82,7 @@ RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list & ENV PROJ_DATA=/opt/conda/share/proj # Install micromamba, setup channels, and replace conda with micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda RUN curl -L "https://micro.mamba.pm/install.sh" -o /tmp/micromamba-install.sh \ && bash /tmp/micromamba-install.sh \ && rm /tmp/micromamba-install.sh \ @@ -90,7 +91,8 @@ RUN curl -L "https://micro.mamba.pm/install.sh" -o /tmp/micromamba-install.sh \ && micromamba config append channels conda-forge \ && micromamba config append channels nvidia \ && micromamba config append channels rapidsai \ - && micromamba config set channel_priority strict + && micromamba config set channel_priority strict \ + && python -m nb_conda_kernels.install --disable # Install conda packages not available on pip. # When using pip in a conda environment, conda commands should be ran first and then From fc5c14bb850eed4c64183babd8393b3e8c1b6154 Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Fri, 4 Oct 2024 00:30:02 +0000 Subject: [PATCH 3/6] trying to fix gpu image --- Dockerfile.tmpl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index f9b84a68..8f292015 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -106,8 +106,7 @@ RUN micromamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ # b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed. {{ if eq .Accelerator "gpu" }} RUN pip uninstall -y pyarrow && \ - micromamba remove -y --force grpc-cpp && \ - micromamba install -y -c conda-forge spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ + micromamba install -y spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ /tmp/clean-layer.sh {{ else }} RUN pip install spacy && \ From c02f4c129a342e08f40198213a2fe5096367bdee Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Fri, 4 Oct 2024 14:11:14 +0000 Subject: [PATCH 4/6] allow flexible channels --- Dockerfile.tmpl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 8f292015..5c715a36 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -88,10 +88,10 @@ RUN curl -L "https://micro.mamba.pm/install.sh" -o /tmp/micromamba-install.sh \ && rm /tmp/micromamba-install.sh \ && mv ~/.local/bin/micromamba /usr/bin/micromamba \ && (!(which conda) || cp /usr/bin/micromamba $(which conda)) \ - && micromamba config append channels conda-forge \ && micromamba config append channels nvidia \ && micromamba config append channels rapidsai \ - && micromamba config set channel_priority strict \ + && micromamba config append channels conda-forge \ + && micromamba config set channel_priority flexible \ && python -m nb_conda_kernels.install --disable # Install conda packages not available on pip. @@ -106,7 +106,7 @@ RUN micromamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ # b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed. {{ if eq .Accelerator "gpu" }} RUN pip uninstall -y pyarrow && \ - micromamba install -y spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ + micromamba install -vvvy spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ /tmp/clean-layer.sh {{ else }} RUN pip install spacy && \ From 6b5b81b5fcea40b9c508473d02a28c126adb3f87 Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Fri, 4 Oct 2024 16:17:20 +0000 Subject: [PATCH 5/6] fix cudf/cuml version pins with micromamba --- Dockerfile.tmpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 5c715a36..5c0ead0e 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -106,7 +106,8 @@ RUN micromamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ # b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed. {{ if eq .Accelerator "gpu" }} RUN pip uninstall -y pyarrow && \ - micromamba install -vvvy spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ + micromamba remove -y --force grpc-cpp && \ + micromamba install -vvvy spacy "cudf>=24.4" "cuml>=24.4" cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ /tmp/clean-layer.sh {{ else }} RUN pip install spacy && \ From ec53f2e45da5f6ba3b72736124f021bfde682eb7 Mon Sep 17 00:00:00 2001 From: Dustin Herbison Date: Fri, 4 Oct 2024 16:37:15 +0000 Subject: [PATCH 6/6] fix solver for cuml/cudf --- Dockerfile.tmpl | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 5c0ead0e..11b05ebd 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -106,7 +106,6 @@ RUN micromamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ # b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed. {{ if eq .Accelerator "gpu" }} RUN pip uninstall -y pyarrow && \ - micromamba remove -y --force grpc-cpp && \ micromamba install -vvvy spacy "cudf>=24.4" "cuml>=24.4" cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ /tmp/clean-layer.sh {{ else }}