From bd070a5f07d58deaeb4af94a0f4fbcaed3ddd1ef Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Mon, 12 May 2025 06:01:01 +0000 Subject: [PATCH 1/5] add aidoc-miner --- .github/workflows/build-docker.yml | 11 ++++++++++- docker_aidoc/miner.Dockerfile | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 docker_aidoc/miner.Dockerfile diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 28f208d..2a914b9 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -57,6 +57,15 @@ jobs: build_image vllm-cuda latest docker_vllm/vllm-cuda.Dockerfile push_image + qpod_aidoc-miner: + name: 'aidoc-miner' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: | + source ./tool.sh && free_diskspace + build_image aidoc-miner latest docker_aidoc/miner.Dockerfile --build-arg "BASE_IMG=py-nlp-cuda128" + push_image qpod_PaddleOCR_cuda120: name: 'paddleocr-cuda120,doc-ai-cuda120' @@ -99,7 +108,7 @@ jobs: ## Sync all images in this build (listed by "names") to mirror registry. sync_images: - needs: ["qpod_OpenCV", "qpod_HuggingFaceModels", "qpod_PaddleOCR_cuda112", "qpod_PaddleOCR_cuda120", "qpod_vllm-cuda"] + needs: ["qpod_OpenCV", "qpod_HuggingFaceModels", "qpod_aidoc-miner", "qpod_PaddleOCR_cuda112", "qpod_PaddleOCR_cuda120", "qpod_vllm-cuda"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/docker_aidoc/miner.Dockerfile b/docker_aidoc/miner.Dockerfile new file mode 100644 index 0000000..c2c8815 --- /dev/null +++ b/docker_aidoc/miner.Dockerfile @@ -0,0 +1,25 @@ +# Distributed under the terms of the Modified BSD License. + +# reference: https://github.com/opendatalab/MinerU/blob/master/docker/china/Dockerfile + +ARG BASE_NAMESPACE +ARG BASE_IMG="py-nlp-cuda128" +FROM ${BASE_NAMESPACE:+$BASE_NAMESPACE/}${BASE_IMG} + +LABEL maintainer="haobibo@gmail.com" + +RUN set -eux \ + # ---------- + && apt-get update \ + && mkdir -pv /usr/share/man/man1 \ + && apt-get -qq install -yq --no-install-recommends openjdk-21-jre-headless \ + && apt-get -qq install -yq --no-install-recommends \ + libgl1 libglib2.0-0 libxrender1 libsm6 libxext6 \ + fontconfig ttf-mscorefonts-installer \ + fonts-noto-cjk fonts-wqy-zenhei fonts-wqy-microhei \ + libreoffice poppler-utils \ + && pip install -U magic-pdf[full] modelscope \ + # ---------- + && rm -rf /var/lib/apt/lists/* \ + && source /opt/utils/script-setup.sh \ + && install__clean && list_installed_packages From 9ff92b8e65e2fe56192ed93a0858b774652df6d9 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Tue, 13 May 2025 02:57:38 +0800 Subject: [PATCH 2/5] debug pipeline --- .github/workflows/build-docker.yml | 53 +++++++++---------- docker_HuggingFace-model/list_hf_models.txt | 1 + .../OpenCV.Dockerfile | 0 .../work/install_list_OpenCV.apt | 0 ...ockerfile => paddle-ocr-models.Dockerfile} | 0 .../{Dockerfile => paddle-ocr.Dockerfile} | 0 docker_vllm/vllm-cuda.Dockerfile | 2 +- 7 files changed, 27 insertions(+), 29 deletions(-) rename docker_OpenCV/Dockerfile => docker_OpenFace/OpenCV.Dockerfile (100%) rename {docker_OpenCV => docker_OpenFace}/work/install_list_OpenCV.apt (100%) rename docker_PaddleOCR/{models.Dockerfile => paddle-ocr-models.Dockerfile} (100%) rename docker_PaddleOCR/{Dockerfile => paddle-ocr.Dockerfile} (100%) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 2a914b9..2f86c2e 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -26,24 +26,29 @@ env: DOCKER_MIRROR_REGISTRY_USERNAME: ${{ vars.DOCKER_MIRROR_REGISTRY_USERNAME }} DOCKER_MIRROR_REGISTRY_PASSWORD: ${{ secrets.DOCKER_MIRROR_REGISTRY_PASSWORD }} + jobs: - qpod_OpenCV: - name: 'opencv' + qpod_PaddleOCR_cuda112: + name: 'paddleocr-cuda112' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - run: source ./tool.sh && build_image opencv latest docker_OpenCV/Dockerfile && push_image - - # paddleocr-models: temp disable the build caused by the paddle run on CPU server - qpod_PaddleOCR_cuda112: - name: 'paddleocr-cuda112' + - run: | + source ./tool.sh && free_diskspace + build_image paddleocr-cuda112 latest docker_PaddleOCR/paddle-ocr.Dockerfile --build-arg "BASE_IMG=cuda_11.2" + echo 'paddleocr-models: temp disable the build caused by the paddle run on CPU server' + echo 'build_image paddleocr-models latest docker_PaddleOCR/paddle-ocr-models.Dockerfile --build-arg "BASE_IMG=paddleocr-cuda112" --build-arg "BASE_NAMESPACE_SRC=docker.io/library"' + push_image + + qpod_PaddleOCR_cuda120: + name: 'paddleocr-cuda120,doc-ai-cuda120' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - run: | source ./tool.sh && free_diskspace - build_image paddleocr-cuda112 latest docker_PaddleOCR/Dockerfile --build-arg "BASE_IMG=cuda_11.2" - echo 'build_image paddleocr-models latest docker_PaddleOCR/models.Dockerfile --build-arg "BASE_IMG=paddleocr-cuda112" --build-arg "BASE_NAMESPACE_SRC=docker.io/library"' + build_image paddleocr-cuda120 latest docker_PaddleOCR/paddle-ocrDockerfile --build-arg "BASE_IMG=cuda_12.0" + build_image doc-ai-cuda120 latest docker_PaddleOCR/paddle-ocrDockerfile --build-arg "BASE_IMG=py-nlp-cuda120" push_image @@ -67,17 +72,6 @@ jobs: build_image aidoc-miner latest docker_aidoc/miner.Dockerfile --build-arg "BASE_IMG=py-nlp-cuda128" push_image - qpod_PaddleOCR_cuda120: - name: 'paddleocr-cuda120,doc-ai-cuda120' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - run: | - source ./tool.sh && free_diskspace - build_image paddleocr-cuda120 latest docker_PaddleOCR/Dockerfile --build-arg "BASE_IMG=cuda_12.0" - build_image doc-ai-cuda120 latest docker_PaddleOCR/Dockerfile --build-arg "BASE_IMG=py-nlp-cuda120" - push_image - qpod_OpenFace: name: 'openface-src,openface' @@ -86,9 +80,11 @@ jobs: steps: - uses: actions/checkout@v4 - run: | - source ./tool.sh - export IMG_PREFIX_SRC="docker.io/library" && build_image openface-src latest docker_OpenFace/OpenFace-src.Dockerfile && push_image src - export IMG_PREFIX_SRC="${IMG_PREFIX_DST}" && build_image openface latest docker_OpenFace/OpenFace.Dockerfile && push_image openface + source ./tool.sh && export IMG_PREFIX_SRC="docker.io/library" + build_image opencv latest docker_OpenFace/OpenCV.Dockerfile && push_image opencv + build_image openface-src latest docker_OpenFace/OpenFace-src.Dockerfile && push_image openface-src + export IMG_PREFIX_SRC="${IMG_PREFIX_DST}" + build_image openface latest docker_OpenFace/OpenFace.Dockerfile && push_image openface # To build HF model image for a single model, simple run: `build_image_hf_model bert-base-cased` qpod_HuggingFaceModels: @@ -99,16 +95,17 @@ jobs: - env: HF_MODEL_NAME: ${{ vars.HF_MODEL_NAME }} run: | - source tool.sh && export IMG_PREFIX_SRC="docker.io/library" - source docker_HuggingFace-model/script-setup-huggingface.sh - export -f download_hf_model build_image_hf_model build_image_no_tag push_image + source tool.sh && export -f build_image_no_tag push_image free_diskspace && free_diskspace + source docker_HuggingFace-model/script-setup-huggingface.sh && export -f download_hf_model build_image_hf_model HF_MODEL_RANDOM=$(sort --random-sort docker_HuggingFace-model/list_hf_models.txt | head -n1) - download_hf_model ${HF_MODEL_NAME:-$HF_MODEL_RANDOM} && build_image_hf_model ${HF_MODEL_NAME} + download_hf_model ${HF_MODEL_NAME:-$HF_MODEL_RANDOM} + export IMG_PREFIX_SRC="docker.io/library" + build_image_hf_model ${HF_MODEL_NAME} ## Sync all images in this build (listed by "names") to mirror registry. sync_images: - needs: ["qpod_OpenCV", "qpod_HuggingFaceModels", "qpod_aidoc-miner", "qpod_PaddleOCR_cuda112", "qpod_PaddleOCR_cuda120", "qpod_vllm-cuda"] + needs: ["qpod_OpenFace", "qpod_HuggingFaceModels", "qpod_aidoc-miner", "qpod_PaddleOCR_cuda112", "qpod_PaddleOCR_cuda120", "qpod_vllm-cuda"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/docker_HuggingFace-model/list_hf_models.txt b/docker_HuggingFace-model/list_hf_models.txt index 657c5ff..62657af 100644 --- a/docker_HuggingFace-model/list_hf_models.txt +++ b/docker_HuggingFace-model/list_hf_models.txt @@ -4,3 +4,4 @@ BAAI/bge-m3 BAAI/bge-reranker-v2-m3 Alibaba-NLP/gte-modernbert-base Alibaba-NLP/gte-reranker-modernbert-base +Alibaba-NLP/gte-Qwen2-1.5B-instruct diff --git a/docker_OpenCV/Dockerfile b/docker_OpenFace/OpenCV.Dockerfile similarity index 100% rename from docker_OpenCV/Dockerfile rename to docker_OpenFace/OpenCV.Dockerfile diff --git a/docker_OpenCV/work/install_list_OpenCV.apt b/docker_OpenFace/work/install_list_OpenCV.apt similarity index 100% rename from docker_OpenCV/work/install_list_OpenCV.apt rename to docker_OpenFace/work/install_list_OpenCV.apt diff --git a/docker_PaddleOCR/models.Dockerfile b/docker_PaddleOCR/paddle-ocr-models.Dockerfile similarity index 100% rename from docker_PaddleOCR/models.Dockerfile rename to docker_PaddleOCR/paddle-ocr-models.Dockerfile diff --git a/docker_PaddleOCR/Dockerfile b/docker_PaddleOCR/paddle-ocr.Dockerfile similarity index 100% rename from docker_PaddleOCR/Dockerfile rename to docker_PaddleOCR/paddle-ocr.Dockerfile diff --git a/docker_vllm/vllm-cuda.Dockerfile b/docker_vllm/vllm-cuda.Dockerfile index 94ed620..01dca10 100644 --- a/docker_vllm/vllm-cuda.Dockerfile +++ b/docker_vllm/vllm-cuda.Dockerfile @@ -1,7 +1,7 @@ # Distributed under the terms of the Modified BSD License. ARG BASE_NAMESPACE -ARG BASE_IMG="cuda_12.4" +ARG BASE_IMG="torch-cuda126" FROM ${BASE_NAMESPACE:+$BASE_NAMESPACE/}${BASE_IMG} LABEL maintainer="haobibo@gmail.com" From 6246a63da5a347b4b3b840bccd2f01b1461c0027 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Tue, 13 May 2025 02:59:34 +0800 Subject: [PATCH 3/5] typo --- .github/workflows/build-docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 2f86c2e..c59a804 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -47,8 +47,8 @@ jobs: - uses: actions/checkout@v4 - run: | source ./tool.sh && free_diskspace - build_image paddleocr-cuda120 latest docker_PaddleOCR/paddle-ocrDockerfile --build-arg "BASE_IMG=cuda_12.0" - build_image doc-ai-cuda120 latest docker_PaddleOCR/paddle-ocrDockerfile --build-arg "BASE_IMG=py-nlp-cuda120" + build_image paddleocr-cuda120 latest docker_PaddleOCR/paddle-ocr.Dockerfile --build-arg "BASE_IMG=cuda_12.0" + build_image doc-ai-cuda120 latest docker_PaddleOCR/paddle-ocr.Dockerfile --build-arg "BASE_IMG=py-nlp-cuda120" push_image From fb32f069a6c640fb7dff19ba4bc4b66b438ce71f Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Tue, 13 May 2025 03:08:49 +0800 Subject: [PATCH 4/5] update img sync --- .github/workflows/build-docker.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index c59a804..fcade8a 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -74,8 +74,7 @@ jobs: qpod_OpenFace: - name: 'openface-src,openface' - # needs: [ "qpod_OpenCV" ] + name: 'opencv,openface-src,openface' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 From 75fd5fb92e7624c32a9c03c273a00270646e4124 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Tue, 13 May 2025 03:12:38 +0800 Subject: [PATCH 5/5] debug build --- .github/workflows/build-docker.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index fcade8a..edcbaec 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -79,10 +79,11 @@ jobs: steps: - uses: actions/checkout@v4 - run: | - source ./tool.sh && export IMG_PREFIX_SRC="docker.io/library" - build_image opencv latest docker_OpenFace/OpenCV.Dockerfile && push_image opencv + source ./tool.sh && + export IMG_PREFIX_SRC="docker.io/library" build_image openface-src latest docker_OpenFace/OpenFace-src.Dockerfile && push_image openface-src export IMG_PREFIX_SRC="${IMG_PREFIX_DST}" + build_image opencv latest docker_OpenFace/OpenCV.Dockerfile && push_image opencv build_image openface latest docker_OpenFace/OpenFace.Dockerfile && push_image openface # To build HF model image for a single model, simple run: `build_image_hf_model bert-base-cased`