From ba53b664e52a3967a11cdef12dc88aee7c54cfbb Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Wed, 31 May 2023 18:51:38 -0700 Subject: [PATCH] remove inf1 support (#785) --- .github/workflows/docker-nightly-publish.yml | 2 +- .github/workflows/integration.yml | 73 +------------------- README.md | 2 +- serving/docker/README.md | 4 +- serving/docker/docker-compose.yml | 5 -- serving/docker/fastertransformer.Dockerfile | 4 +- serving/docker/pytorch-cu118.Dockerfile | 4 +- serving/docker/pytorch-inf1.Dockerfile | 60 ---------------- serving/docker/scripts/install_inferentia.sh | 31 --------- serving/docker/scripts/pull_and_retag.sh | 2 +- serving/docker/scripts/security_patch.sh | 2 +- tests/integration/download_models.sh | 11 +-- 12 files changed, 13 insertions(+), 187 deletions(-) delete mode 100644 serving/docker/pytorch-inf1.Dockerfile delete mode 100755 serving/docker/scripts/install_inferentia.sh diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 9184947df..fec3b0591 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - arch: [ cpu, cpu-full, deepspeed, pytorch-inf1, pytorch-inf2, pytorch-cu118, fastertransformer ] + arch: [ cpu, cpu-full, deepspeed, pytorch-inf2, pytorch-cu118, fastertransformer ] steps: - uses: actions/checkout@v3 - name: Login to Docker diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f3baa0f40..b01372ff5 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -33,19 +33,9 @@ jobs: --fail \ | jq '.token' | tr -d '"' ) ./start_instance.sh action_graviton $token djl-serving - - name: Create new Inferentia instance - id: create_inf - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_inf $token djl-serving outputs: gpu_instance_id: ${{ steps.create_gpu.outputs.action_gpu_instance_id }} aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} - inf_instance_id: ${{ steps.create_inf.outputs.action_inf_instance_id }} cpu-test: runs-on: ubuntu-latest @@ -153,65 +143,6 @@ jobs: name: ${{ matrix.arch }}-logs path: tests/integration/logs/ - inferentia-test: - runs-on: [ self-hosted, inf ] - timeout-minutes: 30 - needs: create-runners - steps: - - uses: actions/checkout@v3 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Set up JDK 11 - uses: actions/setup-java@v3 - with: - distribution: 'corretto' - java-version: 11 - - uses: actions/cache@v3 - with: - path: ~/.gradle/caches - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} - restore-keys: | - ${{ runner.os }}-gradle- - - name: Install DJL-Bench - working-directory: benchmark - run: ./gradlew installOnLinux - - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf1 ${{ github.event.inputs.djl-version }} - - name: Download models and dockers - working-directory: tests/integration - run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG - mkdir logs - ./download_models.sh pytorch-inf1 - - name: Test Pytorch model - working-directory: tests/integration - run: | - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \ - serve -m test::PyTorch=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz?model_name=resnet18_inf1_1_12 - ./test_client.sh image/jpg models/kitten.jpg - docker rm -f $(docker ps -aq) - - name: Test Python mode - working-directory: tests/integration - run: | - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \ - serve -m test::Python=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz - ./test_client.sh image/jpg models/kitten.jpg - docker rm -f $(docker ps -aq) - - name: On fail step - if: ${{ failure() }} - working-directory: tests/integration - run: | - cat logs/serving.log - - name: Upload test logs - uses: actions/upload-artifact@v3 - with: - name: pytorch-inf1-logs - path: tests/integration/logs/ - gpu-test: runs-on: [ self-hosted, gpu ] timeout-minutes: 30 @@ -336,7 +267,7 @@ jobs: stop-runners: if: always() runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, inferentia-test, aarch64-test, gpu-test ] + needs: [ create-runners, aarch64-test, gpu-test ] steps: - name: Stop all instances run: | @@ -345,5 +276,3 @@ jobs: ./stop_instance.sh $instance_id instance_id=${{ needs.create-runners.outputs.aarch64_instance_id }} ./stop_instance.sh $instance_id - instance_id=${{ needs.create-runners.outputs.inf_instance_id }} - ./stop_instance.sh $instance_id diff --git a/README.md b/README.md index 51ef784ce..36c8195e6 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ To see examples, see the [starting page](serving/docs/starting.md). ### More examples - [Serving a Python model](https://github.com/deepjavalibrary/djl-demo/tree/master/huggingface/python) -- [Serving on Inf1 EC2 instance](https://github.com/deepjavalibrary/djl-demo/tree/master/huggingface/inferentia) +- [Serving on Inferentia EC2 instance](https://github.com/deepjavalibrary/djl-demo/tree/master/huggingface/inferentia) - [Serving with docker](https://github.com/deepjavalibrary/djl-serving/tree/master/serving/docker) ### More command line options diff --git a/serving/docker/README.md b/serving/docker/README.md index 624af6d24..0bafd9e27 100644 --- a/serving/docker/README.md +++ b/serving/docker/README.md @@ -51,6 +51,6 @@ docker run -it --runtime=nvidia --shm-size 2g -v $PWD:/opt/ml/model -p 8080:8080 mkdir models cd models -curl -O https://resources.djl.ai/test-models/pytorch/bert_qa_inf1.tar.gz -docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.22.1-inf1 +curl -O https://resources.djl.ai/test-models/pytorch/resnet18_inf2_2_4.tar.gz +docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.22.1-pytorch-inf2 ``` diff --git a/serving/docker/docker-compose.yml b/serving/docker/docker-compose.yml index 7f0465bc4..27d4b361a 100644 --- a/serving/docker/docker-compose.yml +++ b/serving/docker/docker-compose.yml @@ -22,11 +22,6 @@ services: context: . dockerfile: deepspeed.Dockerfile image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}deepspeed${NIGHTLY}" - pytorch-inf1: - build: - context: . - dockerfile: pytorch-inf1.Dockerfile - image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}pytorch-inf1${NIGHTLY}" pytorch-cu118: build: context: . diff --git a/serving/docker/fastertransformer.Dockerfile b/serving/docker/fastertransformer.Dockerfile index 992f225bc..fe8e89c5e 100644 --- a/serving/docker/fastertransformer.Dockerfile +++ b/serving/docker/fastertransformer.Dockerfile @@ -19,8 +19,8 @@ ARG torch_wheel="https://aws-pytorch-unified-cicd-binaries.s3.us-west-2.amazonaw ARG ft_wheel="https://publish.djl.ai/fastertransformer/fastertransformer-0.23.0-py3-none-any.whl" ARG tb_wheel="https://publish.djl.ai/tritonserver/r23.04/tritontoolkit-23.4-py3-none-any.whl" ARG ompi_version=4.1.4 -ARG transformers_version=4.27.3 -ARG accelerate_version=0.17.1 +ARG transformers_version=4.29.2 +ARG accelerate_version=0.19.0 ARG bitsandbytes_version=0.38.1 EXPOSE 8080 diff --git a/serving/docker/pytorch-cu118.Dockerfile b/serving/docker/pytorch-cu118.Dockerfile index 629e9c6ca..637f0890a 100644 --- a/serving/docker/pytorch-cu118.Dockerfile +++ b/serving/docker/pytorch-cu118.Dockerfile @@ -14,8 +14,8 @@ ARG version=11.8.0-cudnn8-devel-ubuntu20.04 FROM nvidia/cuda:$version as base ARG djl_version=0.23.0~SNAPSHOT -ARG torch_version=2.0.0 -ARG torch_vision_version=0.15.1 +ARG torch_version=2.0.1 +ARG torch_vision_version=0.15.2 ARG python_version=3.9 RUN mkdir -p /opt/djl/conf && \ diff --git a/serving/docker/pytorch-inf1.Dockerfile b/serving/docker/pytorch-inf1.Dockerfile deleted file mode 100644 index 9bec140bb..000000000 --- a/serving/docker/pytorch-inf1.Dockerfile +++ /dev/null @@ -1,60 +0,0 @@ -# -*- mode: dockerfile -*- -# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file -# except in compliance with the License. A copy of the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" -# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for -# the specific language governing permissions and limitations under the License. -FROM ubuntu:20.04 -ARG djl_version=0.23.0~SNAPSHOT -ARG torch_version=1.12.1 -EXPOSE 8080 - -# Sets up Path for Neuron tools -ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" - -COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh -RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh -WORKDIR /opt/djl -ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 -ENV MODEL_SERVER_HOME=/opt/djl -ENV DJL_CACHE_DIR=/tmp/.djl.ai -ENV HUGGINGFACE_HUB_CACHE=/tmp -ENV TRANSFORMERS_CACHE=/tmp -ENV NEURON_SDK_PATH=/usr/local/lib/python3.7/dist-packages/torch_neuron/lib -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$NEURON_SDK_PATH -ENV PYTORCH_LIBRARY_PATH=/usr/local/lib/python3.7/dist-packages/torch/lib -ENV PYTORCH_EXTRA_LIBRARY_PATH=$NEURON_SDK_PATH/libtorchneuron.so -ENV PYTORCH_PRECXX11=true -ENV PYTORCH_VERSION=1.12.1 -ENV JAVA_OPTS="-Xmx1g -Xms1g -Xss2m -XX:-UseContainerSupport -XX:+ExitOnOutOfMemoryError -Dai.djl.default_engine=PyTorch" - -ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] -CMD ["serve"] - -COPY scripts scripts/ -RUN mkdir -p /opt/djl/conf && \ - mkdir -p /opt/djl/deps && \ - mkdir -p /opt/ml/model -COPY config.properties /opt/djl/conf/ -RUN scripts/install_djl_serving.sh $djl_version && \ - mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ - echo "${djl_version} inf" > /opt/djl/bin/telemetry && \ - scripts/install_djl_serving.sh $djl_version ${torch_version} && \ - scripts/install_inferentia.sh && \ - scripts/patch_oss_dlc.sh python && \ - scripts/security_patch.sh pytorch-inf1 && \ - useradd -m -d /home/djl djl && \ - chown -R djl:djl /opt/djl && \ - rm -rf scripts && pip3 cache purge && \ - apt-get clean -y && rm -rf /var/lib/apt/lists/* - -LABEL maintainer="djl-dev@amazon.com" -LABEL dlc_major_version="1" -LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.inf1="true" -LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" -LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" diff --git a/serving/docker/scripts/install_inferentia.sh b/serving/docker/scripts/install_inferentia.sh deleted file mode 100755 index 6645ab78a..000000000 --- a/serving/docker/scripts/install_inferentia.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -# refer to: https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-deploy/docker-example/Dockerfile-libmode.html#libmode-dockerfile -apt-get update -DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gnupg2 curl software-properties-common - -# install python 3.7 -- required by inferentia -add-apt-repository -y ppa:deadsnakes/ppa - -# remove python 3.8 -apt-get autoremove -y python3 - -echo "deb https://apt.repos.neuron.amazonaws.com bionic main" > /etc/apt/sources.list.d/neuron.list -curl -L https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - - -# Installing Neuron Tools -apt-get update -apt-get install -y aws-neuronx-tools aws-neuronx-dkms python3.7 python3.7-distutils pciutils -ln -sf /usr/bin/python3.7 /usr/bin/python3 -ln -sf /usr/bin/python3.7 /usr/bin/python - -curl -O https://bootstrap.pypa.io/get-pip.py -python3 get-pip.py -python3 -m pip install -U pip -rm get-pip.py - -# Include torch-neuron -python3 -m pip install numpy awscli -python3 -m pip install torch-neuron==1.12.1.* torchvision --extra-index-url=https://pip.repos.neuron.amazonaws.com diff --git a/serving/docker/scripts/pull_and_retag.sh b/serving/docker/scripts/pull_and_retag.sh index f93f5804d..a0d7fe279 100755 --- a/serving/docker/scripts/pull_and_retag.sh +++ b/serving/docker/scripts/pull_and_retag.sh @@ -2,7 +2,7 @@ version=$1 repo=$2 -images="cpu aarch64 cpu-full pytorch-inf1 pytorch-inf2 pytorch-cu118 deepspeed fastertransformer" +images="cpu aarch64 cpu-full pytorch-inf2 pytorch-cu118 deepspeed fastertransformer" for image in $images; do if [[ ! "$version" == "nightly" ]]; then diff --git a/serving/docker/scripts/security_patch.sh b/serving/docker/scripts/security_patch.sh index a06d9c0dd..5ce140596 100755 --- a/serving/docker/scripts/security_patch.sh +++ b/serving/docker/scripts/security_patch.sh @@ -8,6 +8,6 @@ if [[ "$IMAGE_NAME" == "deepspeed" ]] || \ [[ "$IMAGE_NAME" == "pytorch-cu118" ]] || \ [[ "$IMAGE_NAME" == "fastertransformer" ]]; then apt-get upgrade -y dpkg e2fsprogs libdpkg-perl libpcre2-8-0 libpcre3 openssl libsqlite3-0 libsepol1 libdbus-1-3 curl -elif [[ "$IMAGE_NAME" == "cpu" ]] || [[ "$IMAGE_NAME" == "pytorch-inf1" ]]; then +elif [[ "$IMAGE_NAME" == "cpu" ]]; then apt-get upgrade -y libpcre2-8-0 libdbus-1-3 curl fi diff --git a/tests/integration/download_models.sh b/tests/integration/download_models.sh index 11f91a692..89f98483d 100755 --- a/tests/integration/download_models.sh +++ b/tests/integration/download_models.sh @@ -2,7 +2,7 @@ set -e -platform=$1 # expected values are "cpu" "cpu-full" "pytorch-cu118" "pytorch-inf1" "aarch64" +platform=$1 # expected values are "cpu" "cpu-full" "pytorch-cu118" "pytorch-inf2" "aarch64" rm -rf models mkdir models && cd models @@ -22,10 +22,6 @@ aarch_models_urls=( "https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip" ) -inf1_models_urls=( - "https://resources.djl.ai/test-models/pytorch/resnet18_inf1_1_12.tar.gz" -) - inf2_models_urls=( "https://resources.djl.ai/test-models/pytorch/resnet18_inf2_2_4.tar.gz" ) @@ -45,9 +41,6 @@ case $platform in cpu | cpu-full | pytorch-cu118) download "${general_platform_models_urls[@]}" ;; -pytorch-inf1) - download "${inf1_models_urls[@]}" - ;; pytorch-inf2) download "${inf2_models_urls[@]}" ;; @@ -55,7 +48,7 @@ aarch64) download "${aarch_models_urls[@]}" ;; *) - echo "Bad argument. Expecting one of the values: cpu, cpu-full, pytorch-cu118, pytorch-inf1, pytorch-inf2, aarch64" + echo "Bad argument. Expecting one of the values: cpu, cpu-full, pytorch-cu118, pytorch-inf2, aarch64" exit 1 ;; esac