From ba53b664e52a3967a11cdef12dc88aee7c54cfbb Mon Sep 17 00:00:00 2001
From: Qing Lan <qingla@amazon.com>
Date: Wed, 31 May 2023 18:51:38 -0700
Subject: [PATCH] remove inf1 support (#785)

---
 .github/workflows/docker-nightly-publish.yml |  2 +-
 .github/workflows/integration.yml            | 73 +-------------------
 README.md                                    |  2 +-
 serving/docker/README.md                     |  4 +-
 serving/docker/docker-compose.yml            |  5 --
 serving/docker/fastertransformer.Dockerfile  |  4 +-
 serving/docker/pytorch-cu118.Dockerfile      |  4 +-
 serving/docker/pytorch-inf1.Dockerfile       | 60 ----------------
 serving/docker/scripts/install_inferentia.sh | 31 ---------
 serving/docker/scripts/pull_and_retag.sh     |  2 +-
 serving/docker/scripts/security_patch.sh     |  2 +-
 tests/integration/download_models.sh         | 11 +--
 12 files changed, 13 insertions(+), 187 deletions(-)
 delete mode 100644 serving/docker/pytorch-inf1.Dockerfile
 delete mode 100755 serving/docker/scripts/install_inferentia.sh

diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml
index 9184947df..fec3b0591 100644
--- a/.github/workflows/docker-nightly-publish.yml
+++ b/.github/workflows/docker-nightly-publish.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [ cpu, cpu-full, deepspeed, pytorch-inf1, pytorch-inf2, pytorch-cu118, fastertransformer ]
+        arch: [ cpu, cpu-full, deepspeed, pytorch-inf2, pytorch-cu118, fastertransformer ]
     steps:
       - uses: actions/checkout@v3
       - name: Login to Docker
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f3baa0f40..b01372ff5 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -33,19 +33,9 @@ jobs:
           --fail \
           | jq '.token' | tr -d '"' )
           ./start_instance.sh action_graviton $token djl-serving
-      - name: Create new Inferentia instance
-        id: create_inf
-        run: |
-          cd /home/ubuntu/djl_benchmark_script/scripts
-          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
-          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
-          --fail \
-          | jq '.token' | tr -d '"' )
-          ./start_instance.sh action_inf $token djl-serving
     outputs:
       gpu_instance_id: ${{ steps.create_gpu.outputs.action_gpu_instance_id }}
       aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }}
-      inf_instance_id: ${{ steps.create_inf.outputs.action_inf_instance_id }}
 
   cpu-test:
     runs-on: ubuntu-latest
@@ -153,65 +143,6 @@ jobs:
           name: ${{ matrix.arch }}-logs
           path: tests/integration/logs/
 
-  inferentia-test:
-    runs-on: [ self-hosted, inf ]
-    timeout-minutes: 30
-    needs: create-runners
-    steps:
-      - uses: actions/checkout@v3
-      - name: Clean env
-        run: |
-          yes | docker system prune -a --volumes
-          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
-          echo "wait dpkg lock..."
-          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
-      - name: Set up JDK 11
-        uses: actions/setup-java@v3
-        with:
-          distribution: 'corretto'
-          java-version: 11
-      - uses: actions/cache@v3
-        with:
-          path: ~/.gradle/caches
-          key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }}
-          restore-keys: |
-            ${{ runner.os }}-gradle-
-      - name: Install DJL-Bench
-        working-directory: benchmark
-        run: ./gradlew installOnLinux
-      - name: Build container name
-        run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf1 ${{ github.event.inputs.djl-version }}
-      - name: Download models and dockers
-        working-directory: tests/integration
-        run: |
-          docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
-          mkdir logs
-          ./download_models.sh pytorch-inf1
-      - name: Test Pytorch model
-        working-directory: tests/integration
-        run: |
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \
-          serve -m test::PyTorch=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz?model_name=resnet18_inf1_1_12
-          ./test_client.sh image/jpg models/kitten.jpg
-          docker rm -f $(docker ps -aq)
-      - name: Test Python mode
-        working-directory: tests/integration
-        run: |
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \
-          serve -m test::Python=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz
-          ./test_client.sh image/jpg models/kitten.jpg
-          docker rm -f $(docker ps -aq)
-      - name: On fail step
-        if: ${{ failure() }}
-        working-directory: tests/integration
-        run: |
-          cat logs/serving.log
-      - name: Upload test logs
-        uses: actions/upload-artifact@v3
-        with:
-          name: pytorch-inf1-logs
-          path: tests/integration/logs/
-
   gpu-test:
     runs-on: [ self-hosted, gpu ]
     timeout-minutes: 30
@@ -336,7 +267,7 @@ jobs:
   stop-runners:
     if: always()
     runs-on: [ self-hosted, scheduler ]
-    needs: [ create-runners, inferentia-test, aarch64-test, gpu-test ]
+    needs: [ create-runners, aarch64-test, gpu-test ]
     steps:
       - name: Stop all instances
         run: |
@@ -345,5 +276,3 @@ jobs:
           ./stop_instance.sh $instance_id
           instance_id=${{ needs.create-runners.outputs.aarch64_instance_id }}
           ./stop_instance.sh $instance_id
-          instance_id=${{ needs.create-runners.outputs.inf_instance_id }}
-          ./stop_instance.sh $instance_id
diff --git a/README.md b/README.md
index 51ef784ce..36c8195e6 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ To see examples, see the [starting page](serving/docs/starting.md).
 ### More examples
 
 - [Serving a Python model](https://github.com/deepjavalibrary/djl-demo/tree/master/huggingface/python)
-- [Serving on Inf1 EC2 instance](https://github.com/deepjavalibrary/djl-demo/tree/master/huggingface/inferentia)
+- [Serving on Inferentia EC2 instance](https://github.com/deepjavalibrary/djl-demo/tree/master/huggingface/inferentia)
 - [Serving with docker](https://github.com/deepjavalibrary/djl-serving/tree/master/serving/docker)
 
 ### More command line options
diff --git a/serving/docker/README.md b/serving/docker/README.md
index 624af6d24..0bafd9e27 100644
--- a/serving/docker/README.md
+++ b/serving/docker/README.md
@@ -51,6 +51,6 @@ docker run -it --runtime=nvidia --shm-size 2g -v $PWD:/opt/ml/model -p 8080:8080
 mkdir models
 cd models
 
-curl -O https://resources.djl.ai/test-models/pytorch/bert_qa_inf1.tar.gz
-docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.22.1-inf1
+curl -O https://resources.djl.ai/test-models/pytorch/resnet18_inf2_2_4.tar.gz
+docker run --device /dev/neuron0 -it --rm -v $PWD:/opt/ml/model -p 8080:8080 deepjavalibrary/djl-serving:0.22.1-pytorch-inf2
 ```
diff --git a/serving/docker/docker-compose.yml b/serving/docker/docker-compose.yml
index 7f0465bc4..27d4b361a 100644
--- a/serving/docker/docker-compose.yml
+++ b/serving/docker/docker-compose.yml
@@ -22,11 +22,6 @@ services:
       context: .
       dockerfile: deepspeed.Dockerfile
     image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}deepspeed${NIGHTLY}"
-  pytorch-inf1:
-    build:
-      context: .
-      dockerfile: pytorch-inf1.Dockerfile
-    image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}pytorch-inf1${NIGHTLY}"
   pytorch-cu118:
     build:
       context: .
diff --git a/serving/docker/fastertransformer.Dockerfile b/serving/docker/fastertransformer.Dockerfile
index 992f225bc..fe8e89c5e 100644
--- a/serving/docker/fastertransformer.Dockerfile
+++ b/serving/docker/fastertransformer.Dockerfile
@@ -19,8 +19,8 @@ ARG torch_wheel="https://aws-pytorch-unified-cicd-binaries.s3.us-west-2.amazonaw
 ARG ft_wheel="https://publish.djl.ai/fastertransformer/fastertransformer-0.23.0-py3-none-any.whl"
 ARG tb_wheel="https://publish.djl.ai/tritonserver/r23.04/tritontoolkit-23.4-py3-none-any.whl"
 ARG ompi_version=4.1.4
-ARG transformers_version=4.27.3
-ARG accelerate_version=0.17.1
+ARG transformers_version=4.29.2
+ARG accelerate_version=0.19.0
 ARG bitsandbytes_version=0.38.1
 
 EXPOSE 8080
diff --git a/serving/docker/pytorch-cu118.Dockerfile b/serving/docker/pytorch-cu118.Dockerfile
index 629e9c6ca..637f0890a 100644
--- a/serving/docker/pytorch-cu118.Dockerfile
+++ b/serving/docker/pytorch-cu118.Dockerfile
@@ -14,8 +14,8 @@ ARG version=11.8.0-cudnn8-devel-ubuntu20.04
 FROM nvidia/cuda:$version as base
 
 ARG djl_version=0.23.0~SNAPSHOT
-ARG torch_version=2.0.0
-ARG torch_vision_version=0.15.1
+ARG torch_version=2.0.1
+ARG torch_vision_version=0.15.2
 ARG python_version=3.9
 
 RUN mkdir -p /opt/djl/conf && \
diff --git a/serving/docker/pytorch-inf1.Dockerfile b/serving/docker/pytorch-inf1.Dockerfile
deleted file mode 100644
index 9bec140bb..000000000
--- a/serving/docker/pytorch-inf1.Dockerfile
+++ /dev/null
@@ -1,60 +0,0 @@
-# -*- mode: dockerfile -*-
-# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file
-# except in compliance with the License. A copy of the License is located at
-#
-# http://aws.amazon.com/apache2.0/
-#
-# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
-# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
-# the specific language governing permissions and limitations under the License.
-FROM ubuntu:20.04
-ARG djl_version=0.23.0~SNAPSHOT
-ARG torch_version=1.12.1
-EXPOSE 8080
-
-# Sets up Path for Neuron tools
-ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
-
-COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
-RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh
-WORKDIR /opt/djl
-ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
-ENV MODEL_SERVER_HOME=/opt/djl
-ENV DJL_CACHE_DIR=/tmp/.djl.ai
-ENV HUGGINGFACE_HUB_CACHE=/tmp
-ENV TRANSFORMERS_CACHE=/tmp
-ENV NEURON_SDK_PATH=/usr/local/lib/python3.7/dist-packages/torch_neuron/lib
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$NEURON_SDK_PATH
-ENV PYTORCH_LIBRARY_PATH=/usr/local/lib/python3.7/dist-packages/torch/lib
-ENV PYTORCH_EXTRA_LIBRARY_PATH=$NEURON_SDK_PATH/libtorchneuron.so
-ENV PYTORCH_PRECXX11=true
-ENV PYTORCH_VERSION=1.12.1
-ENV JAVA_OPTS="-Xmx1g -Xms1g -Xss2m -XX:-UseContainerSupport -XX:+ExitOnOutOfMemoryError -Dai.djl.default_engine=PyTorch"
-
-ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
-CMD ["serve"]
-
-COPY scripts scripts/
-RUN mkdir -p /opt/djl/conf && \
-    mkdir -p /opt/djl/deps && \
-    mkdir -p /opt/ml/model
-COPY config.properties /opt/djl/conf/
-RUN scripts/install_djl_serving.sh $djl_version && \
-    mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \
-    echo "${djl_version} inf" > /opt/djl/bin/telemetry && \
-    scripts/install_djl_serving.sh $djl_version ${torch_version} && \
-    scripts/install_inferentia.sh && \
-    scripts/patch_oss_dlc.sh python && \
-    scripts/security_patch.sh pytorch-inf1 && \
-    useradd -m -d /home/djl djl && \
-    chown -R djl:djl /opt/djl && \
-    rm -rf scripts && pip3 cache purge && \
-    apt-get clean -y && rm -rf /var/lib/apt/lists/*
-
-LABEL maintainer="djl-dev@amazon.com"
-LABEL dlc_major_version="1"
-LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.inf1="true"
-LABEL com.amazonaws.sagemaker.capabilities.multi-models="true"
-LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
diff --git a/serving/docker/scripts/install_inferentia.sh b/serving/docker/scripts/install_inferentia.sh
deleted file mode 100755
index 6645ab78a..000000000
--- a/serving/docker/scripts/install_inferentia.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-# refer to: https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-deploy/docker-example/Dockerfile-libmode.html#libmode-dockerfile
-apt-get update
-DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gnupg2 curl software-properties-common
-
-# install python 3.7 -- required by inferentia
-add-apt-repository -y ppa:deadsnakes/ppa
-
-# remove python 3.8
-apt-get autoremove -y python3
-
-echo "deb https://apt.repos.neuron.amazonaws.com bionic main" > /etc/apt/sources.list.d/neuron.list
-curl -L https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
-
-# Installing Neuron Tools
-apt-get update
-apt-get install -y aws-neuronx-tools aws-neuronx-dkms python3.7 python3.7-distutils pciutils
-ln -sf /usr/bin/python3.7 /usr/bin/python3
-ln -sf /usr/bin/python3.7 /usr/bin/python
-
-curl -O https://bootstrap.pypa.io/get-pip.py
-python3 get-pip.py
-python3 -m pip install -U pip
-rm get-pip.py
-
-# Include torch-neuron
-python3 -m pip install numpy awscli
-python3 -m pip install torch-neuron==1.12.1.* torchvision --extra-index-url=https://pip.repos.neuron.amazonaws.com
diff --git a/serving/docker/scripts/pull_and_retag.sh b/serving/docker/scripts/pull_and_retag.sh
index f93f5804d..a0d7fe279 100755
--- a/serving/docker/scripts/pull_and_retag.sh
+++ b/serving/docker/scripts/pull_and_retag.sh
@@ -2,7 +2,7 @@
 
 version=$1
 repo=$2
-images="cpu aarch64 cpu-full pytorch-inf1 pytorch-inf2 pytorch-cu118 deepspeed fastertransformer"
+images="cpu aarch64 cpu-full pytorch-inf2 pytorch-cu118 deepspeed fastertransformer"
 
 for image in $images; do
     if [[ ! "$version" == "nightly" ]]; then
diff --git a/serving/docker/scripts/security_patch.sh b/serving/docker/scripts/security_patch.sh
index a06d9c0dd..5ce140596 100755
--- a/serving/docker/scripts/security_patch.sh
+++ b/serving/docker/scripts/security_patch.sh
@@ -8,6 +8,6 @@ if [[ "$IMAGE_NAME" == "deepspeed" ]] || \
    [[ "$IMAGE_NAME" == "pytorch-cu118" ]] || \
    [[ "$IMAGE_NAME" == "fastertransformer" ]]; then
   apt-get upgrade -y dpkg e2fsprogs libdpkg-perl libpcre2-8-0 libpcre3 openssl libsqlite3-0 libsepol1 libdbus-1-3 curl
-elif [[ "$IMAGE_NAME" == "cpu" ]] ||  [[ "$IMAGE_NAME" == "pytorch-inf1" ]]; then
+elif [[ "$IMAGE_NAME" == "cpu" ]]; then
   apt-get upgrade -y libpcre2-8-0 libdbus-1-3 curl
 fi
diff --git a/tests/integration/download_models.sh b/tests/integration/download_models.sh
index 11f91a692..89f98483d 100755
--- a/tests/integration/download_models.sh
+++ b/tests/integration/download_models.sh
@@ -2,7 +2,7 @@
 
 set -e
 
-platform=$1 # expected values are "cpu" "cpu-full" "pytorch-cu118" "pytorch-inf1" "aarch64"
+platform=$1 # expected values are "cpu" "cpu-full" "pytorch-cu118" "pytorch-inf2" "aarch64"
 
 rm -rf models
 mkdir models && cd models
@@ -22,10 +22,6 @@ aarch_models_urls=(
   "https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip"
 )
 
-inf1_models_urls=(
-  "https://resources.djl.ai/test-models/pytorch/resnet18_inf1_1_12.tar.gz"
-)
-
 inf2_models_urls=(
   "https://resources.djl.ai/test-models/pytorch/resnet18_inf2_2_4.tar.gz"
 )
@@ -45,9 +41,6 @@ case $platform in
 cpu | cpu-full | pytorch-cu118)
   download "${general_platform_models_urls[@]}"
   ;;
-pytorch-inf1)
-  download "${inf1_models_urls[@]}"
-  ;;
 pytorch-inf2)
   download "${inf2_models_urls[@]}"
   ;;
@@ -55,7 +48,7 @@ aarch64)
   download "${aarch_models_urls[@]}"
   ;;
 *)
-  echo "Bad argument. Expecting one of the values: cpu, cpu-full, pytorch-cu118, pytorch-inf1, pytorch-inf2, aarch64"
+  echo "Bad argument. Expecting one of the values: cpu, cpu-full, pytorch-cu118, pytorch-inf2, aarch64"
   exit 1
   ;;
 esac