Skip to content

Commit

Permalink
Merge pull request #851 from FedML-AI/dev/v0.7.0
Browse files Browse the repository at this point in the history
Sync the model inference.
  • Loading branch information
fedml-alex committed Mar 31, 2023
2 parents 1b18774 + fb0d323 commit a2a7961
Show file tree
Hide file tree
Showing 28 changed files with 323 additions and 323 deletions.
4 changes: 2 additions & 2 deletions devops/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ pipeline {
sh 'cp -Rf ./aws/* ./devops/scripts/aws'
sh 'cd ./devops/scripts/;unzip ./docker.zip;unzip ./kubectl.zip;tar -zxvf ./helm-v3.10.3-linux-amd64.tar.gz;mv linux-amd64/helm /usr/local/bin/helm;cd ../../'

//sh './devops/scripts/build-fedml-docker.sh'
sh './devops/scripts/build-fedml-docker.sh'

sh 'docker build --network=host -f ./devops/dockerfile/device-image/Dockerfile-Base -t public.ecr.aws/x6k8q1x9/fedml-device-image:base .'

Expand Down Expand Up @@ -305,7 +305,7 @@ pipeline {
container('base') {
withCredentials([usernamePassword(passwordVariable : 'DOCKERHUB_PASSWORD' ,usernameVariable : 'DOCKERHUB_USERNAME' ,credentialsId : "$DOCKERHUB_CREDENTIAL_ID" ,)]) {
sh 'docker login --username $DOCKERHUB_USERNAME --password $DOCKERHUB_PASSWORD'
//sh './devops/scripts/push-fedml-docker.sh'
sh './devops/scripts/push-fedml-docker.sh'
}

withCredentials([usernamePassword(passwordVariable : 'AWS_IAM_ACCESS_ID' ,usernameVariable : 'AWS_ECR_USERNAME' ,credentialsId : "$AWS_ECR_CREDENTIAL_ID_PRODUCTION" ,)]) {
Expand Down
2 changes: 1 addition & 1 deletion devops/dockerfile/device-image/Dockerfile-Base
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG VERSION=base
ARG IS_BUILDING_GPU_IMAGE=0
#ARG BASE_IMAGE=continuumio/miniconda3:4.7.12
ARG BASE_IMAGE=fedml/fedml:latest-torch1.12.1-cuda11.3-cudnn8-devel
ARG BASE_IMAGE=fedml/fedml:latest-torch1.13.1-cuda11.6-cudnn8-devel
FROM ${BASE_IMAGE}

ADD ./devops/scripts/aws ./fedml/aws
Expand Down
2 changes: 1 addition & 1 deletion devops/dockerfile/model-inference-ingress/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG VERSION=dev
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG FEDML_PIP_HOME=/usr/local/lib/python3.7/dist-packages/fedml
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
FROM ${BASE_IMAGE}

ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Expand Down
2 changes: 1 addition & 1 deletion devops/dockerfile/model-premise-master/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG VERSION=dev
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG FEDML_PIP_HOME=/usr/local/lib/python3.7/dist-packages/fedml
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
FROM ${BASE_IMAGE}

ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Expand Down
2 changes: 1 addition & 1 deletion devops/dockerfile/model-premise-slave/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG VERSION=dev
ARG IS_BUILDING_GPU_IMAGE=0
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG FEDML_PIP_HOME=/usr/local/lib/python3.7/dist-packages/fedml
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
FROM ${BASE_IMAGE}

ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Expand Down
89 changes: 89 additions & 0 deletions devops/dockerfile/multi-stages-build/Dockerfile-Inference-Dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
ARG VERSION=dev
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
FROM ${BASE_IMAGE} AS fedml-inference-env-base

ADD ./devops/scripts/runner.sh ./fedml/runner.sh

ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt

RUN chmod a+x ./fedml/runner.sh
RUN echo "Updating..."

RUN pip3 install -r ./fedml/requirements.txt

COPY ./python ./fedml/fedml-pip
COPY ./python/fedml ${FEDML_PIP_HOME}
WORKDIR ./fedml/fedml-pip
RUN pip3 install -e ./

WORKDIR /fedml


ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
FROM ${BASE_IMAGE} AS fedml-inference-master

WORKDIR /fedml

COPY --from=fedml-inference-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-inference-env-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}

ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} SERVER_DEVICE_ID=0 SERVER_OS_NAME=linux INFER_HOST="127.0.0.1" \
FEDML_REDIS_ADDR="127.0.0.1" FEDML_REDIS_PORT=6379 FEDML_REDIS_PASSWORD="fedml_default"

CMD fedml model device login ${ACCOUNT_ID} -v ${FEDML_VERSION} -p -m \
-ih ${INFER_HOST} -id ${SERVER_DEVICE_ID} -os ${SERVER_OS_NAME} \
-ra ${FEDML_REDIS_ADDR} -rp ${FEDML_REDIS_PORT} -rpw ${FEDML_REDIS_PASSWORD};./runner.sh


FROM ${BASE_IMAGE} AS fedml-inference-slave

WORKDIR /fedml

COPY --from=fedml-inference-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-inference-env-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}

ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} CLIENT_DEVICE_ID=0 CLIENT_OS_NAME=linux INFER_HOST="127.0.0.1"

CMD fedml model device login ${ACCOUNT_ID} -v ${FEDML_VERSION} -p \
-id ${CLIENT_DEVICE_ID} -os ${CLIENT_OS_NAME} -ih ${INFER_HOST}; ./runner.sh


FROM ${BASE_IMAGE} AS fedml-inference-ingress

WORKDIR /fedml

COPY --from=fedml-inference-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-inference-env-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}

ENV FEDML_REDIS_ADDR="local" FEDML_REDIS_PORT=6379 FEDML_REDIS_PASSWORD="fedml_default" \
FEDML_END_POINT_ID=0 FEDML_MODEL_ID=0 \
FEDML_MODEL_NAME="model" FEDML_MODEL_VERSION="v1" \
FEDML_INFER_URL="infer" FEDML_CONFIG_VERSION="release" \
FEDML_INFER_PORT=5001

CMD python3 ${FEDML_PIP_HOME}/cli/model_deployment/device_model_inference_entry.py \
-rp ${FEDML_REDIS_ADDR} -ra ${FEDML_REDIS_PORT} -rpw ${FEDML_REDIS_PASSWORD} \
-ep ${FEDML_END_POINT_ID} -mi ${FEDML_MODEL_ID} \
-mn ${FEDML_MODEL_NAME} -mv ${FEDML_MODEL_VERSION} \
-iu ${FEDML_INFER_URL} -cv ${FEDML_CONFIG_VERSION} \
-ip ${FEDML_INFER_PORT};./runner.sh


ARG INF_BACKEND_BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.01-py3
FROM ${INF_BACKEND_BASE_IMAGE} AS fedml-inference-backend

ADD ./devops/scripts/runner.sh ./fedml/runner.sh

ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt

RUN chmod a+x ./fedml/runner.sh

ENV FEDML_MODEL_SERVING_REPO_SCAN_INTERVAL=3 \
FEDML_MODEL_SERVING_REPO_PATH=/home/fedml/fedml-client/fedml/models_serving

CMD mkdir -p ${FEDML_MODEL_SERVING_REPO_PATH};tritonserver --model-control-mode=poll \
--strict-model-config=false \
--backend-config=onnxruntime,default-max-batch-size=1 \
--repository-poll-secs=${FEDML_MODEL_SERVING_REPO_SCAN_INTERVAL} \
--model-repository=${FEDML_MODEL_SERVING_REPO_PATH}
117 changes: 117 additions & 0 deletions devops/dockerfile/multi-stages-build/Dockerfile-Traing-Dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
ARG VERSION=dev
ARG IS_BUILDING_GPU_IMAGE=0
#ARG BASE_IMAGE=continuumio/miniconda3:4.7.12
ARG BASE_IMAGE=fedml/fedml:latest-torch1.13.1-cuda11.6-cudnn8-devel
ARG FEDML_PIP_HOME=/usr/local/lib/python3.8/dist-packages/fedml
FROM ${BASE_IMAGE} AS fedml-image-base

ADD ./devops/scripts/aws ./fedml/aws
ADD ./devops/scripts/setup-aws-cli.sh ./fedml/setup-aws-cli.sh
ADD ./devops/scripts/set-aws-credentials.sh ./fedml/set-aws-credentials.sh
ADD ./devops/scripts/docker /usr/bin/
ADD ./devops/scripts/kubectl /usr/bin/

ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt
ADD ./devops/scripts/setup-conda-env.sh ./fedml/setup-conda-env.sh

RUN chmod a+x /usr/bin/docker
RUN chmod a+x /usr/bin/kubectl
RUN chmod a+x ./fedml/setup-aws-cli.sh
RUN chmod a+x ./fedml/set-aws-credentials.sh
RUN ./fedml/setup-aws-cli.sh

RUN apt-get update
#RUN apt-get -y install gettext-base
#RUN apt-get -y install unar wget
#RUN apt-get -y install libquadmath0

RUN sudo DEBIAN_FRONTEND=noninteractive apt-get install -qq gettext-base < /dev/null > /dev/null
RUN sudo DEBIAN_FRONTEND=noninteractive apt-get install -qq unar wget < /dev/null > /dev/null
RUN sudo DEBIAN_FRONTEND=noninteractive apt-get install -qq libquadmath0 < /dev/null > /dev/null

RUN chmod a+x ./fedml/setup-conda-env.sh
RUN bash ./fedml/setup-conda-env.sh ${IS_BUILDING_GPU_IMAGE}

#RUN pip install --upgrade pip
#RUN pip3 uninstall fedml

RUN pip3 install --upgrade fedml
RUN pip3 install -r ./fedml/requirements.txt
RUN pip3 install --upgrade gevent


FROM ${BASE_IMAGE} AS fedml-training-env-base

ADD ./devops/scripts/runner.sh ./fedml/runner.sh

ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt

RUN chmod a+x ./fedml/runner.sh
RUN echo "Updating..."

RUN pip3 install -r ./fedml/requirements.txt

COPY ./python ./fedml/fedml-pip
COPY ./python/fedml ${FEDML_PIP_HOME}
WORKDIR ./fedml/fedml-pip
RUN pip3 install -e ./
#RUN pip3 install -e '.[tensorflow]'
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'

WORKDIR /fedml


FROM ${BASE_IMAGE} AS fedml-image-base-with-version

RUN pip3 install MNN==1.1.6

WORKDIR /fedml

COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-training-env-base /fedml/fedml-pip/fedml ${FEDML_PIP_HOME}

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_DEVICE_ID=0 \
FEDML_PACKAGE_NAME=package FEDML_PACKAGE_URL=s3_url \
FEDML_RUNNER_CMD=3dsad

CMD fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -s -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; ./runner.sh


FROM ${BASE_IMAGE} AS fedml-client-agent

WORKDIR /fedml

COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-training-env-base /fedml/fedml-pip/fedml ${FEDML_PIP_HOME}

ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} CLIENT_DEVICE_ID=0 CLIENT_OS_NAME=linux

CMD fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -c -id ${CLIENT_DEVICE_ID} -os ${CLIENT_OS_NAME}; ./runner.sh


FROM ${BASE_IMAGE} AS fedml-server-agent

WORKDIR /fedml

COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-image-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_AGENT_ID=0 \
AWS_IAM_ACCESS_ID=0 \
AWS_IAM_ACCESS_KEY=0 \
AWS_REGION=0

CMD ./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -s -r cloud_agent -id ${SERVER_AGENT_ID};./runner.sh


FROM ${BASE_IMAGE} AS fedml-edge-server

WORKDIR /fedml

COPY --from=fedml-training-env-base /fedml/fedml-pip /fedml/fedml-pip
COPY --from=fedml-image-base /fedml/fedml-pip/python/fedml ${FEDML_PIP_HOME}

ENV ACCOUNT_ID=0 FEDML_VERSION=${VERSION} SERVER_DEVICE_ID=0 SERVER_OS_NAME=linux

CMD fedml login ${ACCOUNT_ID} -v ${FEDML_VERSION} -s -id ${SERVER_DEVICE_ID} -os ${SERVER_OS_NAME};./runner.sh
4 changes: 2 additions & 2 deletions devops/k8s/README_MODEL_SERVING.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ Moreover, on GCP k8s cluster, you should set up your GPU nodes based on the foll
After you have installed FedML model serving packages, you may run the helm upgrade commands to modify parameters.

e.g.
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-0.7.397.tgz -n $YourNameSpace```
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-latest.tgz -n $YourNameSpace```

### 6). Config your CNAME record in your DNS provider (Godaddy, wordpress, AWS Route 53...)
#### (a). Find the Kubernetes nginx ingress named 'fedml-model-inference-gateway' in your Kubernetes cluster.
Expand Down Expand Up @@ -150,7 +150,7 @@ Pull remote model(ModelOps) to local model repository:
1. Q: Supports automatically scale?
A: Yes. Call CLI `helm upgrade`. For example, you can do upgrade by using the following CLI:

```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-0.7.397.tgz -n $YourNameSpace```
```helm upgrade --set "autoscaling.enabled=true" --set replicaCount=$InstanceNumber fedml-model-premise-master fedml-model-premise-master-latest.tgz -n $YourNameSpace```


2. Q: Does the inference endpoint supports private IP? \
Expand Down
Binary file modified devops/k8s/fedml-model-premise-master-latest.tgz
Binary file not shown.
2 changes: 1 addition & 1 deletion devops/k8s/fedml-model-premise-master/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.7.700
version: 0.8.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
Binary file modified devops/k8s/fedml-model-premise-slave-latest.tgz
Binary file not shown.
2 changes: 1 addition & 1 deletion devops/k8s/fedml-model-premise-slave/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.7.700
version: 0.8.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ spec:
resources:
limits:
memory: 8192Mi
requests:
memory: 4096Mi
volumeMounts:
- name: home-dir
mountPath: {{ .Values.volume.clientHomeDirMountPath }}
Expand Down
Loading

0 comments on commit a2a7961

Please sign in to comment.