From 829558f4f8d2c635d6ca17050d1392282d53f209 Mon Sep 17 00:00:00 2001 From: Yiyao Wei Date: Wed, 22 Sep 2021 21:06:40 +0200 Subject: [PATCH] model regression test: support various GPU runners (#9686) * tmp: testing tag * Select github runner based on tf version * Remove \ * fix path * Add a default image tag * Keep the first three characters * Syntax error * Update github gpu runner * more comments --- .github/configs/tf-cuda.json | 17 +++++++++++++++++ .../runner/github-runner-deployment.yaml.tmpl | 2 +- .../ci-model-regression-on-schedule.yml | 17 +++++++++++++++++ .github/workflows/ci-model-regression.yml | 17 +++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 .github/configs/tf-cuda.json diff --git a/.github/configs/tf-cuda.json b/.github/configs/tf-cuda.json new file mode 100644 index 00000000000..bc5ddc487b8 --- /dev/null +++ b/.github/configs/tf-cuda.json @@ -0,0 +1,17 @@ +{ + "default_image_tag": "latest", + "config": [ + { + "TF": "2.3", + "IMAGE_TAG": "cuda-10.1-cudnn7" + }, + { + "TF": "2.5", + "IMAGE_TAG": "cuda-11.2.0-cudnn8" + }, + { + "TF": "2.6", + "IMAGE_TAG": "cuda-11.2.0-cudnn8" + } + ] +} diff --git a/.github/runner/github-runner-deployment.yaml.tmpl b/.github/runner/github-runner-deployment.yaml.tmpl index 8bd903b3f86..46d704e614f 100644 --- a/.github/runner/github-runner-deployment.yaml.tmpl +++ b/.github/runner/github-runner-deployment.yaml.tmpl @@ -25,7 +25,7 @@ spec: terminationGracePeriodSeconds: 720 containers: - name: github-runner - image: {{getenv "GH_RUNNER_IMAGE"}}:latest + image: {{getenv "GH_RUNNER_IMAGE"}}:{{getenv "GH_RUNNER_IMAGE_TAG" "latest"}} imagePullPolicy: Always livenessProbe: initialDelaySeconds: 30 diff --git a/.github/workflows/ci-model-regression-on-schedule.yml b/.github/workflows/ci-model-regression-on-schedule.yml index a698117e489..5f7f01551e1 100644 --- a/.github/workflows/ci-model-regression-on-schedule.yml +++ b/.github/workflows/ci-model-regression-on-schedule.yml @@ -57,8 +57,25 @@ jobs: curl -o gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64 chmod 755 gomplate + - name: Get TensorFlow version + run: |- + # Read TF version, trim special characters ^~><+ + TF_VERSION=$(grep "tensorflow =" pyproject.toml | sed 's/^[^"]*"\([^"]*\),.*/\1/' | sed 's/[\^~><=]//g') + # Keep the first 3 characters, e.g. we keep 2.3 if TF_VERSION is 2.3.4 + TF_VERSION=${TF_VERSION::3} + echo "TensorFlow version: $TF_VERSION" + echo TF_VERSION=$TF_VERSION >> $GITHUB_ENV + + # Use compatible CUDA/cuDNN with the given TF version + - name: Prepare GitHub runner image tag + run: |- + GH_RUNNER_IMAGE_TAG=$(jq -r 'if (.config | any(.TF == "${{ env.TF_VERSION }}" )) then (.config[] | select(.TF == "${{ env.TF_VERSION }}") | .IMAGE_TAG) else .default_image_tag end' .github/configs/tf-cuda.json) + echo "GitHub runner image tag for TensorFlow ${{ env.TF_VERSION }} is ${GH_RUNNER_IMAGE_TAG}" + echo GH_RUNNER_IMAGE_TAG=$GH_RUNNER_IMAGE_TAG >> $GITHUB_ENV + - name: Render deployment template run: |- + export GH_RUNNER_IMAGE_TAG=${{ env.GH_RUNNER_IMAGE_TAG }} export GH_RUNNER_IMAGE=${{ secrets.GH_RUNNER_IMAGE }} ./gomplate -f .github/runner/github-runner-deployment.yaml.tmpl -o runner_deployment.yaml diff --git a/.github/workflows/ci-model-regression.yml b/.github/workflows/ci-model-regression.yml index 8a841e99338..87ba6c5c46b 100644 --- a/.github/workflows/ci-model-regression.yml +++ b/.github/workflows/ci-model-regression.yml @@ -153,8 +153,25 @@ jobs: sudo curl -o /usr/local/bin/gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64 sudo chmod +x /usr/local/bin/gomplate + - name: Get TensorFlow version + run: |- + # Read TF version, trim special characters ^~><+ + TF_VERSION=$(grep "tensorflow =" pyproject.toml | sed 's/^[^"]*"\([^"]*\),.*/\1/' | sed 's/[\^~><=]//g') + # Keep the first 3 characters, e.g. we keep 2.3 if TF_VERSION is 2.3.4 + TF_VERSION=${TF_VERSION::3} + echo "TensorFlow version: $TF_VERSION" + echo TF_VERSION=$TF_VERSION >> $GITHUB_ENV + + # Use compatible CUDA/cuDNN with the given TF version + - name: Prepare GitHub runner image tag + run: |- + GH_RUNNER_IMAGE_TAG=$(jq -r 'if (.config | any(.TF == "${{ env.TF_VERSION }}" )) then (.config[] | select(.TF == "${{ env.TF_VERSION }}") | .IMAGE_TAG) else .default_image_tag end' .github/configs/tf-cuda.json) + echo "GitHub runner image tag for TensorFlow ${{ env.TF_VERSION }} is ${GH_RUNNER_IMAGE_TAG}" + echo GH_RUNNER_IMAGE_TAG=$GH_RUNNER_IMAGE_TAG >> $GITHUB_ENV + - name: Render deployment template run: |- + export GH_RUNNER_IMAGE_TAG=${{ env.GH_RUNNER_IMAGE_TAG }} export GH_RUNNER_IMAGE=${{ secrets.GH_RUNNER_IMAGE }} gomplate -f .github/runner/github-runner-deployment.yaml.tmpl -o runner_deployment.yaml