From a0c89e5a17c0b8f57e23d17eaa1094062b4c9e27 Mon Sep 17 00:00:00 2001 From: Akarsha Rao <94624926+raoakarsha@users.noreply.github.com> Date: Thu, 18 Aug 2022 18:09:21 -0700 Subject: [PATCH 1/5] Update hpu-tests.yml --- .azure/hpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml index bdfada907cac9..d0085cf7a7c77 100644 --- a/.azure/hpu-tests.yml +++ b/.azure/hpu-tests.yml @@ -22,7 +22,7 @@ jobs: cancelTimeoutInMinutes: "2" pool: habana-gaudi-hpus container: - image: "vault.habana.ai/gaudi-docker/1.5.0/ubuntu20.04/habanalabs/pytorch-installer-1.11.0:latest" + image: "vault.habana.ai/gaudi-docker/1.6.0/ubuntu20.04/habanalabs/pytorch-installer-1.12.0:latest" options: "--runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host --shm-size=4g --name cd-container -v /usr/bin/docker:/tmp/docker:ro" workspace: clean: all From fff4c297b2c8ce86ebcd4e24dca9155f07aaf7a1 Mon Sep 17 00:00:00 2001 From: Akarsha Rao <94624926+raoakarsha@users.noreply.github.com> Date: Thu, 18 Aug 2022 18:11:02 -0700 Subject: [PATCH 2/5] Update Dockerfile --- dockers/ci-runner-hpu/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dockers/ci-runner-hpu/Dockerfile b/dockers/ci-runner-hpu/Dockerfile index 588d23702e9ff..865885de9bc9a 100644 --- a/dockers/ci-runner-hpu/Dockerfile +++ b/dockers/ci-runner-hpu/Dockerfile @@ -16,8 +16,8 @@ # gaudi-docker-agent:latest ARG DIST="latest" -ARG GAUDI_VERSION="1.5.0" -ARG PYTORCH_INSTALLER_VERSION="1.11.0" +ARG GAUDI_VERSION="1.6.0" +ARG PYTORCH_INSTALLER_VERSION="1.12.0" FROM vault.habana.ai/gaudi-docker/${GAUDI_VERSION}/ubuntu20.04/habanalabs/pytorch-installer-${PYTORCH_INSTALLER_VERSION}:${DIST} LABEL maintainer="https://vault.habana.ai/" From c07847cb76fefba84f903cbb11320a8177fa42f2 Mon Sep 17 00:00:00 2001 From: Akarsha Rao <94624926+raoakarsha@users.noreply.github.com> Date: Thu, 18 Aug 2022 19:19:14 -0700 Subject: [PATCH 3/5] Update hpu-tests.yml --- .azure/hpu-tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml index d0085cf7a7c77..1a2a97de8c011 100644 --- a/.azure/hpu-tests.yml +++ b/.azure/hpu-tests.yml @@ -62,11 +62,13 @@ jobs: python -m pytest -sv accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'Single card HPU test' + continueOnError: true - bash: | python -m pytest -sv accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'Multi card(8) HPU test' + continueOnError: true - bash: | python -m pytest -sv plugins/precision/hpu/test_hpu.py --hmp-bf16 \ @@ -75,12 +77,14 @@ jobs: --junitxml=hpu1_precision_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'HPU precision test' + continueOnError: true - bash: | export PYTHONPATH="${PYTHONPATH}:$(pwd)" python "pl_hpu/mnist_sample.py" workingDirectory: examples displayName: 'Testing: HPU examples' + continueOnError: true - task: PublishTestResults@2 inputs: From 3df358c42f8800ae50666dedef09522adb63a946 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Aug 2022 02:20:55 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .azure/hpu-tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml index 1a2a97de8c011..2c2ae920d8d11 100644 --- a/.azure/hpu-tests.yml +++ b/.azure/hpu-tests.yml @@ -62,13 +62,13 @@ jobs: python -m pytest -sv accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'Single card HPU test' - continueOnError: true + continueOnError: true - bash: | python -m pytest -sv accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'Multi card(8) HPU test' - continueOnError: true + continueOnError: true - bash: | python -m pytest -sv plugins/precision/hpu/test_hpu.py --hmp-bf16 \ @@ -77,14 +77,14 @@ jobs: --junitxml=hpu1_precision_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'HPU precision test' - continueOnError: true + continueOnError: true - bash: | export PYTHONPATH="${PYTHONPATH}:$(pwd)" python "pl_hpu/mnist_sample.py" workingDirectory: examples displayName: 'Testing: HPU examples' - continueOnError: true + continueOnError: true - task: PublishTestResults@2 inputs: From 12f14aa230a3283cd22334722e5728816a1c52e5 Mon Sep 17 00:00:00 2001 From: Akarsha Rao <94624926+raoakarsha@users.noreply.github.com> Date: Fri, 19 Aug 2022 10:27:05 -0700 Subject: [PATCH 5/5] Update hpu-tests.yml --- .azure/hpu-tests.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml index 2c2ae920d8d11..d0085cf7a7c77 100644 --- a/.azure/hpu-tests.yml +++ b/.azure/hpu-tests.yml @@ -62,13 +62,11 @@ jobs: python -m pytest -sv accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'Single card HPU test' - continueOnError: true - bash: | python -m pytest -sv accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'Multi card(8) HPU test' - continueOnError: true - bash: | python -m pytest -sv plugins/precision/hpu/test_hpu.py --hmp-bf16 \ @@ -77,14 +75,12 @@ jobs: --junitxml=hpu1_precision_test-results.xml workingDirectory: tests/tests_pytorch displayName: 'HPU precision test' - continueOnError: true - bash: | export PYTHONPATH="${PYTHONPATH}:$(pwd)" python "pl_hpu/mnist_sample.py" workingDirectory: examples displayName: 'Testing: HPU examples' - continueOnError: true - task: PublishTestResults@2 inputs: