From 5fa30d229536b4f36e77322a82d407f8aac13506 Mon Sep 17 00:00:00 2001 From: daquexian Date: Thu, 9 Sep 2021 18:59:51 +0800 Subject: [PATCH] add emoji in speed test output, make it continue-on-error (#6214) * add emoji in speed test output, make it continue-on-error Signed-off-by: daquexian * update yml Signed-off-by: daquexian Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com> --- .github/workflows/test.yml | 57 +++++++++++++++--------------- ci/test/test_speed_multi_client.sh | 13 ++++--- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 40cb5a8f6b5..a51f3b416c7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -500,6 +500,35 @@ jobs: run: | wget ${{ env.image_url }} docker load -i $(basename "${{ env.image_url }}") + - name: Checkout Oneflow-Inc/models + if: matrix.test_suite == 'cuda_new_interface' + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/models + ref: 7a6845f7d7133c02a01a75b9ca525e6c40e99639 + path: oneflow-models + - name: Speed test + id: speed + if: matrix.test_suite == 'cuda_new_interface' + continue-on-error: true + run: | + docker run \ + ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \ + -e ONEFLOW_MODELS_DIR=$PWD/oneflow-models \ + ${{ env.image_tag }} \ + bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/test_speed_multi_client.sh" + - name: Post speed stats + if: matrix.test_suite == 'cuda_new_interface' + continue-on-error: true + uses: actions/github-script@v4 + with: + script: | + github.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: "
\n Speed stats:\n\n ``` \n${{ steps.speed.outputs.stats }}\n ``` \n\n
".replace(/\\n/g, '\n') + }) - name: Module API test timeout-minutes: 45 if: contains(fromJson('["cuda_new_interface", "cpu_new_interface"]'), matrix.test_suite) @@ -569,34 +598,6 @@ jobs: -e ONEFLOW_TEST_DIR=$PWD/python/oneflow/test/graph \ ${{ env.image_tag }} \ bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/generic_test_multi_client.sh" - - name: Checkout Oneflow-Inc/models - if: matrix.test_suite == 'cuda_new_interface' - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/models - ref: 7a6845f7d7133c02a01a75b9ca525e6c40e99639 - path: oneflow-models - - name: Speed test - id: speed - if: matrix.test_suite == 'cuda_new_interface' - run: | - docker run \ - ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \ - -e ONEFLOW_MODELS_DIR=$PWD/oneflow-models \ - ${{ env.image_tag }} \ - bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/test_speed_multi_client.sh" - - name: Post speed stats - if: matrix.test_suite == 'cuda_new_interface' - continue-on-error: true - uses: actions/github-script@v4 - with: - script: | - github.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: "
\n Speed stats:\n\n ``` \n${{ steps.speed.outputs.stats }}\n ``` \n\n
".replace(/\\n/g, '\n') - }) - name: Single client op test timeout-minutes: 45 if: contains(fromJson('["cuda_op", "cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests == '1' diff --git a/ci/test/test_speed_multi_client.sh b/ci/test/test_speed_multi_client.sh index 87e40443277..547919a1757 100755 --- a/ci/test/test_speed_multi_client.sh +++ b/ci/test/test_speed_multi_client.sh @@ -8,7 +8,7 @@ trap 'rc=$?' ERR cd $ONEFLOW_MODELS_DIR function check_relative_speed { - awk -F'[:(]' -v threshold=$1 'BEGIN { ret=2 } /Relative speed/{ if ($2 > threshold) { ret=0 } else { ret=1 }} {print $0} END { exit ret }' + awk -F'[:(]' -v threshold=$1 'BEGIN { ret=2 } /Relative speed/{ if ($2 > threshold) { printf "✔️ "; ret=0 } else { printf "❌ "; ret=1 }} {print $0} END { exit ret }' } function write_to_file_and_print { @@ -22,12 +22,11 @@ python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 2x3x224x224 --no-show-memory --times 50 | check_relative_speed 0.9 | write_to_file_and_print python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 1x3x224x224 --no-show-memory --times 50 | check_relative_speed 0.9 | write_to_file_and_print -# TODO: restore ddp speed test after allocator bug is fixed -# python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 16x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.8 | write_to_file_and_print -# python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 8x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.8 | write_to_file_and_print -# python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 4x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.8 | write_to_file_and_print -# python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 2x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.72 | write_to_file_and_print -# python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 1x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.72 | write_to_file_and_print +python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 16x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.8 | write_to_file_and_print +python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 8x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.8 | write_to_file_and_print +python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 4x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.8 | write_to_file_and_print +python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 2x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.72 | write_to_file_and_print +python3 -m oneflow.distributed.launch --nproc_per_node 2 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 1x3x224x224 --no-show-memory --times 50 --ddp | check_relative_speed 0.72 | write_to_file_and_print result="GPU Name: `nvidia-smi --query-gpu=name --format=csv,noheader -i 0` \n\n `cat result`" # escape newline for github actions: https://github.community/t/set-output-truncates-multiline-strings/16852/2