From bff320841607f57d5884106d6e2b9c5ab59c8365 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 13 Aug 2020 20:50:47 -0700 Subject: [PATCH] [SPARK-32357][INFRA] Publish failed and succeeded test reports in GitHub Actions This PR proposes to report the failed and succeeded tests in GitHub Actions in order to improve the development velocity by leveraging [ScaCap/action-surefire-report](https://github.com/ScaCap/action-surefire-report). See the example below: ![Screen Shot 2020-08-13 at 8 17 52 PM](https://user-images.githubusercontent.com/6477701/90128649-28f7f280-dda2-11ea-9211-e98e34332f6b.png) Note that we cannot just use [ScaCap/action-surefire-report](https://github.com/ScaCap/action-surefire-report) in Apache Spark because PRs are from the forked repository, and GitHub secrets are unavailable for the security reason. This plugin and all similar plugins require to have the GitHub token that has the write access in order to post test results but it is unavailable in PRs. To work around this limitation, I took this approach: 1. In workflow A, run the tests and upload the JUnit XML test results. GitHub provides to upload and download some files. 2. GitHub introduced new event type [`workflow_run`](https://github.blog/2020-08-03-github-actions-improvements-for-fork-and-pull-request-workflows/) 10 days ago. By leveraging this, it triggers another workflow B. 3. Workflow B is in the main repo instead of fork repo, and has the write access the plugin needs. In workflow B, it downloads the artifact uploaded from workflow A (from the forked repository). 4. Workflow B generates the test reports to port from JUnit xml files. 5. Workflow B looks up the PR and posts the test reports. The `workflow_run` event is very new feature, and looks not so many GitHub Actions plugins support. In order to make this working with [ScaCap/action-surefire-report](https://github.com/ScaCap/action-surefire-report), I had to fork two GitHub Actions plugins to use: - [ScaCap/action-surefire-report](https://github.com/ScaCap/action-surefire-report) to have this custom fix: https://github.com/HyukjinKwon/action-surefire-report/commit/c96094cc35061fcf154a7cb46807f2f3e2339476 It added `commit` argument to specify the commit to post the test reports. With `workflow_run`, it can access, in workflow B, to the commit from workflow A. - [dawidd6/action-download-artifact](https://github.com/dawidd6/action-download-artifact) to have this custom fix: https://github.com/HyukjinKwon/action-download-artifact/commit/750b71af351aba467757d7be6924199bb08db4ed It added the support of downloading all artifacts from workflow A, in workflow B. By default, it only supports to specify the name of artifact. Note that I was not able to use the official [actions/download-artifact](https://github.com/actions/download-artifact) because: - It does not support to download artifacts between different workflows, see also https://github.com/actions/download-artifact/issues/3. Once this issue is resolved, we can switch it back to [actions/download-artifact](https://github.com/actions/download-artifact). I plan to make a pull request for both repositories so we don't have to rely on forks. Currently, it's difficult to check the failed tests. You should scroll down long logs from GitHub Actions logs. No, dev-only. Manually tested at: https://github.com/HyukjinKwon/spark/pull/17, https://github.com/HyukjinKwon/spark/pull/18, https://github.com/HyukjinKwon/spark/pull/19, https://github.com/HyukjinKwon/spark/pull/20, and master branch of my forked repository. Closes #29333 from HyukjinKwon/SPARK-32357-fix. Lead-authored-by: Hyukjin Kwon Co-authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- .github/workflows/master.yml | 28 +++++++++++++++---------- .github/workflows/test_report.yml | 35 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/test_report.yml diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 54d1716880e5d..b7f51d4fe6a90 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -1,4 +1,4 @@ -name: master +name: Build and test on: push: @@ -9,7 +9,6 @@ on: - branch-2.4 jobs: - # TODO(SPARK-32248): Recover JDK 11 builds # Build: build Spark and run the tests for specified modules. build: name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" @@ -25,21 +24,21 @@ jobs: # Kinesis tests depends on external Amazon kinesis service. # Note that the modules below are from sparktestsupport/modules.py. modules: - - |- + - >- core, unsafe, kvstore, avro, network-common, network-shuffle, repl, launcher, examples, sketch, graphx - - |- + - >- catalyst, hive-thriftserver - - |- + - >- streaming, sql-kafka-0-10, streaming-kafka-0-10, mllib-local, mllib, yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - - |- + - >- pyspark-sql, pyspark-mllib - - |- + - >- pyspark-core, pyspark-streaming, pyspark-ml - - |- + - >- sparkr # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [""] @@ -137,14 +136,15 @@ jobs: # PyArrow is not supported in PyPy yet, see ARROW-2651. # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. run: | - python2.7 -m pip install numpy pyarrow pandas scipy + python2.7 -m pip install numpy pyarrow pandas scipy xmlrunner python2.7 -m pip list + # PyPy does not have xmlrunner pypy3 -m pip install numpy pandas pypy3 -m pip list - name: Install Python packages (Python 3.8) if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | - python3.8 -m pip install numpy pyarrow pandas scipy + python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner python3.8 -m pip list # SparkR - name: Install R 4.0 @@ -163,13 +163,19 @@ jobs: # Show installed packages in R. sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' # Run the tests. - - name: "Run tests: ${{ matrix.modules }}" + - name: Run tests run: | # Hive tests become flaky when running in parallel as it's too intensive. if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi mkdir -p ~/.m2 ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" rm -rf ~/.m2/repository/org/apache/spark + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} + path: "**/target/test-reports/*.xml" # Static analysis, and documentation build lint: diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml new file mode 100644 index 0000000000000..fc937a2c369e3 --- /dev/null +++ b/.github/workflows/test_report.yml @@ -0,0 +1,35 @@ +name: Report test results +on: + workflow_run: + workflows: ["Build and test"] + types: + - completed + +jobs: + test_report: + runs-on: ubuntu-latest + steps: + - name: Download test results to report + # TODO(SPARK-32605): It was forked to have a custom fix + # https://github.com/HyukjinKwon/action-surefire-report/commit/c96094cc35061fcf154a7cb46807f2f3e2339476 + # in order to add the support of custom target commit SHA. It should be contributed back to the original + # plugin and avoid using the fork. + uses: HyukjinKwon/action-download-artifact@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: ${{ github.event.workflow_run.workflow_id }} + commit: ${{ github.event.workflow_run.head_commit.id }} + - name: Publish test report + # TODO(SPARK-32606): It was forked to have a custom fix + # https://github.com/HyukjinKwon/action-download-artifact/commit/750b71af351aba467757d7be6924199bb08db4ed + # in order to add the support to download all artifacts. It should be contributed back to the original + # plugin and avoid using the fork. + # Alternatively, we can use the official actions/download-artifact once they support to download artifacts + # between different workloads, see also https://github.com/actions/download-artifact/issues/3 + uses: HyukjinKwon/action-surefire-report@master + with: + check_name: Test report + github_token: ${{ secrets.GITHUB_TOKEN }} + report_paths: "**/target/test-reports/*.xml" + commit: ${{ github.event.workflow_run.head_commit.id }} +