Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate PR builders from Jenkins to Github Actions #4

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
272 changes: 177 additions & 95 deletions .github/workflows/master.yml
@@ -1,156 +1,238 @@
name: master

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
# TODO(SPARK-32248): Recover JDK 11 builds
# Build: build Spark and run the tests for specified modules.
build:

name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
java: [ '1.8', '11' ]
hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
hive: [ 'hive-1.2', 'hive-2.3' ]
exclude:
- java: '11'
hive: 'hive-1.2'
- hadoop: 'hadoop-3.2'
hive: 'hive-1.2'
name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }}

java:
- 1.8
HyukjinKwon marked this conversation as resolved.
Show resolved Hide resolved
hadoop:
- hadoop3.2
hive:
- hive2.3
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
# Kinesis tests depends on external Amazon kinesis service.
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- |-
core, unsafe, kvstore, avro,
network_common, network_shuffle, repl, launcher
examples, sketch, graphx
- |-
sql
- |-
catalyst, hive-thriftserver
- |-
streaming, sql-kafka-0-10, streaming-kafka-0-10
- |-
mllib-local, mllib
- |-
pyspark-sql, pyspark-mllib, pyspark-resource
- |-
pyspark-core, pyspark-streaming, pyspark-ml
- |-
sparkr
- |-
yarn, mesos, kubernetes, hadoop-cloud,
spark-ganglia-lgpl
# Here, we split Hive tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: [""]
include:
- modules: hive
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
env:
TEST_ONLY_MODULES: ${{ matrix.modules }}
HADOOP_PROFILE: ${{ matrix.hadoop }}
HIVE_PROFILE: ${{ matrix.hive }}
# Github Actions' default miniconda
CONDA_PREFIX: /usr/share/miniconda
# Don't run the tests in parallel due to flakiness. See SparkParallelTestGrouping.
TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }}
steps:
- uses: actions/checkout@master
# We split caches because GitHub Action Cache has a 400MB-size limit.
- uses: actions/cache@v1
- name: Checkout Spark repository
uses: actions/checkout@v2
# Cache local repositories. Note that Github Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v1
with:
path: build
key: build-${{ hashFiles('**/pom.xml') }}
restore-keys: |
build-
- uses: actions/cache@v1
with:
path: ~/.m2/repository/com
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
- uses: actions/cache@v1
with:
path: ~/.m2/repository/org
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
- uses: actions/cache@v1
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository/net
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }}
path: ~/.m2/repository
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
- uses: actions/cache@v1
${{ matrix.java }}-${{ matrix.hadoop }}-maven-
- name: Cache Ivy local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository/io
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }}
path: ~/.ivy2/cache
key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
- name: Set up JDK ${{ matrix.java }}
${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
- name: Install JDK ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
- name: Build with Maven
# PySpark
- name: Install PyPy3
# SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# Note that order of Python installations here matters because default python3 is
# overridden by pypy3.
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
with:
python-version: pypy3
architecture: x64
- name: Install Python 2.7
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
with:
python-version: 2.7
architecture: x64
- name: Install Python 3.6
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
with:
python-version: 3.6
architecture: x64
- name: Install Python packages
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
# PyArrow is not supported in PyPy yet, see ARROW-2651.
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
python3 -m pip install numpy pyarrow pandas scipy
python3 -m pip list
python2 -m pip install numpy pyarrow pandas scipy
python2 -m pip list
pypy3 -m pip install numpy pandas
pypy3 -m pip list
# SparkR
- name: Install R 3.6
uses: r-lib/actions/setup-r@v1
if: contains(matrix.modules, 'sparkr')
with:
r-version: 3.6
- name: Install R packages
if: contains(matrix.modules, 'sparkr')
run: |
sudo apt-get install -y libcurl4-openssl-dev
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
# Show installed packages in R.
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
# Run the tests.
- name: "Run tests: ${{ matrix.modules }}"
run: |
# Hive tests become flaky when running in parallel as it's too intensive.
if [[ "$TEST_ONLY_MODULES" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install
./dev/run-tests --parallelism 2
rm -rf ~/.m2/repository/org/apache/spark


# Linters: run the linters and other static analysis.
lint:
name: Linters, licenses, dependencies
runs-on: ubuntu-latest
name: Linters (Java/Scala/Python), licenses, dependencies
steps:
- uses: actions/checkout@master
- uses: actions/setup-java@v1
- name: Install JDK 11
uses: actions/setup-java@v1
with:
java-version: '11'
- uses: actions/setup-python@v1
java-version: 11
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: '3.x'
architecture: 'x64'
- name: Scala
run: ./dev/lint-scala
- name: Java
run: ./dev/lint-java
- name: Python
python-version: 3.6
architecture: x64
- name: Install Python linter dependencies
run: |
pip install flake8 sphinx numpy
./dev/lint-python
- name: License
run: ./dev/check-license
- name: Dependencies
run: ./dev/test-dependencies.sh

lintr:
runs-on: ubuntu-latest
name: Linter (R)
steps:
- uses: actions/checkout@master
- uses: actions/setup-java@v1
pip3 install flake8 sphinx numpy
- name: Install R 3.6
uses: r-lib/actions/setup-r@v1
with:
java-version: '11'
- uses: r-lib/actions/setup-r@v1
with:
r-version: '3.6.2'
- name: Install lib
r-version: 3.6
- name: Install R linter dependencies and SparkR
run: |
sudo apt-get install -y libcurl4-openssl-dev
- name: install R packages
run: |
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
- name: package and install SparkR
run: ./R/install-dev.sh
- name: lint-r
./R/install-dev.sh
- name: Scala linter
run: ./dev/lint-scala
- name: Java linter
run: ./dev/lint-java
- name: Python linter
run: ./dev/lint-python
- name: R linter
run: ./dev/lint-r
- name: License test
run: ./dev/check-license
- name: Dependencies test
run: ./dev/test-dependencies.sh

# Documentation build.
docs:
name: Build documentation
runs-on: ubuntu-latest
name: Generate documents
steps:
- uses: actions/checkout@master
- uses: actions/cache@v1
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-repo-
- uses: actions/setup-java@v1
docs-maven-
- name: Install JDK 1.8
uses: actions/setup-java@v1
with:
java-version: '1.8'
- uses: actions/setup-python@v1
java-version: 1.8
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: '3.x'
architecture: 'x64'
- uses: actions/setup-ruby@v1
python-version: 3.6
architecture: x64
- name: Install Ruby 2.7
uses: actions/setup-ruby@v1
with:
ruby-version: '2.7'
- uses: r-lib/actions/setup-r@v1
ruby-version: 2.7
- name: Install R 3.6
uses: r-lib/actions/setup-r@v1
with:
r-version: '3.6.2'
- name: Install lib and pandoc
r-version: 3.6
- name: Install dependencies
run: |
sudo apt-get install -y libcurl4-openssl-dev pandoc
- name: Install packages
run: |
pip install sphinx mkdocs numpy
gem install jekyll jekyll-redirect-from rouge
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
- name: Run jekyll build
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- name: Run documentation build
run: |
cd docs
jekyll build
30 changes: 30 additions & 0 deletions common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
@@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.tags;

import org.scalatest.TagAnnotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

@TagAnnotation
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.METHOD, ElementType.TYPE})
public @interface SlowHiveTest { }
Expand Up @@ -685,7 +685,8 @@ class MasterSuite extends SparkFunSuite
}
}

test("SPARK-27510: Master should avoid dead loop while launching executor failed in Worker") {
// TODO(SPARK-32250): Enable the test back. It is flaky in GitHub Actions.
ignore("SPARK-27510: Master should avoid dead loop while launching executor failed in Worker") {
val master = makeAliveMaster()
var worker: MockExecutorLaunchFailWorker = null
try {
Expand Down
10 changes: 8 additions & 2 deletions dev/run-pip-tests
Expand Up @@ -76,8 +76,14 @@ for python in "${PYTHON_EXECS[@]}"; do
VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
rm -rf "$VIRTUALENV_PATH"
if [ -n "$USE_CONDA" ]; then
CONDA_ENV_CMD="source"
if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
# See also https://github.com/conda/conda/issues/7980
source $CONDA_PREFIX/etc/profile.d/conda.sh
CONDA_ENV_CMD="conda"
fi
conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
source activate "$VIRTUALENV_PATH"
$CONDA_ENV_CMD activate "$VIRTUALENV_PATH"
else
mkdir -p "$VIRTUALENV_PATH"
virtualenv --python=$python "$VIRTUALENV_PATH"
Expand Down Expand Up @@ -120,7 +126,7 @@ for python in "${PYTHON_EXECS[@]}"; do

# conda / virtualenv environments need to be deactivated differently
if [ -n "$USE_CONDA" ]; then
source deactivate
$CONDA_ENV_CMD deactivate
else
deactivate
fi
Expand Down