Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions .github/actions/build-and-test-pyspark/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
name: 'Build and Test PySpark'
author: 'Apache Spark'
description: 'A composite GitHub Action that builds and tests a set of PySpark modules'

inputs:
job-type:
description: "The type of the job: regular, scheduled, pyspark-coverage-scheduled"
required: true
branch:
description: "The branch"
required: true
java-version:
description: "The Java version"
required: true
hadoop-version:
description: "The Hadoop version"
required: true
hive-version:
description: "The Hive version"
required: true
modules:
description: "The modules to be build and tested as a comma-separated list"
required: true
ansi_enabled:
description: "Use ANSI mode: 'true' or 'false'"
required: false
default: "false"

runs:
using: 'composite'
steps:
- name: Set env
shell: bash
run: |
echo "MODULES_TO_TEST=${{ inputs.modules }}" >> $GITHUB_ENV
echo "EXCLUDED_TAGS=${{ inputs.excluded-tags }}" >> $GITHUB_ENV
echo "INCLUDED_TAGS=${{ inputs.included-tags }}" >> $GITHUB_ENV
echo "HADOOP_PROFILE=${{ inputs.hadoop-version }}" >> $GITHUB_ENV
echo "HIVE_PROFILE=${{ inputs.hive-version }}" >> $GITHUB_ENV
echo "GITHUB_PREV_SHA=${{ github.event.before }}" >> $GITHUB_ENV
echo "SPARK_LOCAL_IP=localhost" >> $GITHUB_ENV

# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-

- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
pyspark-coursier-

- name: Install Java ${{ inputs.java-version }}
uses: actions/setup-java@v1
with:
java-version: ${{ inputs.java-version }}

- name: List Python packages (Python 3.9, PyPy3)
shell: bash
run: |
python3.9 -m pip list
pypy3 -m pip list

- name: Install Conda for pip packaging test
shell: bash
run: |
curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda

# Run the tests.
- name: Run tests
shell: bash
run: |
export PATH=$PATH:$HOME/miniconda/bin
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"

- name: Upload coverage to Codecov
if: inputs.job-type == 'pyspark-coverage-scheduled'
uses: codecov/codecov-action@v2
with:
files: ./python/coverage.xml
flags: unittests
name: PySpark

- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ inputs.modules }}--8-${{ inputs.hadoop-version }}-hive2.3
path: "**/target/test-reports/*.xml"

- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ inputs.modules }}--8-${{ inputs.hadoop-version }}-hive2.3
path: "**/target/unit-tests.log"

branding:
icon: 'check-circle'
color: 'green'
128 changes: 128 additions & 0 deletions .github/actions/build-and-test-spark/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
name: 'Build and Test Spark'
author: 'Apache Spark'
description: 'A composite GitHub Action that builds and tests a set of Spark modules'

inputs:
job-type:
description: "The type of the job: regular, scheduled, pyspark-coverage-scheduled"
required: true
branch:
description: "The branch"
required: true
java-version:
description: "The Java version"
required: true
hadoop-version:
description: "The Hadoop version"
required: true
hive-version:
description: "The Hive version"
required: true
envs:
description: "Environment vars as JSON object"
required: false
default: "{}"
modules:
description: "The modules to be build and tested as a comma-separated list"
required: true
included-tags:
description: "Tags to include for testing"
required: false
default: ""
excluded-tags:
description: "Tags to exclude for testing"
required: false
default: ""
label:
description: "Job label"
required: false
default: ""
ansi_enabled:
description: "Use ANSI mode: 'true' or 'false'"
required: false
default: "false"

runs:
using: 'composite'
steps:
- name: Set env
shell: bash
run: |
echo "MODULES_TO_TEST=${{ inputs.modules }}" >> $GITHUB_ENV
echo "EXCLUDED_TAGS=${{ inputs.excluded-tags }}" >> $GITHUB_ENV
echo "INCLUDED_TAGS=${{ inputs.included-tags }}" >> $GITHUB_ENV
echo "HADOOP_PROFILE=${{ inputs.hadoop-version }}" >> $GITHUB_ENV
echo "HIVE_PROFILE=${{ inputs.hive-version }}" >> $GITHUB_ENV
echo "GITHUB_PREV_SHA=${{ github.event.before }}" >> $GITHUB_ENV
echo "SPARK_LOCAL_IP=localhost" >> $GITHUB_ENV

# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-

- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: ${{ inputs.java-version }}-${{ inputs.hadoop-version }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
${{ inputs.java-version }}-${{ inputs.hadoop-version }}-coursier-

- name: Install Java ${{ inputs.java-version }}
uses: actions/setup-java@v1
with:
java-version: ${{ inputs.java-version }}

- name: Install Python 3.8
uses: actions/setup-python@v2
# We should install one Python that is higher then 3+ for SQL and Yarn because:
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
if: contains(inputs.modules, 'yarn') || (contains(inputs.modules, 'sql') && !contains(inputs.modules, 'sql-'))
with:
python-version: 3.8
architecture: x64

- name: Install Python packages (Python 3.8)
if: (contains(inputs.modules, 'sql') && !contains(inputs.modules, 'sql-'))
shell: bash
run: |
python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy xmlrunner
python3.8 -m pip list

# Run the tests.
- name: Run tests
env: ${{fromJSON(inputs.envs)}}
shell: bash
run: |
# Hive "other tests" test needs larger metaspace size based on experiment.
if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
export SERIAL_SBT_TESTS=1
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"

- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ inputs.modules }}-${{ inputs.label }}-${{ inputs.java-version }}-${{ inputs.hadoop-version }}-${{ inputs.hive-version }}
path: "**/target/test-reports/*.xml"

- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ inputs.modules }}-${{ inputs.label }}-${{ inputs.java-version }}-${{ inputs.hadoop-version }}-${{ inputs.hive-version }}
path: "**/target/unit-tests.log"

branding:
icon: 'check-circle'
color: 'green'
Loading