diff --git a/.github/actions/build-and-test-pyspark/action.yml b/.github/actions/build-and-test-pyspark/action.yml
new file mode 100644
index 000000000000..9cfef4b3bc4a
--- /dev/null
+++ b/.github/actions/build-and-test-pyspark/action.yml
@@ -0,0 +1,112 @@
+name: 'Build and Test PySpark'
+author: 'Apache Spark'
+description: 'A composite GitHub Action that builds and tests a set of PySpark modules'
+
+inputs:
+  job-type:
+    description: "The type of the job: regular, scheduled, pyspark-coverage-scheduled"
+    required: true
+  branch:
+    description: "The branch"
+    required: true
+  java-version:
+    description: "The Java version"
+    required: true
+  hadoop-version:
+    description: "The Hadoop version"
+    required: true
+  hive-version:
+    description: "The Hive version"
+    required: true
+  modules:
+    description: "The modules to be build and tested as a comma-separated list"
+    required: true
+  ansi_enabled:
+    description: "Use ANSI mode: 'true' or 'false'"
+    required: false
+    default: "false"
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Set env
+      shell: bash
+      run: |
+        echo "MODULES_TO_TEST=${{ inputs.modules }}" >> $GITHUB_ENV
+        echo "EXCLUDED_TAGS=${{ inputs.excluded-tags }}" >> $GITHUB_ENV
+        echo "INCLUDED_TAGS=${{ inputs.included-tags }}" >> $GITHUB_ENV
+        echo "HADOOP_PROFILE=${{ inputs.hadoop-version }}" >> $GITHUB_ENV
+        echo "HIVE_PROFILE=${{ inputs.hive-version }}" >> $GITHUB_ENV
+        echo "GITHUB_PREV_SHA=${{ github.event.before }}" >> $GITHUB_ENV
+        echo "SPARK_LOCAL_IP=localhost" >> $GITHUB_ENV
+
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT and Maven
+      uses: actions/cache@v2
+      with:
+        path: |
+          build/apache-maven-*
+          build/scala-*
+          build/*.jar
+          ~/.sbt
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          pyspark-coursier-
+
+    - name: Install Java ${{ inputs.java-version }}
+      uses: actions/setup-java@v1
+      with:
+        java-version: ${{ inputs.java-version }}
+
+    - name: List Python packages (Python 3.9, PyPy3)
+      shell: bash
+      run: |
+        python3.9 -m pip list
+        pypy3 -m pip list
+
+    - name: Install Conda for pip packaging test
+      shell: bash
+      run: |
+        curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
+        bash miniconda.sh -b -p $HOME/miniconda
+
+    # Run the tests.
+    - name: Run tests
+      shell: bash
+      run: |
+        export PATH=$PATH:$HOME/miniconda/bin
+        ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
+
+    - name: Upload coverage to Codecov
+      if: inputs.job-type == 'pyspark-coverage-scheduled'
+      uses: codecov/codecov-action@v2
+      with:
+        files: ./python/coverage.xml
+        flags: unittests
+        name: PySpark
+
+    - name: Upload test results to report
+      if: always()
+      uses: actions/upload-artifact@v2
+      with:
+        name: test-results-${{ inputs.modules }}--8-${{ inputs.hadoop-version }}-hive2.3
+        path: "**/target/test-reports/*.xml"
+
+    - name: Upload unit tests log files
+      if: failure()
+      uses: actions/upload-artifact@v2
+      with:
+        name: unit-tests-log-${{ inputs.modules }}--8-${{ inputs.hadoop-version }}-hive2.3
+        path: "**/target/unit-tests.log"
+
+branding:
+  icon: 'check-circle'
+  color: 'green'
diff --git a/.github/actions/build-and-test-spark/action.yml b/.github/actions/build-and-test-spark/action.yml
new file mode 100644
index 000000000000..95437737bd7e
--- /dev/null
+++ b/.github/actions/build-and-test-spark/action.yml
@@ -0,0 +1,128 @@
+name: 'Build and Test Spark'
+author: 'Apache Spark'
+description: 'A composite GitHub Action that builds and tests a set of Spark modules'
+
+inputs:
+  job-type:
+    description: "The type of the job: regular, scheduled, pyspark-coverage-scheduled"
+    required: true
+  branch:
+    description: "The branch"
+    required: true
+  java-version:
+    description: "The Java version"
+    required: true
+  hadoop-version:
+    description: "The Hadoop version"
+    required: true
+  hive-version:
+    description: "The Hive version"
+    required: true
+  envs:
+    description: "Environment vars as JSON object"
+    required: false
+    default: "{}"
+  modules:
+    description: "The modules to be build and tested as a comma-separated list"
+    required: true
+  included-tags:
+    description: "Tags to include for testing"
+    required: false
+    default: ""
+  excluded-tags:
+    description: "Tags to exclude for testing"
+    required: false
+    default: ""
+  label:
+    description: "Job label"
+    required: false
+    default: ""
+  ansi_enabled:
+    description: "Use ANSI mode: 'true' or 'false'"
+    required: false
+    default: "false"
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Set env
+      shell: bash
+      run: |
+        echo "MODULES_TO_TEST=${{ inputs.modules }}" >> $GITHUB_ENV
+        echo "EXCLUDED_TAGS=${{ inputs.excluded-tags }}" >> $GITHUB_ENV
+        echo "INCLUDED_TAGS=${{ inputs.included-tags }}" >> $GITHUB_ENV
+        echo "HADOOP_PROFILE=${{ inputs.hadoop-version }}" >> $GITHUB_ENV
+        echo "HIVE_PROFILE=${{ inputs.hive-version }}" >> $GITHUB_ENV
+        echo "GITHUB_PREV_SHA=${{ github.event.before }}" >> $GITHUB_ENV
+        echo "SPARK_LOCAL_IP=localhost" >> $GITHUB_ENV
+
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT and Maven
+      uses: actions/cache@v2
+      with:
+        path: |
+          build/apache-maven-*
+          build/scala-*
+          build/*.jar
+          ~/.sbt
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: ${{ inputs.java-version }}-${{ inputs.hadoop-version }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          ${{ inputs.java-version }}-${{ inputs.hadoop-version }}-coursier-
+
+    - name: Install Java ${{ inputs.java-version }}
+      uses: actions/setup-java@v1
+      with:
+        java-version: ${{ inputs.java-version }}
+
+    - name: Install Python 3.8
+      uses: actions/setup-python@v2
+      # We should install one Python that is higher then 3+ for SQL and Yarn because:
+      # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
+      # - Yarn has a Python specific test too, for example, YarnClusterSuite.
+      if: contains(inputs.modules, 'yarn') || (contains(inputs.modules, 'sql') && !contains(inputs.modules, 'sql-'))
+      with:
+        python-version: 3.8
+        architecture: x64
+
+    - name: Install Python packages (Python 3.8)
+      if: (contains(inputs.modules, 'sql') && !contains(inputs.modules, 'sql-'))
+      shell: bash
+      run: |
+        python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy xmlrunner
+        python3.8 -m pip list
+
+    # Run the tests.
+    - name: Run tests
+      env: ${{fromJSON(inputs.envs)}}
+      shell: bash
+      run: |
+        # Hive "other tests" test needs larger metaspace size based on experiment.
+        if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
+        export SERIAL_SBT_TESTS=1
+        ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
+
+    - name: Upload test results to report
+      if: always()
+      uses: actions/upload-artifact@v2
+      with:
+        name: test-results-${{ inputs.modules }}-${{ inputs.label }}-${{ inputs.java-version }}-${{ inputs.hadoop-version }}-${{ inputs.hive-version }}
+        path: "**/target/test-reports/*.xml"
+
+    - name: Upload unit tests log files
+      if: failure()
+      uses: actions/upload-artifact@v2
+      with:
+        name: unit-tests-log-${{ inputs.modules }}-${{ inputs.label }}-${{ inputs.java-version }}-${{ inputs.hadoop-version }}-${{ inputs.hive-version }}
+        path: "**/target/unit-tests.log"
+
+branding:
+  icon: 'check-circle'
+  color: 'green'
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 81381eb16d47..0682bddd4318 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -55,81 +55,111 @@ jobs:
     # Run all jobs for Apache Spark repository
     # Run only non-scheduled jobs for forked repositories
     if: github.repository == 'apache/spark' || github.event_name != 'schedule'
+    env:
+      GITHUB_PREV_SHA: ${{ github.event.before }}
     outputs:
-      java: ${{ steps.set-outputs.outputs.java }}
-      branch: ${{ steps.set-outputs.outputs.branch }}
-      hadoop: ${{ steps.set-outputs.outputs.hadoop }}
-      type: ${{ steps.set-outputs.outputs.type }}
-      envs: ${{ steps.set-outputs.outputs.envs }}
+      java: ${{ steps.config.outputs.java }}
+      branch: ${{ steps.config.outputs.branch }}
+      hadoop: ${{ steps.config.outputs.hadoop }}
+      type: ${{ steps.config.outputs.type }}
+      envs: ${{ steps.config.outputs.envs }}
+
+      spark-matrix: ${{ steps.spark.outputs.matrix }}
+      pyspark-matrix: ${{ steps.pyspark.outputs.matrix }}
+
+      # Run scheduled jobs for Apache Spark only
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
+      spark-required: >-
+        ${{  steps.config.outputs.type == 'scheduled'
+         || (steps.config.outputs.type == 'regular' && steps.changes.outputs.build-required == 'true') }}
+
+      # Run PySpark coverage scheduled jobs for Apache Spark only
+      # Run scheduled jobs with JDK 17 in Apache Spark
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if pyspark changes exist
+      pyspark-required: >-
+        ${{  steps.config.outputs.type == 'pyspark-coverage-scheduled'
+         || (steps.config.outputs.type == 'scheduled' && steps.config.outputs.java == '17')
+         || (steps.config.outputs.type == 'regular' && steps.changes.outputs.pyspark-required == 'true') }}
+
+      # Run scheduled jobs with JDK 17 in Apache Spark
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if sparkr changes exist
+      sparkr-required: >-
+        ${{ (steps.config.outputs.type == 'scheduled' && steps.config.outputs.java == '17')
+         || (steps.config.outputs.type == 'regular' && steps.changes.outputs.sparkr-required == 'true') }}
+
+      # Run for regular jobs
+      lint-required: >-
+        ${{ steps.config.outputs.type == 'regular' }}
+
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
+      java-required: >-
+        ${{ steps.config.outputs.type == 'regular' && steps.changes.outputs.build-required == 'true' }}
+
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
+      scala-required: >-
+        ${{ steps.config.outputs.type == 'regular' && steps.changes.outputs.build-required == 'true' }}
+
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if tpcds changes exist
+      tpcds-required: >-
+        ${{ steps.config.outputs.type == 'regular' && steps.changes.outputs.tpcds-required == 'true' }}
+
+      # Run regular jobs for commit in both Apache Spark and forked repository, but only if docker changes exist
+      docker-required: >-
+        ${{ steps.config.outputs.type == 'regular' && steps.changes.outputs.docker-required == 'true' }}
+
     steps:
     - name: Configure branch and additional environment variables
-      id: set-outputs
+      id: config
       run: |
+        # default values
+        java=8
+        branch=master
+        type=scheduled
+        hadoop=hadoop3
+
+        # override default values based on event
         if [ "${{ github.event.schedule }}" = "0 1 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{}'
-          echo '::set-output name=hadoop::hadoop2'
+          envs='{}'
+          hadoop='hadoop2'
         elif [ "${{ github.event.schedule }}" = "0 4 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3'
+          envs='{"SCALA_PROFILE": "scala2.13"}'
+          hadoop='hadoop3'
         elif [ "${{ github.event.schedule }}" = "0 7 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::branch-3.2'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3.2'
+          branch='branch-3.2'
+          envs='{"SCALA_PROFILE": "scala2.13"}'
+          hadoop='hadoop3.2'
         elif [ "${{ github.event.schedule }}" = "0 10 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::pyspark-coverage-scheduled'
-          echo '::set-output name=envs::{"PYSPARK_CODECOV": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
+          type=pyspark-coverage-scheduled
+          envs='{"PYSPARK_CODECOV": "true"}'
         elif [ "${{ github.event.schedule }}" = "0 13 * * *" ]; then
-          echo '::set-output name=java::11'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
+          java=11
+          envs='{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
         elif [ "${{ github.event.schedule }}" = "0 16 * * *" ]; then
-          echo '::set-output name=java::17'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
+          java=17
+          envs='{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
         elif [ "${{ github.event.schedule }}" = "0 19 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::branch-3.3'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3'
+          branch='branch-3.3'
+          envs='{"SCALA_PROFILE": "scala2.13"}'
         else
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'  # NOTE: UPDATE THIS WHEN CUTTING BRANCH
-          echo '::set-output name=type::regular'
-          echo '::set-output name=envs::{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}'
-          echo '::set-output name=hadoop::hadoop3'
+          # NOTE: UPDATE THIS WHEN CUTTING BRANCH
+          branch=master
+          type=regular
+          envs='{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}'
         fi
 
-  precondition:
-    name: Check changes
-    runs-on: ubuntu-20.04
-    needs: configure-jobs
-    env:
-      GITHUB_PREV_SHA: ${{ github.event.before }}
-    outputs:
-      required: ${{ steps.set-outputs.outputs.required }}
-    steps:
+        # output values
+        echo "::set-output name=java::$java"
+        echo "::set-output name=branch::$branch"
+        echo "::set-output name=type::$type"
+        echo "::set-output name=envs::$envs"
+        echo "::set-output name=hadoop::$hadoop"
+
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ steps.config.outputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -137,8 +167,9 @@ jobs:
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+
     - name: Check all modules
-      id: set-outputs
+      id: changes
       run: |
         # is-changed.py is missing in branch-3.2, and it might run in scheduled build, see also SPARK-39517
         build=true; pyspark=true; sparkr=true; tpcds=true; docker=true;
@@ -149,75 +180,138 @@ jobs:
           tpcds=`./dev/is-changed.py -m build,catalyst,core,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
           docker=`./dev/is-changed.py -m build,catalyst,core,docker-integration-tests,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
         fi
-        echo "{\"build\": \"$build\", \"pyspark\": \"$pyspark\", \"sparkr\": \"$sparkr\", \"tpcds\": \"$tpcds\", \"docker\": \"$docker\"}" > required.json
-        cat required.json
-        echo "::set-output name=required::$(cat required.json)"
-
-  # Build: build Spark and run the tests for specified modules.
-  build:
-    name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
-    needs: [configure-jobs, precondition]
-    # Run scheduled jobs for Apache Spark only
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
-    if: >-
-      needs.configure-jobs.outputs.type == 'scheduled'
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true')
+
+        echo "::set-output name=build-required::$build"
+        echo "::set-output name=pyspark-required::$pyspark"
+        echo "::set-output name=sparkr-required::$sparkr"
+        echo "::set-output name=tpcds-required::$tpcds"
+        echo "::set-output name=docker-required::$docker"
+
+    - name: Configure Spark matrix
+      id: spark
+      # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+      # Kinesis tests depends on external Amazon kinesis service.
+      # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
+      # Note that the modules below are from sparktestsupport/modules.py.
+      run: |
+        cat > matrix.json << EOF
+        {
+          "include": [
+            {
+              "modules": "core, unsafe, kvstore, avro, network-common, network-shuffle, repl, launcher, examples, sketch, graphx",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3"
+            },
+            {
+              "modules": "catalyst, hive-thriftserver",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3"
+            },
+            {
+              "modules": "streaming, sql-kafka-0-10, streaming-kafka-0-10, mllib-local, mllib, yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3"
+            },
+            {
+              "modules": "hive",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3",
+              "included-tags": "org.apache.spark.tags.SlowHiveTest",
+              "label": "- slow tests"
+            },
+            {
+              "modules": "hive",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3",
+              "excluded-tags": "org.apache.spark.tags.SlowHiveTest",
+              "label": "- other tests"
+            },
+            {
+              "modules": "sql",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3",
+              "included-tags": "org.apache.spark.tags.ExtendedSQLTest",
+              "label": "- slow tests"
+            },
+            {
+              "modules": "sql",
+              "java": "${{ steps.config.outputs.java }}",
+              "hadoop": "${{ steps.config.outputs.hadoop }}",
+              "hive": "hive2.3",
+              "excluded-tags": "org.apache.spark.tags.ExtendedSQLTest",
+              "label": "- other tests"
+            }
+          ]
+        }
+        EOF
+
+        # detect changes for each of these module sets
+        jq -c ".include[] | ." < matrix.json | while read line; do
+          modules=$(jq -r .modules <<<"$line")
+          changed="$(./dev/is-changed.py -m "$modules")"
+          jq ". + { changed: \"$changed\" }" <<<"$line"
+        done | jq -n '{ include: [inputs] }' > matrix-with-changes.json
+
+        # output json
+        echo "::set-output name=matrix::$(jq -c . < matrix-with-changes.json)"
+        echo "The strategy matrix:"
+        jq . < matrix-with-changes.json
+
+    - name: Configure PySpark matrix
+      id: pyspark
+      run: |
+        cat > matrix.json << EOF
+        {
+          "include": [
+            {
+              "modules": "pyspark-sql, pyspark-mllib, pyspark-resource",
+              "java": "${{ steps.config.outputs.java }}"
+            },
+            {
+              "modules": "pyspark-core, pyspark-streaming, pyspark-ml",
+              "java": "${{ steps.config.outputs.java }}"
+            },
+            {
+              "modules": "pyspark-pandas",
+              "java": "${{ steps.config.outputs.java }}"
+            },
+            {
+              "modules": "pyspark-pandas-slow",
+              "java": "${{ steps.config.outputs.java }}"
+            }
+          ]
+        }
+        EOF
+
+        # detect changes for each of these module sets
+        jq -c ".include[] | ." < matrix.json | while read line; do
+          modules=$(jq -r .modules <<<"$line")
+          changed="$(./dev/is-changed.py -m "$modules")"
+          jq ". + { changed: \"$changed\" }" <<<"$line"
+        done | jq -n '{ include: [inputs] }' > matrix-with-changes.json
+
+        # output json
+        echo "::set-output name=matrix::$(jq -c . < matrix-with-changes.json)"
+        echo "The strategy matrix:"
+        jq . < matrix-with-changes.json
+
+
+  # Build Spark and run the tests for specified modules.
+  spark:
+    name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.label }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.spark-required == 'true'
     # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
-      matrix:
-        java:
-          - ${{ needs.configure-jobs.outputs.java }}
-        hadoop:
-          - ${{ needs.configure-jobs.outputs.hadoop }}
-        hive:
-          - hive2.3
-        # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
-        # Kinesis tests depends on external Amazon kinesis service.
-        # Note that the modules below are from sparktestsupport/modules.py.
-        modules:
-          - >-
-            core, unsafe, kvstore, avro,
-            network-common, network-shuffle, repl, launcher,
-            examples, sketch, graphx
-          - >-
-            catalyst, hive-thriftserver
-          - >-
-            streaming, sql-kafka-0-10, streaming-kafka-0-10,
-            mllib-local, mllib,
-            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
-        # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
-        included-tags: [""]
-        excluded-tags: [""]
-        comment: [""]
-        include:
-          # Hive tests
-          - modules: hive
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
-            hive: hive2.3
-            included-tags: org.apache.spark.tags.SlowHiveTest
-            comment: "- slow tests"
-          - modules: hive
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
-            hive: hive2.3
-            excluded-tags: org.apache.spark.tags.SlowHiveTest
-            comment: "- other tests"
-          # SQL tests
-          - modules: sql
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
-            hive: hive2.3
-            included-tags: org.apache.spark.tags.ExtendedSQLTest
-            comment: "- slow tests"
-          - modules: sql
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
-            hive: hive2.3
-            excluded-tags: org.apache.spark.tags.ExtendedSQLTest
-            comment: "- other tests"
+      matrix: ${{fromJSON(needs.configure-jobs.outputs.spark-matrix)}}
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
@@ -234,6 +328,7 @@ jobs:
         fetch-depth: 0
         repository: apache/spark
         ref: ${{ needs.configure-jobs.outputs.branch }}
+
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -241,91 +336,35 @@ jobs:
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
-      with:
-        path: |
-          build/apache-maven-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
-    - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v1
-      with:
-        java-version: ${{ matrix.java }}
-    - name: Install Python 3.8
-      uses: actions/setup-python@v2
-      # We should install one Python that is higher then 3+ for SQL and Yarn because:
-      # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
-      # - Yarn has a Python specific test too, for example, YarnClusterSuite.
-      if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      with:
-        python-version: 3.8
-        architecture: x64
-    - name: Install Python packages (Python 3.8)
-      if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      run: |
-        python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy xmlrunner
-        python3.8 -m pip list
-    # Run the tests.
-    - name: Run tests
-      env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
-      run: |
-        # Hive "other tests" test needs larger metaspace size based on experiment.
-        if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
-        export SERIAL_SBT_TESTS=1
-        ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
-    - name: Upload test results to report
-      if: always()
-      uses: actions/upload-artifact@v2
-      with:
-        name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
-        path: "**/target/test-reports/*.xml"
-    - name: Upload unit tests log files
-      if: failure()
-      uses: actions/upload-artifact@v2
+
+    - name: "Build and test"
+      uses: ./.github/actions/build-and-test-spark
+      # should be 'true' if not 'false', but we want to fall back to running tests if output is unexpected
+      if: matrix.changed != 'false'
       with:
-        name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
-        path: "**/target/unit-tests.log"
+        job-type: ${{ needs.configure-jobs.outputs.type }}
+        branch: ${{ needs.configure-jobs.outputs.branch }}
+        java-version: ${{ needs.configure-jobs.outputs.java }}
+        hadoop-version: ${{ needs.configure-jobs.outputs.hadoop }}
+        hive-version: hive2.3
+        envs: ${{ needs.configure-jobs.outputs.envs }}
+        modules: ${{ matrix.modules }}
+        included-tags: ${{ matrix.included-tags }}
+        excluded-tags: ${{ matrix.excluded-tags }}
+        label: ${{ matrix.label }}
+        ansi_enabled: ${{ inputs.ansi_enabled }}
 
+  # Build PySpark and run the tests for specified modules.
   pyspark:
-    needs: [configure-jobs, precondition]
-    # Run PySpark coverage scheduled jobs for Apache Spark only
-    # Run scheduled jobs with JDK 17 in Apache Spark
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if pyspark changes exist
-    if: >-
-      needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
-      || (needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).pyspark == 'true')
     name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }}"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.pyspark-required == 'true'
     runs-on: ubuntu-20.04
     container:
       image: dongjoon/apache-spark-github-action-image:20220207
     strategy:
       fail-fast: false
-      matrix:
-        java:
-          - ${{ needs.configure-jobs.outputs.java }}
-        modules:
-          - >-
-            pyspark-sql, pyspark-mllib, pyspark-resource
-          - >-
-            pyspark-core, pyspark-streaming, pyspark-ml
-          - >-
-            pyspark-pandas
-          - >-
-            pyspark-pandas-slow
+      matrix: ${{fromJSON(needs.configure-jobs.outputs.pyspark-matrix)}}
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
@@ -344,6 +383,7 @@ jobs:
         fetch-depth: 0
         repository: apache/spark
         ref: ${{ needs.configure-jobs.outputs.branch }}
+
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
@@ -351,71 +391,24 @@ jobs:
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
-      with:
-        path: |
-          build/apache-maven-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          pyspark-coursier-
-    - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v1
-      with:
-        java-version: ${{ matrix.java }}
-    - name: List Python packages (Python 3.9, PyPy3)
-      run: |
-        python3.9 -m pip list
-        pypy3 -m pip list
-    - name: Install Conda for pip packaging test
-      run: |
-        curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
-        bash miniconda.sh -b -p $HOME/miniconda
-    # Run the tests.
-    - name: Run tests
-      env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
-      run: |
-        export PATH=$PATH:$HOME/miniconda/bin
-        ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
-    - name: Upload coverage to Codecov
-      if: needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
-      uses: codecov/codecov-action@v2
-      with:
-        files: ./python/coverage.xml
-        flags: unittests
-        name: PySpark
-    - name: Upload test results to report
-      if: always()
-      uses: actions/upload-artifact@v2
-      with:
-        name: test-results-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
-        path: "**/target/test-reports/*.xml"
-    - name: Upload unit tests log files
-      if: failure()
-      uses: actions/upload-artifact@v2
+
+    - name: "Build and test"
+      uses: ./.github/actions/build-and-test-pyspark
+      # should be 'true' if not 'false', but we want to fall back to running tests if output is unexpected
+      if: matrix.changed != 'false'
       with:
-        name: unit-tests-log-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
-        path: "**/target/unit-tests.log"
+        job-type: ${{ needs.configure-jobs.outputs.type }}
+        branch: ${{ needs.configure-jobs.outputs.branch }}
+        java-version: ${{ needs.configure-jobs.outputs.java }}
+        hadoop-version: ${{ needs.configure-jobs.outputs.hadoop }}
+        hive-version: hive2.3
+        modules: ${{ matrix.modules }}
+        ansi_enabled: ${{ inputs.ansi_enabled }}
 
   sparkr:
-    needs: [configure-jobs, precondition]
-    # Run scheduled jobs with JDK 17 in Apache Spark
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if sparkr changes exist
-    if: >-
-      (needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).sparkr == 'true')
     name: "Build modules: sparkr"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.sparkr-required == 'true'
     runs-on: ubuntu-20.04
     container:
       image: dongjoon/apache-spark-github-action-image:20220207
@@ -480,9 +473,9 @@ jobs:
 
   # Static analysis, and documentation build
   lint:
-    needs: configure-jobs
-    if: needs.configure-jobs.outputs.type == 'regular'
-    name: Linters, licenses, dependencies and documentation generation
+    name: "Linters, licenses, dependencies and documentation generation"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.lint-required == 'true'
     runs-on: ubuntu-20.04
     env:
       LC_ALL: C.UTF-8
@@ -595,17 +588,16 @@ jobs:
         bundle exec jekyll build
 
   java-11-17:
-    needs: [configure-jobs, precondition]
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
-    name: Java ${{ matrix.java }} build with Maven
+    name: "Java ${{ matrix.java }} build with Maven"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.java-required == 'true'
+    runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
         java:
           - 11
           - 17
-    runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -651,10 +643,9 @@ jobs:
         rm -rf ~/.m2/repository/org/apache/spark
 
   scala-213:
-    needs: [configure-jobs, precondition]
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
-    name: Scala 2.13 build with SBT
+    name: "Scala 2.13 build with SBT"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.scala-required == 'true'
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
@@ -697,10 +688,9 @@ jobs:
         ./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
 
   tpcds-1g:
-    needs: [configure-jobs, precondition]
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if tpcds changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).tpcds == 'true'
-    name: Run TPC-DS queries with SF=1
+    name: "Run TPC-DS queries with SF=1"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.tpcds-required == 'true'
     runs-on: ubuntu-20.04
     env:
       SPARK_LOCAL_IP: localhost
@@ -793,10 +783,9 @@ jobs:
         path: "**/target/unit-tests.log"
 
   docker-integration-tests:
-    needs: [configure-jobs, precondition]
-    # Run regular jobs for commit in both Apache Spark and forked repository, but only if docker changes exist
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).docker == 'true'
-    name: Run Docker integration tests
+    name: "Run Docker integration tests"
+    needs: [configure-jobs]
+    if: needs.configure-jobs.outputs.docker-required == 'true'
     runs-on: ubuntu-20.04
     env:
       HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
diff --git a/dev/is-changed.py b/dev/is-changed.py
index 85f0d3cda6df..7df94627da78 100755
--- a/dev/is-changed.py
+++ b/dev/is-changed.py
@@ -53,7 +53,7 @@ def parse_opts():
 def main():
     opts = parse_opts()
 
-    test_modules = opts.modules.split(",")
+    test_modules = [m.strip() for m in opts.modules.split(",")]
     changed_files = []
     if os.environ.get("APACHE_SPARK_REF"):
         changed_files = identify_changed_files_from_git_commits(