Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/master' into withOrigin-optimiza…
Browse files Browse the repository at this point in the history
…tions
  • Loading branch information
JoshRosen committed Jun 6, 2024
2 parents d0d8db9 + 0f21df0 commit b463c3f
Show file tree
Hide file tree
Showing 10,947 changed files with 1,121,889 additions and 282,714 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
6 changes: 6 additions & 0 deletions .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ github:
merge: false
squash: true
rebase: true

notifications:
pullrequests: reviews@spark.apache.org
issues: reviews@spark.apache.org
commits: commits@spark.apache.org
jira_options: link label
11 changes: 10 additions & 1 deletion .github/PULL_REQUEST_TEMPLATE
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Thanks for sending a pull request! Here are some tips for you:
7. If you want to add a new configuration, please read the guideline first for naming configurations in
'core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala'.
8. If you want to add or modify an error type or message, please read the guideline first in
'core/src/main/resources/error/README.md'.
'common/utils/src/main/resources/error/README.md'.
-->

### What changes were proposed in this pull request?
Expand Down Expand Up @@ -47,3 +47,12 @@ If it was tested in a way different from regular unit tests, please clarify how
If tests were not added, please describe why they were not added and/or why it was difficult to add.
If benchmark tests were added, please run the benchmarks in GitHub Actions for the consistent environment, and the instructions could accord to: https://spark.apache.org/developer-tools.html#github-workflow-benchmarks.
-->


### Was this patch authored or co-authored using generative AI tooling?
<!--
If generative AI tooling has been used in the process of authoring this patch, please include the
phrase: 'Generated-by: ' followed by the name of the tool and its version.
If no, write 'No'.
Please refer to the [ASF Generative Tooling Guidance](https://www.apache.org/legal/generative-tooling.html) for details.
-->
308 changes: 197 additions & 111 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,136 +17,222 @@
# under the License.
#

#
# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler
#
# Note that we currently cannot use the negatioon operator (i.e. `!`) for miniglob matches as they
# would match any file that doesn't touch them. What's needed is the concept of `any `, which takes a
# list of constraints / globs and then matches all of the constraints for either `any` of the files or
# `all` of the files in the change set.
#
# However, `any`/`all` are not supported in a released version and testing off of the `main` branch
# resulted in some other errors when testing.
#
# An issue has been opened upstream requesting that a release be cut that has support for all/any:
# - https://github.com/actions/labeler/issues/111
#
# While we wait for this issue to be handled upstream, we can remove
# the negated / `!` matches for now and at least have labels again.
#
INFRA:
- ".github/**/*"
- "appveyor.yml"
- "tools/**/*"
- "dev/create-release/**/*"
- ".asf.yaml"
- ".gitattributes"
- ".gitignore"
- "dev/github_jira_sync.py"
- "dev/merge_spark_pr.py"
- "dev/run-tests-jenkins*"
- changed-files:
- any-glob-to-any-file: [
'.github/**/*',
'tools/**/*',
'dev/create-release/**/*',
'.asf.yaml',
'.gitattributes',
'.gitignore',
'dev/merge_spark_pr.py',
'dev/run-tests-jenkins*'
]

BUILD:
# Can be supported when a stable release with correct all/any is released
#- any: ['dev/**/*', '!dev/github_jira_sync.py', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
- "dev/**/*"
- "build/**/*"
- "project/**/*"
- "assembly/**/*"
- "**/*pom.xml"
- "bin/docker-image-tool.sh"
- "bin/find-spark-home*"
- "scalastyle-config.xml"
# These can be added in the above `any` clause (and the /dev/**/* glob removed) when
# `any`/`all` support is released
# - "!dev/github_jira_sync.py"
# - "!dev/merge_spark_pr.py"
# - "!dev/run-tests-jenkins*"
# - "!dev/.rat-excludes"
- changed-files:
- all-globs-to-any-file: [
'dev/**/*',
'!dev/merge_spark_pr.py',
'!dev/run-tests-jenkins*'
]
- any-glob-to-any-file: [
'build/**/*',
'project/**/*',
'assembly/**/*',
'**/*pom.xml',
'bin/docker-image-tool.sh',
'bin/find-spark-home*',
'scalastyle-config.xml'
]

DOCS:
- "docs/**/*"
- "**/README.md"
- "**/CONTRIBUTING.md"
- changed-files:
- any-glob-to-any-file: [
'docs/**/*',
'**/README.md',
'**/CONTRIBUTING.md',
'python/docs/**/*'
]

EXAMPLES:
- "examples/**/*"
- "bin/run-example*"
# CORE needs to be updated when all/any are released upstream.
- changed-files:
- any-glob-to-any-file: [
'examples/**/*',
'bin/run-example*'
]

CORE:
# - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"] # If any file matches all of the globs defined in the list started by `any`, label is applied.
- "core/**/*"
- "common/kvstore/**/*"
- "common/network-common/**/*"
- "common/network-shuffle/**/*"
- "python/pyspark/**/*.py"
- "python/pyspark/tests/**/*.py"
- changed-files:
- all-globs-to-any-file: [
'core/**/*',
'!**/*UI.scala',
'!**/ui/**/*'
]
- any-glob-to-any-file: [
'common/kvstore/**/*',
'common/network-common/**/*',
'common/network-shuffle/**/*',
'python/pyspark/*.py',
'python/pyspark/tests/**/*.py'
]

SPARK SUBMIT:
- "bin/spark-submit*"
- changed-files:
- any-glob-to-any-file: [
'bin/spark-submit*'
]

SPARK SHELL:
- "repl/**/*"
- "bin/spark-shell*"
- changed-files:
- any-glob-to-any-file: [
'repl/**/*',
'bin/spark-shell*'
]

SQL:
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming.py", "!python/pyspark/sql/tests/test_streaming.py"]
- "**/sql/**/*"
- "common/unsafe/**/*"
#- "!python/pyspark/sql/avro/**/*"
#- "!python/pyspark/sql/streaming.py"
#- "!python/pyspark/sql/tests/test_streaming.py"
- "bin/spark-sql*"
- "bin/beeline*"
- "sbin/*thriftserver*.sh"
- "**/*SQL*.R"
- "**/DataFrame.R"
- "**/*WindowSpec.R"
- "**/*catalog.R"
- "**/*column.R"
- "**/*functions.R"
- "**/*group.R"
- "**/*schema.R"
- "**/*types.R"
- changed-files:
- all-globs-to-any-file: [
'**/sql/**/*',
'!python/pyspark/sql/avro/**/*',
'!python/pyspark/sql/streaming/**/*',
'!python/pyspark/sql/tests/streaming/test_streaming*.py'
]
- any-glob-to-any-file: [
'common/unsafe/**/*',
'common/sketch/**/*',
'common/variant/**/*',
'bin/spark-sql*',
'bin/beeline*',
'sbin/*thriftserver*.sh',
'**/*SQL*.R',
'**/DataFrame.R',
'**/*WindowSpec.R',
'**/*catalog.R',
'**/*column.R',
'**/*functions.R',
'**/*group.R',
'**/*schema.R',
'**/*types.R'
]

AVRO:
- "external/avro/**/*"
- "python/pyspark/sql/avro/**/*"
- changed-files:
- any-glob-to-any-file: [
'connector/avro/**/*',
'python/pyspark/sql/avro/**/*'
]

DSTREAM:
- "streaming/**/*"
- "data/streaming/**/*"
- "external/kinesis*"
- "external/kafka*"
- "python/pyspark/streaming/**/*"
- changed-files:
- any-glob-to-any-file: [
'streaming/**/*',
'data/streaming/**/*',
'connector/kinesis-asl/**/*',
'connector/kinesis-asl-assembly/**/*',
'connector/kafka-0-10/**/*',
'connector/kafka-0-10-assembly/**/*',
'connector/kafka-0-10-token-provider/**/*',
'python/pyspark/streaming/**/*'
]

GRAPHX:
- "graphx/**/*"
- "data/graphx/**/*"
- changed-files:
- any-glob-to-any-file: [
'graphx/**/*',
'data/graphx/**/*'
]

ML:
- "**/ml/**/*"
- "**/*mllib_*.R"
- changed-files:
- any-glob-to-any-file: [
'**/ml/**/*',
'**/*mllib_*.R'
]

MLLIB:
- "**/spark/mllib/**/*"
- "mllib-local/**/*"
- "python/pyspark/mllib/**/*"
- changed-files:
- any-glob-to-any-file: [
'**/spark/mllib/**/*',
'mllib-local/**/*',
'python/pyspark/mllib/**/*'
]

STRUCTURED STREAMING:
- "**/sql/**/streaming/**/*"
- "external/kafka-0-10-sql/**/*"
- "python/pyspark/sql/streaming.py"
- "python/pyspark/sql/tests/test_streaming.py"
- "**/*streaming.R"
- changed-files:
- any-glob-to-any-file: [
'**/sql/**/streaming/**/*',
'connector/kafka-0-10-sql/**/*',
'python/pyspark/sql/streaming/**/*',
'python/pyspark/sql/tests/streaming/test_streaming*.py',
'**/*streaming.R'
]

PYTHON:
- "bin/pyspark*"
- "**/python/**/*"
- changed-files:
- any-glob-to-any-file: [
'bin/pyspark*',
'**/python/**/*'
]

PANDAS API ON SPARK:
- changed-files:
- any-glob-to-any-file: [
'python/pyspark/pandas/**/*'
]

R:
- "**/r/**/*"
- "**/R/**/*"
- "bin/sparkR*"
- changed-files:
- any-glob-to-any-file: [
'**/r/**/*',
'**/R/**/*',
'bin/sparkR*'
]

YARN:
- "resource-managers/yarn/**/*"
MESOS:
- "resource-managers/mesos/**/*"
- "sbin/*mesos*.sh"
- changed-files:
- any-glob-to-any-file: [
'resource-managers/yarn/**/*'
]

KUBERNETES:
- "resource-managers/kubernetes/**/*"
- changed-files:
- any-glob-to-any-file: [
'resource-managers/kubernetes/**/*'
]

WINDOWS:
- "**/*.cmd"
- "R/pkg/tests/fulltests/test_Windows.R"
- changed-files:
- any-glob-to-any-file: [
'**/*.cmd',
'R/pkg/tests/fulltests/test_Windows.R'
]

WEB UI:
- "**/ui/**/*"
- "**/*UI.scala"
- changed-files:
- any-glob-to-any-file: [
'**/ui/**/*',
'**/*UI.scala'
]

DEPLOY:
- "sbin/**/*"
- changed-files:
- any-glob-to-any-file: [
'sbin/**/*'
]

CONNECT:
- changed-files:
- any-glob-to-any-file: [
'connector/connect/**/*',
'python/pyspark/sql/**/connect/**/*',
'python/pyspark/ml/**/connect/**/*'
]

PROTOBUF:
- changed-files:
- any-glob-to-any-file: [
'connector/protobuf/**/*',
'python/pyspark/sql/protobuf/**/*'
]
Loading

0 comments on commit b463c3f

Please sign in to comment.