diff --git a/.asf.yaml b/.asf.yaml index 3c586c1c3214e..44a819080e00e 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -39,7 +39,7 @@ github: enabled_merge_buttons: squash: true merge: false - rebase: true + rebase: false protected_branches: main: @@ -57,9 +57,15 @@ github: v2-2-stable: required_pull_request_reviews: required_approving_review_count: 1 + v2-3-stable: + required_pull_request_reviews: + required_approving_review_count: 1 collaborators: - auvipy - paolaperaza - petedejoy - gmcdonald + +notifications: + jobs: jobs@airflow.apache.org diff --git a/.codespellignorelines b/.codespellignorelines index 7b8a9bf5e2e19..d641f0aaaa6bb 100644 --- a/.codespellignorelines +++ b/.codespellignorelines @@ -1,2 +1,3 @@ f"DELETE {source_table} FROM { ', '.join(_from_name(tbl) for tbl in stmt.froms) }" for frm in source_query.selectable.froms: + roles = relationship("Role", secondary=assoc_user_role, backref="user", lazy="selectin") diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cbff30240ec85..d3b2c86215aef 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,10 +1,9 @@ { - "name": "Apache Airflow", + "name": "Apache Airflow - sqlite", "dockerComposeFile": [ "../scripts/ci/docker-compose/devcontainer.yml", "../scripts/ci/docker-compose/local.yml", - "../scripts/ci/docker-compose/backend-postgres.yml", - "../scripts/ci/docker-compose/devcontainer-postgres.yml" + "../scripts/ci/docker-compose/backend-sqlite.yml", ], "extensions": [ "ms-python.python", diff --git a/.devcontainer/mysql/devcontainer.json b/.devcontainer/mysql/devcontainer.json new file mode 100644 index 0000000000000..aa9696b12a33a --- /dev/null +++ b/.devcontainer/mysql/devcontainer.json @@ -0,0 +1,24 @@ +{ + "name": "Apache Airflow - mysql", + "dockerComposeFile": [ + "../scripts/ci/docker-compose/devcontainer.yml", + "../scripts/ci/docker-compose/local.yml", + "../scripts/ci/docker-compose/backend-mysql.yml", + "../scripts/ci/docker-compose/devcontainer-mysql.yml" + ], + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "rogalmic.bash-debug", + "ms-azuretools.vscode-docker", + "dbaeumer.vscode-eslint", + "ecmel.vscode-html-css", + "timonwong.shellcheck", + "redhat.vscode-yaml", + "rogalmic.bash-debug" + ], + "service": "airflow", + "forwardPorts": [8080,5555,5432,6379] +} diff --git a/.devcontainer/postgres/devcontainer.json b/.devcontainer/postgres/devcontainer.json new file mode 100644 index 0000000000000..fec09cb0500d7 --- /dev/null +++ b/.devcontainer/postgres/devcontainer.json @@ -0,0 +1,24 @@ +{ + "name": "Apache Airflow - postgres", + "dockerComposeFile": [ + "../scripts/ci/docker-compose/devcontainer.yml", + "../scripts/ci/docker-compose/local.yml", + "../scripts/ci/docker-compose/backend-postgres.yml", + "../scripts/ci/docker-compose/devcontainer-postgres.yml" + ], + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "rogalmic.bash-debug", + "ms-azuretools.vscode-docker", + "dbaeumer.vscode-eslint", + "ecmel.vscode-html-css", + "timonwong.shellcheck", + "redhat.vscode-yaml", + "rogalmic.bash-debug" + ], + "service": "airflow", + "forwardPorts": [8080,5555,5432,6379] +} diff --git a/.dockerignore b/.dockerignore index f6113e2bd6d33..69a3bbfca68c0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -49,12 +49,13 @@ !.flake8 !.dockerignore !pytest.ini -!CHANGELOG.txt +!RELEASE_NOTES.rst !LICENSE !MANIFEST.in !NOTICE !.github !empty +!Dockerfile # This folder is for you if you want to add any packages to the docker context when you build your own # docker image. most of other files and any new folder you add will be excluded by default diff --git a/.editorconfig b/.editorconfig index 5032d5ba5cfe7..c69a40f427731 100644 --- a/.editorconfig +++ b/.editorconfig @@ -26,6 +26,7 @@ charset = utf-8 [*.py] indent_size = 4 +max_line_length = 110 [*.sh] indent_size = 4 @@ -44,6 +45,7 @@ indent_size = 2 [*.{yml,yaml}] indent_size = 2 +max_line_length = 110 [*.{htm,html}] indent_size = 2 diff --git a/.gitattributes b/.gitattributes index 7858b73d853a4..497db03fbcfc5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,45 @@ -tests/** export-ignore -tests/ export-ignore -chart/tests export-ignore +breeze export-ignore +breeze-legacy export-ignore +breeze-complete export-ignore +clients export-ignore +clients export-ignore +dev export-ignore +docker-context-files export-ignore +docker_tests export-ignore +empty export-ignore +kubernetes_tests export-ignore +manifests export-ignore +newsfragments export-ignore +scripts export-ignore +tests export-ignore + +Dockerfile.ci export-ignore + +ISSUE_TRIAGE_PROCESS.rst export-ignore +PULL_REQUEST_WORKFLOW.rst export-ignore +SELECTIVE_CHECKS.md export-ignore +STATIC_CODE_CHECKS.rst export-ignore +TESTING.rst export-ignore +LOCAL_VIRTUALENV.rst export-ignore +CONTRIBUTING.rst export-ignore +BREEZE.rst export-ignore +CI.rst export-ignore +CI_DIAGRAMS.md export-ignore +CONTRIBUTORS_QUICK_START.rst export-ignore + +.devcontainer export-ignore +.github export-ignore +.coveragerc export-ignore +.readthedocs.yml export-ignore +.hadolint.yaml export-ignore +.pre-commit-config.yaml export-ignore +.mailmap export-ignore +.editorconfig export-ignore +.flake8 export-ignore +.inputrc export-ignore +.codespellignorelines export-ignore +.gitmodules export-ignore +.gitpod.yml export-ignore +.markdownlint.yml export-ignore +.bash_completion export-ignore +.asf.yaml export-ignore diff --git a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_bug_report.yml index 611f48e978680..fc8cf8f324bc3 100644 --- a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_bug_report.yml @@ -1,6 +1,6 @@ --- name: Airflow Bug report -description: Problems and issues with code of Apache Airflow core +description: Problems and issues with code in Apache Airflow core labels: ["kind:bug", "area:core"] body: - type: markdown @@ -10,11 +10,11 @@ body: Thank you for finding the time to report the problem! - We really appreciate the community efforts to improve Airflow. + We really appreciate the community's efforts to improve Airflow. - Note, that you do not need to create an issue if you have a change ready to submit! + Note, you do not need to create an issue if you have a change ready to submit! - You can open [Pull Request](https://github.com/apache/airflow/pulls) immediately instead. + You can open a [pull request](https://github.com/apache/airflow/pulls) immediately instead.
" # yamllint enable rule:line-length - type: dropdown @@ -22,10 +22,13 @@ body: label: Apache Airflow version description: > What Apache Airflow version are you using? Only Airflow 2 is supported for bugs. If you wish to - discuss Airflow 1.10, open [Discussion](https://github.com/apache/airflow/discussions) instead! + discuss Airflow 1.10, open a [discussion](https://github.com/apache/airflow/discussions) instead! multiple: false options: - - "2.2.4 (latest released)" + - "2.3.1 (latest released)" + - "2.3.0" + - "2.2.5" + - "2.2.4" - "2.2.3" - "2.2.2" - "2.2.1" @@ -63,7 +66,7 @@ body: label: How to reproduce description: > What should we do to reproduce the problem? If you are not able to provide a reproducible case, - please open a [Discussion](https://github.com/apache/airflow/discussions) instead. + please open a [discussion](https://github.com/apache/airflow/discussions) instead. placeholder: > Please make sure you provide a reproducible step-by-step case of how to reproduce the problem as minimally and precisely as possible. Keep in mind we do not have access to your cluster or DAGs. @@ -85,7 +88,7 @@ body: attributes: label: Deployment description: > - What kind of deployment do you have? If you use Managed Service, consider first using regular + What kind of deployment do you have? If you use a Managed Service, consider first using regular channels of reporting issues for the service. multiple: false options: @@ -129,7 +132,8 @@ body: - type: checkboxes attributes: label: Code of Conduct - description: The Code of Conduct helps create a safe space for everyone. We require + description: > + The Code of Conduct helps create a safe space for everyone. We require that everyone agrees to it. options: - label: > diff --git a/.github/ISSUE_TEMPLATE/airflow_doc_issue_report.yml b/.github/ISSUE_TEMPLATE/airflow_doc_issue_report.yml index be84e5111a63d..0977e9822153c 100644 --- a/.github/ISSUE_TEMPLATE/airflow_doc_issue_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_doc_issue_report.yml @@ -1,6 +1,6 @@ --- name: Airflow Doc issue report -description: Problems and issues with docs of Apache Airflow +description: Problems and issues with Apache Airflow documentation labels: ["kind:bug", "kind:documentation"] body: - type: markdown @@ -10,11 +10,11 @@ body: Thank you for finding the time to report the problem! - We really appreciate the community efforts to improve Airflow. + We really appreciate the community's efforts to improve Airflow. - Note, that you do not need to create an issue if you have a change ready to submit! + Note, you do not need to create an issue if you have a change ready to submit! - You can open [Pull Request](https://github.com/apache/airflow/pulls) immediately instead. + You can open a [pull request](https://github.com/apache/airflow/pulls) immediately instead.
" # yamllint enable rule:line-length - type: textarea @@ -56,7 +56,8 @@ body: - type: checkboxes attributes: label: Code of Conduct - description: The Code of Conduct helps create a safe space for everyone. We require + description: > + The Code of Conduct helps create a safe space for everyone. We require that everyone agrees to it. options: - label: > diff --git a/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml index f8bd383d316d8..dc3d788b848ea 100644 --- a/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml @@ -1,6 +1,6 @@ --- name: Airflow Helm Chart Bug report -description: Problems and issues with Apache Airflow Official Helm Chart +description: Problems and issues with the Apache Airflow Official Helm Chart labels: ["kind:bug", "area:helm-chart"] body: - type: markdown @@ -10,15 +10,15 @@ body: Thank you for finding the time to report the problem! - We really appreciate the community efforts to improve Airflow. + We really appreciate the community's efforts to improve Airflow. Note that this issue is only for the [Official Apache Airflow Helm Chart](https://airflow.apache.org/docs/helm-chart/stable/index.html). If you use another 3rd-party Chart, you should report your issue in the repo of that chart instead. - Note, that you do not need to create an issue if you have a change ready to submit! + Note, you do not need to create an issue if you have a change ready to submit! - You can open [Pull Request](https://github.com/apache/airflow/pulls) immediately instead. + You can open a [pull request](https://github.com/apache/airflow/pulls) immediately instead.
" # yamllint enable rule:line-length - type: dropdown @@ -28,7 +28,9 @@ body: What Apache Airflow Helm Chart version are you using? multiple: false options: - - "1.4.0 (latest released)" + - "1.6.0 (latest released)" + - "1.5.0" + - "1.4.0" - "1.3.0" - "1.2.0" - "1.1.0" @@ -41,10 +43,13 @@ body: label: Apache Airflow version description: > What Apache Airflow version are you using? Only Airflow 2 is supported for bugs. If you wish to - discuss Airflow 1.10, open [Discussion](https://github.com/apache/airflow/discussions) instead! + discuss Airflow 1.10, open a [discussion](https://github.com/apache/airflow/discussions) instead! multiple: false options: - - "2.2.4 (latest released)" + - "2.3.1 (latest released)" + - "2.3.0" + - "2.2.5" + - "2.2.4" - "2.2.3" - "2.2.2" - "2.2.1" @@ -79,9 +84,9 @@ body: label: Docker Image customisations description: What are the specific modification you've made in your image? placeholder: > - Did you extend or customise the official Airflow image? Did you add any packages? Maybe - you can share link to your image, or copy the Dockerfile and `docker build` commands - you used to build the image? Make sure to surround the code you paste with ``` ```. + Did you extend or customise the official Airflow image? Did you add any packages? Maybe + you can share a link to your image, or copy the Dockerfile and `docker build` commands + you used to build the image? Make sure to surround the code you paste with ``` ```. - type: textarea attributes: label: What happened @@ -130,7 +135,8 @@ body: - type: checkboxes attributes: label: Code of Conduct - description: The Code of Conduct helps create a safe space for everyone. We require + description: > + The Code of Conduct helps create a safe space for everyone. We require that everyone agrees to it. options: - label: > diff --git a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml index d1ad2b3ac5de8..10bf533ac98a9 100644 --- a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml @@ -1,6 +1,6 @@ --- name: Airflow Providers Bug report -description: Problems and issues with code of Apache Airflow Provider +description: Problems and issues with code in Apache Airflow Providers labels: ["kind:bug", "area:providers"] body: - type: markdown @@ -10,11 +10,11 @@ body: Thank you for finding the time to report a problem! - We really appreciate the community efforts to improve Airflow. + We really appreciate the community's efforts to improve Airflow. - Note, that you do not need to create an issue if you have a change ready to submit! + Note, you do not need to create an issue if you have a change ready to submit! - You can open [Pull Request](https://github.com/apache/airflow/pulls) immediately instead. + You can open a [pull request](https://github.com/apache/airflow/pulls) immediately instead.
" # yamllint enable rule:line-length - type: dropdown @@ -38,6 +38,7 @@ body: - apache-pinot - apache-spark - apache-sqoop + - arangodb - asana - celery - cloudant @@ -108,10 +109,13 @@ body: label: Apache Airflow version description: > What Apache Airflow version are you using? Only Airflow 2 is supported for bugs. If you wish to - discuss Airflow 1.10, open [Discussion](https://github.com/apache/airflow/discussions) instead! + discuss Airflow 1.10, open a [discussion](https://github.com/apache/airflow/discussions) instead! multiple: false options: - - "2.2.4 (latest released)" + - "2.3.1 (latest released)" + - "2.3.0" + - "2.2.5" + - "2.2.4" - "2.2.3" - "2.2.2" - "2.2.1" @@ -138,7 +142,7 @@ body: attributes: label: Deployment description: > - What kind of deployment do you have? If you use Managed Service, consider first using regular + What kind of deployment do you have? If you use a Managed Service, consider first using regular channels of reporting issues for the service. multiple: false options: @@ -208,7 +212,8 @@ body: - type: checkboxes attributes: label: Code of Conduct - description: The Code of Conduct helps create a safe space for everyone. We require + description: > + The Code of Conduct helps create a safe space for everyone. We require that everyone agrees to it. options: - label: > diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3846ae698c717..be2a102e92d15 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -20,4 +20,4 @@ http://chris.beams.io/posts/git-commit/ Read the **[Pull Request Guidelines](https://github.com/apache/airflow/blob/main/CONTRIBUTING.rst#pull-request-guidelines)** for more information. In case of fundamental code change, Airflow Improvement Proposal ([AIP](https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvements+Proposals)) is needed. In case of a new dependency, check compliance with the [ASF 3rd Party License Policy](https://www.apache.org/legal/resolved.html#category-x). -In case of backwards incompatible changes please leave a note in [UPDATING.md](https://github.com/apache/airflow/blob/main/UPDATING.md). +In case of backwards incompatible changes please leave a note in a newsfragement file, named `{pr_number}.significant.rst`, in [newsfragments](https://github.com/apache/airflow/tree/main/newsfragments). diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index 737e50f665680..607d4fb6cdc55 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -188,8 +188,6 @@ labelPRBasedOnFilePath: area:production-image: - Dockerfile - docs/docker-stack/**/* - - scripts/in_container/prod/* - - scripts/ci/libraries/_verify_image.sh - docker_tests/**/* # Various Flags to control behaviour of the "Labeler" diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 282f92d4b1ee5..9970a82e6c213 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -18,17 +18,13 @@ --- name: "Build Images" on: # yamllint disable-line rule:truthy - schedule: - - cron: '28 0 * * *' pull_request_target: - push: - branches: ['main', 'v[0-9]+-[0-9]+-test'] permissions: # all other permissions are set to none contents: read env: MOUNT_SELECTED_LOCAL_SOURCES: "false" - FORCE_ANSWER_TO_QUESTIONS: "yes" + ANSWER: "yes" CHECK_IMAGE_FOR_REBUILD: "true" SKIP_CHECK_REMOTE_IMAGE: "true" DEBIAN_VERSION: "bullseye" @@ -43,17 +39,13 @@ env: secrets.CONSTRAINTS_GITHUB_REPOSITORY || 'apache/airflow' }} # This token is WRITE one - pull_request_target type of events always have the WRITE token GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_REGISTRY_PULL_IMAGE_TAG: "latest" - INSTALL_PROVIDERS_FROM_SOURCES: "true" - AIRFLOW_LOGIN_TO_GITHUB_REGISTRY: "true" - GITHUB_REGISTRY_PUSH_IMAGE_TAG: ${{ github.event.pull_request.head.sha || github.sha }} + IMAGE_TAG_FOR_THE_BUILD: "${{ github.event.pull_request.head.sha || github.sha }}" concurrency: group: build-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: - build-info: timeout-minutes: 10 name: "Build Info" @@ -104,6 +96,8 @@ jobs: if: github.event_name == 'pull_request_target' # Retrieve it to be able to determine which files has changed in the incoming commit of the PR # we checkout the target commit and it's parent to be able to compare them + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v2 with: ref: ${{ env.TARGET_COMMIT_SHA }} @@ -114,12 +108,6 @@ jobs: with: persist-credentials: false submodules: recursive - - name: env - run: printenv - env: - dynamicOutputs: ${{ toJSON(steps.dynamic-outputs.outputs) }} - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} - GITHUB_CONTEXT: ${{ toJson(github) }} - name: Selective checks id: selective-checks env: @@ -146,7 +134,7 @@ jobs: if [[ "${{ github.event_name }}" == 'schedule' ]]; then echo "::set-output name=cacheDirective::disabled" else - echo "::set-output name=cacheDirective::pulled" + echo "::set-output name=cacheDirective:registry" fi if [[ "$SELECTIVE_CHECKS_IMAGE_BUILD" == "true" ]]; then @@ -156,6 +144,12 @@ jobs: fi env: SELECTIVE_CHECKS_IMAGE_BUILD: ${{ steps.selective-checks.outputs.image-build }} + - name: env + run: printenv + env: + dynamicOutputs: ${{ toJSON(steps.dynamic-outputs.outputs) }} + PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} + GITHUB_CONTEXT: ${{ toJson(github) }} build-ci-images: permissions: @@ -166,19 +160,19 @@ jobs: needs: [build-info] strategy: matrix: - # We need to attempt to build all possible versions here because pull_request_target - # event is run for both main and v1-10-tests python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} fail-fast: true - if: needs.build-info.outputs.image-build == 'true' + if: | + needs.build-info.outputs.image-build == 'true' && + github.event.pull_request.head.repo.full_name != 'apache/airflow' env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} - BACKEND: sqlite PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + BACKEND: sqlite outputs: ${{toJSON(needs.build-info.outputs) }} steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v2 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} @@ -186,7 +180,7 @@ jobs: submodules: recursive - name: "Retrieve DEFAULTS from the _initialization.sh" # We cannot "source" the script here because that would be a security problem (we cannot run - # any code that comes from the sources coming from the PR. Therefore we extract the + # any code that comes from the sources coming from the PR. Therefore, we extract the # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands id: defaults run: | @@ -220,30 +214,35 @@ jobs: # changed in the image built - we should only override those that are executed to build # the image. run: | - rm -rf "scripts/ci" - rm -rf "dev" - mv "main-airflow/scripts/ci" "scripts" - mv "main-airflow/dev" "." + rm -rfv "scripts/ci" + rm -rfv "dev" + mv -v "main-airflow/scripts/ci" "scripts" + mv -v "main-airflow/dev" "." - uses: actions/setup-python@v2 with: python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Build CI image ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" - run: Breeze2 build-ci-image - - name: "Push CI image ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_push_ci_images.sh - - name: > - Push empty CI images to finish waiting jobs: - ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" + run: breeze free-space + - name: Build & Push CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze build-image --push-image --tag-as-latest + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: Push empty CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} if: failure() || cancelled() - run: ./scripts/ci/images/ci_push_empty_ci_images.sh + run: breeze build-image --push-image --empty-image + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Candidates for pip resolver backtrack triggers: ${{ matrix.python-version }}" if: failure() || cancelled() - run: airflow-find-newer-dependencies --max-age 1 --python "${{ matrix.python-version }}" + run: breeze find-newer-dependencies --max-age 1 --python "${{ matrix.python-version }}" + - name: "Fix ownership" + run: breeze fix-ownership + if: always() build-prod-images: permissions: @@ -254,21 +253,18 @@ jobs: needs: [build-info, build-ci-images] strategy: matrix: - # We need to attempt to build all possible versions here because pull_request_target - # event is run for both main and v1-10-tests python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} fail-fast: true - if: needs.build-info.outputs.image-build == 'true' + if: | + needs.build-info.outputs.image-build == 'true' && + github.event.pull_request.head.repo.full_name != 'apache/airflow' env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} - BACKEND: sqlite PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} - VERSION_SUFFIX_FOR_PYPI: ".dev0" - INSTALL_PROVIDERS_FROM_SOURCES: > - ${{ needs.build-info.outputs.defaultBranch == 'main' && 'true' || 'false' }} + BACKEND: sqlite steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v2 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} @@ -310,28 +306,55 @@ jobs: # changed in the image built - we should only override those that are executed to build # the image. run: | - rm -rf "scripts/ci" - rm -rf "dev" - mv "main-airflow/scripts/ci" "scripts" - mv "main-airflow/dev" "." + rm -rfv "scripts/ci" + rm -rfv "dev" + mv -v "main-airflow/scripts/ci" "scripts" + mv -v "main-airflow/dev" "." - uses: actions/setup-python@v2 with: python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image for PROD ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh - env: - GITHUB_REGISTRY_PULL_IMAGE_TAG: ${{ github.event.pull_request.head.sha || github.sha }} - - name: "Build PROD image ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_build_prod_image_on_ci.sh - - name: "Push PROD image ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_push_production_images.sh + run: breeze free-space - name: > - Push empty PROD images to finish waiting jobs: - ${{ matrix.python-version }}:${{ env.GITHUB_REGISTRY_PUSH_IMAGE_TAG }}" + Pull CI image for PROD build: + ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + # Always use default Python version of CI image for preparing packages + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Cleanup dist and context file" + run: rm -fv ./dist/* ./docker-context-files/* + - name: "Prepare providers packages" + run: > + breeze prepare-provider-packages + --package-list-file ./scripts/ci/installed_providers.txt + --package-format wheel + --version-suffix-for-pypi dev0 + - name: "Prepare airflow package" + run: breeze prepare-airflow-package --package-format wheel --version-suffix-for-pypi dev0 + - name: "Move dist packages to docker-context files" + run: mv -v ./dist/*.whl ./docker-context-files + - name: Build & Push PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: > + breeze build-prod-image + --tag-as-latest + --push-image + --install-packages-from-context + --disable-airflow-repo-cache + --airflow-is-in-context + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: Push empty PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} if: failure() || cancelled() - run: ./scripts/ci/images/ci_push_empty_prod_images.sh + run: breeze build-prod-image --cleanup-context --push-image --empty-image + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Fix ownership" + run: breeze fix-ownership + if: always() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dbe5ae0a95de5..05781d33d7cc2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,15 +27,15 @@ on: # yamllint disable-line rule:truthy permissions: # All other permissions are set to none contents: read + packages: read env: MOUNT_SELECTED_LOCAL_SOURCES: "false" - FORCE_ANSWER_TO_QUESTIONS: "yes" + ANSWER: "yes" CHECK_IMAGE_FOR_REBUILD: "true" SKIP_CHECK_REMOTE_IMAGE: "true" DEBIAN_VERSION: "bullseye" DB_RESET: "true" VERBOSE: "true" - DOCKER_CACHE: "pulled" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_USERNAME: ${{ github.actor }} # You can override CONSTRAINTS_GITHUB_REPOSITORY by setting secret in your repo but by default the @@ -45,13 +45,8 @@ env: secrets.CONSTRAINTS_GITHUB_REPOSITORY || 'apache/airflow' }} # In builds from forks, this token is read-only. For scheduler/direct push it is WRITE one GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # In builds from forks, this token is empty, and this is good because such builds do not even try - # to push images to the registry. - GITHUB_REGISTRY_PULL_IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}" - GITHUB_REGISTRY_PUSH_IMAGE_TAG: "latest" - INSTALL_PROVIDERS_FROM_SOURCES: "true" - AIRFLOW_LOGIN_TO_GITHUB_REGISTRY: "true" ENABLE_TEST_COVERAGE: "${{ github.event_name == 'push' }}" + IMAGE_TAG_FOR_THE_BUILD: "${{ github.event.pull_request.head.sha || github.sha }}" concurrency: group: ci-${{ github.event.pull_request.number || github.ref }} @@ -85,6 +80,7 @@ jobs: "XD-DENG", "aijamalnk", "alexvanboxel", + "aneesh-joseph", "aoen", "artwr", "ashb", @@ -103,8 +99,10 @@ jobs: "jghoman", "jhtimmins", "jmcarp", + "josh-fell", "kaxil", "leahecole", + "malthe", "mik-laj", "milton0825", "mistercrunch", @@ -127,6 +125,10 @@ jobs: env: GITHUB_CONTEXT: ${{ toJson(github) }} outputs: + defaultBranch: ${{ steps.selective-checks.outputs.default-branch }} + cacheDirective: ${{ steps.dynamic-outputs.outputs.cacheDirective }} + waitForImage: ${{ steps.wait-for-image.outputs.wait-for-image }} + allPythonVersions: ${{ steps.selective-checks.outputs.all-python-versions }} upgradeToNewerDependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} pythonVersions: ${{ steps.selective-checks.outputs.python-versions }} pythonVersionsListAsString: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} @@ -161,11 +163,17 @@ jobs: needs-api-tests: ${{ steps.selective-checks.outputs.needs-api-tests }} needs-api-codegen: ${{ steps.selective-checks.outputs.needs-api-codegen }} default-branch: ${{ steps.selective-checks.outputs.default-branch }} + sourceHeadRepo: ${{ steps.source-run-info.outputs.sourceHeadRepo }} pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} pullRequestLabels: ${{ steps.source-run-info.outputs.pullRequestLabels }} runsOn: ${{ steps.set-runs-on.outputs.runsOn }} runCoverage: ${{ steps.set-run-coverage.outputs.runCoverage }} + inWorkflowBuild: ${{ steps.set-in-workflow-build.outputs.inWorkflowBuild }} + buildJobDescription: ${{ steps.set-in-workflow-build.outputs.buildJobDescription }} + mergeRun: ${{ steps.set-merge-run.outputs.merge-run }} steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -216,19 +224,231 @@ jobs: # Avoid having to specify the coverage logic every time. - name: Set run coverage id: set-run-coverage - run: | - echo "::set-output name=runCoverage::true" + run: echo "::set-output name=runCoverage::true" if: > github.ref == 'refs/heads/main' && github.repository == 'apache/airflow' && github.event_name == 'push' && steps.selective-checks.outputs.default-branch == 'main' + - name: Determine where to run image builds + id: set-in-workflow-build + # Run in-workflow build image when: + # * direct push is run + # * schedule build is run + # * pull request is run not from fork + run: | + set -x + if [[ ${GITHUB_EVENT_NAME} == "push" || ${GITHUB_EVENT_NAME} == "push" || \ + ${{steps.source-run-info.outputs.sourceHeadRepo}} == "apache/airflow" ]]; then + echo "Images will be built in current workflow" + echo "::set-output name=inWorkflowBuild::true" + echo "::set-output name=buildJobDescription::Build" + else + echo "Images will be built in pull_request_target workflow" + echo "::set-output name=inWorkflowBuild::false" + echo "::set-output name=buildJobDescription::Skip Build (pull_request_target)" + fi + - name: Determine if this is merge run + id: set-merge-run + run: echo "::set-output name=merge-run::true" + # Only in Apache Airflow repo, when there is a merge run to main or any of v2*test branches + if: | + github.repository == 'apache/airflow' && github.event_name == 'push' && + ( + github.ref_name == 'main' || + startsWith(github.ref_name, 'v2') && endsWith(github.ref_name, 'test') + ) + - name: Compute dynamic outputs + id: dynamic-outputs + run: | + set -x + if [[ "${{ github.event_name }}" == 'schedule' ]]; then + echo "::set-output name=cacheDirective::disabled" + else + echo "::set-output name=cacheDirective::registry" + fi + + if [[ "$SELECTIVE_CHECKS_IMAGE_BUILD" == "true" ]]; then + echo "::set-output name=image-build::true" + else + echo "::set-output name=image-build::false" + fi + env: + SELECTIVE_CHECKS_IMAGE_BUILD: ${{ steps.selective-checks.outputs.image-build }} + - name: env + run: printenv + env: + dynamicOutputs: ${{ toJSON(steps.dynamic-outputs.outputs) }} + PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} + GITHUB_CONTEXT: ${{ toJson(github) }} + + build-ci-images: + permissions: + packages: write + timeout-minutes: 80 + name: "${{needs.build-info.outputs.buildJobDescription}} CI image ${{matrix.python-version}}" + runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + needs: [build-info] + strategy: + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} + fail-fast: true + env: + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - uses: actions/checkout@v2 + with: + ref: ${{ needs.build-info.outputs.targetCommitSha }} + persist-credentials: false + submodules: recursive + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Retrieve DEFAULTS from the _initialization.sh" + # We cannot "source" the script here because that would be a security problem (we cannot run + # any code that comes from the sources coming from the PR. Therefore we extract the + # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands + id: defaults + run: | + DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \ + awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') + echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV + DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \ + scripts/ci/libraries/_initialization.sh | \ + awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') + echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV + DEBIAN_VERSION=$(grep "export DEBIAN_VERSION" scripts/ci/libraries/_initialization.sh | \ + awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') + echo "DEBIAN_VERSION=${DEBIAN_VERSION}" >> $GITHUB_ENV + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - run: ./scripts/ci/install_breeze.sh + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Free space" + run: breeze free-space + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: Build & Push CI image ${{ matrix.python-version }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze build-image --push-image --tag-as-latest + env: + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Candidates for pip resolver backtrack triggers: ${{ matrix.python-version }}" + if: failure() || cancelled() + run: breeze find-newer-dependencies --max-age 1 --python "${{ matrix.python-version }}" + - name: "Fix ownership" + run: breeze fix-ownership + if: always() && needs.build-info.outputs.inWorkflowBuild == 'true' + + build-prod-images: + permissions: + packages: write + timeout-minutes: 80 + name: "${{needs.build-info.outputs.buildJobDescription}} PROD image ${{matrix.python-version}}" + runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + needs: [build-info, build-ci-images] + strategy: + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} + fail-fast: true + env: + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} + BACKEND: sqlite + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + VERSION_SUFFIX_FOR_PYPI: "dev0" + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - uses: actions/checkout@v2 + with: + ref: ${{ needs.build-info.outputs.targetCommitSha }} + persist-credentials: false + submodules: recursive + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Retrieve DEFAULTS from the _initialization.sh" + # We cannot "source" the script here because that would be a security problem (we cannot run + # any code that comes from the sources coming from the PR. Therefore we extract the + # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands + id: defaults + run: | + DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \ + awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') + echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV + DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \ + scripts/ci/libraries/_initialization.sh | \ + awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') + echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV + DEBIAN_VERSION=$(grep "export DEBIAN_VERSION" scripts/ci/libraries/_initialization.sh | \ + awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') + echo "DEBIAN_VERSION=${DEBIAN_VERSION}" >> $GITHUB_ENV + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - run: ./scripts/ci/install_breeze.sh + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Free space" + run: breeze free-space + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: > + Pull CI image for PROD build: + ${{ needs.build-info.outputs.defaultPythonVersion }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }}" + run: breeze pull-image --tag-as-latest + env: + # Always use default Python version of CI image for preparing packages + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Cleanup dist and context file" + run: rm -fv ./dist/* ./docker-context-files/* + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Prepare providers packages" + run: > + breeze prepare-provider-packages + --package-list-file ./scripts/ci/installed_providers.txt + --package-format wheel --version-suffix-for-pypi dev0 + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Prepare airflow package" + run: breeze prepare-airflow-package --package-format wheel --version-suffix-for-pypi dev0 + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Move dist packages to docker-context files" + run: mv -v ./dist/*.whl ./docker-context-files + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: Build & Push PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: > + breeze build-prod-image + --tag-as-latest + --push-image + --install-packages-from-context + --disable-airflow-repo-cache + --airflow-is-in-context + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + if: needs.build-info.outputs.inWorkflowBuild == 'true' + - name: "Fix ownership" + run: breeze fix-ownership + if: always() && needs.build-info.outputs.inWorkflowBuild == 'true' run-new-breeze-tests: timeout-minutes: 10 - name: Breeze2 tests + name: Breeze unit tests runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} needs: [build-info] steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v2 with: persist-credentials: false @@ -239,6 +459,7 @@ jobs: cache-dependency-path: ./dev/breeze/setup* - run: python -m pip install --editable ./dev/breeze/ - run: python -m pytest ./dev/breeze/ -n auto --color=yes + - run: breeze version tests-ui: timeout-minutes: 10 @@ -247,6 +468,8 @@ jobs: needs: [build-info] if: needs.build-info.outputs.run-ui-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -262,6 +485,8 @@ jobs: key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/yarn.lock') }} - run: yarn --cwd airflow/ui/ install --frozen-lockfile --non-interactive - run: yarn --cwd airflow/ui/ run test + env: + FORCE_COLOR: 2 tests-www: timeout-minutes: 10 @@ -270,6 +495,8 @@ jobs: needs: [build-info] if: needs.build-info.outputs.run-www-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -285,6 +512,8 @@ jobs: key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/yarn.lock') }} - run: yarn --cwd airflow/www/ install --frozen-lockfile --non-interactive - run: yarn --cwd airflow/www/ run test + env: + FORCE_COLOR: 2 test-openapi-client-generation: @@ -294,6 +523,8 @@ jobs: needs: [build-info] if: needs.build-info.outputs.needs-api-codegen == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -309,6 +540,8 @@ jobs: needs: [build-info] if: needs.build-info.outputs.image-build == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -319,61 +552,24 @@ jobs: with: python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' - cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ - - name: "Free space" - run: airflow-freespace - - name: "Cache virtualenv environment" - uses: actions/cache@v2 - with: - path: '.build/.docker_venv' - key: ${{ runner.os }}-docker-venv-${{ hashFiles('scripts/ci/images/ci_run_docker_tests.py') }} + cache-dependency-path: ./dev/requirements.txt - name: "Test examples of PROD image building" - working-directory: docs/docker-stack/docker-examples run: > - python -m pytest ../../../docker_tests/test_examples_of_prod_image_building.py - -n auto --color=yes - - test-docker-compose-quick-start: - timeout-minutes: 60 - name: "Test docker-compose quick start" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, prod-images] - if: needs.build-info.outputs.image-build == 'true' - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 - with: - fetch-depth: 2 - persist-credentials: false - - name: "Setup python" - uses: actions/setup-python@v2 - with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} - cache: 'pip' - cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ - - name: "Free space" - run: airflow-freespace - - name: "Cache virtualenv environment" - uses: actions/cache@v2 - with: - path: '.build/.docker_venv' - key: ${{ runner.os }}-docker-venv-${{ hashFiles('scripts/ci/images/ci_run_docker_tests.py') }} - - name: "Test docker-compose quick start" - run: ./scripts/ci/images/ci_run_docker_compose_quick_start_test.sh + python -m pip install -r ./docker_tests/requirements.txt && + python -m pytest docker_tests/test_examples_of_prod_image_building.py -n auto --color=yes - ci-images: + wait-for-ci-images: timeout-minutes: 120 name: "Wait for CI images" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info] + needs: [build-info, build-ci-images] if: needs.build-info.outputs.image-build == 'true' env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: sqlite - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -384,39 +580,31 @@ jobs: python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Cache virtualenv environment" - uses: actions/cache@v2 - with: - path: '.build/.docker_venv' - key: ${{ runner.os }}-docker-venv-${{ hashFiles('scripts/ci/images/ci_run_docker_tests.py') }} - - name: > - Wait for CI images - ${{ needs.build-info.outputs.pythonVersions }}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }} + run: breeze free-space + - name: Wait for CI images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} id: wait-for-images - # We wait for the images to be available either from the build-images workflow - # We are utilising single job to wait for all images because this job merely waits - # for the images to be available and run tests with the images. - run: ./scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh + run: breeze pull-image --run-in-parallel --verify-image --wait-for-image --tag-as-latest env: - CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: > - ${{needs.build-info.outputs.pythonVersionsListAsString}} - VERIFY_IMAGE: "true" - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Fix ownership" + run: breeze fix-ownership + if: always() static-checks: timeout-minutes: 30 name: "Static checks" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - MOUNT_SELECTED_LOCAL_SOURCES: "true" - PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} if: needs.build-info.outputs.basic-checks-only == 'false' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -427,42 +615,34 @@ jobs: python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ - - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh - - name: "Get Python version" - run: "echo \"::set-output name=host-python-version::$(python -c - 'import platform; print(platform.python_version())')\"" - id: host-python-version - - name: "Cache pre-commit local-installation" - uses: actions/cache@v2 - with: - path: ~/.local - key: "pre-commit-local-installation-${{steps.host-python-version.outputs.host-python-version}}-\ -${{ hashFiles('setup.py', 'setup.cfg') }}" - restore-keys: "\ -pre-commit-local-installation-${{steps.host-python-version.outputs.host-python-version}}-" - - name: "Cache pre-commit envs" + - name: Cache pre-commit envs uses: actions/cache@v2 with: path: ~/.cache/pre-commit key: "pre-commit-${{steps.host-python-version.outputs.host-python-version}}-\ ${{ hashFiles('.pre-commit-config.yaml') }}" restore-keys: pre-commit-${{steps.host-python-version.outputs.host-python-version}} - - - name: "Cache eslint" - uses: actions/cache@v2 - with: - path: 'airflow/ui/node_modules' - key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/yarn.lock') }} + - run: ./scripts/ci/install_breeze.sh + - name: "Free space" + run: breeze free-space + - name: > + Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Get Python version" + run: "echo \"::set-output name=host-python-version::$(python -c + 'import platform; print(platform.python_version())')\"" + id: host-python-version - name: "Static checks" - run: ./scripts/ci/static_checks/run_static_checks.sh + run: breeze static-checks --all-files --show-diff-on-failure --color always env: - VERBOSE: false + VERBOSE: "false" SKIP: "identity" - COLUMNS: 250 + COLUMNS: "250" + - name: "Fix ownership" + run: breeze fix-ownership + if: always() # Those checks are run if no image needs to be built for checks. This is for simple changes that # Do not touch any of the python code or any of the important files that might require building @@ -474,11 +654,10 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" needs: [build-info] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - SKIP: "build,mypy,flake8,identity" - MOUNT_SELECTED_LOCAL_SOURCES: "true" - PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} if: needs.build-info.outputs.basic-checks-only == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -487,45 +666,53 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" uses: actions/setup-python@v2 with: python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} - - name: > - Fetch incoming commit ${{ github.sha }} with its parent + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - name: Cache pre-commit envs + uses: actions/cache@v2 + with: + path: ~/.cache/pre-commit + key: "pre-commit-basic-${{steps.host-python-version.outputs.host-python-version}}-\ +${{ hashFiles('.pre-commit-config.yaml') }}" + restore-keys: pre-commit-basic-${{steps.host-python-version.outputs.host-python-version}} + - name: Fetch incoming commit ${{ github.sha }} with its parent uses: actions/checkout@v2 with: ref: ${{ github.sha }} fetch-depth: 2 persist-credentials: false + - run: ./scripts/ci/install_breeze.sh + - name: "Free space" + run: breeze free-space - name: "Get Python version" run: "echo \"::set-output name=host-python-version::$(python -c 'import platform; print(platform.python_version())')\"" id: host-python-version - - name: "Cache pre-commit local-installation" - uses: actions/cache@v2 - with: - path: ~/.local - key: "pre-commit-local-installation-${{steps.host-python-version.outputs.host-python-version}}-\ -${{ hashFiles('setup.py', 'setup.cfg') }}" - restore-keys: "\ -pre-commit-local-installation-${{steps.host-python-version.outputs.host-python-version}}-" - - name: "Cache pre-commit envs" - uses: actions/cache@v2 - with: - path: ~/.cache/pre-commit - key: "pre-commit-basic-${{steps.host-python-version.outputs.host-python-version}}-\ -${{ hashFiles('.pre-commit-config.yaml') }}" - restore-keys: pre-commit-basic-${{steps.host-python-version.outputs.host-python-version}} - name: "Static checks: basic checks only" - run: ./scripts/ci/static_checks/run_basic_static_checks.sh "${{ github.sha }}" + run: > + breeze static-checks --all-files --show-diff-on-failure --color always + --commit-ref "${{ github.sha }}" env: - VERBOSE: false + VERBOSE: "false" + SKIP_IMAGE_PRE_COMMITS: "true" + SKIP: "identity" + COLUMNS: "250" + - name: "Fix ownership" + run: breeze fix-ownership + if: always() + docs: timeout-minutes: 45 name: "Build docs" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] if: needs.build-info.outputs.docs-build == 'true' env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -536,23 +723,23 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{needs.build-info.outputs.defaultPythonVersion}} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh - - name: "Fetch inventory versions" - run: ./scripts/ci/docs/ci_docs_prepare.sh + run: breeze free-space + - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - uses: actions/cache@v2 id: cache-doc-inventories with: path: ./docs/_inventory_cache/ - key: docs-inventory-v1-${{ hashFiles('constraints.txt') }} + key: docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} restore-keys: | - docs-inventory-${{ hashFiles('constraints.txt') }} + docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} docs-inventory- - name: "Build docs" - run: ./scripts/ci/docs/ci_docs.sh + run: breeze build-docs - name: Configure AWS credentials uses: ./.github/actions/configure-aws-credentials if: > @@ -567,21 +754,22 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" github.ref == 'refs/heads/main' && github.repository == 'apache/airflow' && github.event_name == 'push' run: aws s3 sync --delete ./files/documentation s3://apache-airflow-docs + - name: "Fix ownership" + run: breeze fix-ownership + if: always() prepare-test-provider-packages-wheel: timeout-minutes: 40 name: "Build and test provider packages wheel" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - AIRFLOW_EXTRAS: "all" - PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} - VERSION_SUFFIX_FOR_PYPI: ".dev0" - NON_INTERACTIVE: "true" - GENERATE_PROVIDERS_ISSUE: "true" + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} if: needs.build-info.outputs.image-build == 'true' && needs.build-info.outputs.default-branch == 'main' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -593,47 +781,68 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: > + Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Cleanup dist files" + run: rm -fv ./dist/* - name: "Prepare provider documentation" - run: ./scripts/ci/provider_packages/ci_prepare_provider_documentation.sh + run: breeze prepare-provider-documentation --answer yes - name: "Prepare provider packages: wheel" - run: ./scripts/ci/provider_packages/ci_prepare_provider_packages.sh - env: - PACKAGE_FORMAT: "wheel" + run: breeze prepare-provider-packages --package-format wheel --version-suffix-for-pypi dev0 - name: "Prepare airflow package: wheel" - run: ./scripts/ci/build_airflow/ci_build_airflow_packages.sh - env: - PACKAGE_FORMAT: "wheel" + run: breeze prepare-airflow-package --package-format wheel --version-suffix-for-pypi dev0 - name: "Install and test provider packages and airflow via wheel files" - run: ./scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh - env: - USE_AIRFLOW_VERSION: "wheel" - PACKAGE_FORMAT: "wheel" + run: > + breeze verify-provider-packages --use-airflow-version wheel --use-packages-from-dist + --package-format wheel + - name: "Remove airflow package and replace providers with 2.1-compliant versions" + run: | + rm -vf dist/apache_airflow-*.whl \ + dist/apache_airflow_providers_cncf_kubernetes*.whl \ + dist/apache_airflow_providers_celery*.whl + pip download --no-deps --dest dist \ + apache-airflow-providers-cncf-kubernetes==3.0.0 \ + apache-airflow-providers-celery==2.1.3 - name: "Install and test provider packages and airflow on Airflow 2.1 files" - run: ./scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh + run: > + breeze verify-provider-packages --use-airflow-version 2.1.0 + --use-packages-from-dist --package-format wheel --airflow-constraints-reference constraints-2.1.0 env: - USE_AIRFLOW_VERSION: "2.1.0" - SKIP_TWINE_CHECK: "true" - PACKAGE_FORMAT: "wheel" + # The extras below are all extras that should be installed with Airflow 2.1.0 + AIRFLOW_EXTRAS: "airbyte,alibaba,amazon,apache.atlas.apache.beam,apache.cassandra,apache.drill,\ + apache.druid,apache.hdfs,apache.hive,apache.kylin,apache.livy,apache.pig,apache.pinot,\ + apache.spark,apache.sqoop,apache.webhdfs,arangodb,asana,async,\ + celery,cgroups,cloudant,cncf.kubernetes,dask,databricks,datadog,dbt.cloud,\ + deprecated_api,dingding,discord,docker,\ + elasticsearch,exasol,facebook,ftp,github,github_enterprise,google,google_auth,\ + grpc,hashicorp,http,imap,influxdb,jdbc,jenkins,jira,kerberos,ldap,\ + leveldb,microsoft.azure,microsoft.mssql,microsoft.psrp,microsoft.winrm,mongo,mysql,\ + neo4j,odbc,openfaas,opsgenie,oracle,pagerduty,pandas,papermill,password,plexus,\ + postgres,presto,qubole,rabbitmq,redis,salesforce,samba,segment,sendgrid,sentry,\ + sftp,singularity,slack,snowflake,sqlite,ssh,statsd,tableau,telegram,trino,vertica,\ + virtualenv,yandex,zendesk" + - name: "Fix ownership" + run: breeze fix-ownership + if: always() prepare-test-provider-packages-sdist: timeout-minutes: 40 name: "Build and test provider packages sdist" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - AIRFLOW_EXTRAS: "all" - PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} - VERSION_SUFFIX_FOR_PYPI: ".dev0" - NON_INTERACTIVE: "true" - GENERATE_PROVIDERS_ISSUE: "true" + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} if: needs.build-info.outputs.image-build == 'true' && needs.build-info.outputs.default-branch == 'main' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -645,19 +854,20 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh - - name: "Prepare provider packages: sdist" - run: ./scripts/ci/provider_packages/ci_prepare_provider_packages.sh + run: breeze free-space + - name: > + Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest env: - PACKAGE_FORMAT: "sdist" + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Cleanup dist files" + run: rm -fv ./dist/* + - name: "Prepare provider packages: sdist" + run: breeze prepare-provider-packages --package-format sdist --version-suffix-for-pypi dev0 - name: "Prepare airflow package: sdist" - run: ./scripts/ci/build_airflow/ci_build_airflow_packages.sh - env: - PACKAGE_FORMAT: "sdist" + run: breeze prepare-airflow-package --package-format sdist --version-suffix-for-pypi dev0 - name: "Upload provider distribution artifacts" uses: actions/upload-artifact@v2 with: @@ -665,16 +875,18 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" path: "./dist/apache-airflow-providers-*.tar.gz" retention-days: 1 - name: "Install and test provider packages and airflow via sdist files" - run: ./scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh - env: - USE_AIRFLOW_VERSION: "sdist" - PACKAGE_FORMAT: "sdist" + run: > + breeze verify-provider-packages --use-airflow-version sdist --use-packages-from-dist + --package-format sdist + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-helm: timeout-minutes: 80 name: "Python unit tests for helm chart" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} MOUNT_SELECTED_LOCAL_SOURCES: "true" @@ -687,6 +899,8 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" (github.repository == 'apache/airflow' || github.event_name != 'schedule') && needs.build-info.outputs.default-branch == 'main' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -697,11 +911,14 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: > + Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Tests: Helm" run: ./scripts/ci/testing/ci_run_airflow_testing.sh env: @@ -724,10 +941,12 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" uses: actions/upload-artifact@v2 if: needs.build-info.outputs.runCoverage == 'true' with: - name: > - coverage-helm + name: coverage-helm path: "./files/coverage*.xml" retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-postgres: timeout-minutes: 130 @@ -735,7 +954,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" Postgres${{matrix.postgres-version}},Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] strategy: matrix: python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} @@ -745,12 +964,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: postgres - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} POSTGRES_VERSION: ${{ matrix.postgres-version }} TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -761,11 +981,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Test downgrade" run: ./scripts/ci/testing/run_downgrade_test.sh - name: "Test Offline SQL generation" @@ -792,17 +1014,19 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" uses: actions/upload-artifact@v2 if: needs.build-info.outputs.runCoverage == 'true' with: - name: > - coverage-postgres-${{matrix.python-version}}-${{matrix.postgres-version}} + name: coverage-postgres-${{matrix.python-version}}-${{matrix.postgres-version}} path: "./files/coverage*.xml" retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-mysql: timeout-minutes: 130 name: > MySQL${{matrix.mysql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] strategy: matrix: python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} @@ -812,12 +1036,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: mysql - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} MYSQL_VERSION: ${{ matrix.mysql-version }} TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -828,11 +1053,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Test downgrade" run: ./scripts/ci/testing/run_downgrade_test.sh - name: "Test Offline SQL generation" @@ -862,13 +1089,16 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" name: coverage-mysql-${{matrix.python-version}}-${{matrix.mysql-version}} path: "./files/coverage*.xml" retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-mssql: timeout-minutes: 130 name: > MSSQL${{matrix.mssql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] strategy: matrix: python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} @@ -878,12 +1108,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: mssql - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} MSSQL_VERSION: ${{ matrix.mssql-version }} TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -894,11 +1125,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Test downgrade" run: ./scripts/ci/testing/run_downgrade_test.sh - name: "Tests: ${{needs.build-info.outputs.testTypes}}" @@ -926,13 +1159,16 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" name: coverage-mssql-${{matrix.python-version}}-${{matrix.mssql-version}} path: "./files/coverage*.xml" retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-sqlite: timeout-minutes: 130 name: > Sqlite Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] strategy: matrix: python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} @@ -941,11 +1177,12 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: sqlite - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -956,11 +1193,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Test downgrade" run: ./scripts/ci/testing/run_downgrade_test.sh - name: "Tests: ${{needs.build-info.outputs.testTypes}}" @@ -988,23 +1227,26 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" name: coverage-sqlite-${{matrix.python-version}} path: ./files/coverage*.xml retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-quarantined: timeout-minutes: 60 name: "Quarantined tests" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} continue-on-error: true - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} MYSQL_VERSION: ${{needs.build-info.outputs.defaultMySQLVersion}} POSTGRES_VERSION: ${{needs.build-info.outputs.defaultPostgresVersion}} TEST_TYPES: "Quarantined" - NUM_RUNS: 10 - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} if: needs.build-info.outputs.run-tests == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1027,11 +1269,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" if: github.ref == 'refs/heads/v1-10-test' run: | echo "ISSUE_ID=10128" >> $GITHUB_ENV - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Pull CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" - run: ./scripts/ci/images/ci_pull_ci_image_on_ci.sh + run: breeze free-space + - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Tests: Quarantined" run: ./scripts/ci/testing/ci_run_quarantined_tests.sh env: @@ -1064,6 +1308,9 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" name: coverage-quarantined-${{ matrix.backend }} path: "./files/coverage*.xml" retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() upload-coverage: timeout-minutes: 15 @@ -1082,6 +1329,8 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" # Only upload coverage on merges to main if: needs.build-info.outputs.runCoverage == 'true' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1092,24 +1341,25 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" with: path: ./coverage-files - name: "Removes unnecessary artifacts" - run: ls ./coverage-files | grep -v coverage | xargs rm -rf + run: ls ./coverage-files | grep -v coverage | xargs rm -rfv - name: "Upload all coverage reports to codecov" uses: ./.github/actions/codecov-action with: directory: "./coverage-files" - prod-images: + wait-for-prod-images: timeout-minutes: 120 name: "Wait for PROD images" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, ci-images] + needs: [build-info, wait-for-ci-images, build-prod-images] if: needs.build-info.outputs.image-build == 'true' env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: sqlite PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1120,34 +1370,67 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace + run: breeze free-space - name: "Cache virtualenv environment" uses: actions/cache@v2 with: path: '.build/.docker_venv' key: ${{ runner.os }}-docker-venv-${{ hashFiles('scripts/ci/images/ci_run_docker_tests.py') }} - - name: > - Wait for PROD images - ${{ needs.build-info.outputs.pythonVersions }}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }} - # We wait for the images to be available either from "build-images.yml' run as pull_request_target. + - name: Wait for PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + # We wait for the images to be available either from "build-images.yml' run as pull_request_target + # or from build-prod-image above. # We are utilising single job to wait for all images because this job merely waits # For the images to be available and test them. - # - id: wait-for-images - run: ./scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh + run: breeze pull-prod-image --verify-image --wait-for-image --run-in-parallel env: - CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: > - ${{needs.build-info.outputs.pythonVersionsListAsString}} - VERIFY_IMAGE: "true" - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Fix ownership" + run: breeze fix-ownership + if: always() + + test-docker-compose-quick-start: + timeout-minutes: 60 + name: "Test docker-compose quick start" + runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + needs: [build-info, wait-for-prod-images] + if: needs.build-info.outputs.image-build == 'true' + env: + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + with: + fetch-depth: 2 + persist-credentials: false + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh + - name: "Free space" + run: breeze free-space + - name: Pull PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-prod-image --tag-as-latest + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Test docker-compose quick start" + run: breeze docker-compose-tests + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-kubernetes: - timeout-minutes: 50 + timeout-minutes: 70 name: Helm Chart; ${{matrix.executor}} runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, prod-images] + needs: [build-info, wait-for-prod-images] strategy: matrix: executor: [KubernetesExecutor, CeleryExecutor, LocalExecutor] @@ -1167,9 +1450,11 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" ${{needs.build-info.outputs.kubernetesVersionsListAsString}} if: > ( needs.build-info.outputs.run-kubernetes-tests == 'true' || - needs.build-info.outputs.needs-helm-tests == 'true' ) && + needs.build-info.outputs.needs-helm-tests == 'true' ) && needs.build-info.outputs.default-branch == 'main' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1180,13 +1465,14 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Get all PROD images" - run: ./scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh + run: breeze free-space + - name: Pull PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-prod-image --run-in-parallel --tag-as-latest env: - VERIFY_IMAGE: "false" + PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Cache bin folder with tools for kubernetes testing" uses: actions/cache@v2 with: @@ -1203,16 +1489,18 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" uses: actions/upload-artifact@v2 if: failure() || cancelled() with: - name: > - kind-logs-${{matrix.executor}} + name: kind-logs-${{matrix.executor}} path: /tmp/kind_logs_* retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() tests-helm-executor-upgrade: - timeout-minutes: 80 + timeout-minutes: 150 name: Helm Chart Executor Upgrade runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: [build-info, prod-images] + needs: [build-info, wait-for-prod-images] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} BACKEND: postgres @@ -1230,6 +1518,8 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" needs.build-info.outputs.run-kubernetes-tests == 'true' && needs.build-info.outputs.default-branch == 'main' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1240,13 +1530,14 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Get all PROD images" - run: ./scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh + run: breeze free-space + - name: Pull PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-prod-image --run-in-parallel --tag-as-latest env: - VERIFY_IMAGE: "false" + PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Cache virtualenv for kubernetes testing" uses: actions/cache@v2 with: @@ -1274,10 +1565,12 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" uses: actions/upload-artifact@v2 if: failure() || cancelled() with: - name: > - kind-logs-KubernetesExecutor + name: kind-logs-KubernetesExecutor path: /tmp/kind_logs_* retention-days: 7 + - name: "Fix ownership" + run: breeze fix-ownership + if: always() constraints: permissions: @@ -1287,8 +1580,8 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} needs: - build-info - - ci-images - - prod-images + - wait-for-ci-images + - wait-for-prod-images - static-checks - tests-sqlite - tests-mysql @@ -1296,10 +1589,10 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-postgres env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} - CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: ${{needs.build-info.outputs.pythonVersionsListAsString}} if: needs.build-info.outputs.upgradeToNewerDependencies != 'false' steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1311,57 +1604,46 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: > - Wait for CI images - ${{ needs.build-info.outputs.pythonVersions }}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }} - run: ./scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh - env: - VERIFY_IMAGE: "false" - - name: "Generate constraints with PyPI providers" - run: ./scripts/ci/constraints/ci_generate_all_constraints.sh + run: breeze free-space + - name: Pull CI images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + run: breeze pull-image --run-in-parallel --tag-as-latest env: - GENERATE_CONSTRAINTS_MODE: "pypi-providers" - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" - - name: "Generate constraints with source providers" - run: ./scripts/ci/constraints/ci_generate_all_constraints.sh - env: - GENERATE_CONSTRAINTS_MODE: "source-providers" - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" - - name: "Generate constraints without providers" - run: ./scripts/ci/constraints/ci_generate_all_constraints.sh + PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Generate constraints" + run: | + breeze generate-constraints --run-in-parallel \ + --airflow-constraints-mode constraints-source-providers + breeze generate-constraints --run-in-parallel --airflow-constraints-mode constraints-no-providers + breeze generate-constraints --run-in-parallel --airflow-constraints-mode constraints env: - GENERATE_CONSTRAINTS_MODE: "no-providers" - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} - name: "Set constraints branch name" id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh - # only actually checkout and push it when we are in apache/airflow repository + if: needs.build-info.outputs.mergeRun == 'true' - name: Checkout ${{ steps.constraints-branch.outputs.branch }} uses: actions/checkout@v2 - if: > - github.repository == 'apache/airflow' && - (github.event_name == 'push' || github.event_name == 'schedule') + if: needs.build-info.outputs.mergeRun == 'true' with: path: "repo" ref: ${{ steps.constraints-branch.outputs.branch }} persist-credentials: false - name: "Commit changed constraint files for ${{needs.build-info.outputs.pythonVersions}}" run: ./scripts/ci/constraints/ci_commit_constraints.sh - if: > - github.repository == 'apache/airflow' && - (github.event_name == 'push' || github.event_name == 'schedule') + if: needs.build-info.outputs.mergeRun == 'true' - name: "Push changes" uses: ./.github/actions/github-push-action - if: > - github.repository == 'apache/airflow' && - (github.event_name == 'push' || github.event_name == 'schedule') + if: needs.build-info.outputs.mergeRun == 'true' with: github_token: ${{ secrets.GITHUB_TOKEN }} branch: ${{ steps.constraints-branch.outputs.branch }} directory: "repo" + - name: "Fix ownership" + run: breeze fix-ownership + if: always() # Push BuildX cache to GitHub Registry in Apache repository, if all tests are successful and build # is executed as result of direct push to "main" or one of the "vX-Y-test" branches @@ -1371,29 +1653,24 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" permissions: packages: write timeout-minutes: 120 - name: "Push images as cache to GitHub Registry" + name: "Push Image Cache" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} needs: - build-info - constraints - docs - # Only run it for direct pushes and scheduled builds - if: > - (github.event_name == 'push' || github.event_name == 'schedule') - && github.repository == 'apache/airflow' + if: needs.build-info.outputs.mergeRun == 'true' strategy: + fail-fast: false matrix: python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} + platform: ["linux/amd64", "linux/arm64"] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} - # Build cache for both platforms for development even if we are releasing - # PROD images only for amd64 - PLATFORM: "linux/amd64,linux/arm64" - # Rebuild images before push using the latest constraints (just pushed) without - # eager upgrade. Do not wait for images, but rebuild them - UPGRADE_TO_NEWER_DEPENDENCIES: "false" steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v2 with: @@ -1404,19 +1681,56 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - - run: python -m pip install --editable ./dev/breeze/ + - run: ./scripts/ci/install_breeze.sh - name: "Free space" - run: airflow-freespace - - name: "Build CI image cache and push ${{env.PYTHON_MAJOR_MINOR_VERSION}}" - run: ./scripts/ci/images/ci_build_ci_image_on_ci.sh + run: breeze free-space + - name: > + Pull CI image for PROD build + ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }}" + run: breeze pull-image --tag-as-latest + env: + # Always use default Python version of CI image for preparing packages + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Cleanup dist and context file" + run: rm -fv ./dist/* ./docker-context-files/* + - name: "Prepare providers packages for PROD build" + run: > + breeze prepare-provider-packages + --package-list-file ./scripts/ci/installed_providers.txt + --package-format wheel + env: + VERSION_SUFFIX_FOR_PYPI: "dev0" + - name: "Prepare airflow package for PROD build" + run: breeze prepare-airflow-package --package-format wheel env: - PREPARE_BUILDX_CACHE: "true" - GITHUB_REGISTRY_PULL_IMAGE_TAG: "latest" - GITHUB_REGISTRY_PUSH_IMAGE_TAG: "latest" - - name: "Build CI image cache and push ${{env.PYTHON_MAJOR_MINOR_VERSION}}" - run: ./scripts/ci/images/ci_build_prod_image_on_ci.sh + VERSION_SUFFIX_FOR_PYPI: "dev0" + - name: "Start ARM instance" + run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh + if: matrix.platform == 'linux/arm64' + - name: "Push CI cache ${{ matrix.python-version }} ${{ matrix.platform }}" + run: > + breeze build-image + --prepare-buildx-cache + --force-build + --platform ${{ matrix.platform }} + env: + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + - name: "Move dist packages to docker-context files" + run: mv -v ./dist/*.whl ./docker-context-files + - name: "Push PROD cache ${{ matrix.python-version }} ${{ matrix.platform }}" + run: > + breeze build-prod-image + --airflow-is-in-context + --install-packages-from-context + --prepare-buildx-cache + --disable-airflow-repo-cache + --platform ${{ matrix.platform }} env: - VERSION_SUFFIX_FOR_PYPI: ".dev0" - PREPARE_BUILDX_CACHE: "true" - GITHUB_REGISTRY_PULL_IMAGE_TAG: "latest" - GITHUB_REGISTRY_PUSH_IMAGE_TAG: "latest" + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + - name: "Stop ARM instance" + run: ./scripts/ci/images/ci_stop_arm_instance.sh + if: always() && matrix.platform == 'linux/arm64' + - name: "Fix ownership" + run: breeze fix-ownership + if: always() diff --git a/.gitignore b/.gitignore index f22c530e2c16f..9a00d53fa3bda 100644 --- a/.gitignore +++ b/.gitignore @@ -194,7 +194,8 @@ log.txt* /airflow/providers/__init__.py # Docker context files -/docker-context-files +/docker-context-files/* +!/docker-context-files/.README.md # Local .terraform directories **/.terraform/* @@ -222,5 +223,5 @@ pip-wheel-metadata # Generated UI licenses licenses/LICENSES-ui.txt -# Packaged Breeze2 on Windows -/Breeze2.exe +# Packaged breeze on Windows +/breeze.exe diff --git a/.gitpod.yml b/.gitpod.yml index e1afcf55b0550..b4115c3801285 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -20,7 +20,7 @@ # Docs: https://www.gitpod.io/docs/config-gitpod-file/ tasks: - - init: ./breeze -y + - init: ./breeze-legacy -y - name: Install pre-commit openMode: split-right command: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1abe45d8b31d0..7de8e753565ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,28 +19,31 @@ default_stages: [commit, push] default_language_version: # force all unspecified python hooks to run python3 python: python3 -minimum_pre_commit_version: "1.20.0" +minimum_pre_commit_version: "2.0.0" repos: - repo: meta hooks: - id: identity + name: Print input to the static check hooks for troubleshooting - id: check-hooks-apply + name: Check if all hooks apply to the repository - repo: https://github.com/thlorenz/doctoc.git - rev: v2.1.0 + rev: v2.2.0 hooks: - id: doctoc - name: Add TOC for md files + name: Add TOC for md and rst files files: - ^README\.md$|^CONTRIBUTING\.md$|^UPDATING.*\.md$|^chart/UPDATING.*\.md$|^dev/.*\.md$|^dev/.*\.rst$ + ^CONTRIBUTING\.md$|^README\.md$|^UPDATING.*\.md$|^chart/UPDATING.*\.md$|^dev/.*\.md$|^dev/.*\.rst$ exclude: ^airflow/_vendor/ args: - "--maxlevel" - "2" - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.1.10 + rev: v1.2.0 hooks: - id: forbid-tabs - exclude: ^docs/Makefile$|^clients/gen/go\.sh|\.gitmodules$|^airflow/_vendor/ + name: Fail if tabs are used in the project + exclude: ^airflow/_vendor/|^clients/gen/go\.sh$|^\.gitmodules$ - id: insert-license name: Add license for all SQL files files: \.sql$ @@ -51,20 +54,9 @@ repos: - --license-filepath - license-templates/LICENSE.txt - --fuzzy-match-generates-todo - - id: insert-license - name: Add license for all other files - exclude: ^\.github/.*$|^airflow/_vendor/ - args: - - --comment-style - - "|#|" - - --license-filepath - - license-templates/LICENSE.txt - - --fuzzy-match-generates-todo - files: > - \.properties$|\.cfg$|\.conf$|\.ini$|\.ldif$|\.readthedocs$|\.service$|\.tf$|Dockerfile.*$ - id: insert-license name: Add license for all rst files - exclude: ^\.github/.*$|^airflow/_vendor/ + exclude: ^\.github/.*$|^airflow/_vendor/|newsfragments/.*\.rst$ args: - --comment-style - "||" @@ -73,8 +65,8 @@ repos: - --fuzzy-match-generates-todo files: \.rst$ - id: insert-license - name: Add license for all JS/TS/TSX/CSS/PUML files - files: \.(js|ts|tsx|css|puml)$ + name: Add license for all CSS/JS/PUML/TS/TSX files + files: \.(css|js|puml|ts|tsx|jsx)$ exclude: ^\.github/.*$|^airflow/_vendor/ args: - --comment-style @@ -94,8 +86,8 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all shell files - exclude: ^\.github/.*$|^airflow/_vendor/ - files: ^breeze$|^breeze-complete$|\.sh$|\.bash$ + exclude: ^\.github/.*$|^airflow/_vendor/|^dev/breeze/autocomplete/.*$ + files: ^breeze-legacy$|^breeze-complete$|\.bash$|\.sh$ args: - --comment-style - "|#|" @@ -136,48 +128,73 @@ repos: - id: insert-license name: Add license for all md files files: \.md$ - exclude: ^\.github/.*$|PROVIDER_CHANGES.*\.md|^airflow/_vendor/ + exclude: ^\.github/.*$|PROVIDER_CHANGES.*\.md$|^airflow/_vendor/ args: - --comment-style - "" - --license-filepath - license-templates/LICENSE.txt - --fuzzy-match-generates-todo + - id: insert-license + name: Add license for all other files + exclude: ^\.github/.*$|^airflow/_vendor/ + args: + - --comment-style + - "|#|" + - --license-filepath + - license-templates/LICENSE.txt + - --fuzzy-match-generates-todo + files: > + \.cfg$|\.conf$|\.ini$|\.ldif$|\.properties$|\.readthedocs$|\.service$|\.tf$|Dockerfile.*$ + # Keep version of black in sync wit blackend-docs and pre-commit-hook-names - repo: https://github.com/psf/black - rev: 21.12b0 + rev: 22.3.0 hooks: - id: black + name: Run Black (the uncompromising Python code formatter) args: [--config=./pyproject.toml] exclude: ^airflow/_vendor/ - repo: https://github.com/asottile/blacken-docs - rev: v1.12.0 + rev: v1.12.1 hooks: - id: blacken-docs + name: Run black on python code blocks in documentation files alias: black - additional_dependencies: [black==21.12b0] + additional_dependencies: [black==22.3.0] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.2.0 hooks: - id: check-merge-conflict + name: Check that merge conflicts are not being committed - id: debug-statements + name: Detect accidentally committed debug statements - id: check-builtin-literals + name: Require literal syntax when initializing Python builtin types exclude: ^airflow/_vendor/ - id: detect-private-key + name: Detect if private key is added to the repository - id: end-of-file-fixer + name: Make sure that there is an empty line at the end exclude: ^airflow/_vendor/ - id: mixed-line-ending + name: Detect if mixed line ending is used (\r vs. \r\n) exclude: ^airflow/_vendor/ - id: check-executables-have-shebangs + name: Check that executables have shebang exclude: ^airflow/_vendor/ - id: check-xml + name: Check XML files with xmllint exclude: ^airflow/_vendor/ - id: trailing-whitespace - exclude: ^airflow/_vendor/ + name: Remove trailing whitespace at end of line + exclude: ^airflow/_vendor/|^images/breeze/output.*$ - id: fix-encoding-pragma + name: Remove encoding header from python files exclude: ^airflow/_vendor/ args: - --remove - id: pretty-format-json + name: Format json files args: - --autofix - --no-sort-keys @@ -185,19 +202,22 @@ repos: - "4" files: ^chart/values\.schema\.json$|^chart/values_schema\.schema\.json$ pass_filenames: true - # TODO: Bump to Python 3.7 when support for Python 3.6 is dropped in Airflow 2.3. + # TODO: Bump to Python 3.8 when support for Python 3.7 is dropped in Airflow. - repo: https://github.com/asottile/pyupgrade - rev: v2.31.0 + rev: v2.32.1 hooks: - id: pyupgrade - args: ["--py36-plus"] + name: Upgrade Python code automatically + args: ["--py37-plus"] exclude: ^airflow/_vendor/ - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.9.0 hooks: - id: rst-backticks + name: Check if RST files use double backticks for code exclude: ^airflow/_vendor/ - id: python-no-log-warn + name: Check if there are no deprecate log warn exclude: ^airflow/_vendor/ - repo: https://github.com/adrienverge/yamllint rev: v1.26.3 @@ -206,7 +226,7 @@ repos: name: Check YAML files with yamllint entry: yamllint -c yamllint-config.yml --strict types: [yaml] - exclude: ^.*init_git_sync\.template\.yaml$|^.*airflow\.template\.yaml$|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml|pre-commit-config|^airflow/_vendor/ + exclude: ^.*init_git_sync\.template\.yaml$|^.*airflow\.template\.yaml$|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$|^airflow/_vendor/ - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: @@ -214,7 +234,7 @@ repos: name: Run isort to sort imports in Python files files: \.py$|\.pyi$ # To keep consistent with the global isort skip config defined in setup.cfg - exclude: ^build/.*$|^\.tox/.*$|^venv/.*$|^airflow/_vendor/ + exclude: ^airflow/_vendor/|^build/.*$|^venv/.*$|^\.tox/.*$ - repo: https://github.com/pycqa/pydocstyle rev: 6.1.1 hooks: @@ -239,14 +259,16 @@ repos: rev: v1.3.0 hooks: - id: yesqa + name: Remove unnecessary noqa statements exclude: | (?x) ^airflow/_vendor/ additional_dependencies: ['flake8>=4.0.1'] - repo: https://github.com/ikamensh/flynt - rev: '0.69' + rev: '0.76' hooks: - id: flynt + name: Run flynt string format converter for Python exclude: | (?x) ^airflow/_vendor/ @@ -266,14 +288,14 @@ repos: The word(s) should be in lowercase." && exec codespell "$@"' -- language: python types: [text] - exclude: ^airflow/_vendor/|^CHANGELOG\.txt$|^airflow/www/static/css/material-icons\.css$ + exclude: ^airflow/_vendor/|^RELEASE_NOTES\.txt$|^airflow/www/static/css/material-icons\.css$|^images/.*$ args: - --ignore-words=docs/spelling_wordlist.txt - --skip=docs/*/commits.rst,airflow/providers/*/*.rst,*.lock,INTHEWILD.md,*.min.js,docs/apache-airflow/pipeline_example.csv - --exclude-file=.codespellignorelines - repo: local hooks: - - id: autoflake + - id: static-check-autoflake name: Remove all unused code entry: autoflake --remove-all-unused-imports --ignore-init-module-imports --in-place language: python @@ -296,48 +318,51 @@ repos: files: Dockerfile.*$ pass_filenames: true require_serial: true - - id: setup-order + - id: check-setup-order name: Check order of dependencies in setup.cfg and setup.py language: python files: ^setup\.cfg$|^setup\.py$ pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py - additional_dependencies: ['rich'] - - id: setup-extra-packages + additional_dependencies: ['rich>=12.4.1'] + - id: check-extra-packages-references name: Checks setup extra packages description: Checks if all the libraries in setup.py are listed in extra-packages-ref.rst file language: python files: ^setup\.py$|^docs/apache-airflow/extra-packages-ref\.rst$ pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py - additional_dependencies: ['rich==9.2.0'] - - id: forbidden-xcom-get-value - name: Check if XCom.get_value is used in backwards-compatible way + additional_dependencies: ['rich>=12.4.1'] + # This check might be removed when min-airflow-version in providers is 2.2 + - id: check-airflow-2-1-compatibility + name: Check that providers are 2.1 compatible. + entry: ./scripts/ci/pre_commit/pre_commit_check_2_1_compatibility.py language: python - files: ^airflow/providers/.*\.py$ pass_filenames: true - entry: ./scripts/ci/pre_commit/pre_commit_check_xcom_get_value.py - additional_dependencies: ['rich'] + files: ^airflow/providers/.*\.py$ + additional_dependencies: ['rich>=12.4.1'] - id: update-breeze-file - name: Update output of breeze command in BREEZE.rst - entry: ./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.sh - language: system - files: ^BREEZE\.rst$|^breeze$|^breeze-complete$|^Dockerfile$ + name: Update output of breeze commands in BREEZE.rst + entry: ./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.py + language: python + files: ^BREEZE\.rst$|^dev/breeze/.*$ pass_filenames: false + additional_dependencies: ['rich>=12.4.1', 'rich-click'] - id: update-local-yml-file name: Update mounts in the local yml file - entry: ./scripts/ci/pre_commit/pre_commit_local_yml_mounts.sh - language: system - files: ^scripts/ci/libraries/_local_mounts\.sh$|^scripts/ci/docker_compose/local\.yml$ + entry: ./scripts/ci/pre_commit/pre_commit_local_yml_mounts.py + language: python + files: ^dev/breeze/src/airflow_breeze/utils/docker_command_utils\.py$|^scripts/ci/docker_compose/local\.yml$ pass_filenames: false + additional_dependencies: ['rich>=12.4.1'] - id: update-setup-cfg-file name: Update setup.cfg file with all licenses entry: ./scripts/ci/pre_commit/pre_commit_setup_cfg_file.sh language: system files: ^setup\.cfg$ pass_filenames: false - - id: build-providers-dependencies - name: Build cross-dependencies for providers packages + - id: update-providers-dependencies + name: Update cross-dependencies for providers packages entry: ./scripts/ci/pre_commit/pre_commit_build_providers_dependencies.sh language: python files: ^airflow/providers/.*\.py$|^tests/providers/.*\.py$ @@ -355,41 +380,46 @@ repos: language: python files: ^Dockerfile$ pass_filenames: false - additional_dependencies: ['rich'] + additional_dependencies: ['rich>=12.4.1'] - id: update-supported-versions name: Updates supported versions in documentation - entry: ./scripts/ci/pre_commit/supported_versions.py + entry: ./scripts/ci/pre_commit/pre_commit_supported_versions.py language: python - files: ^scripts/ci/pre_commit/supported_versions\.py$|^README\.md$|^docs/apache-airflow/supported-versions\.rst$ + files: ^scripts/ci/pre_commit/pre_commit_supported_versions\.py$|^README\.md$|^docs/apache-airflow/supported-versions\.rst$ pass_filenames: false additional_dependencies: ['tabulate'] + - id: check-revision-heads-map + name: Check that the REVISION_HEADS_MAP is up-to-date + language: python + entry: ./scripts/ci/pre_commit/pre_commit_version_heads_map.py + pass_filenames: false - id: update-version name: Update version to the latest version in the documentation entry: ./scripts/ci/pre_commit/pre_commit_update_versions.py language: python files: ^docs pass_filenames: false - - id: pydevd + - id: check-pydevd-left-in-code language: pygrep name: Check for pydevd debug statements accidentally left entry: "pydevd.*settrace\\(" pass_filenames: true files: \.py$ - - id: dont-use-safe-filter + - id: check-safe-filter-usage-in-html language: pygrep name: Don't use safe in templates description: the Safe filter is error-prone, use Markup() in code instead entry: "\\|\\s*safe" files: \.html$ pass_filenames: true - - id: no-providers-in-core-examples + - id: check-no-providers-in-core-examples language: pygrep name: No providers imports in core example DAGs description: The core example DAGs have no dependencies other than core Airflow entry: "^\\s*from airflow\\.providers.*" pass_filenames: true files: ^airflow/example_dags/.*\.py$ - - id: no-relative-imports + - id: check-no-relative-imports language: pygrep name: No relative imports description: Airflow style is to use absolute imports only @@ -397,24 +427,49 @@ repos: pass_filenames: true files: \.py$ exclude: ^tests/|^airflow/_vendor/ - - id: language-matters + - id: check-for-inclusive-language language: pygrep name: Check for language that we do not accept as community - description: Please use "deny_list" or "allow_list" instead. - entry: "(?i)(black|white)[_-]?list" + description: Please use more appropriate words for community documentation. + entry: > + (?i) + (black|white)[_-]?list| + \bshe\b| + \bhe\b| + \bher\b| + \bhis\b| + \bmaster\b| + \bslave\b| + \bsanity\b| + \bdummy\b pass_filenames: true exclude: > (?x) - ^airflow/www/fab_security/manager\.py$| - ^airflow/providers/apache/cassandra/hooks/cassandra\.py$| - ^airflow/providers/apache/hive/operators/hive_stats\.py$| - ^airflow/providers/apache/hive/.*PROVIDER_CHANGES_*| - ^airflow/providers/apache/hive/.*README\.md$| - ^tests/providers/apache/cassandra/hooks/test_cassandra\.py$| - ^docs/apache-airflow-providers-apache-cassandra/connections/cassandra\.rst$| - ^docs/apache-airflow-providers-apache-hive/commits\.rst$| + ^airflow/www/fab_security/manager.py$| + ^airflow/www/static/| + ^airflow/providers/| + ^tests/providers/apache/cassandra/hooks/test_cassandra.py$| + ^docs/apache-airflow-providers-apache-cassandra/connections/cassandra.rst$| + ^docs/apache-airflow-providers-apache-hive/commits.rst$| + ^airflow/api_connexion/openapi/v1.yaml$| + ^tests/cli/commands/test_webserver_command.py$| + ^airflow/cli/commands/webserver_command.py$| + ^airflow/ui/yarn.lock$| + ^airflow/config_templates/default_airflow.cfg$| + ^airflow/config_templates/config.yml$| + ^docs/*.*$| + ^tests/providers/| + ^.pre-commit-config\.yaml$| + ^.*RELEASE_NOTES\.rst$| + ^.*CHANGELOG\.txt$|^.*CHANGELOG\.rst$| git - - id: base-operator + - id: check-base-operator-partial-arguments + name: Check BaseOperator and partial() arguments + language: python + entry: ./scripts/ci/pre_commit/pre_commit_base_operator_partial_arguments.py + pass_filenames: false + files: ^airflow/models/(?:base|mapped)operator.py$ + - id: check-base-operator-usage language: pygrep name: Check BaseOperator[Link] core imports description: Make sure BaseOperator[Link] is imported from airflow.models.baseoperator in core @@ -424,13 +479,12 @@ repos: exclude: > (?x) ^airflow/decorators/.*$| - ^airflow/gcp/.*$| ^airflow/hooks/.*$| ^airflow/operators/.*$| ^airflow/sensors/.*$| ^airflow/providers/.*$| ^dev/provider_packages/.*$ - - id: base-operator + - id: check-base-operator-usage language: pygrep name: Check BaseOperator[Link] other imports description: Make sure BaseOperator[Link] is imported from airflow.models outside of core @@ -438,13 +492,9 @@ repos: pass_filenames: true files: > (?x) - ^airflow/gcp/.*$| - ^airflow/hooks/.*$| - ^airflow/operators/.*$| - ^airflow/sensors/.*$| ^airflow/providers/.*\.py$ exclude: ^airflow/_vendor/ - - id: provide-create-sessions + - id: check-provide-create-sessions-imports language: pygrep name: Check provide_session and create_session imports description: provide_session and create_session should be imported from airflow.utils.session @@ -453,40 +503,40 @@ repos: files: \.py$ exclude: ^airflow/_vendor/ pass_filenames: true - - id: incorrect-use-of-LoggingMixin + - id: check-incorrect-use-of-LoggingMixin language: pygrep name: Make sure LoggingMixin is not used alone entry: "LoggingMixin\\(\\)" files: \.py$ exclude: ^airflow/_vendor/ pass_filenames: true - - id: daysago-import-check + - id: check-daysago-import-from-utils language: pygrep name: Make sure days_ago is imported from airflow.utils.dates entry: "(airflow\\.){0,1}utils\\.dates\\.days_ago" files: \.py$ exclude: ^airflow/_vendor/ pass_filenames: true - - id: restrict-start_date + - id: check-start-date-not-used-in-defaults language: pygrep - name: "'start_date' should not be defined in default_args in example_dags" + name: "'start_date' not to be defined in default_args in example_dags" entry: "default_args\\s*=\\s*{\\s*(\"|')start_date(\"|')|(\"|')start_date(\"|'):" files: \.*example_dags.*\.py$ exclude: ^airflow/_vendor/ pass_filenames: true - - id: check-integrations - name: Check if integration list is aligned + - id: check-integrations-are-consistent + name: Check if integration list is consistent in various places entry: ./scripts/ci/pre_commit/pre_commit_check_integrations.sh language: system pass_filenames: false files: ^common/_common_values\.sh$|^breeze-complete$ - - id: check-apache-license + - id: check-apache-license-rat name: Check if licenses are OK for Apache entry: ./scripts/ci/pre_commit/pre_commit_check_license.sh language: system files: ^.*LICENSE.*$|^.*LICENCE.*$ pass_filenames: false - - id: airflow-config-yaml + - id: check-airflow-config-yaml-consistent name: Checks for consistency between config.yml and default_config.cfg language: python entry: ./scripts/ci/pre_commit/pre_commit_yaml_to_cfg.py @@ -494,95 +544,113 @@ repos: pass_filenames: false require_serial: true additional_dependencies: ['pyyaml'] - - id: boring-cyborg + - id: check-boring-cyborg-configuration name: Checks for Boring Cyborg configuration consistency language: python entry: ./scripts/ci/pre_commit/pre_commit_boring_cyborg.py pass_filenames: false require_serial: true additional_dependencies: ['pyyaml', 'termcolor==1.1.0', 'wcmatch==8.2'] - - id: sort-in-the-wild + - id: update-in-the-wild-to-be-sorted name: Sort INTHEWILD.md alphabetically entry: ./scripts/ci/pre_commit/pre_commit_sort_in_the_wild.sh language: system files: ^\.pre-commit-config\.yaml$|^INTHEWILD\.md$ require_serial: true - - id: sort-spelling-wordlist + - id: update-spelling-wordlist-to-be-sorted name: Sort alphabetically and uniquify spelling_wordlist.txt entry: ./scripts/ci/pre_commit/pre_commit_sort_spelling_wordlist.sh language: system files: ^\.pre-commit-config\.yaml$|^docs/spelling_wordlist\.txt$ require_serial: true - - id: helm-lint + - id: lint-helm-chart name: Lint Helm Chart entry: ./scripts/ci/pre_commit/pre_commit_helm_lint.sh language: system pass_filenames: false files: ^chart require_serial: true - - id: shellcheck + - id: run-shellcheck name: Check Shell scripts syntax correctness language: docker_image - entry: koalaman/shellcheck:v0.7.2 -x -a - files: ^breeze$|^breeze-complete$|\.sh$|^hooks/build$|^hooks/push$|\.bash$ - - id: stylelint + entry: koalaman/shellcheck:v0.8.0 -x -a + files: ^breeze-legacy$|^breeze-complete$|\.sh$|^hooks/build$|^hooks/push$|\.bash$ + exclude: ^dev/breeze/autocomplete/.*$ + - id: lint-css name: stylelint entry: "stylelint" language: node files: ^airflow/www/.*\.(css|scss|sass)$ # Keep dependency versions in sync w/ airflow/www/package.json additional_dependencies: ['stylelint@13.3.1', 'stylelint-config-standard@20.0.0'] - - id: providers-init-file + - id: check-providers-init-file-missing name: Provider init file is missing pass_filenames: false always_run: true entry: ./scripts/ci/pre_commit/pre_commit_check_providers_init.sh language: system - - id: providers-subpackages-init-file + - id: check-providers-subpackages-init-file-exist name: Provider subpackage init files are there pass_filenames: false always_run: true entry: ./scripts/ci/pre_commit/pre_commit_check_providers_subpackages_all_have_init.py language: python require_serial: true - - id: provider-yamls + - id: check-provider-yaml-valid name: Validate providers.yaml files pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py language: python require_serial: true - files: provider\.yaml$|scripts/ci/pre_commit/pre_commit_check_provider_yaml_files\.py$|^docs/ + files: ^docs/|provider\.yaml$|^scripts/ci/pre_commit/pre_commit_check_provider_yaml_files\.py$ additional_dependencies: - 'PyYAML==5.3.1' - 'jsonschema>=3.2.0,<5.0.0' - 'tabulate==0.8.8' - 'jsonpath-ng==1.5.3' - - 'rich==10.9.0' - - id: pre-commit-descriptions - name: Check if pre-commits are described - entry: ./scripts/ci/pre_commit/pre_commit_check_pre_commits.sh - language: system - files: ^\.pre-commit-config\.yaml$|^STATIC_CODE_CHECKS\.rst|^breeze-complete$ - require_serial: true - - id: pre-commit-hook-names - name: Ensure hook ids are not overly long - entry: ./scripts/ci/pre_commit/pre_commit_check_pre_commit_hook_names.py + - 'rich>=12.4.1' + - id: check-pre-commit-information-consistent + name: Update information re pre-commit hooks and verify ids and names + entry: ./scripts/ci/pre_commit/pre_commit_check_pre_commit_hooks.py args: - - --max-length=70 + - --max-length=64 language: python files: ^\.pre-commit-config\.yaml$|^scripts/ci/pre_commit/pre_commit_check_pre_commit_hook_names\.py$ - additional_dependencies: ['pyyaml', 'jinja2', 'black'] + additional_dependencies: ['pyyaml', 'jinja2', 'black==22.3.0', 'tabulate', 'rich>=12.4.1'] require_serial: true pass_filenames: false - - id: airflow-providers-available + - id: check-airflow-providers-have-extras name: Checks providers available when declared by extras in setup.py language: python entry: ./scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py - files: setup\.py$|^airflow/providers/.*\.py$ + files: ^setup\.py$|^airflow/providers/.*\.py$ + pass_filenames: false + require_serial: true + additional_dependencies: ['rich>=12.4.1'] + - id: update-breeze-readme-config-hash + name: Update Breeze README.md with config files hash + language: python + entry: ./scripts/ci/pre_commit/pre_commit_update_breeze_config_hash.py + files: ^dev/breeze/setup.*$|^dev/breeze/pyproject.toml$|^dev/breeze/README.md$ + pass_filenames: false + require_serial: true + - id: check-breeze-top-dependencies-limited + name: Breeze should have small number of top-level dependencies + language: python + entry: ./scripts/tools/check_if_limited_dependencies.py + files: ^dev/breeze/.*$ pass_filenames: false require_serial: true - additional_dependencies: ['rich'] - - id: markdownlint + additional_dependencies: ['click', 'rich>=12.4.1'] + - id: check-system-tests-present + name: Check if system tests have required segments of code + entry: ./scripts/ci/pre_commit/pre_commit_check_system_tests.py + language: python + files: ^tests/system/.*/example_[^/]*.py$ + exclude: ^tests/system/providers/google/bigquery/example_bigquery_queries\.py$ + pass_filenames: true + additional_dependencies: ['rich>=12.4.1'] + - id: lint-markdown name: Run markdownlint description: Checks the style of Markdown files. entry: markdownlint @@ -590,7 +658,7 @@ repos: types: [markdown] files: \.(md|mdown|markdown)$ additional_dependencies: ['markdownlint-cli'] - - id: json-schema + - id: lint-json-schema name: Lint JSON Schema files with JSON Schema entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py args: @@ -602,7 +670,7 @@ repos: exclude: ^airflow/_vendor/ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==5.3.1', 'requests==2.25.0'] - - id: json-schema + - id: lint-json-schema name: Lint NodePort Service with JSON Schema entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py args: @@ -613,7 +681,7 @@ repos: files: ^scripts/ci/kubernetes/nodeport\.yaml$ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==5.3.1', 'requests==2.25.0'] - - id: json-schema + - id: lint-json-schema name: Lint Docker compose files with JSON Schema entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py args: @@ -624,7 +692,7 @@ repos: files: ^scripts/ci/docker-compose/.+\.ya?ml$|docker-compose\.ya?ml$ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==5.3.1', 'requests==2.25.0'] - - id: json-schema + - id: lint-json-schema name: Lint chart/values.schema.json file with JSON Schema entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py args: @@ -636,13 +704,13 @@ repos: files: ^chart/values\.schema\.json$|^chart/values_schema\.schema\.json$ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==5.3.1', 'requests==2.25.0'] - - id: vendor-k8s-json-schema + - id: update-vendored-in-k8s-json-schema name: Vendor k8s definitions into values.schema.json entry: ./scripts/ci/pre_commit/pre_commit_vendor_k8s_json_schema.py language: python files: ^chart/values\.schema\.json$ additional_dependencies: ['requests==2.25.0'] - - id: json-schema + - id: lint-json-schema name: Lint chart/values.yaml file with JSON Schema entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py args: @@ -655,7 +723,7 @@ repos: files: ^chart/values\.yaml$|^chart/values\.schema\.json$ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==5.3.1', 'requests==2.25.0'] - - id: json-schema + - id: lint-json-schema name: Lint airflow/config_templates/config.yml file with JSON Schema entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py args: @@ -666,75 +734,111 @@ repos: files: ^airflow/config_templates/config\.yml$ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==5.3.1', 'requests==2.25.0'] - - id: persist-credentials-disabled + - id: check-persist-credentials-disabled-in-github-workflows name: Check that workflow files have persist-credentials disabled entry: ./scripts/ci/pre_commit/pre_commit_checkout_no_credentials.py language: python pass_filenames: true files: ^\.github/workflows/.*\.yml$ - additional_dependencies: ['PyYAML', 'rich'] - - id: docstring-params + additional_dependencies: ['PyYAML', 'rich>=12.4.1'] + - id: check-docstring-param-types name: Check that docstrings do not specify param types entry: ./scripts/ci/pre_commit/pre_commit_docstring_param_type.py language: python pass_filenames: true files: \.py$ exclude: ^airflow/_vendor/ - additional_dependencies: ['rich'] - - id: chart-schema-lint + additional_dependencies: ['rich>=12.4.1'] + - id: lint-chart-schema name: Lint chart/values.schema.json file entry: ./scripts/ci/pre_commit/pre_commit_chart_schema.py language: python pass_filenames: false files: ^chart/values\.schema\.json$ require_serial: true - - id: ui-lint - name: ESLint against airflow/ui - language: node - 'types_or': [javascript, tsx, ts] - files: ^airflow/ui/ - entry: ./scripts/ci/static_checks/ui_lint.sh - pass_filenames: false - - id: www-lint - name: ESLint against current UI js files - language: node - 'types_or': [javascript] - files: ^airflow/www/static/js/ - entry: ./scripts/ci/static_checks/www_lint.sh + - id: update-inlined-dockerfile-scripts + name: Inline Dockerfile and Dockerfile.ci scripts + entry: ./scripts/ci/pre_commit/pre_commit_inline_scripts_in_docker.py + language: python pass_filenames: false - - id: changelog-duplicates + files: ^Dockerfile$|^Dockerfile.ci$|^scripts/docker/.*$ + require_serial: true + - id: check-changelog-has-no-duplicates name: Check changelogs for duplicate entries language: python files: CHANGELOG\.txt$|CHANGELOG\.rst$ entry: ./scripts/ci/pre_commit/pre_commit_changelog_duplicates.py pass_filenames: true + - id: check-newsfragments-are-valid + name: Check newsfragments are valid + language: python + files: newsfragments/.*\.rst + entry: ./scripts/ci/pre_commit/pre_commit_newsfragments.py + pass_filenames: true + # We sometimes won't have newsfragments in the repo, so always run it so `check-hooks-apply` passes + # This is fast, so not too much downside + always_run: true ## ADD MOST PRE-COMMITS ABOVE THAT LINE # The below pre-commits are those requiring CI image to be built - - id: mypy - name: Run mypy - language: system - entry: ./scripts/ci/pre_commit/pre_commit_mypy.sh --namespace-packages + - id: run-mypy + name: Run mypy for dev + language: python + entry: ./scripts/ci/pre_commit/pre_commit_mypy.py + files: ^dev/.*\.py$ + require_serial: true + additional_dependencies: ['rich>=12.4.1'] + - id: run-mypy + name: Run mypy for core + language: python + entry: ./scripts/ci/pre_commit/pre_commit_mypy.py --namespace-packages files: \.py$ - exclude: ^provider_packages|^docs|^airflow/_vendor/ + exclude: ^provider_packages|^docs|^airflow/_vendor/|^airflow/providers|^airflow/migrations|^dev + require_serial: true + additional_dependencies: ['rich>=12.4.1'] + - id: run-mypy + name: Run mypy for providers + language: python + entry: ./scripts/ci/pre_commit/pre_commit_mypy.py --namespace-packages + files: ^airflow/providers/.*\.py$ require_serial: true - - id: mypy + additional_dependencies: ['rich>=12.4.1'] + - id: run-mypy name: Run mypy for /docs/ folder - language: system - entry: ./scripts/ci/pre_commit/pre_commit_mypy.sh + language: python + entry: ./scripts/ci/pre_commit/pre_commit_mypy.py files: ^docs/.*\.py$ - exclude: rtd-deprecation + exclude: ^docs/rtd-deprecation require_serial: true - - id: flake8 + additional_dependencies: ['rich>=12.4.1'] + - id: run-flake8 name: Run flake8 - language: system - entry: ./scripts/ci/pre_commit/pre_commit_flake8.sh + language: python + entry: ./scripts/ci/pre_commit/pre_commit_flake8.py files: \.py$ pass_filenames: true exclude: ^airflow/_vendor/ - - id: migration-reference + additional_dependencies: ['rich>=12.4.1'] + - id: lint-javascript + name: ESLint against airflow/ui + language: python + 'types_or': [javascript, tsx, ts] + files: ^airflow/ui/ + entry: ./scripts/ci/pre_commit/pre_commit_ui_lint.py + pass_filenames: false + additional_dependencies: ['rich>=12.4.1'] + - id: lint-javascript + name: ESLint against current UI JavaScript files + language: python + 'types_or': [javascript] + files: ^airflow/www/static/js/ + entry: ./scripts/ci/pre_commit/pre_commit_www_lint.py + pass_filenames: false + additional_dependencies: ['rich>=12.4.1'] + - id: update-migration-references name: Update migration ref doc - language: system - entry: ./scripts/ci/pre_commit/pre_commit_migration_reference.sh + language: python + entry: ./scripts/ci/pre_commit/pre_commit_migration_reference.py pass_filenames: false - files: ^airflow/migrations/versions/.*\.py$|^docs/apache-airflow/migrations-ref.rst$ + files: ^airflow/migrations/versions/.*\.py$|^docs/apache-airflow/migrations-ref\.rst$ + additional_dependencies: ['rich>=12.4.1'] ## ONLY ADD PRE-COMMITS HERE THAT REQUIRE CI IMAGE diff --git a/.rat-excludes b/.rat-excludes index 94a97319f2a3c..fa4663ce65188 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -7,6 +7,7 @@ .gitrepo .airflow_db_initialised .airflowignore +.babelrc .coverage .coveragerc .codecov.yml @@ -33,7 +34,6 @@ metastore_db .*svg .*csv .*md5 -CHANGELOG.txt .*zip .*lock unittests.cfg @@ -43,6 +43,7 @@ venv files airflow.iml .gitmodules +scripts/ci/installed_providers.txt # Generated doc files .*html @@ -114,3 +115,9 @@ chart/values_schema.schema.json # A simplistic Robots.txt airflow/www/static/robots.txt + +# Generated autocomplete files +dev/breeze/autocomplete/* + +# Newsfragments are snippets that will be, eventually, consumed into RELEASE_NOTES +newsfragments/* diff --git a/.readthedocs.yml b/.readthedocs.yml index ae08fe7ee059c..592c9da64e1cf 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -20,7 +20,7 @@ formats: [] sphinx: configuration: docs/rtd-deprecation/conf.py python: - version: 3.7 + version: "3.7" install: - method: pip path: . diff --git a/BREEZE.rst b/BREEZE.rst index 8ddba88c6eba7..b80db9c45356d 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -31,15 +31,24 @@ Airflow Breeze is an easy-to-use development and test environment using `Docker Compose `_. The environment is available for local use and is also used in Airflow's CI tests. -We called it *Airflow Breeze* as **It's a Breeze to contribute to Airflow**. +We call it *Airflow Breeze* as **It's a Breeze to contribute to Airflow**. The advantages and disadvantages of using the Breeze environment vs. other ways of testing Airflow are described in `CONTRIBUTING.rst `_. -All the output from the last ./breeze command is automatically logged to the ``logs/breeze.out`` file. +.. note:: + We are currently migrating old Bash-based ./breeze-legacy to the Python-based breeze. Some of the + commands are already converted to breeze, but some old commands should use breeze-legacy. The + documentation mentions when ``./breeze-legacy`` is involved. + + The new ``breeze`` after installing is available on your PATH and you should launch it simply as + ``breeze ``. Previously you had to prepend breeze with ``./`` but this is not needed + any more. For convenience, we will keep ``./breeze`` script for a while to run the new breeze and you + can still use the legacy Breeze with ``./breeze-legacy``. Watch the video below about Airflow Breeze. It explains the motivation for Breeze -and screencasts all its uses. +and screencast all its uses. The video describes old ``./breeze-legacy`` (in video it still +called ``./breeze`` ). .. raw:: html @@ -68,7 +77,7 @@ Docker Desktop See also `Docker for Mac - Space `_ for details on increasing disk space available for Docker on Mac. - **Docker problems**: Sometimes it is not obvious that space is an issue when you run into - a problem with Docker. If you see a weird behaviour, try ``breeze cleanup-image`` command. + a problem with Docker. If you see a weird behaviour, try ``breeze cleanup`` command. Also see `pruning `_ instructions from Docker. Here is an example configuration with more than 200GB disk space for Docker: @@ -83,7 +92,7 @@ Here is an example configuration with more than 200GB disk space for Docker: Docker Compose -------------- -- **Version**: Install the latest stable `Docker Compose`_ +- **Version**: Install the latest stable `Docker Compose `_ and add it to the PATH. ``Breeze`` detects if you are using version that is too old and warns you to upgrade. - **Permissions**: Configure permission to be able to run the ``docker-compose`` command by your user. @@ -118,19 +127,27 @@ Docker in WSL 2 - **WSL 2 Docker mount errors**: Another reason to use Linux filesystem, is that sometimes - depending on the length of - your path, you might get strange errors when you try start ``Breeze``, such us + your path, you might get strange errors when you try start ``Breeze``, such as ``caused: mount through procfd: not a directory: unknown:``. Therefore checking out Airflow in Windows-mounted Filesystem is strongly discouraged. +- **WSL 2 Docker volume remount errors**: + If you're experiencing errors such as ``ERROR: for docker-compose_airflow_run + Cannot create container for service airflow: not a directory`` when starting Breeze + after the first time or an error like ``docker: Error response from daemon: not a directory. + See 'docker run --help'.`` when running the pre-commit tests, you may need to consider + `installing Docker directly in WSL 2 `_ + instead of using Docker Desktop for Windows. + - **WSL 2 Memory Usage** : WSL 2 can consume a lot of memory under the process name "Vmmem". To reclaim the memory after development you can: - * On the Linux distro clear cached memory: ``sudo sysctl -w vm.drop_caches=3`` - * If no longer using Docker you can quit Docker Desktop - (right click system try icon and select "Quit Docker Desktop") - * If no longer using WSL you can shut it down on the Windows Host - with the following command: ``wsl --shutdown`` + * On the Linux distro clear cached memory: ``sudo sysctl -w vm.drop_caches=3`` + * If no longer using Docker you can quit Docker Desktop + (right click system try icon and select "Quit Docker Desktop") + * If no longer using WSL you can shut it down on the Windows Host + with the following command: ``wsl --shutdown`` - **Developing in WSL 2**: You can use all the standard Linux command line utilities to develop on WSL 2. @@ -138,50 +155,25 @@ Docker in WSL 2 If VS Code is installed on the Windows host system then in the WSL Linux Distro you can run ``code .`` in the root directory of you Airflow repo to launch VS Code. -Getopt and gstat ----------------- - -* For Linux, run ``apt install util-linux coreutils`` or an equivalent if your system is not Debian-based. -* For macOS, install GNU ``getopt`` and ``gstat`` utilities to get Airflow Breeze running. - - Run ``brew install gnu-getopt coreutils``. - -.. warning:: - Pay attention to the ``brew install`` command and follow instructions to link the gnu-getopt version - to become the first one on the PATH. Make sure to re-login after you make the suggested changes. - -**Examples:** - -If you use bash, run this command and re-login: - -.. code-block:: bash - - echo 'export PATH="$(brew --prefix)/opt/gnu-getopt/bin:$PATH"' >> ~/.bash_profile - . ~/.bash_profile +The pipx tool +-------------- +We are using ``pipx`` tool to install and manage Breeze. The ``pipx`` tool is created by the creators +of ``pip`` from `Python Packaging Authority `_ -If you use zsh, run this command and re-login: +Install pipx .. code-block:: bash - echo 'export PATH="$(brew --prefix)/opt/gnu-getopt/bin:$PATH"' >> ~/.zprofile - . ~/.zprofile - + pip install --user pipx -Confirm that ``getopt`` and ``gstat`` utilities are successfully installed +Breeze, is not globally accessible until your PATH is updated. Add \.local\bin as a variable +environments. This can be done automatically by the following command (follow instructions printed). .. code-block:: bash - $ getopt --version - getopt from util-linux * - $ gstat --version - stat (GNU coreutils) * - Copyright (C) 2020 Free Software Foundation, Inc. - License GPLv3+: GNU GPL version 3 or later . - This is free software: you are free to change and redistribute it. - There is NO WARRANTY, to the extent permitted by law. + pipx ensurepath - Written by Michael Meskes. Resources required ================== @@ -202,13 +194,16 @@ Disk Minimum 40GB free disk space is required for your Docker Containers. -On Mac OS This might deteriorate over time so you might need to increase it or run ``docker system --prune`` +On Mac OS This might deteriorate over time so you might need to increase it or run ``breeze cleanup`` periodically. For details see `Docker for Mac - Advanced tab `_. On WSL2 you might want to increase your Virtual Hard Disk by following: `Expanding the size of your WSL 2 Virtual Hard Disk `_ +There is a command ``breeze resource-check`` that you can run to check available resources. See below +for details. + Cleaning the environment ------------------------ @@ -218,9 +213,9 @@ them, you may end up with some unused image data. To clean up the Docker environment: -1. Stop Breeze with ``./breeze stop``. (If Breeze is already running) +1. Stop Breeze with ``breeze stop``. (If Breeze is already running) -2. Run the ``docker system prune`` command. +2. Run the ``breeze cleanup`` command. 3. Run ``docker images --all`` and ``docker ps --all`` to verify that your Docker is clean. @@ -236,14 +231,58 @@ In case of disk space errors on macOS, increase the disk space available for Doc Installation ============ -Installation is as easy as checking out Airflow repository and running Breeze command. -You enter the Breeze test environment by running the ``./breeze`` script. You can run it with -the ``help`` command to see the list of available options. See `Breeze Command-Line Interface Reference`_ -for details. +Run this command to install Breeze (make sure to use ``-e`` flag): + +.. code-block:: bash + + pipx install -e ./dev/breeze + +Once this is complete, you should have ``breeze`` binary on your PATH and available to run by ``breeze`` +command. + +Those are all available commands for Breeze and details about the commands are described below: + +.. image:: ./images/breeze/output-commands.svg + :width: 100% + :alt: Breeze commands + +Breeze installed this way is linked to your checked out sources of Airflow so Breeze will +automatically use latest version of sources from ``./dev/breeze``. Sometimes, when dependencies are +updated ``breeze`` commands with offer you to ``self-upgrade`` (you just need to answer ``y`` when asked). + +You can always run such self-upgrade at any time: .. code-block:: bash - ./breeze + breeze self-upgrade + +Those are all available flags of ``self-upgrade`` command: + +.. image:: ./images/breeze/output-self-upgrade.svg + :width: 100% + :alt: Breeze self-upgrade + +If you have several checked out Airflow sources, Breeze will warn you if you are using it from a different +source tree and will offer you to re-install from those sources - to make sure that you are using the right +version. + +You can skip Breeze's upgrade check by setting ``SKIP_BREEZE_UPGRADE_CHECK`` variable to non empty value. + +By default Breeze works on the version of Airflow that you run it in - in case you are outside of the +sources of Airflow and you installed Breeze from a directory - Breeze will be run on Airflow sources from +where it was installed. + +You can run ``breeze version`` command to see where breeze installed from and what are the current sources +that Breeze works on + +Those are all available flags of ``version`` command: + +.. image:: ./images/breeze/output-version.svg + :width: 100% + :alt: Breeze version + +Running Breeze for the first time +================================= The First time you run Breeze, it pulls and builds a local version of Docker images. It pulls the latest Airflow CI images from the @@ -254,32 +293,53 @@ minutes on a fast connection to start. Subsequent runs should be much faster. Once you enter the environment, you are dropped into bash shell of the Airflow container and you can run tests immediately. -To use the full potential of breeze you should set up autocomplete and you can -add the checked-out Airflow repository to your PATH to run Breeze without the ``./`` and from any directory. - -The ``breeze`` command comes with a built-in bash/zsh autocomplete setup command. After installing, when you -start typing the command, you can use to show all the available switches and get +To use the full potential of breeze you should set up autocomplete. The ``breeze`` command comes +with a built-in bash/zsh/fish autocomplete setup command. After installing, +when you start typing the command, you can use to show all the available switches and get auto-completion on typical values of parameters that you can use. You should set up the autocomplete option automatically by running: .. code-block:: bash - ./breeze setup-autocomplete + breeze setup-autocomplete + +You get the auto-completion working when you re-enter the shell (follow the instructions printed). +The command will warn you and not reinstall autocomplete if you already did, but you can +also force reinstalling the autocomplete via: + +.. code-block:: bash + + breeze setup-autocomplete --force + +Those are all available flags of ``setup-autocomplete`` command: + +.. image:: ./images/breeze/output-setup-autocomplete.svg + :width: 100% + :alt: Breeze setup autocomplete -You get the auto-completion working when you re-enter the shell. Customize your environment -------------------------- + When you enter the Breeze environment, automatically an environment file is sourced from -``files/airflow-breeze-config/variables.env``. The ``files`` folder from your local sources is -automatically mounted to the container under ``/files`` path and you can put there any files you want -to make available for the Breeze container. +``files/airflow-breeze-config/variables.env``. + +You can also add ``files/airflow-breeze-config/init.sh`` and the script will be sourced always +when you enter Breeze. For example you can add ``pip install`` commands if you want to install +custom dependencies - but there are no limits to add your own customizations. + +The ``files`` folder from your local sources is automatically mounted to the container under +``/files`` path and you can put there any files you want to make available for the Breeze container. + +You can also copy any .whl or .sdist packages to dist and when you pass ``--use-packages-from-dist`` flag +as ``wheel`` or ``sdist`` line parameter, breeze will automatically install the packages found there +when you enter Breeze. You can also add your local tmux configuration in ``files/airflow-breeze-config/.tmux.conf`` and these configurations will be available for your tmux environment. -there is a symlink between ``files/airflow-breeze-config/.tmux.conf`` and ``~/.tmux.conf`` in the container, +There is a symlink between ``files/airflow-breeze-config/.tmux.conf`` and ``~/.tmux.conf`` in the container, so you can change it at any place, and run .. code-block:: bash @@ -288,7 +348,6 @@ so you can change it at any place, and run inside container, to enable modified tmux configurations. - .. raw:: html
@@ -305,6 +364,9 @@ Breeze helps with running tests in the same environment/way as CI tests are run. types of tests while you enter Breeze CI interactive environment - this is described in detail in ``_ +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command and it is not yet available in the new ``breeze`` command): + .. raw:: html
@@ -321,19 +383,34 @@ You can use additional ``breeze`` flags to choose your environment. You can spec version to use, and backend (the meta-data database). Thanks to that, with Breeze, you can recreate the same environments as we have in matrix builds in the CI. -For example, you can choose to run Python 3.7 tests with MySQL as backend and in the Docker environment as -follows: +For example, you can choose to run Python 3.7 tests with MySQL as backend and with mysql version 8 +as follows: .. code-block:: bash - ./breeze --python 3.7 --backend mysql + breeze --python 3.7 --backend mysql --mysql-version 8 The choices you make are persisted in the ``./.build/`` cache directory so that next time when you use the ``breeze`` script, it could use the values that were used previously. This way you do not have to specify them when you run the script. You can delete the ``.build/`` directory in case you want to restore the default settings. -The defaults when you run the Breeze environment are Python 3.7 version and SQLite database. +You can see which value of the parameters that can be stored persistently in cache marked with >VALUE< +in the help of the commands. + +Another part of configuration is enabling/disabling cheatsheet, asciiart. The cheatsheet and asciiart can +be disabled - they are "nice looking" and cheatsheet +contains useful information for first time users but eventually you might want to disable both if you +find it repetitive and annoying. + +With the config setting colour-blind-friendly communication for Breeze messages. By default we communicate +with the users about information/errors/warnings/successes via colour-coded messages, but we can switch +it off by passing ``--no-colour`` to config in which case the messages to the user printed by Breeze +will be printed using different schemes (italic/bold/underline) to indicate different kind of messages +rather than colours. + +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): .. raw:: html @@ -344,6 +421,41 @@ The defaults when you run the Breeze environment are Python 3.7 version and SQLi
+Those are all available flags of ``config`` command: + +.. image:: ./images/breeze/output-config.svg + :width: 100% + :alt: Breeze config + + +You can also dump hash of the configuration options used - this is mostly use to generate the dump +of help of the commands only when they change. + +.. image:: ./images/breeze/output-command-hash-export.svg + :width: 100% + :alt: Breeze command-hash-export + + +Starting complete Airflow installation +====================================== + +For testing Airflow oyou often want to start multiple components (in multiple terminals). Breeze has +built-in ``start-airflow`` command that start breeze container, launches multiple terminals using tmux +and launches all Airflow necessary components in those terminals. + +You can also use it to start any released version of Airflow from ``PyPI`` with the +``--use-airflow-version`` flag. + +.. code-block:: bash + + breeze --python 3.7 --backend mysql --use-airflow-version 2.2.5 start-airflow + +Those are all available flags of ``start-airflow`` command: + +.. image:: ./images/breeze/output-start-airflow.svg + :width: 100% + :alt: Breeze start-airflow + Troubleshooting =============== @@ -351,13 +463,13 @@ Troubleshooting If you are having problems with the Breeze environment, try the steps below. After each step you can check whether your problem is fixed. -1. If you are on macOS, check if you have enough disk space for Docker. -2. Restart Breeze with ``./breeze restart``. -3. Delete the ``.build`` directory and run ``./breeze build-image``. -4. Clean up Docker images via ``breeze cleanup-image`` command. +1. If you are on macOS, check if you have enough disk space for Docker (Breeze will warn you if not). +2. Stop Breeze with ``breeze stop``. +3. Delete the ``.build`` directory and run ``breeze build-image``. +4. Clean up Docker images via ``breeze cleanup`` command. 5. Restart your Docker Engine and try again. 6. Restart your machine and try again. -7. Re-install Docker CE and try again. +7. Re-install Docker Desktop and try again. In case the problems are not solved, you can set the VERBOSE_COMMANDS variable to "true": @@ -377,51 +489,96 @@ Airflow Breeze is a bash script serving as a "swiss-army-knife" of Airflow testi hood it uses other scripts that you can also run manually if you have problem with running the Breeze environment. Breeze script allows performing the following tasks: -Airflow developers tasks ------------------------- +Development tasks +----------------- -Regular development tasks: +Those are commands mostly used by contributors: -* Setup autocomplete for Breeze with ``breeze setup-autocomplete`` command +* Execute arbitrary command in the test environment with ``breeze shell`` command * Enter interactive shell in CI container when ``shell`` (or no command) is specified * Start containerised, development-friendly airflow installation with ``breeze start-airflow`` command * Build documentation with ``breeze build-docs`` command -* Initialize local virtualenv with ``breeze initialize-local-virtualenv`` command -* Build CI docker image with ``breeze build-image`` command -* Cleanup CI docker image with ``breeze cleanup-image`` command -* Run static checks with autocomplete support ``breeze static-check`` command +* Initialize local virtualenv with ``./scripts/tools/initialize_virtualenv.py`` command +* Run static checks with autocomplete support ``breeze static-checks`` command * Run test specified with ``breeze tests`` command +* Build CI docker image with ``breeze build-image`` command +* Cleanup breeze with ``breeze cleanup`` command Additional management tasks: * Join running interactive shell with ``breeze exec`` command * Stop running interactive environment with ``breeze stop`` command -* Restart running interactive environment with ``breeze restart`` command -* Execute arbitrary command in the test environment with ``breeze shell`` command -* Execute arbitrary docker-compose command with ``breeze docker-compose`` command +* Execute arbitrary docker-compose command with ``./breeze-legacy docker-compose`` command -Kubernetes tests related: +Tests +----- -* Manage KinD Kubernetes cluster and deploy Airflow to KinD cluster ``breeze kind-cluster`` commands -* Run Kubernetes tests specified with ``breeze kind-cluster tests`` command -* Enter the interactive kubernetes test environment with ``breeze kind-cluster shell`` command +* Run docker-compose tests with ``breeze docker-compose-tests`` command. +* Run test specified with ``breeze tests`` command. -Airflow can also be used for managing Production images (with ``--production-image`` flag added for image -related command) - this is a development-only feature, regular users of Airflow should use ``docker build`` -commands to manage the images as described in the user documentation about -`building the image `_ +.. image:: ./images/breeze/output-tests.svg + :width: 100% + :alt: Breeze tests + +Kubernetes tests +---------------- + +* Manage KinD Kubernetes cluster and deploy Airflow to KinD cluster ``./breeze-legacy kind-cluster`` commands +* Run Kubernetes tests specified with ``./breeze-legacy kind-cluster tests`` command +* Enter the interactive kubernetes test environment with ``./breeze-legacy kind-cluster shell`` command + +CI Image tasks +-------------- + +The image building is usually run for users automatically when needed, +but sometimes Breeze users might want to manually build, pull or verify the CI images. + +* Build CI docker image with ``breeze build-image`` command +* Pull CI images in parallel ``breeze pull-image`` command +* Verify CI image ``breeze verify-image`` command -Maintainer tasks +PROD Image tasks ---------------- +Users can also build Production images when they are developing them. However when you want to +use the PROD image, the regular docker build commands are recommended. See +`building the image `_ + +* Build PROD image with ``breeze build-prod-image`` command +* Pull PROD image in parallel ``breeze pull-prod-image`` command +* Verify CI image ``breeze verify-prod-image`` command + +Configuration and maintenance +----------------------------- + +* Cleanup breeze with ``breeze cleanup`` command +* Self-upgrade breeze with ``breeze self-upgrade`` command +* Setup autocomplete for Breeze with ``breeze setup-autocomplete`` command +* Checking available resources for docker with ``breeze resource-check`` command +* Freeing space needed to run CI tests with ``breeze free-space`` command +* Fixing ownership of files in your repository with ``breeze fix-ownership`` command +* Print Breeze version with ``breeze version`` command +* Outputs hash of commands defined by ``breeze`` with ``command-hash-export`` (useful to avoid needless + regeneration of Breeze images) + +Release tasks +------------- + Maintainers also can use Breeze for other purposes (those are commands that regular contributors likely -do not need): +do not need or have no access to run). Those are usually connected with releasing Airflow: -* Prepare cache for CI: ``breeze prepare-build-cache`` (needs buildx plugin and write access to cache ghcr.io) +* Prepare cache for CI: ``breeze build-image --prepare-build-cache`` and + ``breeze build-prod image --prepare-build-cache``(needs buildx plugin and write access to registry ghcr.io) * Generate constraints with ``breeze generate-constraints`` (needed when conflicting changes are merged) -* Prepare airflow packages: ``breeze prepare-airflow-packages`` (when releasing Airflow) +* Prepare airflow packages: ``breeze prepare-airflow-package`` (when releasing Airflow) +* Verify providers: ``breeze verify-provider-packages`` (when releasing provider packages) - including importing + the providers in an earlier airflow version. * Prepare provider documentation ``breeze prepare-provider-documentation`` and prepare provider packages ``breeze prepare-provider-packages`` (when releasing provider packages) +* Finding the updated dependencies since the last successful build when we have conflict with + ``breeze find-newer-dependencies`` command +* Release production images to DockerHub with ``breeze release-prod-images`` command + Details of Breeze usage ======================= @@ -430,11 +587,30 @@ Database volumes in Breeze -------------------------- Breeze keeps data for all it's integration in named docker volumes. Each backend and integration -keeps data in their own volume. Those volumes are persisted until ``./breeze stop`` command or -``./breeze restart`` command is run. You can also preserve the volumes by adding flag -``--preserve-volumes`` when you run either of those commands. Then, next time when you start -``Breeze``, it will have the data pre-populated. You can always delete the volumes by -running ``./breeze stop`` without the ``--preserve-volumes`` flag. +keeps data in their own volume. Those volumes are persisted until ``breeze stop`` command. +You can also preserve the volumes by adding flag ``--preserve-volumes`` when you run the command. +Then, next time when you start Breeze, it will have the data pre-populated. + +Those are all available flags of ``stop`` command: + +.. image:: ./images/breeze/output-stop.svg + :width: 100% + :alt: Breeze stop + +Image cleanup +-------------- + +Breeze uses docker images heavily and those images are rebuild periodically. This might cause extra +disk usage by the images. If you need to clean-up the images periodically you can run +``breeze cleanup`` command (by default it will skip removing your images before cleaning up but you +can also remove the images to clean-up everything by adding ``--all``). + +Those are all available flags of ``cleanup`` command: + + +.. image:: ./images/breeze/output-cleanup.svg + :width: 100% + :alt: Breeze cleanup Launching multiple terminals ---------------------------- @@ -448,6 +624,8 @@ capability of creating multiple virtual terminals and multiplex between them. Mo found at `tmux GitHub wiki page `_ . Tmux has several useful shortcuts that allow you to split the terminals, open new tabs etc - it's pretty useful to learn it. +Here is the part of Breeze video which is relevant: + .. raw:: html
@@ -464,6 +642,8 @@ to enter the running container. It's as easy as launching ``breeze exec`` while Breeze environment. You will be dropped into bash and environment variables will be read in the same way as when you enter the environment. You can do it multiple times and open as many terminals as you need. +Here is the part of Breeze video which is relevant: + .. raw:: html
@@ -474,6 +654,12 @@ way as when you enter the environment. You can do it multiple times and open as
+Those are all available flags of ``exec`` command: + +.. image:: ./images/breeze/output-exec.svg + :width: 100% + :alt: Breeze exec + Additional tools ---------------- @@ -500,6 +686,7 @@ Currently available scripts: for iTerm2 (Mac OS only) * ``install_java.sh`` - installs `the OpenJDK 8u41 `__ * ``install_kubectl.sh`` - installs `the Kubernetes command-line tool, kubectl `__ +* ``install_snowsql.sh`` - installs `SnowSQL `__ * ``install_terraform.sh`` - installs `Terraform `__ Launching Breeze integrations @@ -521,6 +708,9 @@ Once integration is started, it will continue to run until the environment is st Note that running integrations uses significant resources - CPU and memory. +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): + .. raw:: html
@@ -530,7 +720,7 @@ Note that running integrations uses significant resources - CPU and memory.
-Building CI images +Managing CI images ------------------ With Breeze you can build images that are used by Airflow CI and production ones. @@ -543,6 +733,9 @@ The CI image is built automatically as needed, however it can be rebuilt manuall image should be built manually - but also a variant of this image is built automatically when kubernetes tests are executed see `Running Kubernetes tests <#running-kubernetes-tests>`_ +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): + .. raw:: html
@@ -562,10 +755,50 @@ latest image build on CI. This might happen when for example latest patches have Python images or when significant changes are made in the Dockerfile. In such cases, Breeze will download the latest images before rebuilding because this is usually faster than rebuilding the image. -In most cases, rebuilding an image requires network connectivity (for example, to download new -dependencies). If you work offline and do not want to rebuild the images when needed, you can set the -``FORCE_ANSWER_TO_QUESTIONS`` variable to ``no`` as described in the -`Setting default behaviour for user interaction <#setting-default-behaviour-for-user-interaction>`_ section. +Those are all available flags of ``build-image`` command: + +.. image:: ./images/breeze/output-build-image.svg + :width: 100% + :alt: Breeze build-image + +You can also pull the CI images locally in parallel with optional verification. + +Those are all available flags of ``pull-image`` command: + +.. image:: ./images/breeze/output-pull-image.svg + :width: 100% + :alt: Breeze pull-image + +Finally, you can verify CI image by running tests - either with the pulled/built images or +with an arbitrary image. + +Those are all available flags of ``verify-image`` command: + +.. image:: ./images/breeze/output-verify-image.svg + :width: 100% + :alt: Breeze verify-image + +Verifying providers +------------------- + +Breeze can also be used to verify if provider classes are importable and if they are following the +right naming conventions. This happens automatically on CI but you can also run it manually. + +.. code-block:: bash + + breeze verify-provider-packages + +You can also run the verification with an earlier airflow version to check for compatibility. + +.. code-block:: bash + + breeze verify-provider-packages --use-airflow-version 2.1.0 + +All the command parameters are here: + +.. image:: ./images/breeze/output-verify-provider-packages.svg + :width: 100% + :alt: Breeze verify-provider-packages Preparing packages ------------------ @@ -578,12 +811,31 @@ You can read more about testing provider packages in There are several commands that you can run in Breeze to manage and build packages: -* preparing Provider Readme files +* preparing Provider documentation files * preparing Airflow packages * preparing Provider packages -Preparing provider readme files is part of the release procedure by the release managers -and it is described in detail in `dev `_ . +Preparing provider documentation files is part of the release procedure by the release managers +and it is described in detail in `dev `_ . + +The below example perform documentation preparation for provider packages. + +.. code-block:: bash + + breeze prepare-provider-documentation + +By default, the documentation preparation runs package verification to check if all packages are +importable, but you can add ``--skip-package-verification`` to skip it. + +.. code-block:: bash + + breeze prepare-provider-documentation --skip-package-verification + +You can also add ``--answer yes`` to perform non-interactive build. + +.. image:: ./images/breeze/output-prepare-provider-documentation.svg + :width: 100% + :alt: Breeze prepare-provider-documentation The packages are prepared in ``dist`` folder. Note, that this command cleans up the ``dist`` folder before running, so you should run it before generating airflow package below as it will be removed. @@ -592,7 +844,7 @@ The below example builds provider packages in the wheel format. .. code-block:: bash - ./breeze prepare-provider-packages + breeze prepare-provider-packages If you run this command without packages, you will prepare all packages, you can however specify providers that you would like to build. By default ``both`` types of packages are prepared ( @@ -600,20 +852,23 @@ providers that you would like to build. By default ``both`` types of packages ar .. code-block:: bash - ./breeze prepare-provider-packages google amazon + breeze prepare-provider-packages google amazon You can see all providers available by running this command: .. code-block:: bash - ./breeze prepare-provider-packages -- --help + breeze prepare-provider-packages --help +.. image:: ./images/breeze/output-prepare-provider-packages.svg + :width: 100% + :alt: Breeze prepare-provider-packages -You can also prepare airflow packages using breeze: +You can prepare airflow packages using breeze: .. code-block:: bash - ./breeze prepare-airflow-packages + breeze prepare-airflow-package This prepares airflow .whl package in the dist folder. @@ -622,10 +877,13 @@ default is to build ``both`` type of packages ``sdist`` and ``wheel``. .. code-block:: bash - ./breeze prepare-airflow-packages --package-format=wheel + breeze prepare-airflow-package --package-format=wheel +.. image:: ./images/breeze/output-prepare-airflow-package.svg + :width: 100% + :alt: Breeze prepare-airflow-package -Building Production images +Managing Production images -------------------------- The **Production image** is also maintained in GitHub Container Registry for Caching @@ -636,30 +894,30 @@ However in many cases you want to add your own custom version of the image - wit python dependencies, additional Airflow extras. Breeze's ``build-image`` command helps to build your own, customized variant of the image that contains everything you need. -You can switch to building the production image by adding ``--production-image`` flag to the ``build_image`` -command. Note, that the images can also be built using ``docker build`` command by passing appropriate +You can switch to building the production image by using ``build-prod-image`` command. +Note, that the images can also be built using ``docker build`` command by passing appropriate build-args as described in `IMAGES.rst `_ , but Breeze provides several flags that -makes it easier to do it. You can see all the flags by running ``./breeze build-image --help``, +makes it easier to do it. You can see all the flags by running ``breeze build-prod-image --help``, but here typical examples are presented: .. code-block:: bash - ./breeze build-image --production-image --additional-extras "jira" + breeze build-prod-image --additional-extras "jira" This installs additional ``jira`` extra while installing airflow in the image. .. code-block:: bash - ./breeze build-image --production-image --additional-python-deps "torchio==0.17.10" + breeze build-prod-image --additional-python-deps "torchio==0.17.10" This install additional pypi dependency - torchio in specified version. .. code-block:: bash - ./breeze build-image --production-image --additional-dev-apt-deps "libasound2-dev" \ - --additional-runtime-apt-deps "libasound2" + breeze build-prod-image --additional-dev-apt-deps "libasound2-dev" \ + --additional-runtime-apt-deps "libasound2" This installs additional apt dependencies - ``libasound2-dev`` in the build image and ``libasound`` in the final image. Those are development dependencies that might be needed to build and use python packages added @@ -671,11 +929,20 @@ suffix and they need to also be paired with corresponding runtime dependency add .. code-block:: bash - ./breeze build-image --production-image --python 3.7 --additional-dev-deps "libasound2-dev" \ + breeze build-prod-image --python 3.7 --additional-dev-deps "libasound2-dev" \ --additional-runtime-apt-deps "libasound2" Same as above but uses python 3.7. +Those are all available flags of ``build-prod-image`` command: + +.. image:: ./images/breeze/output-build-prod-image.svg + :width: 100% + :alt: Breeze commands + +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): + .. raw:: html
@@ -685,6 +952,53 @@ Same as above but uses python 3.7.
+You can also pull PROD images in parallel with optional verification. + +Those are all available flags of ``pull-prod-image`` command: + +.. image:: ./images/breeze/output-pull-prod-image.svg + :width: 100% + :alt: Breeze pull-prod-image + +Finally, you can verify PROD image by running tests - either with the pulled/built images or +with an arbitrary image. + +Those are all available flags of ``verify-prod-image`` command: + +.. image:: ./images/breeze/output-verify-prod-image.svg + :width: 100% + :alt: Breeze verify-prod-image + +Releasing Production images to DockerHub +---------------------------------------- + +The **Production image** can be released by release managers who have permissions to push the image. This +happens only when there is an RC candidate or final version of Airflow released. + +You release "regular" and "slim" images as separate steps. + +Releasing "regular" images: + +.. code-block:: bash + + breeze release-prod-images --airflow-version 2.4.0 + +Or "slim" images: + +.. code-block:: bash + + breeze release-prod-images --airflow-version 2.4.0 --slim-images + +By default when you are releasing the "final" image, we also tag image with "latest" tags but this +step can be skipped if you pass the ``--skip-latest`` flag. + +These are all of the available flags for the ``release-prod-images`` command: + +.. image:: ./images/breeze/output-release-prod-images.svg + :width: 100% + :alt: Release prod images + + Running static checks --------------------- @@ -694,18 +1008,51 @@ you have auto-complete setup you should see auto-completable list of all checks .. code-block:: bash - ./breeze static-check mypy + breeze static-checks -t mypy The above will run mypy check for currently staged files. -You can also add arbitrary pre-commit flag after ``--`` +You can also pass specific pre-commit flags for example ``--all-files`` : .. code-block:: bash - ./breeze static-check mypy -- --all-files + breeze static-checks -t mypy --all-files The above will run mypy check for all files. +There is a convenience ``--last-commit`` flag that you can use to run static check on last commit only: + +.. code-block:: bash + + breeze static-checks -t mypy --last-commit + +The above will run mypy check for all files in the last commit. + +There is another convenience ``--commit-ref`` flag that you can use to run static check on specific commit: + +.. code-block:: bash + + breeze static-checks -t mypy --commit-ref 639483d998ecac64d0fef7c5aa4634414065f690 + +The above will run mypy check for all files in the 639483d998ecac64d0fef7c5aa4634414065f690 commit. +Any ``commit-ish`` reference from Git will work here (branch, tag, short/long hash etc.) + +If you ever need to get a list of the files that will be checked (for troubleshooting) use these commands: + +.. code-block:: bash + + breeze static-checks -t identity --verbose # currently staged files + breeze static-checks -t identity --verbose --from-ref $(git merge-base main HEAD) --to-ref HEAD # branch updates + +Those are all available flags of ``static-checks`` command: + +.. image:: ./images/breeze/output-static-checks.svg + :width: 100% + :alt: Breeze static checks + +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): + .. raw:: html
@@ -715,13 +1062,12 @@ The above will run mypy check for all files.
-If you ever need to get a list of the files that will be checked (for troubleshooting when playing with -``--from-ref`` and ``--to-ref``), use these commands: +.. note:: -.. code-block:: bash + When you run static checks, some of the artifacts (mypy_cache) is stored in docker-compose volume + so that it can speed up static checks execution significantly. However, sometimes, the cache might + get broken, in which case you should run ``breeze stop`` to clean up the cache. - ./breeze static-check identity --verbose # currently staged files - ./breeze static-check identity --verbose -- --from-ref $(git merge-base main HEAD) --to-ref HEAD # branch updates Building the Documentation -------------------------- @@ -730,7 +1076,7 @@ To build documentation in Breeze, use the ``build-docs`` command: .. code-block:: bash - ./breeze build-docs + breeze build-docs Results of the build can be found in the ``docs/_build`` folder. @@ -745,7 +1091,7 @@ extra ``--`` flag. .. code-block:: bash - ./breeze build-docs -- --spellcheck-only + breeze build-docs --spellcheck-only This process can take some time, so in order to make it shorter you can filter by package, using the flag ``--package-filter ``. The package name has to be one of the providers or ``apache-airflow``. For @@ -753,12 +1099,21 @@ instance, for using it with Amazon, the command would be: .. code-block:: bash - ./breeze build-docs -- --package-filter apache-airflow-providers-amazon + breeze build-docs --package-filter apache-airflow-providers-amazon Often errors during documentation generation come from the docstrings of auto-api generated classes. During the docs building auto-api generated files are stored in the ``docs/_api`` folder. This helps you easily identify the location the problems with documentation originated from. +Those are all available flags of ``build-docs`` command: + +.. image:: ./images/breeze/output-build-docs.svg + :width: 100% + :alt: Breeze build documentation + +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): + .. raw:: html
@@ -777,34 +1132,45 @@ files are stored in separated orphan branches: ``constraints-main``, ``constrain Those are constraint files as described in detail in the ``_ contributing documentation. -You can use ``./breeze generate-constraints`` command to manually generate constraints for a single python -version and single constraint mode like this: -.. code-block:: bash +You can use ``breeze generate-constraints`` command to manually generate constraints for +all or selected python version and single constraint mode like this: + +.. warning:: - ./breeze generate-constraints --generate-constraints-mode pypi-providers + In order to generate constraints, you need to build all images with ``--upgrade-to-newer-dependencies`` + flag - for all python versions. +.. code-block:: bash + + breeze generate-constraints --airflow-constraints-mode constraints + Constraints are generated separately for each python version and there are separate constraints modes: * 'constraints' - those are constraints generated by matching the current airflow version from sources and providers that are installed from PyPI. Those are constraints used by the users who want to - install airflow with pip. Use ``pypi-providers`` mode for that. + install airflow with pip. * "constraints-source-providers" - those are constraints generated by using providers installed from current sources. While adding new providers their dependencies might change, so this set of providers is the current set of the constraints for airflow and providers from the current main sources. - Those providers are used by CI system to keep "stable" set of constraints. Use - ``source-providers`` mode for that. + Those providers are used by CI system to keep "stable" set of constraints. * "constraints-no-providers" - those are constraints generated from only Apache Airflow, without any providers. If you want to manage airflow separately and then add providers individually, you can - use those. Use ``no-providers`` mode for that. + use those. + +Those are all available flags of ``generate-constraints`` command: + +.. image:: ./images/breeze/output-generate-constraints.svg + :width: 100% + :alt: Breeze generate-constraints In case someone modifies setup.py, the scheduled CI Tests automatically upgrades and pushes changes to the constraint files, however you can also perform test run of this locally using -the procedure described in ``_ which utilises -multiple processors on your local machine to generate such constraints faster. +the procedure described in `Refreshing CI Cache `_ +which utilises multiple processors on your local machine to generate such constraints faster. This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff @@ -833,7 +1199,7 @@ To use your host IDE with Breeze: .. code-block:: bash - ./breeze initialize-local-virtualenv --python 3.8 + ./scripts/tools/initialize_virtualenv.py .. warning:: Make sure that you use the right Python version in this command - matching the Python version you have @@ -846,6 +1212,9 @@ This is a lightweight solution that has its own limitations. More details on using the local virtualenv are available in the `LOCAL_VIRTUALENV.rst `_. +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +but it is not available in the ``breeze`` command): + .. raw:: html
@@ -855,6 +1224,17 @@ More details on using the local virtualenv are available in the `LOCAL_VIRTUALEN
+Running docker-compose tests +---------------------------- + +You can use Breeze to run docker-compose tests. Those tests are run using Production image +and they are running test with the Quick-start docker compose we have. + +.. image:: ./images/breeze/output-docker-compose-tests.svg + :width: 100% + :alt: Breeze generate-constraints + + Running Kubernetes tests ------------------------ @@ -864,6 +1244,9 @@ automatically to run the tests. This is described in detail in `Testing Kubernetes `_. +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command and it is not yet available in the current ``breeze`` command): + .. raw:: html
@@ -881,8 +1264,16 @@ You can always stop it via: .. code-block:: bash - ./breeze stop + breeze stop +Those are all available flags of ``stop`` command: + +.. image:: ./images/breeze/output-stop.svg + :width: 100% + :alt: Breeze stop + +Here is the part of Breeze video which is relevant (note that it refers to the old ``./breeze-legacy`` +command but it is very similar to current ``breeze`` command): .. raw:: html @@ -893,6 +1284,49 @@ You can always stop it via:
+Resource check +============== + +Breeze requires certain resources to be available - disk, memory, CPU. When you enter Breeze's shell, +the resources are checked and information if there is enough resources is displayed. However you can +manually run resource check any time by ``breeze resource-check`` command. + +Those are all available flags of ``resource-check`` command: + +.. image:: ./images/breeze/output-resource-check.svg + :width: 100% + :alt: Breeze resource-check + + +Freeing the space +================= + +When our CI runs a job, it needs all memory and disk it can have. We have a Breeze command that frees +the memory and disk space used. You can also use it clear space locally but it performs a few operations +that might be a bit invasive - such are removing swap file and complete pruning of docker disk space used. + +Those are all available flags of ``free-space`` command: + +.. image:: ./images/breeze/output-free-space.svg + :width: 100% + :alt: Breeze free-space + + +Tracking backtracking issues for CI builds +========================================== + +When our CI runs a job, we automatically upgrade our dependencies in the ``main`` build. However, this might +lead to conflicts and ``pip`` backtracking for a long time (possibly forever) for dependency resolution. +Unfortunately those issues are difficult to diagnose so we had to invent our own tool to help us with +diagnosing them. This tool is ``find-newer-dependencies`` and it works in the way that it helps to guess +which new dependency might have caused the backtracking. The whole process is described in +`tracking backtracking issues `_. + +Those are all available flags of ``find-newer-dependencies`` command: + +.. image:: ./images/breeze/output-find-newer-dependencies.svg + :width: 100% + :alt: Breeze find-newer-dependencies Internal details of Breeze ========================== @@ -938,11 +1372,17 @@ Running Arbitrary commands in the Breeze environment ---------------------------------------------------- To run other commands/executables inside the Breeze Docker-based environment, use the -``./breeze shell`` command. You should add your command as -c "command" after ``--`` as extra arguments. +``breeze shell`` command. .. code-block:: bash - ./breeze shell -- -c "ls -la" + breeze shell "ls -la" + +Those are all available flags of ``shell`` command: + +.. image:: ./images/breeze/output-shell.svg + :width: 100% + :alt: Breeze shell Running "Docker Compose" commands --------------------------------- @@ -953,48 +1393,22 @@ after ``--`` as extra arguments. .. code-block:: bash - ./breeze docker-compose pull -- --ignore-pull-failures - -Restarting Breeze environment ------------------------------ - -You can also restart the environment and enter it via: - -.. code-block:: bash - - ./breeze restart - + ./breeze-legacy docker-compose pull -- --ignore-pull-failures Setting default answers for user interaction -------------------------------------------- Sometimes during the build, you are asked whether to perform an action, skip it, or quit. This happens -when rebuilding or removing an image - actions that take a lot of time and could be potentially destructive. - -For automation scripts, you can export one of the three variables to control the default -interaction behaviour: - -.. code-block:: - - export FORCE_ANSWER_TO_QUESTIONS="yes" - -If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``yes``, the images are automatically rebuilt when needed. -Images are deleted without asking. - -.. code-block:: - - export FORCE_ANSWER_TO_QUESTIONS="no" +when rebuilding or removing an image and in few other cases - actions that take a lot of time +or could be potentially destructive. You can force answer to the questions by providing an +``--answer`` flag in the commands that support it. -If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``no``, the old images are used even if rebuilding is needed. -This is useful when you work offline. Deleting images is aborted. +For automation scripts, you can export the ``ANSWER`` variable (and set it to +``y``, ``n``, ``q``, ``yes``, ``no``, ``quit`` - in all case combinations). .. code-block:: - export FORCE_ANSWER_TO_QUESTIONS="quit" - -If ``FORCE_ANSWER_TO_QUESTIONS`` is set to ``quit``, the whole script is aborted. Deleting images is aborted. - -If more than one variable is set, ``yes`` takes precedence over ``no``, which takes precedence over ``quit``. + export ANSWER="yes" Fixing File/Directory Ownership ------------------------------- @@ -1003,11 +1417,18 @@ On Linux, there is a problem with propagating ownership of created files (a know files and directories created in the container are not owned by the host user (but by the root user in our case). This may prevent you from switching branches, for example, if files owned by the root user are created within your sources. In case you are on a Linux host and have some files in your sources created -by the root user, you can fix the ownership of those files by running this script: +by the root user, you can fix the ownership of those files by running : .. code-block:: - ./scripts/ci/tools/fix_ownership.sh + breeze fix-ownership + +Those are all available flags of ``fix-ownership`` command: + +.. image:: ./images/breeze/output-fix-ownership.svg + :width: 100% + :alt: Breeze fix-ownership + Mounting Local Sources to Breeze -------------------------------- @@ -1048,6 +1469,7 @@ You can connect to these ports/databases using: * Flower: http://127.0.0.1:25555 * Postgres: jdbc:postgresql://127.0.0.1:25433/airflow?user=postgres&password=airflow * Mysql: jdbc:mysql://127.0.0.1:23306/airflow?user=root +* MSSQL: jdbc:sqlserver://127.0.0.1:21433;databaseName=airflow;user=sa;password=Airflow123 * Redis: redis://127.0.0.1:26379/0 If you do not use ``start-airflow`` command, you can start the webserver manually with @@ -1120,1683 +1542,25 @@ of the ``Dockerfile.ci``. This way dependencies will be added incrementally. Before merge, these dependencies should be moved to the appropriate ``apt-get install`` command, which is already in the ``Dockerfile.ci``. +Recording command output +======================== -Breeze Command-Line Interface Reference -======================================= - -Airflow Breeze Syntax ---------------------- - -This is the current syntax for `./breeze <./breeze>`_: - - .. START BREEZE HELP MARKER - -.. code-block:: text - - - #################################################################################################### - - usage: breeze [FLAGS] [COMMAND] -- - - By default the script enters the CI container and drops you to bash shell, but you can choose - one of the commands to run specific actions instead. - - Add --help after each command to see details: +Breeze uses built-in capability of ``rich`` to record and print the command help as an ``svg`` file. +It's enabled by setting ``RECORD_BREEZE_OUTPUT_FILE`` to a file name where it will be recorded. +By default it records the screenshots with default characters width and with "Breeze screenshot" title, +but you can override it with ``RECORD_BREEZE_WIDTH`` and ``RECORD_BREEZE_TITLE`` variables respectively. - Commands without arguments: +Uninstalling Breeze +=================== +Breeze was installed with ``pipx``, with ``pipx list``, you can list the installed packages. +Once you have the name of ``breeze`` package you can proceed to uninstall it. - shell [Default] Enters interactive shell in the container - build-docs Builds documentation in the container - build-image Builds CI or Production docker image - prepare-build-cache Prepares CI or Production build cache - cleanup-image Cleans up the container image created - exec Execs into running breeze container in new terminal - generate-constraints Generates pinned constraint files - initialize-local-virtualenv Initializes local virtualenv - prepare-airflow-packages Prepares airflow packages - setup-autocomplete Sets up autocomplete for breeze - start-airflow Starts Scheduler and Webserver and enters the shell - stop Stops the docker-compose environment - restart Stops the docker-compose environment including DB cleanup - toggle-suppress-cheatsheet Toggles on/off cheatsheet - toggle-suppress-asciiart Toggles on/off asciiart - - Commands with arguments: - - docker-compose Executes specified docker-compose command - kind-cluster Manages KinD cluster on the host - prepare-provider-documentation Prepares provider packages documentation - prepare-provider-packages Prepares provider packages - static-check Performs selected static check for changed files - tests Runs selected tests in the container - - Help commands: - - flags Shows all breeze's flags - help Shows this help message - help-all Shows detailed help for all commands and flags - - #################################################################################################### - - Detailed usage - - #################################################################################################### - - - Detailed usage for command: shell - - - breeze shell [FLAGS] [-- ] - - This is default subcommand if no subcommand is used. - - Enters interactive shell where you can run all tests, start Airflow webserver, scheduler, - workers, interact with the database, run DAGs etc. It is the default command if no command - is selected. The shell is executed in the container and in case integrations are chosen, - the integrations will be started as separated docker containers - under the docker-compose - supervision. Local sources are by default mounted to within the container so you can edit - them locally and run tests immediately in the container. Several folders ('files', 'dist') - are also mounted so that you can exchange files between the host and container. - - The 'files/airflow-breeze-config/variables.env' file can contain additional variables - and setup. This file is automatically sourced when you enter the container. Database - and webserver ports are forwarded to appropriate database/webserver so that you can - connect to it from your host environment. - - You can also pass after -- they will be passed as bash parameters, this is - especially useful to pass bash options, for example -c to execute command: +.. code-block:: bash - 'breeze shell -- -c "ls -la"' - 'breeze -- -c "ls -la"' + pipx list - For GitHub repository, the --github-repository flag can be used to specify the repository - to pull and push images. You can also use --github-image-id in case - you want to pull the image with specific COMMIT_SHA tag. +This will also remove breeze from the folder: ``${HOME}.local/bin/`` - 'breeze shell \ - --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e' - pull/use image with SHA - 'breeze \ - --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e' - pull/use image with SHA - - Most flags are applicable to the shell command as it will run build when needed. - - - #################################################################################################### - - - Detailed usage for command: build-docs - - - breeze build-docs [-- ] - - Builds Airflow documentation. The documentation is build inside docker container - to - maintain the same build environment for everyone. Appropriate sources are mapped from - the host to the container so that latest sources are used. The folders where documentation - is generated ('docs/_build') are also mounted to the container - this way results of - the documentation build is available in the host. - - The possible extra args are: --docs-only, --spellcheck-only, --package-filter, --help - - - #################################################################################################### - - - Detailed usage for command: build-image - - - breeze build-image [FLAGS] - - Builds docker image (CI or production) without entering the container. You can pass - additional options to this command, such as: - - Choosing python version: - '--python' - - Choosing cache option: - '--build-cache-local' or '-build-cache-pulled', or '--build-cache-none' - - Choosing whether to force pull images or force build the image: - '--force-build-image' - - You can also pass '--production-image' flag to build production image rather than CI image. - - For GitHub repository, the '--github-repository' can be used to choose repository - to pull/push images. - - Flags: - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - -a, --install-airflow-version INSTALL_AIRFLOW_VERSION - Uses different version of Airflow when building PROD image. - - 2.0.2 2.0.1 2.0.0 wheel sdist - - -t, --install-airflow-reference INSTALL_AIRFLOW_REFERENCE - Installs Airflow directly from reference in GitHub when building PROD image. - This can be a GitHub branch like main or v2-2-test, or a tag like 2.2.0rc1. - - --installation-method INSTALLATION_METHOD - Method of installing Airflow in PROD image - either from the sources ('.') - or from package 'apache-airflow' to install from PyPI. - Default in Breeze is to install from sources. One of: - - . apache-airflow - - --upgrade-to-newer-dependencies - Upgrades PIP packages to latest versions available without looking at the constraints. - - -I, --production-image - Use production image for entering the environment and builds (not for tests). - - -F, --force-build-images - Forces building of the local docker images. The images are rebuilt - automatically for the first time or when changes are detected in - package-related files, but you can force it using this flag. - - --cleanup-docker-context-files - Removes whl and tar.gz files created in docker-context-files before running the command. - In case there are some files there it unnecessarily increases the context size and - makes the COPY . always invalidated - if you happen to have those files when you build your - image. - - Customization options: - - -E, --extras EXTRAS - Extras to pass to build images The default are different for CI and production images: - - CI image: - devel_ci - - Production image: - amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,google,google_auth, - grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid, - sftp,slack,ssh,statsd,virtualenv - - --image-tag TAG - Additional tag in the image. - - --skip-installing-airflow-providers-from-sources - By default 'pip install' in Airflow 2.0 installs only the provider packages that - are needed by the extras. When you build image during the development (which is - default in Breeze) all providers are installed by default from sources. - You can disable it by adding this flag but then you have to install providers from - wheel packages via --use-packages-from-dist flag. - - --disable-pypi-when-building - Disable installing Airflow from pypi when building. If you use this flag and want - to install Airflow, you have to install it from packages placed in - 'docker-context-files' and use --install-from-docker-context-files flag. - - --additional-extras ADDITIONAL_EXTRAS - Additional extras to pass to build images The default is no additional extras. - - --additional-python-deps ADDITIONAL_PYTHON_DEPS - Additional python dependencies to use when building the images. - - --dev-apt-command DEV_APT_COMMAND - The basic command executed before dev apt deps are installed. - - --additional-dev-apt-command ADDITIONAL_DEV_APT_COMMAND - Additional command executed before dev apt deps are installed. - - --additional-dev-apt-deps ADDITIONAL_DEV_APT_DEPS - Additional apt dev dependencies to use when building the images. - - --dev-apt-deps DEV_APT_DEPS - The basic apt dev dependencies to use when building the images. - - --additional-dev-apt-deps ADDITIONAL_DEV_DEPS - Additional apt dev dependencies to use when building the images. - - --additional-dev-apt-envs ADDITIONAL_DEV_APT_ENVS - Additional environment variables set when adding dev dependencies. - - --runtime-apt-command RUNTIME_APT_COMMAND - The basic command executed before runtime apt deps are installed. - - --additional-runtime-apt-command ADDITIONAL_RUNTIME_APT_COMMAND - Additional command executed before runtime apt deps are installed. - - --runtime-apt-deps ADDITIONAL_RUNTIME_APT_DEPS - The basic apt runtime dependencies to use when building the images. - - --additional-runtime-apt-deps ADDITIONAL_RUNTIME_DEPS - Additional apt runtime dependencies to use when building the images. - - --additional-runtime-apt-envs ADDITIONAL_RUNTIME_APT_DEPS - Additional environment variables set when adding runtime dependencies. - - Build options: - - --disable-mysql-client-installation - Disables installation of the mysql client which might be problematic if you are building - image in controlled environment. Only valid for production image. - - --disable-mssql-client-installation - Disables installation of the mssql client which might be problematic if you are building - image in controlled environment. Only valid for production image. - - --constraints-location - Url to the constraints file. In case of the production image it can also be a path to the - constraint file placed in 'docker-context-files' folder, in which case it has to be - in the form of '/docker-context-files/' - - --disable-pip-cache - Disables GitHub PIP cache during the build. Useful if GitHub is not reachable during build. - - --install-from-docker-context-files - This flag is used during image building. If it is used additionally to installing - Airflow from PyPI, the packages are installed from the .whl and .tar.gz packages placed - in the 'docker-context-files' folder. The same flag can be used during entering the image in - the CI image - in this case also the .whl and .tar.gz files will be installed automatically - - -C, --force-clean-images - Force build images with cache disabled. This will remove the pulled or build images - and start building images from scratch. This might take a long time. - - -r, --skip-rebuild-check - Skips checking image for rebuilds. It will use whatever image is available locally/pulled. - - -L, --build-cache-local - Uses local cache to build images. No pulled images will be used, but results of local - builds in the Docker cache are used instead. This will take longer than when the pulled - cache is used for the first time, but subsequent '--build-cache-local' builds will be - faster as they will use mostly the locally build cache. - - This is default strategy used by the Production image builds. - - -U, --build-cache-pulled - Uses images pulled from GitHub Container Registry to build images. - Those builds are usually faster than when ''--build-cache-local'' with the exception if - the registry images are not yet updated. The images are updated after successful merges - to main. - - This is default strategy used by the CI image builds. - - -X, --build-cache-disabled - Disables cache during docker builds. This is useful if you want to make sure you want to - rebuild everything from scratch. - - This strategy is used by default for both Production and CI images for the scheduled - (nightly) builds in CI. - - -g, --github-repository GITHUB_REPOSITORY - GitHub repository used to pull, push images. - Default: apache/airflow. - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: prepare-build-cache - - - breeze prepare-build-cache [FLAGS] - - Prepares build cache (CI or production) without entering the container. You can pass - additional options to this command, such as: - - Choosing python version: - '--python' - - You can also pass '--production-image' flag to build production image rather than CI image. - - For GitHub repository, the '--github-repository' can be used to choose repository - to pull/push images. Cleanup docker context files and pull cache are forced. This command - requires buildx to be installed. - - Flags: - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - -a, --install-airflow-version INSTALL_AIRFLOW_VERSION - Uses different version of Airflow when building PROD image. - - 2.0.2 2.0.1 2.0.0 wheel sdist - - -t, --install-airflow-reference INSTALL_AIRFLOW_REFERENCE - Installs Airflow directly from reference in GitHub when building PROD image. - This can be a GitHub branch like main or v2-2-test, or a tag like 2.2.0rc1. - - --installation-method INSTALLATION_METHOD - Method of installing Airflow in PROD image - either from the sources ('.') - or from package 'apache-airflow' to install from PyPI. - Default in Breeze is to install from sources. One of: - - . apache-airflow - - --upgrade-to-newer-dependencies - Upgrades PIP packages to latest versions available without looking at the constraints. - - -I, --production-image - Use production image for entering the environment and builds (not for tests). - - -g, --github-repository GITHUB_REPOSITORY - GitHub repository used to pull, push images. - Default: apache/airflow. - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: cleanup-image - - - breeze cleanup-image [FLAGS] - - Removes the breeze-related images created in your local docker image cache. This will - not reclaim space in docker cache. You need to 'docker system prune' (optionally - with --all) to reclaim that space. - - Flags: - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - -I, --production-image - Use production image for entering the environment and builds (not for tests). - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: exec - - - breeze exec [-- ] - - Execs into interactive shell to an already running container. The container mus be started - already by breeze shell command. If you are not familiar with tmux, this is the best - way to run multiple processes in the same container at the same time for example scheduler, - webserver, workers, database console and interactive terminal. - - - #################################################################################################### - - - Detailed usage for command: generate-constraints - - - breeze generate-constraints [FLAGS] - - Generates pinned constraint files with all extras from setup.py. Those files are generated in - files folder - separate files for different python version. Those constraint files when - pushed to orphan constraints-main, constraints-2-0 branches are used - to generate repeatable CI test runs as well as run repeatable production image builds and - upgrades when you want to include installing or updating some of the released providers - released at the time particular airflow version was released. You can use those - constraints to predictably install released Airflow versions. This is mainly used to test - the constraint generation or manually fix them - constraints are pushed to the orphan - branches by a successful scheduled CRON job in CI automatically, but sometimes manual fix - might be needed. - - Flags: - - --generate-constraints-mode GENERATE_CONSTRAINTS_MODE - Mode of generating constraints - determines whether providers are installed when generating - constraints and which version of them (either the ones from sources are used or the ones - from pypi. - - One of: - - source-providers pypi-providers no-providers - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: initialize-local-virtualenv - - - breeze initialize-local-virtualenv [FLAGS] - - Initializes locally created virtualenv installing all dependencies of Airflow - taking into account the constraints for the version specified. - This local virtualenv can be used to aid auto-completion and IDE support as - well as run unit tests directly from the IDE. You need to have virtualenv - activated before running this command. - - Flags: - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - - #################################################################################################### - - - Detailed usage for command: prepare-airflow-packages - - - breeze prepare-airflow-packages [FLAGS] - - Prepares airflow packages (sdist and wheel) in dist folder. Note that - prepare-provider-packages command cleans up the dist folder, so if you want also - to generate provider packages, make sure you run prepare-provider-packages first, - and prepare-airflow-packages second. You can specify optional - --version-suffix-for-pypi flag to generate rc candidates for PyPI packages. - The packages are prepared in dist folder - - Examples: - - 'breeze prepare-airflow-packages --package-format wheel' or - 'breeze prepare-airflow-packages --version-suffix-for-pypi rc1' - - Flags: - - --package-format PACKAGE_FORMAT - - Chooses format of packages to prepare. - - One of: - - both,sdist,wheel - - Default: both - - -S, --version-suffix-for-pypi SUFFIX - Adds optional suffix to the version in the generated provider package. It can be used - to generate rc1/rc2 ... versions of the packages to be uploaded to PyPI. - - -N, --version-suffix-for-svn SUFFIX - Adds optional suffix to the generated names of package. It can be used to generate - rc1/rc2 ... versions of the packages to be uploaded to SVN. - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: setup-autocomplete - - - breeze setup-autocomplete - - Sets up autocomplete for breeze commands. Once you do it you need to re-enter the bash - shell and when typing breeze command will provide autocomplete for - parameters and values. - - - #################################################################################################### - - - Detailed usage for command: start-airflow - - - breeze start-airflow - - Like the Shell command this will enter the interactive shell, but it will also start - automatically the Scheduler and the Webserver. It will leave you in a tmux session where you - can also observe what is happening in your Airflow. - - This is a convenient way to setup a development environment. Your dags will be loaded from the - folder 'files/dags' on your host machine (it could take some times). - - If you want to load default connections and example dags you can use the dedicated flags. - - Flags: - - --use-airflow-version AIRFLOW_SPECIFICATION - In CI image, installs Airflow at runtime from PIP released version or using - the installation method specified (sdist, wheel, none). When 'none' is used, - airflow is just removed. In this case airflow package should be added to dist folder - and --use-packages-from-dist flag should be used. - - 2.0.2 2.0.1 2.0.0 wheel sdist none - - --use-packages-from-dist - In CI image, if specified it will look for packages placed in dist folder and - it will install the packages after entering the image. - This is useful for testing provider packages. - - --load-example-dags - Include Airflow example dags. - - --load-default-connections - Include Airflow Default Connections. - - - #################################################################################################### - - - Detailed usage for command: stop - - - breeze stop - - Brings down running docker compose environment. When you start the environment, the docker - containers will continue running so that startup time is shorter. But they take quite a lot of - memory and CPU. This command stops all running containers from the environment. - - Flags: - - --preserve-volumes - Use this flag if you would like to preserve data volumes from the databases used - by the integrations. By default, those volumes are deleted, so when you run 'stop' - or 'restart' commands you start from scratch, but by using this flag you can - preserve them. If you want to delete those volumes after stopping Breeze, just - run the 'breeze stop' again without this flag. - - - #################################################################################################### - - - Detailed usage for command: restart - - - breeze restart [FLAGS] - - Restarts running docker compose environment. When you restart the environment, the docker - containers will be restarted. That includes cleaning up the databases. This is - especially useful if you switch between different versions of Airflow. - - Flags: - - --preserve-volumes - Use this flag if you would like to preserve data volumes from the databases used - by the integrations. By default, those volumes are deleted, so when you run 'stop' - or 'restart' commands you start from scratch, but by using this flag you can - preserve them. If you want to delete those volumes after stopping Breeze, just - run the 'breeze stop' again without this flag. - - - #################################################################################################### - - - Detailed usage for command: toggle-suppress-cheatsheet - - - breeze toggle-suppress-cheatsheet - - Toggles on/off cheatsheet displayed before starting bash shell. - - - #################################################################################################### - - - Detailed usage for command: toggle-suppress-asciiart - - - breeze toggle-suppress-asciiart - - Toggles on/off asciiart displayed before starting bash shell. - - - #################################################################################################### - - - Detailed usage for command: docker-compose - - - breeze docker-compose [FLAGS] COMMAND [-- ] - - Run docker-compose command instead of entering the environment. Use 'help' as command - to see available commands. The passed after -- are treated - as additional options passed to docker-compose. For example - - 'breeze docker-compose pull -- --ignore-pull-failures' - - Flags: - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - -b, --backend BACKEND - Backend to use for tests - it determines which database is used. - One of: - - sqlite mysql postgres mssql - - Default: sqlite - - --postgres-version POSTGRES_VERSION - Postgres version used. One of: - - 10 11 12 13 - - --mysql-version MYSQL_VERSION - MySql version used. One of: - - 5.7 8 - - --mssql-version MSSQL_VERSION - MSSql version used. One of: - - 2017-latest 2019-latest - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: kind-cluster - - - breeze kind-cluster [FLAGS] OPERATION - - Manages host-side Kind Kubernetes cluster that is used to run Kubernetes integration tests. - It allows to start/stop/restart/status the Kind Kubernetes cluster and deploy Airflow to it. - This enables you to run tests inside the breeze environment with latest airflow images. - Note that in case of deploying airflow, the first step is to rebuild the image and loading it - to the cluster so you can also pass appropriate build image flags that will influence - rebuilding the production image. Operation is one of: - - start stop restart status deploy test shell k9s - - The last two operations - shell and k9s allow you to perform interactive testing with - kubernetes tests. You can enter the shell from which you can run kubernetes tests and in - another terminal you can start the k9s CLI to debug kubernetes instance. It is an easy - way to debug the kubernetes deployments. - - You can read more about k9s at https://k9scli.io/ - - Flags: - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - -F, --force-build-images - Forces building of the local docker images. The images are rebuilt - automatically for the first time or when changes are detected in - package-related files, but you can force it using this flag. - - --cleanup-docker-context-files - Removes whl and tar.gz files created in docker-context-files before running the command. - In case there are some files there it unnecessarily increases the context size and - makes the COPY . always invalidated - if you happen to have those files when you build your - image. - - Customization options: - - -E, --extras EXTRAS - Extras to pass to build images The default are different for CI and production images: - - CI image: - devel_ci - - Production image: - amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,google,google_auth, - grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid, - sftp,slack,ssh,statsd,virtualenv - - --image-tag TAG - Additional tag in the image. - - --skip-installing-airflow-providers-from-sources - By default 'pip install' in Airflow 2.0 installs only the provider packages that - are needed by the extras. When you build image during the development (which is - default in Breeze) all providers are installed by default from sources. - You can disable it by adding this flag but then you have to install providers from - wheel packages via --use-packages-from-dist flag. - - --disable-pypi-when-building - Disable installing Airflow from pypi when building. If you use this flag and want - to install Airflow, you have to install it from packages placed in - 'docker-context-files' and use --install-from-docker-context-files flag. - - --additional-extras ADDITIONAL_EXTRAS - Additional extras to pass to build images The default is no additional extras. - - --additional-python-deps ADDITIONAL_PYTHON_DEPS - Additional python dependencies to use when building the images. - - --dev-apt-command DEV_APT_COMMAND - The basic command executed before dev apt deps are installed. - - --additional-dev-apt-command ADDITIONAL_DEV_APT_COMMAND - Additional command executed before dev apt deps are installed. - - --additional-dev-apt-deps ADDITIONAL_DEV_APT_DEPS - Additional apt dev dependencies to use when building the images. - - --dev-apt-deps DEV_APT_DEPS - The basic apt dev dependencies to use when building the images. - - --additional-dev-apt-deps ADDITIONAL_DEV_DEPS - Additional apt dev dependencies to use when building the images. - - --additional-dev-apt-envs ADDITIONAL_DEV_APT_ENVS - Additional environment variables set when adding dev dependencies. - - --runtime-apt-command RUNTIME_APT_COMMAND - The basic command executed before runtime apt deps are installed. - - --additional-runtime-apt-command ADDITIONAL_RUNTIME_APT_COMMAND - Additional command executed before runtime apt deps are installed. - - --runtime-apt-deps ADDITIONAL_RUNTIME_APT_DEPS - The basic apt runtime dependencies to use when building the images. - - --additional-runtime-apt-deps ADDITIONAL_RUNTIME_DEPS - Additional apt runtime dependencies to use when building the images. - - --additional-runtime-apt-envs ADDITIONAL_RUNTIME_APT_DEPS - Additional environment variables set when adding runtime dependencies. - - Build options: - - --disable-mysql-client-installation - Disables installation of the mysql client which might be problematic if you are building - image in controlled environment. Only valid for production image. - - --disable-mssql-client-installation - Disables installation of the mssql client which might be problematic if you are building - image in controlled environment. Only valid for production image. - - --constraints-location - Url to the constraints file. In case of the production image it can also be a path to the - constraint file placed in 'docker-context-files' folder, in which case it has to be - in the form of '/docker-context-files/' - - --disable-pip-cache - Disables GitHub PIP cache during the build. Useful if GitHub is not reachable during build. - - --install-from-docker-context-files - This flag is used during image building. If it is used additionally to installing - Airflow from PyPI, the packages are installed from the .whl and .tar.gz packages placed - in the 'docker-context-files' folder. The same flag can be used during entering the image in - the CI image - in this case also the .whl and .tar.gz files will be installed automatically - - -C, --force-clean-images - Force build images with cache disabled. This will remove the pulled or build images - and start building images from scratch. This might take a long time. - - -r, --skip-rebuild-check - Skips checking image for rebuilds. It will use whatever image is available locally/pulled. - - -L, --build-cache-local - Uses local cache to build images. No pulled images will be used, but results of local - builds in the Docker cache are used instead. This will take longer than when the pulled - cache is used for the first time, but subsequent '--build-cache-local' builds will be - faster as they will use mostly the locally build cache. - - This is default strategy used by the Production image builds. - - -U, --build-cache-pulled - Uses images pulled from GitHub Container Registry to build images. - Those builds are usually faster than when ''--build-cache-local'' with the exception if - the registry images are not yet updated. The images are updated after successful merges - to main. - - This is default strategy used by the CI image builds. - - -X, --build-cache-disabled - Disables cache during docker builds. This is useful if you want to make sure you want to - rebuild everything from scratch. - - This strategy is used by default for both Production and CI images for the scheduled - (nightly) builds in CI. - - - #################################################################################################### - - - Detailed usage for command: prepare-provider-documentation - - - breeze prepare-provider-documentation [FLAGS] [PACKAGE_ID ...] - - Prepares documentation files for provider packages. - - The command is optionally followed by the list of packages to generate readme for. - If the first parameter is not formatted as a date, then today is regenerated. - If no packages are specified, readme for all packages are generated. - If no date is specified, current date + 3 days is used (allowing for PMC votes to pass). - - Examples: - - 'breeze prepare-provider-documentation' or - 'breeze prepare-provider-documentation --version-suffix-for-pypi rc1' - - General form: - - 'breeze prepare-provider-documentation ...' - - * is usually directory in the airflow/providers folder (for example - 'google' but in several cases, it might be one level deeper separated with - '.' for example 'apache.hive' - - Flags: - - -S, --version-suffix-for-pypi SUFFIX - Adds optional suffix to the version in the generated provider package. It can be used - to generate rc1/rc2 ... versions of the packages to be uploaded to PyPI. - - -N, --version-suffix-for-svn SUFFIX - Adds optional suffix to the generated names of package. It can be used to generate - rc1/rc2 ... versions of the packages to be uploaded to SVN. - - --package-format PACKAGE_FORMAT - - Chooses format of packages to prepare. - - One of: - - both,sdist,wheel - - Default: both - - --non-interactive - - Runs the command in non-interactive mode. - - --generate-providers-issue - - Generate providers issue that should be created. - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: prepare-provider-packages - - - breeze prepare-provider-packages [FLAGS] [PACKAGE_ID ...] - - Prepares provider packages. You can provide (after --) optional list of packages to prepare. - If no packages are specified, readme for all packages are generated. You can specify optional - --version-suffix-for-svn flag to generate rc candidate packages to upload to SVN or - --version-suffix-for-pypi flag to generate rc candidates for PyPI packages. You can also - provide both suffixes in case you prepare alpha/beta versions. The packages are prepared in - dist folder. Note that this command also cleans up dist folder before generating the packages - so that you do not have accidental files there. This will delete airflow package if it is - prepared there so make sure you run prepare-provider-packages first, - and prepare-airflow-packages second. - - Examples: - - 'breeze prepare-provider-packages' or - 'breeze prepare-provider-packages google' or - 'breeze prepare-provider-packages --package-format wheel google' or - 'breeze prepare-provider-packages --version-suffix-for-svn rc1 http google amazon' or - 'breeze prepare-provider-packages --version-suffix-for-pypi rc1 http google amazon' - 'breeze prepare-provider-packages --version-suffix-for-pypi a1 - --version-suffix-for-svn a1 http google amazon' - - General form: - - 'breeze prepare-provider-packages [--package-format PACKAGE_FORMAT] \ - [--version-suffix-for-svn|--version-suffix-for-pypi] ...' - - * is usually directory in the airflow/providers folder (for example - 'google'), but in several cases, it might be one level deeper separated with '.' - for example 'apache.hive' - - Flags: - - --package-format PACKAGE_FORMAT - - Chooses format of packages to prepare. - - One of: - - both,sdist,wheel - - Default: both - - -S, --version-suffix-for-pypi SUFFIX - Adds optional suffix to the version in the generated provider package. It can be used - to generate rc1/rc2 ... versions of the packages to be uploaded to PyPI. - - -N, --version-suffix-for-svn SUFFIX - Adds optional suffix to the generated names of package. It can be used to generate - rc1/rc2 ... versions of the packages to be uploaded to SVN. - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - - #################################################################################################### - - - Detailed usage for command: static-check - - - breeze static-check [FLAGS] static_check [-- ] - - Run selected static checks for currently changed files. You should specify static check that - you would like to run or 'all' to run all checks. One of: - - all airflow-config-yaml airflow-providers-available airflow-provider-yaml-files-ok - autoflake base-operator black blacken-docs boring-cyborg build - build-providers-dependencies chart-schema-lint capitalized-breeze - changelog-duplicates check-apache-license check-builtin-literals - check-executables-have-shebangs check-extras-order check-hooks-apply - check-integrations check-merge-conflict check-xml daysago-import-check - debug-statements detect-private-key docstring-params doctoc dont-use-safe-filter - end-of-file-fixer fix-encoding-pragma flake8 flynt forbidden-xcom-get-value - codespell forbid-tabs helm-lint identity incorrect-use-of-LoggingMixin - insert-license isort json-schema language-matters lint-dockerfile lint-openapi - markdownlint mermaid migration-reference mixed-line-ending mypy mypy-helm - no-providers-in-core-examples no-relative-imports persist-credentials-disabled - pre-commit-descriptions pre-commit-hook-names pretty-format-json - provide-create-sessions providers-changelogs providers-init-file - providers-subpackages-init-file provider-yamls pydevd pydocstyle python-no-log-warn - pyupgrade restrict-start_date rst-backticks setup-order setup-extra-packages - shellcheck sort-in-the-wild sort-spelling-wordlist stylelint trailing-whitespace - ui-lint update-breeze-file update-extras update-local-yml-file update-setup-cfg-file - update-supported-versions update-versions vendor-k8s-json-schema - verify-db-migrations-documented version-sync www-lint yamllint yesqa - - You can pass extra arguments including options to the pre-commit framework as - passed after --. For example: - - 'breeze static-check mypy' or - 'breeze static-check mypy -- --files tests/core.py' - 'breeze static-check mypy -- --all-files' - - To check all files that differ between you current branch and main run: - - 'breeze static-check all -- --from-ref $(git merge-base main HEAD) --to-ref HEAD' - - To check all files that are in the HEAD commit run: - - 'breeze static-check mypy -- --from-ref HEAD^ --to-ref HEAD' - - - You can see all the options by adding --help EXTRA_ARG: - - 'breeze static-check mypy -- --help' - - - #################################################################################################### - - - Detailed usage for command: tests - - - breeze tests [FLAGS] [TEST_TARGET ..] [-- ] - - Run the specified unit test target. There might be multiple - targets specified separated with comas. The passed after -- are treated - as additional options passed to pytest. You can pass 'tests' as target to - run all tests. For example: - - 'breeze tests tests/core/test_core.py -- --logging-level=DEBUG' - 'breeze tests tests - - Flags: - - --test-type TEST_TYPE - Type of the test to run. One of: - - All,Always,Core,Providers,API,CLI,Integration,Other,WWW,Postgres,MySQL,Helm, - Quarantined - - Default: All - - - #################################################################################################### - - - Detailed usage for command: flags - - - Explains in detail all the flags that can be used with breeze. - - - #################################################################################################### - - - Detailed usage for command: help - - - breeze help - - Shows general help message for all commands. - - - #################################################################################################### - - - Detailed usage for command: help-all - - - breeze help-all - - Shows detailed help for all commands and flags. - - - #################################################################################################### - - - #################################################################################################### - - Summary of all flags supported by Breeze: - - **************************************************************************************************** - Choose Airflow variant - - -p, --python PYTHON_MAJOR_MINOR_VERSION - Python version used for the image. This is always major/minor version. - - One of: - - 3.7 3.8 3.9 3.10 - - --platform PLATFORM - Builds image for the platform specified. - - One of: - - linux/amd64 linux/arm64 linux/amd64,linux/arm64 - - - -d, --debian DEBIAN_VERSION - Debian version used for the image. This is always name of the debian distribution version. - - One of: - - bullseye buster - - **************************************************************************************************** - Choose backend to run for Airflow - - -b, --backend BACKEND - Backend to use for tests - it determines which database is used. - One of: - - sqlite mysql postgres mssql - - Default: sqlite - - --postgres-version POSTGRES_VERSION - Postgres version used. One of: - - 10 11 12 13 - - --mysql-version MYSQL_VERSION - MySql version used. One of: - - 5.7 8 - - --mssql-version MSSQL_VERSION - MSSql version used. One of: - - 2017-latest 2019-latest - - **************************************************************************************************** - Enable production image - - -I, --production-image - Use production image for entering the environment and builds (not for tests). - - **************************************************************************************************** - Additional actions executed while entering breeze - - -d, --db-reset - Resets the database at entry to the environment. It will drop all the tables - and data and recreate the DB from scratch even if 'restart' command was not used. - Combined with 'restart' command it enters the environment in the state that is - ready to start Airflow webserver/scheduler/worker. Without the switch, the database - does not have any tables and you need to run reset db manually. - - -i, --integration INTEGRATION - Integration to start during tests - it determines which integrations are started - for integration tests. There can be more than one integration started, or all to - start all integrations. Selected integrations are not saved for future execution. - One of: - - cassandra kerberos mongo openldap pinot rabbitmq redis statsd trino all - - --init-script INIT_SCRIPT_FILE - Initialization script name - Sourced from files/airflow-breeze-config. Default value - init.sh. It will be executed after the environment is configured and started. - - **************************************************************************************************** - Additional actions executed while starting Airflow - - --load-example-dags - Include Airflow example dags. - - --load-default-connections - Include Airflow Default Connections. - - **************************************************************************************************** - Cleanup options when stopping Airflow - - --preserve-volumes - Use this flag if you would like to preserve data volumes from the databases used - by the integrations. By default, those volumes are deleted, so when you run 'stop' - or 'restart' commands you start from scratch, but by using this flag you can - preserve them. If you want to delete those volumes after stopping Breeze, just - run the 'breeze stop' again without this flag. - - **************************************************************************************************** - Kind kubernetes and Kubernetes tests configuration(optional) - - Configuration for the KinD Kubernetes cluster and tests: - - -K, --kubernetes-mode KUBERNETES_MODE - Kubernetes mode - only used in case one of kind-cluster commands is used. - One of: - - image - - Default: image - - -V, --kubernetes-version KUBERNETES_VERSION - Kubernetes version - only used in case one of kind-cluster commands is used. - One of: - - v1.21.1 v1.20.2 - - Default: v1.21.1 - - --kind-version KIND_VERSION - Kind version - only used in case one of kind-cluster commands is used. - One of: - - v0.11.1 - - Default: v0.11.1 - - --helm-version HELM_VERSION - Helm version - only used in case one of kind-cluster commands is used. - One of: - - v3.6.3 - - Default: v3.6.3 - - --executor EXECUTOR - Executor to use in a kubernetes cluster. - One of: - - KubernetesExecutor CeleryExecutor LocalExecutor CeleryKubernetesExecutor - LocalKubernetesExecutor - - Default: KubernetesExecutor - - **************************************************************************************************** - Manage mounting local files - - -l, --skip-mounting-local-sources - Skips mounting local volume with sources - you get exactly what is in the - docker image rather than your current local sources of Airflow. - - **************************************************************************************************** - Assume answers to questions - - -y, --assume-yes - Assume 'yes' answer to all questions. - - -n, --assume-no - Assume 'no' answer to all questions. - - -q, --assume-quit - Assume 'quit' answer to all questions. - - **************************************************************************************************** - Install different Airflow version during PROD image build - - -a, --install-airflow-version INSTALL_AIRFLOW_VERSION - Uses different version of Airflow when building PROD image. - - 2.0.2 2.0.1 2.0.0 wheel sdist - - -t, --install-airflow-reference INSTALL_AIRFLOW_REFERENCE - Installs Airflow directly from reference in GitHub when building PROD image. - This can be a GitHub branch like main or v2-2-test, or a tag like 2.2.0rc1. - - --installation-method INSTALLATION_METHOD - Method of installing Airflow in PROD image - either from the sources ('.') - or from package 'apache-airflow' to install from PyPI. - Default in Breeze is to install from sources. One of: - - . apache-airflow - - --upgrade-to-newer-dependencies - Upgrades PIP packages to latest versions available without looking at the constraints. - - **************************************************************************************************** - Use different Airflow version at runtime in CI image - - --use-airflow-version AIRFLOW_SPECIFICATION - In CI image, installs Airflow at runtime from PIP released version or using - the installation method specified (sdist, wheel, none). When 'none' is used, - airflow is just removed. In this case airflow package should be added to dist folder - and --use-packages-from-dist flag should be used. - - 2.0.2 2.0.1 2.0.0 wheel sdist none - - --use-packages-from-dist - In CI image, if specified it will look for packages placed in dist folder and - it will install the packages after entering the image. - This is useful for testing provider packages. - - **************************************************************************************************** - Credentials - - -f, --forward-credentials - Forwards host credentials to docker container. Use with care as it will make - your credentials available to everything you install in Docker. - - **************************************************************************************************** - Flags for building Docker images (both CI and production) - - -F, --force-build-images - Forces building of the local docker images. The images are rebuilt - automatically for the first time or when changes are detected in - package-related files, but you can force it using this flag. - - --cleanup-docker-context-files - Removes whl and tar.gz files created in docker-context-files before running the command. - In case there are some files there it unnecessarily increases the context size and - makes the COPY . always invalidated - if you happen to have those files when you build your - image. - - Customization options: - - -E, --extras EXTRAS - Extras to pass to build images The default are different for CI and production images: - - CI image: - devel_ci - - Production image: - amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,google,google_auth, - grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid, - sftp,slack,ssh,statsd,virtualenv - - --image-tag TAG - Additional tag in the image. - - --skip-installing-airflow-providers-from-sources - By default 'pip install' in Airflow 2.0 installs only the provider packages that - are needed by the extras. When you build image during the development (which is - default in Breeze) all providers are installed by default from sources. - You can disable it by adding this flag but then you have to install providers from - wheel packages via --use-packages-from-dist flag. - - --disable-pypi-when-building - Disable installing Airflow from pypi when building. If you use this flag and want - to install Airflow, you have to install it from packages placed in - 'docker-context-files' and use --install-from-docker-context-files flag. - - --additional-extras ADDITIONAL_EXTRAS - Additional extras to pass to build images The default is no additional extras. - - --additional-python-deps ADDITIONAL_PYTHON_DEPS - Additional python dependencies to use when building the images. - - --dev-apt-command DEV_APT_COMMAND - The basic command executed before dev apt deps are installed. - - --additional-dev-apt-command ADDITIONAL_DEV_APT_COMMAND - Additional command executed before dev apt deps are installed. - - --additional-dev-apt-deps ADDITIONAL_DEV_APT_DEPS - Additional apt dev dependencies to use when building the images. - - --dev-apt-deps DEV_APT_DEPS - The basic apt dev dependencies to use when building the images. - - --additional-dev-apt-deps ADDITIONAL_DEV_DEPS - Additional apt dev dependencies to use when building the images. - - --additional-dev-apt-envs ADDITIONAL_DEV_APT_ENVS - Additional environment variables set when adding dev dependencies. - - --runtime-apt-command RUNTIME_APT_COMMAND - The basic command executed before runtime apt deps are installed. - - --additional-runtime-apt-command ADDITIONAL_RUNTIME_APT_COMMAND - Additional command executed before runtime apt deps are installed. - - --runtime-apt-deps ADDITIONAL_RUNTIME_APT_DEPS - The basic apt runtime dependencies to use when building the images. - - --additional-runtime-apt-deps ADDITIONAL_RUNTIME_DEPS - Additional apt runtime dependencies to use when building the images. - - --additional-runtime-apt-envs ADDITIONAL_RUNTIME_APT_DEPS - Additional environment variables set when adding runtime dependencies. - - Build options: - - --disable-mysql-client-installation - Disables installation of the mysql client which might be problematic if you are building - image in controlled environment. Only valid for production image. - - --disable-mssql-client-installation - Disables installation of the mssql client which might be problematic if you are building - image in controlled environment. Only valid for production image. - - --constraints-location - Url to the constraints file. In case of the production image it can also be a path to the - constraint file placed in 'docker-context-files' folder, in which case it has to be - in the form of '/docker-context-files/' - - --disable-pip-cache - Disables GitHub PIP cache during the build. Useful if GitHub is not reachable during build. - - --install-from-docker-context-files - This flag is used during image building. If it is used additionally to installing - Airflow from PyPI, the packages are installed from the .whl and .tar.gz packages placed - in the 'docker-context-files' folder. The same flag can be used during entering the image in - the CI image - in this case also the .whl and .tar.gz files will be installed automatically - - -C, --force-clean-images - Force build images with cache disabled. This will remove the pulled or build images - and start building images from scratch. This might take a long time. - - -r, --skip-rebuild-check - Skips checking image for rebuilds. It will use whatever image is available locally/pulled. - - -L, --build-cache-local - Uses local cache to build images. No pulled images will be used, but results of local - builds in the Docker cache are used instead. This will take longer than when the pulled - cache is used for the first time, but subsequent '--build-cache-local' builds will be - faster as they will use mostly the locally build cache. - - This is default strategy used by the Production image builds. - - -U, --build-cache-pulled - Uses images pulled from GitHub Container Registry to build images. - Those builds are usually faster than when ''--build-cache-local'' with the exception if - the registry images are not yet updated. The images are updated after successful merges - to main. - - This is default strategy used by the CI image builds. - - -X, --build-cache-disabled - Disables cache during docker builds. This is useful if you want to make sure you want to - rebuild everything from scratch. - - This strategy is used by default for both Production and CI images for the scheduled - (nightly) builds in CI. - - **************************************************************************************************** - Flags for pulling/pushing Docker images (both CI and production) - - -g, --github-repository GITHUB_REPOSITORY - GitHub repository used to pull, push images. - Default: apache/airflow. - - - - - -s, --github-image-id COMMIT_SHA - of the image. Images in GitHub registry are stored with those - to be able to easily find the image for particular CI runs. Once you know the - , you can specify it in github-image-id flag and Breeze will - automatically pull and use that image so that you can easily reproduce a problem - that occurred in CI. - - Default: latest. - - **************************************************************************************************** - Flags for running tests - - --test-type TEST_TYPE - Type of the test to run. One of: - - All,Always,Core,Providers,API,CLI,Integration,Other,WWW,Postgres,MySQL,Helm, - Quarantined - - Default: All - - **************************************************************************************************** - Flags for generation of the provider packages - - -S, --version-suffix-for-pypi SUFFIX - Adds optional suffix to the version in the generated provider package. It can be used - to generate rc1/rc2 ... versions of the packages to be uploaded to PyPI. - - -N, --version-suffix-for-svn SUFFIX - Adds optional suffix to the generated names of package. It can be used to generate - rc1/rc2 ... versions of the packages to be uploaded to SVN. - - **************************************************************************************************** - Increase verbosity of the scripts - - -v, --verbose - Show verbose information about executed docker, kind, kubectl, helm commands. Useful for - debugging - when you run breeze with --verbose flags you will be able to see the commands - executed under the hood and copy&paste them to your terminal to debug them more easily. - - Note that you can further increase verbosity and see all the commands executed by breeze - by running 'export VERBOSE_COMMANDS="true"' before running breeze. - - --dry-run-docker - Only show docker commands to execute instead of actually executing them. The docker - commands are printed in yellow color. - - **************************************************************************************************** - Print detailed help message - - -h, --help - Shows detailed help message for the command specified. +.. code-block:: bash - .. END BREEZE HELP MARKER + pipx uninstall apache-airflow-breeze diff --git a/Breeze2 b/Breeze2 deleted file mode 100755 index 9591f6ba7d748..0000000000000 --- a/Breeze2 +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 -# isort: skip -import os -import sys - -# Python <3.4 does not have pathlib -from venv import EnvBuilder - -if sys.version_info.major != 3 or sys.version_info.minor < 7: - print("ERROR! Make sure you use Python 3.7+ !!") - sys.exit(1) - -import subprocess -from os import execv -from pathlib import Path - -if getattr(sys, 'frozen', False): - # If the application is run as a bundle, the PyInstaller bootloader - # extends the sys module by a flag frozen=True and sets the temporary app - # path into variable _MEIPASS' and sys.executable is Breeze's executable path. - AIRFLOW_SOURCES_DIR = Path(sys.executable).parent.resolve() -else: - AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve() -BUILD_DIR = AIRFLOW_SOURCES_DIR / ".build" -BUILD_BREEZE_DIR = BUILD_DIR / "breeze2" -BUILD_BREEZE_CFG_SAVED = BUILD_BREEZE_DIR / "setup.cfg.saved" -BUILD_BREEZE_VENV_DIR = BUILD_BREEZE_DIR / "venv" -BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / ("Scripts" if os.name == 'nt' else "bin") -BUILD_BREEZE_VENV_PYTHON = BUILD_BREEZE_VENV_BIN_DIR / "python" -BUILD_BREEZE_VENV_BREEZE = BUILD_BREEZE_VENV_BIN_DIR / "Breeze2" - -BREEZE_SOURCE_PATH = AIRFLOW_SOURCES_DIR / "dev" / "breeze" -BREEZE_SETUP_CFG_PATH = BREEZE_SOURCE_PATH / "setup.cfg" - -BUILD_BREEZE_DIR.mkdir(parents=True, exist_ok=True) - - -def needs_installation() -> bool: - """Returns true if Breeze's virtualenv needs (re)installation""" - if not BUILD_BREEZE_VENV_DIR.exists() or not BUILD_BREEZE_CFG_SAVED.exists(): - return True - return BREEZE_SETUP_CFG_PATH.read_text() != BUILD_BREEZE_CFG_SAVED.read_text() - - -def save_config(): - """Saves cfg file to virtualenv to check if there is a need for reinstallation of the virtualenv""" - BUILD_BREEZE_CFG_SAVED.write_text(BREEZE_SETUP_CFG_PATH.read_text()) - - -if needs_installation(): - print(f"(Re)Installing Breeze's virtualenv in {BUILD_BREEZE_VENV_DIR}") - try: - EnvBuilder(system_site_packages=False, upgrade=True, with_pip=True, prompt="breeze").create( - str(BUILD_BREEZE_VENV_DIR) - ) - except Exception as e: - # in some cases (mis-configured python) the venv creation might not work via API - # (ensurepip missing). This is the case in case of default MacOS Python and Python executable - # Bundled in Windows executable, In this case we fallback to running venv as a tool using default - # Python3 found on path (in case of Windows Bundled exe, you don't even have a current - # interpreted executable available, because Python interpreter is executed through a library. - # and sys.executable points to the Bundled exe file. - BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True) - subprocess.run(["python3", "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True) - if os.name == 'nt': - subprocess.run( - [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."], - cwd=BREEZE_SOURCE_PATH, - check=True, - ) - else: - subprocess.run( - [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."], - cwd=BREEZE_SOURCE_PATH, - check=True, - ) - save_config() - -if os.name == 'nt': - # This is the best way of running it on Windows, though it leaves the original process hanging around - subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:], check=True) -else: - execv(f"{BUILD_BREEZE_VENV_BREEZE}", [f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:]) diff --git a/CI.rst b/CI.rst index 43ea218a69a7e..f24639271e977 100644 --- a/CI.rst +++ b/CI.rst @@ -59,14 +59,14 @@ Container Registry used as cache We are using GitHub Container Registry to store the results of the ``Build Images`` workflow which is used in the ``Tests`` workflow. -Currently in main version of Airflow we run tests in 4 different versions of Python (3.6, 3.7, 3.8, 3.9) +Currently in main version of Airflow we run tests in 4 different versions of Python (3.7, 3.8, 3.9, 3.10) which means that we have to build 8 images (4 CI ones and 4 PROD ones). Yet we run around 12 jobs with each of the CI images. That is a lot of time to just build the environment to run. Therefore we are utilising ``pull_request_target`` feature of GitHub Actions. This feature allows to run a separate, independent workflow, when the main workflow is run - this separate workflow is different than the main one, because by default it runs using ``main`` version -of the sources but also - and most of all - that it has WRITE access to the Github Container Image registry. +of the sources but also - and most of all - that it has WRITE access to the GitHub Container Image registry. This is especially important in our case where Pull Requests to Airflow might come from any repository, and it would be a huge security issue if anyone from outside could @@ -158,7 +158,7 @@ You can use those variables when you try to reproduce the build locally. | | | | | builds it forces rebuild, regardless if it | | | | | | is determined to be needed. | +-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ -| ``FORCE_ANSWER_TO_QUESTIONS`` | | yes | yes | This variable determines if answer to questions | +| ``ANSWER`` | | yes | yes | This variable determines if answer to questions | | | | | | during the build process should be | | | | | | automatically given. For local development, | | | | | | the user is occasionally asked to provide | @@ -186,13 +186,6 @@ You can use those variables when you try to reproduce the build locally. +-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ | ``HOST_OS`` | | Linux | Linux | OS of the Host (Darwin/Linux). | +-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ -| ``HOST_HOME`` | | | | Home directory on the host. | -+-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ -| Version suffix variables | -+-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ -| ``VERSION_SUFFIX_FOR_PYPI`` | | | | Version suffix used during provider | -| | | | | package preparation for PyPI builds. | -+-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ | Git variables | +-----------------------------------------+-------------+--------------+------------+-------------------------------------------------+ | ``COMMIT_SHA`` | | GITHUB_SHA | GITHUB_SHA | SHA of the commit of the build is run | @@ -491,7 +484,7 @@ running, GitHub Actions will cancel the old workflow run automatically. Build Images Workflow --------------------- -This workflow builds images for the CI Workflow. +This workflow builds images for the CI Workflow for Pull Requests coming from forks. It's a special type of workflow: ``pull_request_target`` which means that it is triggered when a pull request is opened. This also means that the workflow has Write permission to push to the GitHub registry the images @@ -499,8 +492,14 @@ used by CI jobs which means that the images can be built only once and reused by (including the matrix jobs). We've implemented it so that the ``Tests`` workflow waits until the images are built by the ``Build Images`` workflow before running. -This workflow is also triggered on normal pushes to our "main" branches, i.e. after a -pull request is merged and whenever ``scheduled`` run is triggered. +Those "Build Image" steps are skipped in case Pull Requests do not come from "forks" (i.e. those +are internal PRs for Apache Airflow repository. This is because in case of PRs coming from +Apache Airflow (only committers can create those) the "pull_request" workflows have enough +permission to push images to GitHub Registry. + +This workflow is not triggered on normal pushes to our "main" branches, i.e. after a +pull request is merged and whenever ``scheduled`` run is triggered. Again in this case the "CI" workflow +has enough permissions to push the images. In this case we simply do not run this workflow. The workflow has the following jobs: @@ -662,9 +661,9 @@ For example knowing that the CI job was for commit ``cd27124534b46c9688a1d89e75f .. code-block:: bash - docker pull ghcr.io/apache/airflow/main/ci/python3.6:cd27124534b46c9688a1d89e75fcd137ab5137e3 + docker pull ghcr.io/apache/airflow/main/ci/python3.7:cd27124534b46c9688a1d89e75fcd137ab5137e3 - docker run -it ghcr.io/apache/airflow/main/ci/python3.6:cd27124534b46c9688a1d89e75fcd137ab5137e3 + docker run -it ghcr.io/apache/airflow/main/ci/python3.7:cd27124534b46c9688a1d89e75fcd137ab5137e3 But you usually need to pass more variables and complex setup if you want to connect to a database or @@ -674,7 +673,7 @@ cd27124534b46c9688a1d89e75fcd137ab5137e3, in python 3.8 environment you can run: .. code-block:: bash - ./breeze --github-image-id cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.8 + ./breeze-legacy --github-image-id cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.8 You will be dropped into a shell with the exact version that was used during the CI run and you will be able to run pytest tests manually, easily reproducing the environment that was used in CI. Note that in @@ -701,13 +700,7 @@ In order to add a new version the following operations should be done (example u .. code-block:: bash - ./breeze build-image --python 3.10 - -* push image as cache to GitHub: - -.. code-block:: bash - - ./breeze push-image --python 3.10 + breeze build-image --python 3.10 * Find the 2 new images (prod, ci) created in `GitHub Container registry `_ diff --git a/CI_DIAGRAMS.md b/CI_DIAGRAMS.md index 1bbee13ae2aa2..3b228b13c71b7 100644 --- a/CI_DIAGRAMS.md +++ b/CI_DIAGRAMS.md @@ -33,15 +33,13 @@ sequenceDiagram activate Tests Tests -->> Build Images: Trigger 'pull_request_target' activate Build Images - Note over Build Images: Build info - par 3.6, [3.7, 3.8, 3.9] - activate GitHub Registry - GitHub Registry ->> Build Images: Pull CI Images from Cache - deactivate GitHub Registry - Note over Build Images: Build CI Images
[COMMIT_SHA] - end - par No CI image - Note over Tests: Build info
Which tests?
Which Python? + Note over Build Images: Build info
Decide which Python + Note over Tests: Build info
Decide on tests
Decide on Matrix (selective) + Note over Tests: Skip Build
(Runs in 'Build Images')
CI Images + Note over Tests: Skip Build
(Runs in 'Build Images')
PROD Images + par + GitHub Registry ->> Build Images: Pull CI Images
[latest] + Note over Build Images: Build CI Images
[COMMIT_SHA]
Use latest constraints
or upgrade if setup changed and Note over Tests: OpenAPI client gen and @@ -49,77 +47,135 @@ sequenceDiagram and Note over Tests: Test examples
PROD image building end - par 3.6, [3.7, 3.8, 3.9] - activate GitHub Registry - Build Images ->> GitHub Registry: Push CI Images - Note over GitHub Registry: Tagged CI Images
[COMMIT_SHA] - end - par 3.6, [3.7, 3.8, 3.9] - GitHub Registry ->> Build Images: Pull PROD Images from Cache - Note over Build Images: Build PROD Images
[COMMIT_SHA] - end + Build Images ->> GitHub Registry: Push CI Images
[COMMIT_SHA] loop Wait for CI images - par 3.6, [3.7, 3.8, 3.9] - Tests ->> Tests: Check CI Images - Note over Tests: Wait for
[COMMIT_SHA] - end + GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] end - par 3.6, [3.7, 3.8, 3.9] - GitHub Registry ->> Tests: Pull CI Image - Note over Tests: Verify CI Image - end - deactivate GitHub Registry - par 3.6, [3.7, 3.8, 3.9] - opt Needed? + Note over Tests: Verify CI Images
[COMMIT_SHA] + par + GitHub Registry ->> Build Images: Pull PROD Images
[latest] + Note over Build Images: Build PROD Images
[COMMIT_SHA] + and + opt Note over Tests: Run static checks end and - opt Needed? + opt Note over Tests: Run basic
static checks end and - opt Needed? + opt Note over Tests: Build docs end and - opt Needed? + opt Note over Tests: Tests end and - opt Needed? + opt Note over Tests: Test provider
packages build end and - opt Needed? + opt Note over Tests: Helm tests end end - par 3.6, [3.7, 3.8, 3.9] - Build Images ->> GitHub Registry: Push PROD Images - activate GitHub Registry - end + Build Images ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] deactivate Build Images - Note over GitHub Registry: Tagged PROD Images
[COMMIT_SHA] loop Wait for PROD images - par 3.6, [3.7, 3.8, 3.9] - Tests ->> Tests: Check PROD Images - Note over Tests: Wait for
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] + end + Note over Tests: Verify PROD Image
[COMMIT_SHA] + par + opt + Note over Tests: Run Kubernetes
tests + end + and + opt + Note over Tests: Run Kubernetes
upgrade tests end end - par 3.6, [3.7, 3.8, 3.9] - GitHub Registry ->> Tests: Pull PROD Image - Note over Tests: Verify PROD Image + opt + Note over Tests: Generate constraints end - deactivate GitHub Registry - par 3.6, [3.7, 3.8, 3.9] - opt Needed? + Tests -->> Airflow Repo: Status update + deactivate Airflow Repo + deactivate Tests +``` + +## Pull request flow from "apache/airflow" repo + +```mermaid +sequenceDiagram + Note over Airflow Repo: pull request + Note over Tests: pull_request
[Read Token] + Note over Build Images: pull_request_target
[Write Token] + activate Airflow Repo + Airflow Repo -->> Tests: Trigger 'pull_request' + activate Tests + Tests -->> Build Images: Trigger 'pull_request_target' + activate Build Images + Note over Build Images: Build info + Note over Build Images: Skip Build
(Runs in 'Tests')
CI Images + Note over Build Images: Skip Build
(Runs in 'Tests')
PROD Images + deactivate Build Images + Note over Tests: Build info
Decide on tests
Decide on Matrix (selective) + par + GitHub Registry ->> Tests: Pull CI Images
[latest] + Note over Tests: Build CI Images
[COMMIT_SHA]
Use latest constraints
or upgrade if setup changed + and + Note over Tests: OpenAPI client gen + and + Note over Tests: Test UI + and + Note over Tests: Test examples
PROD image building + end + Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] + Note over Tests: Verify CI Image
[COMMIT_SHA] + par + GitHub Registry ->> Tests: Pull PROD Images
[latest] + Note over Tests: Build PROD Images
[COMMIT_SHA] + and + opt + Note over Tests: Run static checks + end + and + opt + Note over Tests: Run basic
static checks + end + and + opt + Note over Tests: Build docs + end + and + opt + Note over Tests: Tests + end + and + opt + Note over Tests: Test provider
packages build + end + and + opt + Note over Tests: Helm tests + end + end + Tests ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull PROD Image
[COMIT_SHA] + Note over Tests: Verify PROD Image
[COMMIT_SHA] + par + opt Note over Tests: Run Kubernetes
tests end and - opt Needed? + opt Note over Tests: Run Kubernetes
upgrade tests end end + opt + Note over Tests: Generate constraints + end Tests -->> Airflow Repo: Status update deactivate Airflow Repo deactivate Tests @@ -129,23 +185,15 @@ sequenceDiagram ```mermaid sequenceDiagram - Note over Airflow Repo: merge + Note over Airflow Repo: pull request Note over Tests: push
[Write Token] - Note over Build Images: push
[Write Token] activate Airflow Repo Airflow Repo -->> Tests: Trigger 'push' activate Tests - Airflow Repo -->> Build Images: Trigger 'push' - activate Build Images - Note over Build Images: Build info - par 3.6, 3.7, 3.8, 3.9 - activate GitHub Registry - GitHub Registry ->> Build Images: Pull CI Images from Cache - deactivate GitHub Registry - Note over Build Images: Build CI Images
[COMMIT_SHA] - end - par No CI image - Note over Tests: Build info
All tests
All python + Note over Tests: Build info
All tests
Full matrix + par + GitHub Registry ->> Tests: Pull CI Images
[latest] + Note over Tests: Build CI Images
[COMMIT_SHA]
Always upgrade deps and Note over Tests: OpenAPI client gen and @@ -153,72 +201,60 @@ sequenceDiagram and Note over Tests: Test examples
PROD image building end - par 3.6, 3.7, 3.8, 3.9 - Build Images ->> GitHub Registry: Push CI Images - activate GitHub Registry - Note over GitHub Registry: Tagged CI Images
[COMMIT_SHA] - end - par 3.6, 3.7, 3.8, 3.9 - GitHub Registry ->> Build Images: Pull PROD Images from Cache - Note over Build Images: Build PROD Images
[COMMIT_SHA] - end - loop Wait for CI images - par 3.6, 3.7, 3.8, 3.9 - Tests ->> Tests: Check CI Images - Note over Tests: Wait for
[COMMIT_SHA] + Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] + Note over Tests: Verify CI Image
[COMMIT_SHA] + par + GitHub Registry ->> Tests: Pull PROD Images
[latest] + Note over Tests: Build PROD Images
[COMMIT_SHA] + and + opt + Note over Tests: Run static checks end - end - par 3.6, 3.7, 3.8, 3.9 - GitHub Registry ->> Tests: Pull CI Image [COMMIT_SHA] - Note over Tests: Verify CI Image - end - deactivate GitHub Registry - par 3.6, 3.7, 3.8, 3.9 - Note over Tests: Run static checks and - Note over Tests: Build docs + opt + Note over Tests: Run basic
static checks + end and - Note over Tests: Tests + opt + Note over Tests: Build docs + end and - Note over Tests: Test provider
packages build + opt + Note over Tests: Tests + end and - Note over Tests: Helm tests - end - par 3.6, 3.7, 3.8, 3.9 - Build Images ->> GitHub Registry: Push PROD Images - Note over GitHub Registry: Tagged PROD Images
[COMMIT_SHA] - activate GitHub Registry - end - deactivate Build Images - loop Wait for PROD images - par 3.6, 3.7, 3.8, 3.9 - Tests ->> Tests: Check PROD Images - Note over Tests: Wait for
[COMMIT_SHA] + opt + Note over Tests: Test provider
packages build end - end - par 3.6, 3.7, 3.8, 3.9 - GitHub Registry ->> Tests: Pull PROD Image [COMMIT_SHA] - Note over Tests: Verify PROD Image - end - deactivate GitHub Registry - par 3.6, 3.7, 3.8, 3.9 - Note over Tests: Run Kubernetes
tests and - Note over Tests: Run Kubernetes
upgrade tests + opt + Note over Tests: Helm tests + end end - Note over Tests: Merge Coverage - Tests -->> Coverage.io: Upload Coverage - par 3.6, 3.7, 3.8, 3.9 - Tests ->> GitHub Registry: Push CI Images to Cache - activate GitHub Registry + Tests ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull PROD Image
[COMMIT_SHA] + Note over Tests: Verify PROD Image
[COMMIT_SHA] + par + opt + Note over Tests: Run Kubernetes
tests + end and - Tests ->> GitHub Registry: Push PROD Images to Cache + opt + Note over Tests: Run Kubernetes
upgrade tests + end end - Note over GitHub Registry: Tagged Images
[latest] - deactivate GitHub Registry - par 3.6, 3.7, 3.8, 3.9 - Note over Tests: Generate constraints - Tests ->> Airflow Repo: Push constraints + Note over Tests: Generate constraints + opt In merge run? + Tests ->> Airflow Repo: Push constraints if changed + end + opt In merge run? + GitHub Registry ->> Tests: Pull CI Image
[latest] + Note over Tests: Build CI Images
[latest]
Use latest constraints + Tests ->> GitHub Registry: Push CI Image
[latest] + GitHub Registry ->> Tests: Pull PROD Image
[latest] + Note over Tests: Build PROD Images
[latest] + Tests ->> GitHub Registry: Push PROD Image
[latest] end Tests -->> Airflow Repo: Status update deactivate Airflow Repo @@ -230,19 +266,14 @@ sequenceDiagram ```mermaid sequenceDiagram Note over Airflow Repo: scheduled - Note over Tests: schedule
[Write Token] - Note over Build Images: schedule
[Write Token] + Note over Tests: push
[Write Token] activate Airflow Repo Airflow Repo -->> Tests: Trigger 'schedule' activate Tests - Airflow Repo -->> Build Images: Trigger 'schedule' - activate Build Images - Note over Build Images: Build info - par 3.6, 3.7, 3.8, 3.9 - Note over Build Images: Build CI Images
Cache disabled
[COMMIT_SHA] - end - par No CI image - Note over Tests: Build info
All tests
All python + Note over Tests: Build info
All tests
Full matrix + par + GitHub Registry ->> Tests: Pull CI Images
[latest] + Note over Tests: Build CI Images
[COMMIT_SHA]
Always upgrade deps and Note over Tests: OpenAPI client gen and @@ -250,62 +281,57 @@ sequenceDiagram and Note over Tests: Test examples
PROD image building end - par 3.6, 3.7, 3.8, 3.9 - Build Images ->> GitHub Registry: Push CI Images - activate GitHub Registry - Note over GitHub Registry: Tagged CI Images
[COMMIT_SHA] - end - par 3.6, 3.7, 3.8, 3.9 - Note over Build Images: Build PROD Images
Cache disabled
[COMMIT_SHA] - end - loop Wait for CI images - par 3.6, 3.7, 3.8, 3.9 - Tests ->> Tests: Check CI Images - Note over Tests: Wait for
[COMMIT_SHA] + Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] + Note over Tests: Verify CI Image
[COMMIT_SHA] + par + GitHub Registry ->> Tests: Pull PROD Images
[latest] + Note over Tests: Build PROD Images
[COMMIT_SHA] + and + opt + Note over Tests: Run static checks end - end - par 3.6, 3.7, 3.8, 3.9 - GitHub Registry ->> Tests: Pull CI Image [COMMIT_SHA] - Note over Tests: Verify CI Image - end - deactivate GitHub Registry - par 3.6, 3.7, 3.8, 3.9 - Note over Tests: Run static checks and - Note over Tests: Build docs + opt + Note over Tests: Run basic
static checks + end and - Note over Tests: Tests + opt + Note over Tests: Build docs + end and - Note over Tests: Test provider
packages build + opt + Note over Tests: Tests + end and - Note over Tests: Helm tests - end - par 3.6, 3.7, 3.8, 3.9 - Build Images ->> GitHub Registry: Push PROD Images - activate GitHub Registry - Note over GitHub Registry: Tagged PROD Images
[COMMIT_SHA] - end - deactivate Build Images - loop Wait for PROD images - par 3.6, 3.7, 3.8, 3.9 - Tests ->> Tests: Check PROD Images - Note over Tests: Wait for
[COMMIT_SHA] + opt + Note over Tests: Test provider
packages build end - end - par 3.6, 3.7, 3.8, 3.9 - GitHub Registry ->> Tests: Pull PROD Image [COMMIT_SHA] - Note over Tests: Verify PROD Image - end - deactivate GitHub Registry - par 3.6, 3.7, 3.8, 3.9 - Note over Tests: Run Kubernetes
tests and - Note over Tests: Run Kubernetes
upgrade tests + opt + Note over Tests: Helm tests + end end - par 3.6, 3.7, 3.8, 3.9 - Note over Tests: Generate constraints - Tests ->> Airflow Repo: Push constraints + Tests ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] + GitHub Registry ->> Tests: Pull PROD Image
[COMMIT_SHA] + Note over Tests: Verify PROD Image
[COMMIT_SHA] + par + opt + Note over Tests: Run Kubernetes
tests + end + and + opt + Note over Tests: Run Kubernetes
upgrade tests + end end + Note over Tests: Generate constraints + Tests ->> Airflow Repo: Push constraints if changed + GitHub Registry ->> Tests: Pull CI Image
[latest] + Note over Tests: Build CI Images
[latest]
Use latest constraints + Tests ->> GitHub Registry: Push CI Image
[latest] + GitHub Registry ->> Tests: Pull PROD Image
[latest] + Note over Tests: Build PROD Images
[latest] + Tests ->> GitHub Registry: Push PROD Image
[latest] Tests -->> Airflow Repo: Status update deactivate Airflow Repo deactivate Tests diff --git a/COMMITTERS.rst b/COMMITTERS.rst index e9ca3eefa37ad..054988407cb60 100644 --- a/COMMITTERS.rst +++ b/COMMITTERS.rst @@ -176,13 +176,13 @@ become active again you can simply email the PMC and ask to be reinstated. The PMC also can mark committers as inactive after they have not been involved in the community for more than 12 months. -Github configuration for committers +GitHub configuration for committers ----------------------------------- -To be able to merge PRs, committers have to integrate their Github ID with Apache systems. To do that follow steps: +To be able to merge PRs, committers have to integrate their GitHub ID with Apache systems. To do that follow steps: -1. Verify you have a Github ID `enabled with 2FA `__. -2. Enter your Github ID into your `Apache ID profile `__. +1. Verify you have a GitHub ID `enabled with 2FA `__. +2. Enter your GitHub ID into your `Apache ID profile `__. 3. Merge your Apache and GitHub accounts using `GitBox (Apache Account Linking utility) `__. You should see 3 green checks in GitBox. 4. Wait at least 30 minutes for an email inviting you to Apache GitHub Organization and accept invitation. -5. After accepting the Github Invitation verify that you are a member of the `Airflow committers team on Github `__. +5. After accepting the GitHub Invitation verify that you are a member of the `Airflow committers team on GitHub `__. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 34b220405e6b8..8bab15352577a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -66,6 +66,18 @@ implement it. Issue reporting and resolution process -------------------------------------- +An unusual element of the Apache Airflow project is that you can open a PR to +fix an issue or make an enhancement, without needing to open an issue first. +This is intended to make it as easy as possible to contribute to the project. + +If you however feel the need to open an issue (usually a bug or feature request) +consider starting with a `GitHub Discussion `_ instead. +In the vast majority of cases discussions are better than issues - you should only open +issues if you are sure you found a bug and have a reproducible case, +or when you want to raise a feature request that will not require a lot of discussion. +If you have a very important topic to discuss, start a discussion on the +`Devlist `_ instead. + The Apache Airflow project uses a set of labels for tracking and triaging issues, as well as a set of priorities and milestones to track how and when the enhancements and bug fixes make it into an Airflow release. This is documented as part of @@ -203,27 +215,16 @@ also have support for popular remote development environments: GitHub Codespaces You can see the differences between the various environments `here `__. -The local env instructions can be found in full in the `LOCAL_VIRTUALENV.rst`_ file. - -.. _LOCAL_VIRTUALENV.rst: -https://github.com/apache/airflow/blob/main/LOCAL_VIRTUALENV.rst +The local env instructions can be found in full in the `LOCAL_VIRTUALENV.rst `_ file. The Breeze Docker Compose env is to maintain a consistent and common development environment so that you can replicate CI failures locally and work on solving them locally rather by pushing to CI. -The Breeze instructions can be found in full in the `BREEZE.rst`_ file. - -.. _BREEZE.rst: -https://github.com/apache/airflow/blob/main/BREEZE.rst +The Breeze instructions can be found in full in the `BREEZE.rst `_ file. You can configure the Docker-based Breeze development environment as follows: -1. Install the latest versions of the `Docker Community Edition`_ and `Docker Compose`_ and add them to the PATH. - -.. _Docker Community Edition: -https://github.com/apache/airflow/blob/main/BREEZE.rst#docker-community-edition - -.. _Docker Compose: https://github.com/apache/airflow/blob/main/BREEZE.rst#docker-compose +1. Install the latest versions of the `Docker Community Edition `_ and `Docker Compose `_ and add them to the PATH. 2. Install `jq`_ on your machine. The exact command depends on the operating system (or Linux distribution) you use. @@ -245,7 +246,7 @@ or on macOS with `Homebrew `_ .. code-block:: bash - ./breeze + breeze Breeze starts with downloading the Airflow CI image from the Docker Hub and installing all required dependencies. @@ -257,13 +258,14 @@ to make them immediately visible in the environment. .. code-block:: bash - mkvirtualenv myenv --python=python3.7 + mkvirtualenv myenv --python=python3.9 5. Initialize the created environment: .. code-block:: bash - ./breeze initialize-local-virtualenv --python 3.7 + ./scripts/tools/initialize_virtualenv.py + 6. Open your IDE (for example, PyCharm) and select the virtualenv you created as the project's default virtualenv in your IDE. @@ -329,6 +331,24 @@ Step 4: Prepare PR this step is automatically run while you are committing your code. If not, you can do it manually via ``git add`` and then ``pre-commit run``. + * Consider adding a newsfragment to your PR so you can add an entry in the release notes. + The following newsfragment types are supported: + + * `significant` + * `feature` + * `improvement` + * `bugfix` + * `doc` + * `misc` + + To add a newsfragment, simply create an rst file named ``{pr_number}.{type}.rst`` (e.g. ``1234.bugfix.rst``) + and place in either `newsfragments `__ for core newsfragments, + or `chart/newsfragments `__ for helm chart newsfragments. + + For significant newsfragments, similar to git commits, the first line is the summary and optionally a + body can be added with an empty line separating it. + For other newsfragment types, only use a single summary line. + 2. Rebase your fork, squash commits, and resolve all conflicts. See `How to rebase PR <#how-to-rebase-pr>`_ if you need help with rebasing your change. Remember to rebase often if your PR takes a lot of time to review/fix. This will make rebase process much easier and less painful and the more often you do it, @@ -472,10 +492,10 @@ Development Environments There are two environments, available on Linux and macOS, that you can use to develop Apache Airflow: -- `Local virtualenv development environment <#local-virtualenv-development-environment>`_ +- `Local virtualenv development environment `_ that supports running unit tests and can be used in your IDE. -- `Breeze Docker-based development environment <#breeze-development-environment>`_ that provides +- `Breeze Docker-based development environment `_ that provides an end-to-end CI solution with all software dependencies covered. The table below summarizes differences between the environments: @@ -533,7 +553,7 @@ Limitations: real unit tests. Technically, to run integration tests, you can configure and install the dependencies on your own, but it is usually complex. Instead, you are recommended to use - `Breeze development environment <#breeze-development-environment>`__ with all required packages + `Breeze development environment `__ with all required packages pre-installed. - You need to make sure that your local environment is consistent with other @@ -554,6 +574,13 @@ All details about using and running Airflow Breeze can be found in The Airflow Breeze solution is intended to ease your local development as "*It's a Breeze to develop Airflow*". +.. note:: + + We are in a process of switching to the new Python-based Breeze from a legacy Bash + Breeze. Not all functionality has been ported yet and the old Breeze is still available + until then as ``./breeze-legacy`` script. The documentation mentions when the old ./breeze-legacy + should be still used. + Benefits: - Breeze is a complete environment that includes external components, such as @@ -581,9 +608,14 @@ Limitations: disk space and CPU. You can stop the environment manually after you use it or even use a ``bare`` environment to decrease resource usage. -**NOTE:** Breeze CI images are not supposed to be used in production environments. -They are optimized for repeatability of tests, maintainability and speed of building rather -than production performance. The production images are not yet officially published. + + +.. note:: + + Breeze CI images are not supposed to be used in production environments. + They are optimized for repeatability of tests, maintainability and speed of building rather + than production performance. The production images are not yet officially published. + Airflow dependencies @@ -614,20 +646,18 @@ all dependencies needed in the CI environment. This is the full list of those extras: .. START EXTRAS HERE - airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.hdfs, apache.hive, apache.kylin, apache.livy, apache.pig, apache.pinot, -apache.spark, apache.sqoop, apache.webhdfs, asana, async, atlas, aws, azure, cassandra, celery, -cgroups, cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, -devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, docker, druid, elasticsearch, -exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, -hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, jira, kerberos, kubernetes, ldap, +apache.spark, apache.sqoop, apache.webhdfs, arangodb, asana, async, atlas, aws, azure, cassandra, +celery, cgroups, cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, dbt.cloud, +deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, docker, druid, +elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, +grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, jira, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, opsgenie, oracle, pagerduty, pandas, papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, slack, snowflake, spark, sqlite, ssh, statsd, tableau, telegram, trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk - .. END EXTRAS HERE Provider packages @@ -644,7 +674,7 @@ and not packaged together with the core, unless you set ``INSTALL_PROVIDERS_FROM variable to ``true``. In Breeze - which is a development environment, ``INSTALL_PROVIDERS_FROM_SOURCES`` variable is set to true, -but you can add ``--skip-installing-airflow-providers-from-sources`` flag to Breeze to skip installing providers when +but you can add ``--install-providers-from-sources=false`` flag to Breeze to install providers from PyPI instead of source files when building the images. One watch-out - providers are still always installed (or rather available) if you install airflow from @@ -853,47 +883,42 @@ There are several sets of constraints we keep: providers. If you want to manage airflow separately and then add providers individually, you can use those. Those constraints are named ``constraints-no-providers-.txt``. -We also have constraints with "source-providers" but they are used i - -The first ones can be used as constraints file when installing Apache Airflow in a repeatable way. +The first two can be used as constraints file when installing Apache Airflow in a repeatable way. It can be done from the sources: +from the PyPI package: + .. code-block:: bash - pip install -e . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" + pip install apache-airflow[google,amazon,async]==2.2.5 \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.5/constraints-3.7.txt" +The last one can be used to install Airflow in "minimal" mode - i.e when bare Airflow is installed without +extras. -or from the PyPI package: +When you install airflow from sources (in editable mode) you should use "constraints-source-providers" +instead (this accounts for the case when some providers have not yet been released and have conflicting +requirements). .. code-block:: bash - pip install apache-airflow \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" + pip install -e . \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.7.txt" This works also with extras - for example: .. code-block:: bash - pip install .[ssh] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" + pip install ".[ssh]" \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.7.txt" -As of apache-airflow 1.10.12 it is also possible to use constraints directly from GitHub using specific -tag/hash name. We tag commits working for particular release with constraints- tag. So for example -fixed valid constraints 1.10.12 can be used by using ``constraints-1.10.12`` tag: - -.. code-block:: bash - - pip install apache-airflow[ssh]==1.10.12 \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.7.txt" - There are different set of fixed constraint files for different python major/minor versions and you should use the right file for the right python version. If you want to update just airflow dependencies, without paying attention to providers, you can do it using --no-providers constraint files as well. +``constraints-no-providers`` constraint files as well. .. code-block:: bash @@ -909,7 +934,9 @@ if the tests are successful. Documentation ============= -Documentation for ``apache-airflow`` package and other packages that are closely related to it ie. providers packages are in ``/docs/`` directory. For detailed information on documentation development, see: `docs/README.rst `_ +Documentation for ``apache-airflow`` package and other packages that are closely related to it ie. +providers packages are in ``/docs/`` directory. For detailed information on documentation development, +see: `docs/README.rst `_ Static code checks ================== @@ -1140,8 +1167,20 @@ development machine before continuing with migration. $ cd airflow $ alembic revision -m "add new field to db" Generating - ~/airflow/airflow/migrations/versions/12341123_add_new_field_to_db.py + ~/airflow/airflow/migrations/versions/a1e23c41f123_add_new_field_to_db.py + +Note that migration file names are standardized by pre-commit hook ``update-migration-references``, so that they sort alphabetically and indicate +the Airflow version in which they first appear (the alembic revision ID is removed). As a result you should expect to see a pre-commit failure +on the first attempt. Just stage the modified file and commit again +(or run the hook manually before committing). + +After your new migration file is run through pre-commit it will look like this: + +.. code-block:: + + 1234_A_B_C_add_new_field_to_db.py +This represents that your migration is the 1234th migration and expected for release in Airflow version A.B.C. Node.js Environment Setup ========================= @@ -1214,6 +1253,7 @@ commands: yarn run prod # Starts a web server that manages and updates your assets as you modify them + # You'll need to run the webserver in debug mode too: `airflow webserver -d` yarn run dev diff --git a/CONTRIBUTORS_QUICK_START.rst b/CONTRIBUTORS_QUICK_START.rst index 2b9d6367c2e32..10ff2cc9516d9 100644 --- a/CONTRIBUTORS_QUICK_START.rst +++ b/CONTRIBUTORS_QUICK_START.rst @@ -16,7 +16,7 @@ under the License. ************************* -Contributor's Quick Guide +Contributor's Quick Start ************************* .. contents:: :local: @@ -24,37 +24,41 @@ Contributor's Quick Guide Note to Starters ################ -There are two ways you can run the Airflow dev env on your machine: - 1. With a Docker Container - 2. With a local virtual environment -Before deciding which method to choose, there are a couple factors to consider: -Running Airflow in a container is the most reliable way: it provides a more consistent environment and allows integration tests with a number of integrations (cassandra, mongo, mysql, etc.). However it also requires **4GB RAM, 40GB disk space and at least 2 cores**. -If you are working on a basic feature, installing Airflow on a local environment might be sufficient. +Airflow is quite a complex project, and setting up a working environment, but we made it rather simple if +you follow the guide. + +There are three ways you can run the Airflow dev env: -- |Virtual Env Guide| +1. With a Docker Containers and Docker Compose (on your local machine). This environment is managed + with `Breeze `_ tool written in Python that makes the environment management, yeah you + guessed it - a breeze. +2. With a local virtual environment (on your local machine). +3. With a remote, managed environment (via remote development environment) + +Before deciding which method to choose, there are a couple factors to consider: -.. |Virtual Env Guide| raw:: html +* Running Airflow in a container is the most reliable way: it provides a more consistent environment + and allows integration tests with a number of integrations (cassandra, mongo, mysql, etc.). + However it also requires **4GB RAM, 40GB disk space and at least 2 cores**. +* If you are working on a basic feature, installing Airflow on a local environment might be sufficient. + For a comprehensive venv tutorial - visit + `Virtual Env guide `_ +* You need to have usually a paid account to access managed, remote virtual environment. - For a comprehensive venv tutorial - visit Virtual Env Guide +Local machine development +######################### -Prerequisites -############# +If you do not work with remote development environment, you need those prerequisites. 1. Docker Community Edition 2. Docker Compose 3. pyenv (you can also use pyenv-virtualenv or virtualenvwrapper) -4. jq - - -Installing Prerequisites on Ubuntu -################################## +The below setup describe Ubuntu installation. It might be slightly different on different machines. Docker Community Edition ------------------------ - 1. Installing required packages for Docker and setting up docker repo .. code-block:: bash @@ -97,9 +101,6 @@ Note : After adding user to docker group Logout and Login again for group member $ docker run hello-world - - - Docker Compose -------------- @@ -123,10 +124,9 @@ Docker Compose $ docker-compose --version - - Pyenv and setting up virtual-env -------------------------------- + Note: You might have issues with pyenv if you have a Mac with an M1 chip. Consider using virtualenv as an alternative. 1. Install pyenv and configure your shell's environment for Pyenv as suggested in Pyenv `README `_ @@ -165,50 +165,8 @@ Pyenv and setting up virtual-env $ pyenv activate airflow-env - -Installing jq --------------------------------- - -``jq`` is a lightweight and flexible command-line JSON processor. - -Install ``jq`` with the following command: - -.. code-block:: bash - - $ sudo apt install jq - - - -Setup and develop using PyCharm -############################### - -.. raw:: html - -
- Setup and develop using PyCharm - - - -Setup Airflow with Breeze -------------------------- - - - -.. note:: - - Only ``pip`` installation is currently officially supported. - - While they are some successes with using other tools like `poetry `_ or - `pip-tools `_, they do not share the same workflow as - ``pip`` - especially when it comes to constraint vs. requirements management. - Installing via ``Poetry`` or ``pip-tools`` is not currently supported. - - If you wish to install airflow using those tools you should use the constraint files and convert - them to appropriate format and workflow that your tool requires. - - Forking and cloning Project -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------- 1. Goto |airflow_github| and fork the project. @@ -223,7 +181,7 @@ Forking and cloning Project alt="Forking Apache Airflow project">
-2. Goto your github account's fork of airflow click on ``Code`` and copy the clone link. +2. Goto your github account's fork of airflow click on ``Code`` you will find the link to your repo. .. raw:: html @@ -232,49 +190,41 @@ Forking and cloning Project alt="Cloning github fork of Apache airflow">
+3. Follow `Cloning a repository `_ + to clone the repo locally (you can also do it in your IDE - see the `Using your IDE `_ + chapter below. +Typical development tasks +######################### -3. Open your IDE or source code editor and select the option to clone the repository - - .. raw:: html - -
- Cloning github fork to Pycharm -
- - -4. Paste the copied clone link in the URL field and submit. - - .. raw:: html - -
- Cloning github fork to Pycharm -
- +For many of the development tasks you will need ``Breeze`` to be configured. ``Breeze`` is a development +environment which uses docker and docker-compose and it's main purpose is to provide a consistent +and repeatable environment for all the contributors and CI. When using ``Breeze`` you avoid the "works for me" +syndrome - because not only others can reproduce easily what you do, but also the CI of Airflow uses +the same environment to run all tests - so you should be able to easily reproduce the same failures you +see in CI in your local environment. Setting up Breeze -~~~~~~~~~~~~~~~~~ -1. Open terminal and enter into virtual environment ``airflow-env`` and goto project directory +----------------- -.. code-block:: bash +1. Install ``pipx`` - follow the instructions in `Install pipx `_ - $ pyenv activate airflow-env - $ cd ~/Projects/airflow/ -2. Initializing breeze autocomplete +2. Run ``pipx install -e ./dev/breeze`` in your checked-out repository. Make sure to follow any instructions + printed by ``pipx`` during the installation - this is needed to make sure that ``breeze`` command is + available in your PATH. + +3. Initialize breeze autocomplete .. code-block:: bash - $ ./breeze setup-autocomplete - $ source ~/.bash_completion.d/breeze-complete + $ breeze setup-autocomplete -3. Initialize breeze environment with required python version and backend. This may take a while for first time. +4. Initialize breeze environment with required python version and backend. This may take a while for first time. .. code-block:: bash - $ ./breeze --python 3.8 --backend mysql + $ breeze --python 3.7 --backend mysql .. note:: If you encounter an error like "docker.credentials.errors.InitializationError: @@ -287,8 +237,12 @@ Setting up Breeze Once the package is installed, execute the breeze command again to resume image building. -4. Once the breeze environment is initialized, create airflow tables and users from the breeze CLI. ``airflow db reset`` - is required to execute at least once for Airflow Breeze to get the database/tables created. +5. When you enter Breeze environment you should see prompt similar to ``root@e4756f6ac886:/opt/airflow#``. This + means that you are inside the Breeze container and ready to run most of the development tasks. You can leave + the environment with ``exit`` and re-enter it with just ``breeze`` command. + Once you enter breeze environment, create airflow tables and users from the breeze CLI. ``airflow db reset`` + is required to execute at least once for Airflow Breeze to get the database/tables created. If you run + tests, however - the test database will be initialized automatically for you. .. code-block:: bash @@ -297,45 +251,28 @@ Setting up Breeze --email admin@example.com --firstname foo --lastname bar -5. Closing Breeze environment. After successfully finishing above command will leave you in container, - type ``exit`` to exit the container +6. Exiting Breeze environment. After successfully finishing above command will leave you in container, + type ``exit`` to exit the container. The database created before will remain and servers will be + running though, until you stop breeze environment completely. .. code-block:: bash root@b76fcb399bb6:/opt/airflow# root@b76fcb399bb6:/opt/airflow# exit -.. code-block:: bash - - $ ./breeze stop - -Installing airflow in the local virtual environment ``airflow-env`` with breeze. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. It may require some packages to be installed; watch the output of the command to see which ones are missing. - -.. code-block:: bash - - $ sudo apt-get install sqlite libsqlite3-dev default-libmysqlclient-dev postgresql - -2. Initialize virtual environment with breeze. +6. You can stop the environment (which means deleting the databases and database servers running in the + background) via ``breeze stop`` command. .. code-block:: bash - $ ./breeze initialize-local-virtualenv --python 3.8 - -3. Add following line to ~/.bashrc in order to call breeze command from anywhere. - -.. code-block:: bash + $ breeze stop - export PATH=${PATH}:"/home/${USER}/Projects/airflow" - source ~/.bashrc Using Breeze -~~~~~~~~~~~~ +------------ 1. Starting breeze environment using ``breeze start-airflow`` starts Breeze environment with last configuration run( - In this case python and backend will be picked up from last execution ``./breeze --python 3.8 --backend mysql``) + In this case python and backend will be picked up from last execution ``breeze --python 3.8 --backend mysql``) It also automatically starts webserver, backend and scheduler. It drops you in tmux with scheduler in bottom left and webserver in bottom right. Use ``[Ctrl + B] and Arrow keys`` to navigate. @@ -346,8 +283,8 @@ Using Breeze Use CI image. Branch name: main - Docker image: apache/airflow:main-python3.8-ci - Airflow source version: 2.0.0b2 + Docker image: ghcr.io/apache/airflow/main/ci/python3.8:latest + Airflow source version: 2.4.0.dev0 Python version: 3.8 Backend: mysql 5.7 @@ -355,17 +292,21 @@ Using Breeze Port forwarding: Ports are forwarded to the running docker containers for webserver and database + * 12322 -> forwarded to Airflow ssh server -> airflow:22 * 28080 -> forwarded to Airflow webserver -> airflow:8080 * 25555 -> forwarded to Flower dashboard -> airflow:5555 * 25433 -> forwarded to Postgres database -> postgres:5432 * 23306 -> forwarded to MySQL database -> mysql:3306 + * 21433 -> forwarded to MSSQL database -> mssql:1443 * 26379 -> forwarded to Redis broker -> redis:6379 Here are links to those services that you can use on host: + * ssh connection for remote debugging: ssh -p 12322 airflow@127.0.0.1 pw: airflow * Webserver: http://127.0.0.1:28080 * Flower: http://127.0.0.1:25555 * Postgres: jdbc:postgresql://127.0.0.1:25433/airflow?user=postgres&password=airflow * Mysql: jdbc:mysql://127.0.0.1:23306/airflow?user=root + * MSSQL: jdbc:sqlserver://127.0.0.1:21433;databaseName=airflow;user=sa;password=Airflow123 * Redis: redis://127.0.0.1:26379/0 @@ -405,8 +346,6 @@ Using Breeze $ root@0c6e4ff0ab3d:/opt/airflow# airflow webserver - - 2. Now you can access airflow web interface on your local machine at |http://127.0.0.1:28080| with user name ``admin`` and password ``admin``. @@ -425,9 +364,6 @@ Using Breeze MySQL Workbench with Host ``127.0.0.1``, port ``23306``, user ``root`` and password blank(leave empty), default schema ``airflow``. - If you cannot connect to MySQL, refer to the Prerequisites section in the - |Breeze documentation| and try increasing Docker disk space. - .. raw:: html
@@ -483,19 +419,11 @@ Following are some of important topics of Breeze documentation: Additional tools to the Docker Image -- |Internal details of Breeze| - -.. |Internal details of Breeze| raw:: html - - - Internal details of Breeze - - - |Breeze Command-Line Interface Reference| .. |Breeze Command-Line Interface Reference| raw:: html - Breeze Command-Line Interface Reference @@ -507,113 +435,154 @@ Following are some of important topics of Breeze documentation: Cleaning the environment -- |Other uses of the Airflow Breeze environment| -.. |Other uses of the Airflow Breeze environment| raw:: html +Configuring Pre-commit +---------------------- + +Before committing changes to github or raising a pull request, code needs to be checked for certain quality standards +such as spell check, code syntax, code formatting, compatibility with Apache License requirements etc. This set of +tests are applied when you commit your code. - Other uses of the Airflow Breeze environment +.. raw:: html +
+ CI tests GitHub +
-Setting up Debug -~~~~~~~~~~~~~~~~ +To avoid burden on CI infrastructure and to save time, Pre-commit hooks can be run locally before committing changes. -1. Configuring Airflow database connection +1. Installing required packages -- Airflow is by default configured to use SQLite database. Configuration can be seen on local machine - ``~/airflow/airflow.cfg`` under ``sql_alchemy_conn``. +.. code-block:: bash -- Installing required dependency for MySQL connection in ``airflow-env`` on local machine. + $ sudo apt install libxml2-utils - .. code-block:: bash +2. Installing required Python packages - $ pyenv activate airflow-env - $ pip install PyMySQL +.. code-block:: bash -- Now set ``sql_alchemy_conn = mysql+pymysql://root:@127.0.0.1:23306/airflow?charset=utf8mb4`` in file - ``~/airflow/airflow.cfg`` on local machine. + $ pyenv activate airflow-env + $ pip install pre-commit -1. Debugging an example DAG +3. Go to your project directory -- Add Interpreter to PyCharm pointing interpreter path to ``~/.pyenv/versions/airflow-env/bin/python``, which is virtual - environment ``airflow-env`` created with pyenv earlier. For adding an Interpreter go to ``File -> Setting -> Project: - airflow -> Python Interpreter``. +.. code-block:: bash - .. raw:: html + $ cd ~/Projects/airflow -
- Adding existing interpreter -
-- In PyCharm IDE open airflow project, directory ``/files/dags`` of local machine is by default mounted to docker - machine when breeze airflow is started. So any DAG file present in this directory will be picked automatically by - scheduler running in docker machine and same can be seen on ``http://127.0.0.1:28080``. +1. Running pre-commit hooks + +.. code-block:: bash + + $ pre-commit run --all-files + No-tabs checker......................................................Passed + Add license for all SQL files........................................Passed + Add license for all other files......................................Passed + Add license for all rst files........................................Passed + Add license for all JS/CSS/PUML files................................Passed + Add license for all JINJA template files.............................Passed + Add license for all shell files......................................Passed + Add license for all python files.....................................Passed + Add license for all XML files........................................Passed + Add license for all yaml files.......................................Passed + Add license for all md files.........................................Passed + Add license for all mermaid files....................................Passed + Add TOC for md files.................................................Passed + Add TOC for upgrade documentation....................................Passed + Check hooks apply to the repository..................................Passed + black................................................................Passed + Check for merge conflicts............................................Passed + Debug Statements (Python)............................................Passed + Check builtin type constructor use...................................Passed + Detect Private Key...................................................Passed + Fix End of Files.....................................................Passed + ........................................................................... + +5. Running pre-commit for selected files -- Copy any example DAG present in the ``/airflow/example_dags`` directory to ``/files/dags/``. +.. code-block:: bash -- Add a ``__main__`` block at the end of your DAG file to make it runnable. It will run a ``back_fill`` job: + $ pre-commit run --files airflow/decorators.py tests/utils/test_task_group.py - .. code-block:: python - if __name__ == "__main__": - dag.clear() - dag.run() -- Add ``AIRFLOW__CORE__EXECUTOR=DebugExecutor`` to Environment variable of Run Configuration. +6. Running specific hook for selected files - - Click on Add configuration +.. code-block:: bash - .. raw:: html + $ pre-commit run black --files airflow/decorators.py tests/utils/test_task_group.py + black...............................................................Passed + $ pre-commit run flake8 --files airflow/decorators.py tests/utils/test_task_group.py + Run flake8..........................................................Passed -
- Add Configuration pycharm -
- - Add Script Path and Environment Variable to new Python configuration - .. raw:: html +7. Enabling Pre-commit check before push. It will run pre-commit automatically before committing and stops the commit -
- Add environment variable pycharm -
+.. code-block:: bash -- Now Debug an example dag and view the entries in tables such as ``dag_run, xcom`` etc in MySQL Workbench. + $ cd ~/Projects/airflow + $ pre-commit install + $ git commit -m "Added xyz" +8. To disable Pre-commit +.. code-block:: bash -Starting development --------------------- + $ cd ~/Projects/airflow + $ pre-commit uninstall -Creating a branch -~~~~~~~~~~~~~~~~~ +- For more information on visit |STATIC_CODE_CHECKS.rst| -1. Click on the branch symbol in the status bar +.. |STATIC_CODE_CHECKS.rst| raw:: html - .. raw:: html + + STATIC_CODE_CHECKS.rst -
- Creating a new branch -
+- Following are some of the important links of STATIC_CODE_CHECKS.rst -2. Give a name to a branch and checkout + - |Pre-commit Hooks| - .. raw:: html + .. |Pre-commit Hooks| raw:: html -
- Giving a name to a branch -
+ + Pre-commit Hooks + + - |Running Static Code Checks via Breeze| + + .. |Running Static Code Checks via Breeze| raw:: html + + Running Static Code Checks via Breeze + + +Installing airflow in the local venv +------------------------------------ + +1. It may require some packages to be installed; watch the output of the command to see which ones are missing. + +.. code-block:: bash + + $ sudo apt-get install sqlite libsqlite3-dev default-libmysqlclient-dev postgresql + $ ./scripts/tools/initialize_virtualenv.py + + +2. Add following line to ~/.bashrc in order to call breeze command from anywhere. + +.. code-block:: bash + export PATH=${PATH}:"/home/${USER}/Projects/airflow" + source ~/.bashrc +Running tests with Breeze +------------------------- -Testing -~~~~~~~ +You can usually conveniently run tests in your IDE (see IDE below) using virtualenv but with Breeze you +can be sure that all the tests are run in the same environment as tests in CI. All Tests are inside ./tests directory. @@ -623,18 +592,21 @@ All Tests are inside ./tests directory. .. code-block:: bash - root@51d89409f7a2:/opt/airflow# pytest tests/utils/test_trigger_rule.py - ================================================ test session starts ================================================ - platform linux -- Python 3.8.12, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /usr/local/bin/python - cachedir: .pytest_cache - rootdir: /opt/airflow, configfile: pytest.ini - plugins: forked-1.4.0, rerunfailures-9.1.1, requests-mock-1.9.3, asyncio-0.18.1, cov-3.0.0, httpx-0.20.0, xdist-2.5.0, flaky-3.7.0, timeouts-1.2.1, anyio-3.5.0, instafail-0.4.2 - asyncio: mode=strict - setup timeout: 0.0s, execution timeout: 0.0s, teardown timeout: 0.0s - collected 1 item + root@63528318c8b1:/opt/airflow# pytest tests/utils/test_decorators.py + ======================================= test session starts ======================================= + platform linux -- Python 3.8.6, pytest-6.0.1, py-1.9.0, pluggy-0.13.1 -- /usr/local/bin/python + cachedir: .pytest_cache + rootdir: /opt/airflow, configfile: pytest.ini + plugins: celery-4.4.7, requests-mock-1.8.0, xdist-1.34.0, flaky-3.7.0, rerunfailures-9.0, instafail + -0.4.2, forked-1.3.0, timeouts-1.2.1, cov-2.10.0 + setup timeout: 0.0s, execution timeout: 0.0s, teardown timeout: 0.0s + collected 3 items + + tests/utils/test_decorators.py::TestApplyDefault::test_apply PASSED [ 33%] + tests/utils/test_decorators.py::TestApplyDefault::test_default_args PASSED [ 66%] + tests/utils/test_decorators.py::TestApplyDefault::test_incorrect_default_args PASSED [100%] - tests/utils/test_trigger_rule.py::TestTriggerRule::test_valid_trigger_rules PASSED [100%] - =========================================== 1 passed, 1 warning in 0.66s ============================================ + ======================================== 3 passed in 1.49s ======================================== - Running All the test with Breeze by specifying required python version, backend, backend version @@ -643,26 +615,6 @@ All Tests are inside ./tests directory. $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type All tests -- Running specific test in container using shell scripts. Testing in container scripts are located in - ``./scripts/in_container`` directory. - -.. code-block:: bash - - root@df8927308887:/opt/airflow# ./scripts/in_container/ - bin/ run_flake8.sh* - check_environment.sh* run_generate_constraints.sh* - entrypoint_ci.sh* run_init_script.sh* - entrypoint_exec.sh* run_install_and_test_provider_packages.sh* - _in_container_script_init.sh* run_mypy.sh* - prod/ run_prepare_provider_packages.sh* - run_ci_tests.sh* run_prepare_provider_documentation.sh* - run_clear_tmp.sh* run_system_tests.sh* - run_docs_build.sh* run_tmux_welcome.sh* - run_extract_tests.sh* stop_tmux_airflow.sh* - run_fix_ownership.sh* update_quarantined_test_status.py* - - root@df8927308887:/opt/airflow# ./scripts/in_container/run_docs_build.sh - - Running specific type of test - Types of tests @@ -746,1003 +698,44 @@ All Tests are inside ./tests directory. Local and Remote Debugging in IDE +Contribution guide +################## -Pre-commit -~~~~~~~~~~ +- To know how to contribute to the project visit |CONTRIBUTING.rst| -Before committing changes to github or raising a pull request, code needs to be checked for certain quality standards -such as spell check, code syntax, code formatting, compatibility with Apache License requirements etc. This set of -tests are applied when you commit your code. +.. |CONTRIBUTING.rst| raw:: html -.. raw:: html + CONTRIBUTING.rst -
- CI tests GitHub -
+- Following are some of important links of CONTRIBUTING.rst + - |Types of contributions| -To avoid burden on CI infrastructure and to save time, Pre-commit hooks can be run locally before committing changes. + .. |Types of contributions| raw:: html -1. Installing required packages + + Types of contributions -.. code-block:: bash - $ sudo apt install libxml2-utils + - |Roles of contributor| -2. Installing required Python packages + .. |Roles of contributor| raw:: html -.. code-block:: bash + Roles of + contributor - $ pyenv activate airflow-env - $ pip install pre-commit -3. Go to your project directory + - |Workflow for a contribution| -.. code-block:: bash + .. |Workflow for a contribution| raw:: html - $ cd ~/Projects/airflow + + Workflow for a contribution -1. Running pre-commit hooks -.. code-block:: bash - - $ pre-commit run --all-files - No-tabs checker......................................................Passed - Add license for all SQL files........................................Passed - Add license for all other files......................................Passed - Add license for all rst files........................................Passed - Add license for all JS/CSS/PUML files................................Passed - Add license for all JINJA template files.............................Passed - Add license for all shell files......................................Passed - Add license for all python files.....................................Passed - Add license for all XML files........................................Passed - Add license for all yaml files.......................................Passed - Add license for all md files.........................................Passed - Add license for all mermaid files....................................Passed - Add TOC for md files.................................................Passed - Add TOC for upgrade documentation....................................Passed - Check hooks apply to the repository..................................Passed - black................................................................Passed - Check for merge conflicts............................................Passed - Debug Statements (Python)............................................Passed - Check builtin type constructor use...................................Passed - Detect Private Key...................................................Passed - Fix End of Files.....................................................Passed - ........................................................................... - -5. Running pre-commit for selected files - -.. code-block:: bash - - $ pre-commit run --files airflow/decorators.py tests/utils/test_task_group.py - - - -6. Running specific hook for selected files - -.. code-block:: bash - - $ pre-commit run black --files airflow/decorators.py tests/utils/test_task_group.py - black...............................................................Passed - $ pre-commit run flake8 --files airflow/decorators.py tests/utils/test_task_group.py - Run flake8..........................................................Passed - - - - -7. Running specific checks in container using shell scripts. Scripts are located in ``./scripts/in_container`` - directory. - -.. code-block:: bash - - root@df8927308887:/opt/airflow# ./scripts/in_container/ - bin/ run_flake8.sh* - check_environment.sh* run_generate_constraints.sh* - entrypoint_ci.sh* run_init_script.sh* - entrypoint_exec.sh* run_install_and_test_provider_packages.sh* - _in_container_script_init.sh* run_mypy.sh* - prod/ run_prepare_provider_packages.sh* - run_ci_tests.sh* run_prepare_provider_documentation.sh* - run_clear_tmp.sh* run_system_tests.sh* - run_docs_build.sh* run_tmux_welcome.sh* - run_extract_tests.sh* stop_tmux_airflow.sh* - run_fix_ownership.sh* update_quarantined_test_status.py* - - - root@df8927308887:/opt/airflow# ./scripts/in_container/run_docs_build.sh - - - - -8. Enabling Pre-commit check before push. It will run pre-commit automatically before committing and stops the commit - -.. code-block:: bash - - $ cd ~/Projects/airflow - $ pre-commit install - $ git commit -m "Added xyz" - -9. To disable Pre-commit - -.. code-block:: bash - - $ cd ~/Projects/airflow - $ pre-commit uninstall - - -- For more information on visit |STATIC_CODE_CHECKS.rst| - -.. |STATIC_CODE_CHECKS.rst| raw:: html - - - STATIC_CODE_CHECKS.rst - -- Following are some of the important links of STATIC_CODE_CHECKS.rst - - - |Pre-commit Hooks| - - .. |Pre-commit Hooks| raw:: html - - - Pre-commit Hooks - - - |Running Static Code Checks via Breeze| - - .. |Running Static Code Checks via Breeze| raw:: html - - Running Static Code Checks via Breeze - - - - - -Contribution guide -~~~~~~~~~~~~~~~~~~ - -- To know how to contribute to the project visit |CONTRIBUTING.rst| - -.. |CONTRIBUTING.rst| raw:: html - - CONTRIBUTING.rst - -- Following are some of important links of CONTRIBUTING.rst - - - |Types of contributions| - - .. |Types of contributions| raw:: html - - - Types of contributions - - - - |Roles of contributor| - - .. |Roles of contributor| raw:: html - - Roles of - contributor - - - - |Workflow for a contribution| - - .. |Workflow for a contribution| raw:: html - - - Workflow for a contribution - - - -Raising Pull Request -~~~~~~~~~~~~~~~~~~~~ - -1. Go to your GitHub account and open your fork project and click on Branches - - .. raw:: html - -
- Goto fork and select branches -
- -2. Click on ``New pull request`` button on branch from which you want to raise a pull request. - - .. raw:: html - -
- Accessing local airflow -
- -3. Add title and description as per Contributing guidelines and click on ``Create pull request``. - - .. raw:: html - -
- Accessing local airflow -
- - -Syncing Fork and rebasing Pull request -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Often it takes several days or weeks to discuss and iterate with the PR until it is ready to merge. -In the meantime new commits are merged, and you might run into conflicts, therefore you should periodically -synchronize main in your fork with the ``apache/airflow`` main and rebase your PR on top of it. Following -describes how to do it. - - -- |Syncing fork| - -.. |Syncing fork| raw:: html - - - Update new changes made to apache:airflow project to your fork - - -- |Rebasing pull request| - -.. |Rebasing pull request| raw:: html - - - Rebasing pull request - -.. raw:: html - - - - - -Setup and develop using Visual Studio Code -########################################## - -.. raw:: html - -
- Setup and develop using Visual Studio Code - - - -Setup Airflow with Breeze -------------------------- - - - -.. note:: - - Only ``pip`` installation is currently officially supported. - - While they are some successes with using other tools like `poetry `_ or - `pip-tools `_, they do not share the same workflow as - ``pip`` - especially when it comes to constraint vs. requirements management. - Installing via ``Poetry`` or ``pip-tools`` is not currently supported. - - If you wish to install airflow using those tools you should use the constraint files and convert - them to appropriate format and workflow that your tool requires. - - -Forking and cloning Project -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. Goto |airflow_github| and fork the project. - - .. |airflow_github| raw:: html - - https://github.com/apache/airflow/ - - .. raw:: html - -
- Forking Apache Airflow project -
- -2. Goto your github account's fork of airflow click on ``Code`` and copy the clone link. - - .. raw:: html - -
- Cloning github fork of Apache airflow -
- - - -3. Open your IDE or source code editor and select the option to clone the repository - - .. raw:: html - -
- Cloning github fork to Visual Studio Code -
- - -4. Paste the copied clone link in the URL field and submit. - - .. raw:: html - -
- Cloning github fork to Visual Studio Code -
- - -Setting up Breeze -~~~~~~~~~~~~~~~~~ -1. Open terminal and enter into virtual environment ``airflow-env`` and goto project directory - -.. code-block:: bash - - $ pyenv activate airflow-env - $ cd ~/Projects/airflow/ - -2. Initializing breeze autocomplete - -.. code-block:: bash - - $ ./breeze setup-autocomplete - $ source ~/.bash_completion.d/breeze-complete - -3. Initialize breeze environment with required python version and backend. This may take a while for first time. - -.. code-block:: bash - - $ ./breeze --python 3.8 --backend mysql - -.. note:: - If you encounter an error like "docker.credentials.errors.InitializationError: - docker-credential-secretservice not installed or not available in PATH", you may execute the following command to fix it: - - .. code-block:: bash - - $ sudo apt install golang-docker-credential-helper - - Once the package is installed, execute the breeze command again to resume image building. - -4. Once the breeze environment is initialized, create airflow tables and users from the breeze CLI. ``airflow db reset`` - is required to execute at least once for Airflow Breeze to get the database/tables created. - -.. code-block:: bash - - root@b76fcb399bb6:/opt/airflow# airflow db reset - root@b76fcb399bb6:/opt/airflow# airflow users create --role Admin --username admin --password admin \ - --email admin@example.com --firstname foo --lastname bar - - -5. Closing Breeze environment. After successfully finishing above command will leave you in container, - type ``exit`` to exit the container - -.. code-block:: bash - - root@b76fcb399bb6:/opt/airflow# - root@b76fcb399bb6:/opt/airflow# exit - -.. code-block:: bash - - $ ./breeze stop - -Installing airflow in the local virtual environment ``airflow-env`` with breeze. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. It may require some packages to be installed; watch the output of the command to see which ones are missing. - -.. code-block:: bash - - $ sudo apt-get install sqlite libsqlite3-dev default-libmysqlclient-dev postgresql - $ ./breeze initialize-local-virtualenv --python 3.8 - - -2. Add following line to ~/.bashrc in order to call breeze command from anywhere. - -.. code-block:: bash - - export PATH=${PATH}:"/home/${USER}/Projects/airflow" - source ~/.bashrc - -Using Breeze -~~~~~~~~~~~~ - -1. Starting breeze environment using ``breeze start-airflow`` starts Breeze environment with last configuration run( - In this case python and backend will be picked up from last execution ``./breeze --python 3.8 --backend mysql``) - It also automatically starts webserver, backend and scheduler. It drops you in tmux with scheduler in bottom left - and webserver in bottom right. Use ``[Ctrl + B] and Arrow keys`` to navigate. - -.. code-block:: bash - - $ breeze start-airflow - - Use CI image. - - Branch name: main - Docker image: apache/airflow:main-python3.8-ci - Airflow source version: 2.0.0b2 - Python version: 3.8 - Backend: mysql 5.7 - - - Port forwarding: - - Ports are forwarded to the running docker containers for webserver and database - * 28080 -> forwarded to Airflow webserver -> airflow:8080 - * 25555 -> forwarded to Flower dashboard -> airflow:5555 - * 25433 -> forwarded to Postgres database -> postgres:5432 - * 23306 -> forwarded to MySQL database -> mysql:3306 - * 26379 -> forwarded to Redis broker -> redis:6379 - - Here are links to those services that you can use on host: - * Webserver: http://127.0.0.1:28080 - * Flower: http://127.0.0.1:25555 - * Postgres: jdbc:postgresql://127.0.0.1:25433/airflow?user=postgres&password=airflow - * Mysql: jdbc:mysql://127.0.0.1:23306/airflow?user=root - * Redis: redis://127.0.0.1:26379/0 - - -.. raw:: html - -
- Accessing local airflow -
- - -- Alternatively you can start the same using following commands - - 1. Start Breeze - - .. code-block:: bash - - $ breeze --python 3.8 --backend mysql - - 2. Open tmux - - .. code-block:: bash - - $ root@0c6e4ff0ab3d:/opt/airflow# tmux - - 3. Press Ctrl + B and " - - .. code-block:: bash - - $ root@0c6e4ff0ab3d:/opt/airflow# airflow scheduler - - - 4. Press Ctrl + B and % - - .. code-block:: bash - - $ root@0c6e4ff0ab3d:/opt/airflow# airflow webserver - - - - -2. Now you can access airflow web interface on your local machine at |http://127.0.0.1:28080| with user name ``admin`` - and password ``admin``. - - .. |http://127.0.0.1:28080| raw:: html - - http://127.0.0.1:28080 - - .. raw:: html - -
- Accessing local airflow -
- -3. Setup mysql database in - MySQL Workbench with Host ``127.0.0.1``, port ``23306``, user ``root`` and password - blank(leave empty), default schema ``airflow``. - - .. raw:: html - -
- Connecting to mysql -
- -4. Stopping breeze - -.. code-block:: bash - - root@f3619b74c59a:/opt/airflow# stop_airflow - root@f3619b74c59a:/opt/airflow# exit - $ breeze stop - -5. Knowing more about Breeze - -.. code-block:: bash - - $ breeze --help - - -For more information visit : |Breeze documentation| - -.. |Breeze documentation| raw:: html - - Breeze documentation - -Following are some of important topics of Breeze documentation: - - -- |Choosing different Breeze environment configuration| - -.. |Choosing different Breeze environment configuration| raw:: html - - Choosing different Breeze environment configuration - - -- |Troubleshooting Breeze environment| - -.. |Troubleshooting Breeze environment| raw:: html - - Troubleshooting - Breeze environment - - -- |Installing Additional tools to the Docker Image| - -.. |Installing Additional tools to the Docker Image| raw:: html - - Installing - Additional tools to the Docker Image - - -- |Internal details of Breeze| - -.. |Internal details of Breeze| raw:: html - - - Internal details of Breeze - - -- |Breeze Command-Line Interface Reference| - -.. |Breeze Command-Line Interface Reference| raw:: html - - Breeze Command-Line Interface Reference - - -- |Cleaning the environment| - -.. |Cleaning the environment| raw:: html - - - Cleaning the environment - - -- |Other uses of the Airflow Breeze environment| - -.. |Other uses of the Airflow Breeze environment| raw:: html - - Other uses of the Airflow Breeze environment - - - -Setting up Debug -~~~~~~~~~~~~~~~~ - -1. Configuring Airflow database connection - -- Airflow is by default configured to use SQLite database. Configuration can be seen on local machine - ``~/airflow/airflow.cfg`` under ``sql_alchemy_conn``. - -- Installing required dependency for MySQL connection in ``airflow-env`` on local machine. - - .. code-block:: bash - - $ pyenv activate airflow-env - $ pip install PyMySQL - -- Now set ``sql_alchemy_conn = mysql+pymysql://root:@127.0.0.1:23306/airflow?charset=utf8mb4`` in file - ``~/airflow/airflow.cfg`` on local machine. - -1. Debugging an example DAG - -- In Visual Studio Code open airflow project, directory ``/files/dags`` of local machine is by default mounted to docker - machine when breeze airflow is started. So any DAG file present in this directory will be picked automatically by - scheduler running in docker machine and same can be seen on ``http://127.0.0.1:28080``. - -- Copy any example DAG present in the ``/airflow/example_dags`` directory to ``/files/dags/``. - -- Add a ``__main__`` block at the end of your DAG file to make it runnable. It will run a ``back_fill`` job: - - .. code-block:: python - - - if __name__ == "__main__": - dag.clear() - dag.run() - -- Add ``"AIRFLOW__CORE__EXECUTOR": "DebugExecutor"`` to the ``"env"`` field of Debug configuration. - - - Using the ``Run`` view click on ``Create a launch.json file`` - - .. raw:: html - -
- Add Debug Configuration to Visual Studio Code - Add Debug Configuration to Visual Studio Code - Add Debug Configuration to Visual Studio Code -
- - - Change ``"program"`` to point to an example dag and add ``"env"`` and ``"python"`` fields to the new Python configuration - - .. raw:: html - -
- Add environment variable to Visual Studio Code Debug configuration -
- -- Now Debug an example dag and view the entries in tables such as ``dag_run, xcom`` etc in mysql workbench. - - - -Starting development --------------------- - - -Creating a branch -~~~~~~~~~~~~~~~~~ - -1. Click on the branch symbol in the status bar - - .. raw:: html - -
- Creating a new branch -
- -2. Give a name to a branch and checkout - - .. raw:: html - -
- Giving a name to a branch -
- - - -Testing -~~~~~~~ - -All Tests are inside ./tests directory. - -- Running Unit tests inside Breeze environment. - - Just run ``pytest filepath+filename`` to run the tests. - -.. code-block:: bash - - root@63528318c8b1:/opt/airflow# pytest tests/utils/test_decorators.py - ======================================= test session starts ======================================= - platform linux -- Python 3.8.6, pytest-6.0.1, py-1.9.0, pluggy-0.13.1 -- /usr/local/bin/python - cachedir: .pytest_cache - rootdir: /opt/airflow, configfile: pytest.ini - plugins: celery-4.4.7, requests-mock-1.8.0, xdist-1.34.0, flaky-3.7.0, rerunfailures-9.0, instafail - -0.4.2, forked-1.3.0, timeouts-1.2.1, cov-2.10.0 - setup timeout: 0.0s, execution timeout: 0.0s, teardown timeout: 0.0s - collected 3 items - - tests/utils/test_decorators.py::TestApplyDefault::test_apply PASSED [ 33%] - tests/utils/test_decorators.py::TestApplyDefault::test_default_args PASSED [ 66%] - tests/utils/test_decorators.py::TestApplyDefault::test_incorrect_default_args PASSED [100%] - - ======================================== 3 passed in 1.49s ======================================== - -- Running All the test with Breeze by specifying required python version, backend, backend version - -.. code-block:: bash - - $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type All tests - - -- Running specific test in container using shell scripts. Testing in container scripts are located in - ``./scripts/in_container`` directory. - -.. code-block:: bash - - root@df8927308887:/opt/airflow# ./scripts/in_container/ - bin/ run_flake8.sh* - check_environment.sh* run_generate_constraints.sh* - entrypoint_ci.sh* run_init_script.sh* - entrypoint_exec.sh* run_install_and_test_provider_packages.sh* - _in_container_script_init.sh* run_mypy.sh* - prod/ run_prepare_provider_packages.sh* - run_ci_tests.sh* run_prepare_provider_documentation.sh* - run_clear_tmp.sh* run_system_tests.sh* - run_docs_build.sh* run_tmux_welcome.sh* - run_extract_tests.sh* stop_tmux_airflow.sh* - run_fix_ownership.sh* update_quarantined_test_status.py* - - root@df8927308887:/opt/airflow# ./scripts/in_container/run_docs_build.sh - -- Running specific type of test - - - Types of tests - - - Running specific type of test - - .. code-block:: bash - - $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type Core - - -- Running Integration test for specific test type - - - Running an Integration Test - - .. code-block:: bash - - $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type All --integration mongo - - -- For more information on Testing visit : |TESTING.rst| - -.. |TESTING.rst| raw:: html - - TESTING.rst - -- Following are the some of important topics of TESTING.rst - - - |Airflow Test Infrastructure| - - .. |Airflow Test Infrastructure| raw:: html - - - Airflow Test Infrastructure - - - - |Airflow Unit Tests| - - .. |Airflow Unit Tests| raw:: html - - Airflow Unit - Tests - - - - |Helm Unit Tests| - - .. |Helm Unit Tests| raw:: html - - Helm Unit Tests - - - - - |Airflow Integration Tests| - - .. |Airflow Integration Tests| raw:: html - - - Airflow Integration Tests - - - - |Running Tests with Kubernetes| - - .. |Running Tests with Kubernetes| raw:: html - - - Running Tests with Kubernetes - - - - |Airflow System Tests| - - .. |Airflow System Tests| raw:: html - - Airflow - System Tests - - - - |Local and Remote Debugging in IDE| - - .. |Local and Remote Debugging in IDE| raw:: html - - Local and Remote Debugging in IDE - - -Pre-commit -~~~~~~~~~~ - -Before committing changes to github or raising a pull request, code needs to be checked for certain quality standards -such as spell check, code syntax, code formatting, compatibility with Apache License requirements etc. This set of -tests are applied when you commit your code. - -.. raw:: html - -
- CI tests GitHub -
- - -To avoid burden on CI infrastructure and to save time, Pre-commit hooks can be run locally before committing changes. - -1. Installing required packages - -.. code-block:: bash - - $ sudo apt install libxml2-utils - -2. Installing required Python packages - -.. code-block:: bash - - $ pyenv activate airflow-env - $ pip install pre-commit - -3. Go to your project directory - -.. code-block:: bash - - $ cd ~/Projects/airflow - - -1. Running pre-commit hooks - -.. code-block:: bash - - $ pre-commit run --all-files - No-tabs checker......................................................Passed - Add license for all SQL files........................................Passed - Add license for all other files......................................Passed - Add license for all rst files........................................Passed - Add license for all JS/CSS/PUML files................................Passed - Add license for all JINJA template files.............................Passed - Add license for all shell files......................................Passed - Add license for all python files.....................................Passed - Add license for all XML files........................................Passed - Add license for all yaml files.......................................Passed - Add license for all md files.........................................Passed - Add license for all mermaid files....................................Passed - Add TOC for md files.................................................Passed - Add TOC for upgrade documentation....................................Passed - Check hooks apply to the repository..................................Passed - black................................................................Passed - Check for merge conflicts............................................Passed - Debug Statements (Python)............................................Passed - Check builtin type constructor use...................................Passed - Detect Private Key...................................................Passed - Fix End of Files.....................................................Passed - ........................................................................... - -5. Running pre-commit for selected files - -.. code-block:: bash - - $ pre-commit run --files airflow/decorators.py tests/utils/test_task_group.py - - - -6. Running specific hook for selected files - -.. code-block:: bash - - $ pre-commit run black --files airflow/decorators.py tests/utils/test_task_group.py - black...............................................................Passed - $ pre-commit run flake8 --files airflow/decorators.py tests/utils/test_task_group.py - Run flake8..........................................................Passed - - - - -7. Running specific checks in container using shell scripts. Scripts are located in ``./scripts/in_container`` - directory. - -.. code-block:: bash - - root@df8927308887:/opt/airflow# ./scripts/in_container/ - bin/ run_flake8.sh* - check_environment.sh* run_generate_constraints.sh* - entrypoint_ci.sh* run_init_script.sh* - entrypoint_exec.sh* run_install_and_test_provider_packages.sh* - _in_container_script_init.sh* run_mypy.sh* - prod/ run_prepare_provider_packages.sh* - run_ci_tests.sh* run_prepare_provider_documentation.sh* - run_clear_tmp.sh* run_system_tests.sh* - run_docs_build.sh* run_tmux_welcome.sh* - run_extract_tests.sh* stop_tmux_airflow.sh* - run_fix_ownership.sh* update_quarantined_test_status.py* - - - root@df8927308887:/opt/airflow# ./scripts/in_container/run_docs_build.sh - - - - -8. Enabling Pre-commit check before push. It will run pre-commit automatically before committing and stops the commit - -.. code-block:: bash - - $ cd ~/Projects/airflow - $ pre-commit install - $ git commit -m "Added xyz" - -9. To disable Pre-commit - -.. code-block:: bash - - $ cd ~/Projects/airflow - $ pre-commit uninstall - - -- For more information on visit |STATIC_CODE_CHECKS.rst| - -.. |STATIC_CODE_CHECKS.rst| raw:: html - - - STATIC_CODE_CHECKS.rst - -- Following are some of the important links of STATIC_CODE_CHECKS.rst - - - |Pre-commit Hooks| - - .. |Pre-commit Hooks| raw:: html - - - Pre-commit Hooks - - - |Running Static Code Checks via Breeze| - - .. |Running Static Code Checks via Breeze| raw:: html - - Running Static Code Checks via Breeze - - - - - -Contribution guide -~~~~~~~~~~~~~~~~~~ - -- To know how to contribute to the project visit |CONTRIBUTING.rst| - -.. |CONTRIBUTING.rst| raw:: html - - CONTRIBUTING.rst - -- Following are some of important links of CONTRIBUTING.rst - - - |Types of contributions| - - .. |Types of contributions| raw:: html - - - Types of contributions - - - - |Roles of contributor| - - .. |Roles of contributor| raw:: html - - Roles of - contributor - - - - |Workflow for a contribution| - - .. |Workflow for a contribution| raw:: html - - - Workflow for a contribution - - - -Raising Pull Request -~~~~~~~~~~~~~~~~~~~~ +Raising Pull Request +-------------------- 1. Go to your GitHub account and open your fork project and click on Branches @@ -1773,7 +766,7 @@ Raising Pull Request Syncing Fork and rebasing Pull request -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-------------------------------------- Often it takes several days or weeks to discuss and iterate with the PR until it is ready to merge. In the meantime new commits are merged, and you might run into conflicts, therefore you should periodically @@ -1796,265 +789,22 @@ describes how to do it. Rebasing pull request -.. raw:: html - -
- - -Setup and develop using Gitpod online workspaces -################################################ - -.. raw:: html - -
- Setup and develop using Gitpod online workspaces - - - -Setup Airflow with Breeze -------------------------- - - -Forking and cloning Project -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. Goto |airflow_github| and fork the project. - - .. |airflow_github| raw:: html - - https://github.com/apache/airflow/ - - .. raw:: html - -
- Forking Apache Airflow project -
- -2. Goto your github account's fork of airflow click on ``Code`` and copy the clone link. - - .. raw:: html - -
- Cloning github fork of Apache airflow -
- -3. Add goto https://gitpod.io/# as shown. - - .. raw:: html - -
- Open personal airflow clone with Gitpod -
- -Setting up Breeze -~~~~~~~~~~~~~~~~~ - -1. Breeze is already initialized in one of the terminals in Gitpod - -2. Once the breeze environment is initialized, create airflow tables and users from the breeze CLI. ``airflow db reset`` - is required to execute at least once for Airflow Breeze to get the database/tables created. - -.. note:: - - This step is needed when you would like to run/use webserver. - -.. code-block:: bash - - root@b76fcb399bb6:/opt/airflow# airflow db reset - root@b76fcb399bb6:/opt/airflow# airflow users create --role Admin --username admin --password admin \ - --email admin@example.com --firstname foo --lastname bar - - -3. Closing Breeze environment. After successfully finishing above command will leave you in container, - type ``exit`` to exit the container +Using your IDE +############## -.. code-block:: bash - - root@b76fcb399bb6:/opt/airflow# - root@b76fcb399bb6:/opt/airflow# exit - -.. code-block:: bash - - $ ./breeze stop - - -Installing Airflow with Breeze. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Gitpod default image have all the required packages installed. - -1. Add following line to ~/.bashrc in order to call breeze command from anywhere. - -.. code-block:: bash - - export PATH=${PATH}:"/workspace/airflow" - source ~/.bashrc - - -Using Breeze -~~~~~~~~~~~~ - -1. Starting breeze environment using ``breeze start-airflow`` starts Breeze environment with last configuration run. - It also automatically starts webserver, backend and scheduler. It drops you in tmux with scheduler in bottom left - and webserver in bottom right. Use ``[Ctrl + B] and Arrow keys`` to navigate. - -.. code-block:: bash - - $ breeze start-airflow - - Use CI image. - - Branch name: main - Docker image: ghcr.io/apache/airflow/main/ci/python3.8:latest - Airflow source version: 2.3.0.dev0 - Python version: 3.8 - Backend: mysql 5.7 - - - Port forwarding: - - Ports are forwarded to the running docker containers for webserver and database - * 12322 -> forwarded to Airflow ssh server -> airflow:22 - * 28080 -> forwarded to Airflow webserver -> airflow:8080 - * 25555 -> forwarded to Flower dashboard -> airflow:5555 - * 25433 -> forwarded to Postgres database -> postgres:5432 - * 23306 -> forwarded to MySQL database -> mysql:3306 - * 21433 -> forwarded to MSSQL database -> mssql:1443 - * 26379 -> forwarded to Redis broker -> redis:6379 - - Here are links to those services that you can use on host: - * ssh connection for remote debugging: ssh -p 12322 airflow@127.0.0.1 pw: airflow - * Webserver: http://127.0.0.1:28080 - * Flower: http://127.0.0.1:25555 - * Postgres: jdbc:postgresql://127.0.0.1:25433/airflow?user=postgres&password=airflow - * Mysql: jdbc:mysql://127.0.0.1:23306/airflow?user=root - * Redis: redis://127.0.0.1:26379/0 - -.. raw:: html - -
- Accessing local airflow -
- -2. You can access the ports as shown - -.. raw:: html - -
- Accessing ports via VSCode UI -
+If you are familiar with Python development and use your favourite editors, Airflow can be setup +similarly to other projects of yours. However, if you need specific instructions for your IDE you +will find more detailed instructions here: +* `Pycharm/IntelliJ `_ +* `Visual Studio Code `_ -Starting development --------------------- - - -Creating a branch -~~~~~~~~~~~~~~~~~ - -1. Click on the branch symbol in the status bar - - .. raw:: html - -
- Creating a new branch -
- -2. Give a name to a branch and checkout - - .. raw:: html - -
- Giving a name to a branch -
- - - -Testing -~~~~~~~ - -All Tests are inside ``./tests`` directory. - -- Running Unit tests inside Breeze environment. - - Just run ``pytest filepath+filename`` to run the tests. - -.. code-block:: bash - - root@4a2143c17426:/opt/airflow# pytest tests/utils/test_session.py - ======================================= test session starts ======================================= - platform linux -- Python 3.7.12, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /usr/local/bin/python - cachedir: .pytest_cache - rootdir: /opt/airflow, configfile: pytest.ini - plugins: anyio-3.3.4, flaky-3.7.0, asyncio-0.16.0, cov-3.0.0, forked-1.3.0, httpx-0.15.0, instafail-0.4.2, rerunfailures-9.1.1, timeouts-1.2.1, xdist-2.4.0, requests-mock-1.9.3 - setup timeout: 0.0s, execution timeout: 0.0s, teardown timeout: 0.0s - collected 4 items - - tests/utils/test_session.py::TestSession::test_raised_provide_session PASSED [ 25%] - tests/utils/test_session.py::TestSession::test_provide_session_without_args_and_kwargs PASSED [ 50%] - tests/utils/test_session.py::TestSession::test_provide_session_with_args PASSED [ 75%] - tests/utils/test_session.py::TestSession::test_provide_session_with_kwargs PASSED [100%] - - ====================================== 4 passed, 11 warnings in 33.14s ====================================== +Using Remote development environments +##################################### -- Running All the tests with Breeze by specifying required Python version, backend, backend version +In order to use remote development environment, you usually need a paid account, but you do not have to +setup local machine for development. -.. code-block:: bash - - $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type All tests - - -- Running specific test in container using shell scripts. Testing in container scripts are located in - ``./scripts/in_container`` directory. - -.. code-block:: bash - - root@4a2143c17426:/opt/airflow# ls ./scripts/in_container/ - _in_container_script_init.sh quarantine_issue_header.md run_mypy.sh - _in_container_utils.sh run_anything.sh run_prepare_airflow_packages.sh - airflow_ci.cfg run_ci_tests.sh run_prepare_provider_documentation.sh - bin run_docs_build.sh run_prepare_provider_packages.sh - check_environment.sh run_extract_tests.sh run_resource_check.sh - check_junitxml_result.py run_fix_ownership.sh run_system_tests.sh - configure_environment.sh run_flake8.sh run_tmux_welcome.sh - entrypoint_ci.sh run_generate_constraints.sh stop_tmux_airflow.sh - entrypoint_exec.sh run_init_script.sh update_quarantined_test_status.py - prod run_install_and_test_provider_packages.sh - - root@df8927308887:/opt/airflow# ./scripts/in_container/run_docs_build.sh - -- Running specific type of test - - - Types of tests - - - Running specific type of test - - .. note:: - - Before starting a new instance, let's clear the volume and databases "fresh like a daisy". You - can do this by: - - .. code-block::bash - - $ breeze stop - - .. code-block:: bash - - $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type Core - - -- Running Integration test for specific test type - - - Running an Integration Test - - .. code-block:: bash - - $ breeze --backend mysql --mysql-version 5.7 --python 3.8 --db-reset --test-type All --integration mongo +* `GitPod `_ +* `GitHub Codespaces `_ diff --git a/CONTRIBUTORS_QUICK_START_CODESPACES.rst b/CONTRIBUTORS_QUICK_START_CODESPACES.rst new file mode 100644 index 0000000000000..70e0a8b3f47cd --- /dev/null +++ b/CONTRIBUTORS_QUICK_START_CODESPACES.rst @@ -0,0 +1,45 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Setup and develop using GitHub Codespaces +######################################### + +1. Goto |airflow_github| and fork the project. + + .. |airflow_github| raw:: html + + https://github.com/apache/airflow/ + + .. raw:: html + +
+ Forking Apache Airflow project +
+ +2. Follow `Codespaces Quickstart `_ to start + a new codespace. + +3. Once the codespace starts your terminal should be already in ``Breeze`` environment and you should + be able to edit and run the tests in VS Code interface. + +4. You can use `Quick start quide for Visual Studio Code `_ for details + as Codespaces use Visual Studio Code as interface. + + +Follow the `Quick start `_ for typical development tasks. diff --git a/CONTRIBUTORS_QUICK_START_GITPOD.rst b/CONTRIBUTORS_QUICK_START_GITPOD.rst new file mode 100644 index 0000000000000..3615d300978b9 --- /dev/null +++ b/CONTRIBUTORS_QUICK_START_GITPOD.rst @@ -0,0 +1,81 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. contents:: :local: + +Connect your project to Gitpod +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Goto |airflow_github| and fork the project. + + .. |airflow_github| raw:: html + + https://github.com/apache/airflow/ + + .. raw:: html + +
+ Forking Apache Airflow project +
+ +2. Goto your github account's fork of airflow click on ``Code`` and copy the clone link. + + .. raw:: html + +
+ Cloning github fork of Apache airflow +
+ +3. Add goto https://gitpod.io/# as shown. + + .. raw:: html + +
+ Open personal airflow clone with Gitpod +
+ +Set up Breeze in Gitpod +~~~~~~~~~~~~~~~~~~~~~~~ + +Gitpod default image have all the required packages installed. + +1. Run ``pipx install -e ./dev/breeze`` to install Breeze + +2. Run ``breeze`` to enter breeze in Gitpod. + +Setting up database in Breeze +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you enter breeze environment is initialized, create airflow tables and users from the breeze CLI. +The ``airflow db reset`` command is required to execute at least once for Airflow Breeze to +get the database/tables created. When you run the tests, your database will be initialized automatically +the first time you run tests. + +.. note:: + + This step is needed when you would like to run/use webserver. + +.. code-block:: bash + + root@b76fcb399bb6:/opt/airflow# airflow db reset + root@b76fcb399bb6:/opt/airflow# airflow users create --role Admin --username admin --password admin \ + --email admin@example.com --firstname foo --lastname bar + +Follow the `Quick start `_ for typical development tasks. diff --git a/CONTRIBUTORS_QUICK_START_PYCHARM.rst b/CONTRIBUTORS_QUICK_START_PYCHARM.rst new file mode 100644 index 0000000000000..88c04c5545036 --- /dev/null +++ b/CONTRIBUTORS_QUICK_START_PYCHARM.rst @@ -0,0 +1,132 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. contents:: :local: + +Setup your project +################## + +1. Open your IDE or source code editor and select the option to clone the repository + + .. raw:: html + +
+ Cloning github fork to Pycharm +
+ + +2. Paste the repository link in the URL field and submit. + + .. raw:: html + +
+ Cloning github fork to Pycharm +
+ +Setting up debugging +#################### + +It requires "airflow-env" virtual environment configured locally. + +1. Configuring Airflow database connection + +- Airflow is by default configured to use SQLite database. Configuration can be seen on local machine + ``~/airflow/airflow.cfg`` under ``sql_alchemy_conn``. + +- Installing required dependency for MySQL connection in ``airflow-env`` on local machine. + + .. code-block:: bash + + $ pyenv activate airflow-env + $ pip install PyMySQL + +- Now set ``sql_alchemy_conn = mysql+pymysql://root:@127.0.0.1:23306/airflow?charset=utf8mb4`` in file + ``~/airflow/airflow.cfg`` on local machine. + +2. Debugging an example DAG + +- Add Interpreter to PyCharm pointing interpreter path to ``~/.pyenv/versions/airflow-env/bin/python``, which is virtual + environment ``airflow-env`` created with pyenv earlier. For adding an Interpreter go to ``File -> Setting -> Project: + airflow -> Python Interpreter``. + + .. raw:: html + +
+ Adding existing interpreter +
+ +- In PyCharm IDE open airflow project, directory ``/files/dags`` of local machine is by default mounted to docker + machine when breeze airflow is started. So any DAG file present in this directory will be picked automatically by + scheduler running in docker machine and same can be seen on ``http://127.0.0.1:28080``. + +- Copy any example DAG present in the ``/airflow/example_dags`` directory to ``/files/dags/``. + +- Add a ``__main__`` block at the end of your DAG file to make it runnable. It will run a ``back_fill`` job: + + .. code-block:: python + + if __name__ == "__main__": + dag.clear() + dag.run() + +- Add ``AIRFLOW__CORE__EXECUTOR=DebugExecutor`` to Environment variable of Run Configuration. + + - Click on Add configuration + + .. raw:: html + +
+ Add Configuration pycharm +
+ + - Add Script Path and Environment Variable to new Python configuration + + .. raw:: html + +
+ Add environment variable pycharm +
+ +- Now Debug an example dag and view the entries in tables such as ``dag_run, xcom`` etc in MySQL Workbench. + +Creating a branch +################# + +1. Click on the branch symbol in the status bar + + .. raw:: html + +
+ Creating a new branch +
+ +2. Give a name to a branch and checkout + + .. raw:: html + +
+ Giving a name to a branch +
+ +Follow the `Quick start `_ for typical development tasks. diff --git a/CONTRIBUTORS_QUICK_START_VSCODE.rst b/CONTRIBUTORS_QUICK_START_VSCODE.rst new file mode 100644 index 0000000000000..c1baf3191017c --- /dev/null +++ b/CONTRIBUTORS_QUICK_START_VSCODE.rst @@ -0,0 +1,125 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. contents:: :local: + +Setup your project +################## + +1. Open your IDE or source code editor and select the option to clone the repository + + .. raw:: html + +
+ Cloning github fork to Visual Studio Code +
+ + +2. Paste the copied clone link in the URL field and submit. + + .. raw:: html + +
+ Cloning github fork to Visual Studio Code +
+ + +Setting up debugging +#################### + +1. Configuring Airflow database connection + +- Airflow is by default configured to use SQLite database. Configuration can be seen on local machine + ``~/airflow/airflow.cfg`` under ``sql_alchemy_conn``. + +- Installing required dependency for MySQL connection in ``airflow-env`` on local machine. + + .. code-block:: bash + + $ pyenv activate airflow-env + $ pip install PyMySQL + +- Now set ``sql_alchemy_conn = mysql+pymysql://root:@127.0.0.1:23306/airflow?charset=utf8mb4`` in file + ``~/airflow/airflow.cfg`` on local machine. + +1. Debugging an example DAG + +- In Visual Studio Code open airflow project, directory ``/files/dags`` of local machine is by default mounted to docker + machine when breeze airflow is started. So any DAG file present in this directory will be picked automatically by + scheduler running in docker machine and same can be seen on ``http://127.0.0.1:28080``. + +- Copy any example DAG present in the ``/airflow/example_dags`` directory to ``/files/dags/``. + +- Add a ``__main__`` block at the end of your DAG file to make it runnable. It will run a ``back_fill`` job: + + .. code-block:: python + + + if __name__ == "__main__": + dag.clear() + dag.run() + +- Add ``"AIRFLOW__CORE__EXECUTOR": "DebugExecutor"`` to the ``"env"`` field of Debug configuration. + + - Using the ``Run`` view click on ``Create a launch.json file`` + + .. raw:: html + +
+ Add Debug Configuration to Visual Studio Code + Add Debug Configuration to Visual Studio Code + Add Debug Configuration to Visual Studio Code +
+ + - Change ``"program"`` to point to an example dag and add ``"env"`` and ``"python"`` fields to the new Python configuration + + .. raw:: html + +
+ Add environment variable to Visual Studio Code Debug configuration +
+ +- Now Debug an example dag and view the entries in tables such as ``dag_run, xcom`` etc in mysql workbench. + +Creating a branch +################# + +1. Click on the branch symbol in the status bar + + .. raw:: html + +
+ Creating a new branch +
+ +2. Give a name to a branch and checkout + + .. raw:: html + +
+ Giving a name to a branch +
+ +Follow the `Quick start `_ for typical development tasks. diff --git a/Dockerfile b/Dockerfile index 0149fa861bcba..cab46244ae954 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +# syntax=docker/dockerfile:1.4 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -34,7 +35,6 @@ # much smaller. # # Use the same builder frontend version for everyone -# syntax=docker/dockerfile:1.3 ARG AIRFLOW_EXTRAS="amazon,async,celery,cncf.kubernetes,dask,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv" ARG ADDITIONAL_AIRFLOW_EXTRAS="" ARG ADDITIONAL_PYTHON_DEPS="" @@ -44,11 +44,11 @@ ARG AIRFLOW_UID="50000" ARG AIRFLOW_USER_HOME_DIR=/home/airflow # latest released version here -ARG AIRFLOW_VERSION="2.2.4" +ARG AIRFLOW_VERSION="2.3.1" ARG PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" -ARG AIRFLOW_PIP_VERSION=22.0.4 +ARG AIRFLOW_PIP_VERSION=22.1.2 ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" ARG AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" @@ -58,6 +58,1034 @@ ARG AIRFLOW_VERSION_SPECIFICATION="" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR="on" + +############################################################################################## +# This is the script image where we keep all inlined bash scripts needed in other segments +############################################################################################## +FROM scratch as scripts + +############################################################################################## +# Please DO NOT modify the inlined scripts manually. The content of those files will be +# replaced by pre-commit automatically from the "scripts/docker/" folder. +# This is done in order to avoid problems with caching and file permissions and in order to +# make the PROD Dockerfile standalone +############################################################################################## + +# The content below is automatically copied from scripts/docker/determine_debian_version_specific_variables.sh +COPY <<"EOF" /determine_debian_version_specific_variables.sh +function determine_debian_version_specific_variables() { + local color_red + color_red=$'\e[31m' + local color_reset + color_reset=$'\e[0m' + + local debian_version + debian_version=$(lsb_release -cs) + if [[ ${debian_version} == "buster" ]]; then + export DISTRO_LIBENCHANT="libenchant-dev" + export DISTRO_LIBGCC="libgcc-8-dev" + export DISTRO_SELINUX="python-selinux" + export DISTRO_LIBFFI="libffi6" + # Note missing man directories on debian-buster + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 + mkdir -pv /usr/share/man/man1 + mkdir -pv /usr/share/man/man7 + elif [[ ${debian_version} == "bullseye" ]]; then + export DISTRO_LIBENCHANT="libenchant-2-2" + export DISTRO_LIBGCC="libgcc-10-dev" + export DISTRO_SELINUX="python3-selinux" + export DISTRO_LIBFFI="libffi7" + else + echo + echo "${color_red}Unknown distro version ${debian_version}${color_reset}" + echo + exit 1 + fi +} + +determine_debian_version_specific_variables +EOF + +# The content below is automatically copied from scripts/docker/install_mysql.sh +COPY <<"EOF" /install_mysql.sh +set -euo pipefail +declare -a packages + +MYSQL_VERSION="8.0" +readonly MYSQL_VERSION + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" + +install_mysql_client() { + if [[ "${1}" == "dev" ]]; then + packages=("libmysqlclient-dev" "mysql-client") + elif [[ "${1}" == "prod" ]]; then + packages=("libmysqlclient21" "mysql-client") + else + echo + echo "Specify either prod or dev" + echo + exit 1 + fi + + echo + echo "${COLOR_BLUE}Installing mysql client version ${MYSQL_VERSION}: ${1}${COLOR_RESET}" + echo + + local key="467B942D3A79BD29" + readonly key + + GNUPGHOME="$(mktemp -d)" + export GNUPGHOME + set +e + for keyserver in $(shuf -e ha.pool.sks-keyservers.net hkp://p80.pool.sks-keyservers.net:80 \ + keyserver.ubuntu.com hkp://keyserver.ubuntu.com:80) + do + gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break + done + set -e + gpg --export "${key}" > /etc/apt/trusted.gpg.d/mysql.gpg + gpgconf --kill all + rm -rf "${GNUPGHOME}" + unset GNUPGHOME + echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # disable MYSQL for ARM64 + INSTALL_MYSQL_CLIENT="false" +fi + +if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then + install_mysql_client "${@}" +fi +EOF + +# The content below is automatically copied from scripts/docker/install_mssql.sh +COPY <<"EOF" /install_mssql.sh +set -euo pipefail + +: "${INSTALL_MSSQL_CLIENT:?Should be true or false}" + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_YELLOW=$'\e[33m' +readonly COLOR_YELLOW +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +function install_mssql_client() { + # Install MsSQL client from Microsoft repositories + if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then + echo + echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}" + echo + return + fi + echo + echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}" + echo + local distro + local version + distro=$(lsb_release -is | tr '[:upper:]' '[:lower:]') + version_name=$(lsb_release -cs | tr '[:upper:]' '[:lower:]') + version=$(lsb_release -rs) + local driver + if [[ ${version_name} == "buster" ]]; then + driver=msodbcsql17 + elif [[ ${version_name} == "bullseye" ]]; then + driver=msodbcsql18 + else + echo + echo "${COLOR_YELLOW}Only Buster or Bullseye are supported. Skipping MSSQL installation${COLOR_RESET}" + echo + return + fi + curl --silent https://packages.microsoft.com/keys/microsoft.asc | apt-key add - >/dev/null 2>&1 + curl --silent "https://packages.microsoft.com/config/${distro}/${version}/prod.list" > \ + /etc/apt/sources.list.d/mssql-release.list + apt-get update -yqq + apt-get upgrade -yqq + ACCEPT_EULA=Y apt-get -yqq install -y --no-install-recommends "${driver}" + rm -rf /var/lib/apt/lists/* + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # disable MSSQL for ARM64 + INSTALL_MSSQL_CLIENT="false" +fi + +install_mssql_client "${@}" +EOF + +# The content below is automatically copied from scripts/docker/install_postgres.sh +COPY <<"EOF" /install_postgres.sh +set -euo pipefail +declare -a packages + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" + +install_postgres_client() { + echo + echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}" + echo + + if [[ "${1}" == "dev" ]]; then + packages=("libpq-dev" "postgresql-client") + elif [[ "${1}" == "prod" ]]; then + packages=("postgresql-client") + else + echo + echo "Specify either prod or dev" + echo + exit 1 + fi + + curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - + echo "deb https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then + install_postgres_client "${@}" +fi +EOF + +# The content below is automatically copied from scripts/docker/install_pip_version.sh +COPY <<"EOF" /install_pip_version.sh +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_pip_version() { + echo + echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" + echo + pip install --disable-pip-version-check --no-cache-dir --upgrade "pip==${AIRFLOW_PIP_VERSION}" && + mkdir -p ${HOME}/.local/bin +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::show_pip_version_and_location + +install_pip_version +EOF + +# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh +COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_REPO:?Should be set}" +: "${AIRFLOW_BRANCH:?Should be set}" +: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" +: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow_dependencies_from_branch_tip() { + echo + echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" + echo + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + fi + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + fi + # Install latest set of dependencies using constraints. In case constraints were upgraded and there + # are conflicts, this might fail, but it should be fixed in the following installation steps + set -x + pip install --root-user-action ignore \ + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true + set +x + echo + echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" + echo + pip uninstall --yes apache-airflow || true +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow_dependencies_from_branch_tip +EOF + +# The content below is automatically copied from scripts/docker/common.sh +COPY <<"EOF" /common.sh +set -euo pipefail + +function common::get_colors() { + COLOR_BLUE=$'\e[34m' + COLOR_GREEN=$'\e[32m' + COLOR_RED=$'\e[31m' + COLOR_RESET=$'\e[0m' + COLOR_YELLOW=$'\e[33m' + export COLOR_BLUE + export COLOR_GREEN + export COLOR_RED + export COLOR_RESET + export COLOR_YELLOW +} + + +function common::get_airflow_version_specification() { + if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=} + && -n ${AIRFLOW_VERSION} + && ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then + AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}" + fi +} + +function common::override_pip_version_if_needed() { + if [[ -n ${AIRFLOW_VERSION} ]]; then + if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then + export AIRFLOW_PIP_VERSION="22.1.2" + fi + fi +} + +function common::get_constraints_location() { + # auto-detect Airflow-constraint reference and location + if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then + if [[ ${AIRFLOW_VERSION} =~ v?2.* && ! ${AIRFLOW_VERSION} =~ .*dev.* ]]; then + AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION} + else + AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH} + fi + fi + + if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then + local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}" + local python_version + python_version="$(python --version 2>/dev/stdout | cut -d " " -f 2 | cut -d "." -f 1-2)" + AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt" + fi +} + +function common::show_pip_version_and_location() { + echo "PATH=${PATH}" + echo "pip on path: $(which pip)" + echo "Using pip: $(pip --version)" +} +EOF + +# The content below is automatically copied from scripts/docker/prepare_node_modules.sh +COPY <<"EOF" /prepare_node_modules.sh +set -euo pipefail + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +function prepare_node_modules() { + echo + echo "${COLOR_BLUE}Preparing node modules${COLOR_RESET}" + echo + local www_dir + if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." ]]; then + # In case we are building from sources in production image, we should build the assets + www_dir="${AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES}}/airflow/www" + else + www_dir="$(python -m site --user-site)/airflow/www" + fi + pushd ${www_dir} || exit 1 + set +e + yarn install --frozen-lockfile --no-cache 2>/tmp/out-yarn-install.txt + local res=$? + if [[ ${res} != 0 ]]; then + >&2 echo + >&2 echo "Error when running yarn install:" + >&2 echo + >&2 cat /tmp/out-yarn-install.txt && rm -f /tmp/out-yarn-install.txt + exit 1 + fi + rm -f /tmp/out-yarn-install.txt + popd || exit 1 +} + +prepare_node_modules +EOF + +# The content below is automatically copied from scripts/docker/compile_www_assets.sh +COPY <<"EOF" /compile_www_assets.sh +set -euo pipefail + +BUILD_TYPE=${BUILD_TYPE="prod"} +REMOVE_ARTIFACTS=${REMOVE_ARTIFACTS="true"} + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +function compile_www_assets() { + echo + echo "${COLOR_BLUE}Compiling www assets: running yarn ${BUILD_TYPE}${COLOR_RESET}" + echo + local www_dir + if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." ]]; then + # In case we are building from sources in production image, we should build the assets + www_dir="${AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES}}/airflow/www" + else + www_dir="$(python -m site --user-site)/airflow/www" + fi + pushd ${www_dir} || exit 1 + set +e + yarn run "${BUILD_TYPE}" 2>/tmp/out-yarn-run.txt + res=$? + if [[ ${res} != 0 ]]; then + >&2 echo + >&2 echo "Error when running yarn run:" + >&2 echo + >&2 cat /tmp/out-yarn-run.txt && rm -rf /tmp/out-yarn-run.txt + exit 1 + fi + rm -f /tmp/out-yarn-run.txt + set -e + local md5sum_file + md5sum_file="static/dist/sum.md5" + readonly md5sum_file + find package.json yarn.lock static/css static/js -type f | sort | xargs md5sum > "${md5sum_file}" + if [[ ${REMOVE_ARTIFACTS} == "true" ]]; then + echo + echo "${COLOR_BLUE}Removing generated node modules${COLOR_RESET}" + echo + rm -rf "${www_dir}/node_modules" + rm -vf "${www_dir}"/{package.json,yarn.lock,.eslintignore,.eslintrc,.stylelintignore,.stylelintrc,compile_assets.sh,webpack.config.js} + else + echo + echo "${COLOR_BLUE}Leaving generated node modules${COLOR_RESET}" + echo + fi + popd || exit 1 +} + +compile_www_assets +EOF + +# The content below is automatically copied from scripts/docker/pip +COPY <<"EOF" /pip +#!/usr/bin/env bash +COLOR_RED=$'\e[31m' +COLOR_RESET=$'\e[0m' +COLOR_YELLOW=$'\e[33m' + +if [[ $(id -u) == "0" ]]; then + echo + echo "${COLOR_RED}You are running pip as root. Please use 'airflow' user to run pip!${COLOR_RESET}" + echo + echo "${COLOR_YELLOW}See: https://airflow.apache.org/docs/docker-stack/build.html#adding-a-new-pypi-package${COLOR_RESET}" + echo + exit 1 +fi +exec "${HOME}"/.local/bin/pip "${@}" +EOF + +# The content below is automatically copied from scripts/docker/install_from_docker_context_files.sh +COPY <<"EOF" /install_from_docker_context_files.sh + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow_and_providers_from_docker_context_files(){ + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + fi + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + fi + + if [[ ! -d /docker-context-files ]]; then + echo + echo "${COLOR_RED}You must provide a folder via --build-arg DOCKER_CONTEXT_FILES= and you missed it!${COLOR_RESET}" + echo + exit 1 + fi + + # shellcheck disable=SC2206 + local pip_flags=( + # Don't quote this -- if it is empty we don't want it to create an + # empty array element + --find-links="file:///docker-context-files" + ) + + # Find Apache Airflow packages in docker-context files + local reinstalling_apache_airflow_package + reinstalling_apache_airflow_package=$(ls \ + /docker-context-files/apache?airflow?[0-9]*.{whl,tar.gz} 2>/dev/null || true) + # Add extras when installing airflow + if [[ -n "${reinstalling_apache_airflow_package}" ]]; then + # When a provider depends on a dev version of Airflow, we need to + # specify `apache-airflow==$VER`, otherwise pip will look for it on + # pip, and fail to find it + + # This will work as long as the wheel file is correctly named, which it + # will be if it was build by wheel tooling + local ver + ver=$(basename "$reinstalling_apache_airflow_package" | cut -d "-" -f 2) + reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==$ver" + fi + + # Find Apache Airflow packages in docker-context files + local reinstalling_apache_airflow_providers_packages + reinstalling_apache_airflow_providers_packages=$(ls \ + /docker-context-files/apache?airflow?providers*.{whl,tar.gz} 2>/dev/null || true) + if [[ -z "${reinstalling_apache_airflow_package}" && \ + -z "${reinstalling_apache_airflow_providers_packages}" ]]; then + return + fi + + echo + echo "${COLOR_BLUE}Force re-installing airflow and providers from local files with eager upgrade${COLOR_RESET}" + echo + # force reinstall all airflow + provider package local files with eager upgrade + set -x + pip install "${pip_flags[@]}" --root-user-action ignore --upgrade --upgrade-strategy eager \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} \ + ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} + set +x + + # make sure correct PIP version is left installed + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + pip check +} + +function install_all_other_packages_from_docker_context_files() { + + echo + echo "${COLOR_BLUE}Force re-installing all other package from local files without dependencies${COLOR_RESET}" + echo + local reinstalling_other_packages + # shellcheck disable=SC2010 + reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \ + grep -v apache_airflow | grep -v apache-airflow || true) + if [[ -n "${reinstalling_other_packages}" ]]; then + set -x + pip install --root-user-action ignore --force-reinstall --no-deps --no-index ${reinstalling_other_packages} + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set -x + fi +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow_and_providers_from_docker_context_files + +common::show_pip_version_and_location +install_all_other_packages_from_docker_context_files +EOF + +# The content below is automatically copied from scripts/docker/install_airflow.sh +COPY <<"EOF" /install_airflow.sh + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow() { + # Coherence check for editable installation mode. + if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ + ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then + echo + echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" + echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" + exit 1 + fi + # Remove mysql from extras if client is not going to be installed + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}" + fi + # Remove postgres from extras if client is not going to be installed + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}" + fi + if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then + echo + echo "${COLOR_BLUE}Installing all packages with eager upgrade${COLOR_RESET}" + echo + # eager upgrade + pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ + ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} + if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then + # Remove airflow and reinstall it using editable flag + # We can only do it when we install airflow from sources + set -x + pip uninstall apache-airflow --yes + pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + set +x + fi + + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + else \ + echo + echo "${COLOR_BLUE}Installing all packages with constraints and upgrade if needed${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + # then upgrade if needed without using constraints to account for new limits in setup.py + pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ + ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + fi + +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow +EOF + +# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh +COPY <<"EOF" /install_additional_dependencies.sh +set -euo pipefail + +: "${UPGRADE_TO_NEWER_DEPENDENCIES:?Should be true or false}" +: "${ADDITIONAL_PYTHON_DEPS:?Should be set}" +: "${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS:?Should be set}" +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +function install_additional_dependencies() { + if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then + echo + echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ + ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + else + echo + echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ + ${ADDITIONAL_PYTHON_DEPS} + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + fi +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_additional_dependencies +EOF + + +# The content below is automatically copied from scripts/docker/entrypoint_prod.sh +COPY <<"EOF" /entrypoint_prod.sh +#!/usr/bin/env bash +AIRFLOW_COMMAND="${1:-}" + +set -euo pipefail + +LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" +export LD_PRELOAD + +function run_check_with_retries { + local cmd + cmd="${1}" + local countdown + countdown="${CONNECTION_CHECK_MAX_COUNT}" + + while true + do + set +e + local last_check_result + local res + last_check_result=$(eval "${cmd} 2>&1") + res=$? + set -e + if [[ ${res} == 0 ]]; then + echo + break + else + echo -n "." + countdown=$((countdown-1)) + fi + if [[ ${countdown} == 0 ]]; then + echo + echo "ERROR! Maximum number of retries (${CONNECTION_CHECK_MAX_COUNT}) reached." + echo + echo "Last check result:" + echo "$ ${cmd}" + echo "${last_check_result}" + echo + exit 1 + else + sleep "${CONNECTION_CHECK_SLEEP_TIME}" + fi + done +} + +function run_nc() { + # Checks if it is possible to connect to the host using netcat. + # + # We want to avoid misleading messages and perform only forward lookup of the service IP address. + # Netcat when run without -n performs both forward and reverse lookup and fails if the reverse + # lookup name does not match the original name even if the host is reachable via IP. This happens + # randomly with docker-compose in GitHub Actions. + # Since we are not using reverse lookup elsewhere, we can perform forward lookup in python + # And use the IP in NC and add '-n' switch to disable any DNS use. + # Even if this message might be harmless, it might hide the real reason for the problem + # Which is the long time needed to start some services, seeing this message might be totally misleading + # when you try to analyse the problem, that's why it's best to avoid it, + local host="${1}" + local port="${2}" + local ip + ip=$(python -c "import socket; print(socket.gethostbyname('${host}'))") + nc -zvvn "${ip}" "${port}" +} + + +function wait_for_connection { + # Waits for Connection to the backend specified via URL passed as first parameter + # Detects backend type depending on the URL schema and assigns + # default port numbers if not specified in the URL. + # Then it loops until connection to the host/port specified can be established + # It tries `CONNECTION_CHECK_MAX_COUNT` times and sleeps `CONNECTION_CHECK_SLEEP_TIME` between checks + local connection_url + connection_url="${1}" + local detected_backend + detected_backend=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).scheme)" "${connection_url}") + local detected_host + detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname or '')" "${connection_url}") + local detected_port + detected_port=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).port or '')" "${connection_url}") + + echo BACKEND="${BACKEND:=${detected_backend}}" + readonly BACKEND + + if [[ -z "${detected_port=}" ]]; then + if [[ ${BACKEND} == "postgres"* ]]; then + detected_port=5432 + elif [[ ${BACKEND} == "mysql"* ]]; then + detected_port=3306 + elif [[ ${BACKEND} == "mssql"* ]]; then + detected_port=1433 + elif [[ ${BACKEND} == "redis"* ]]; then + detected_port=6379 + elif [[ ${BACKEND} == "amqp"* ]]; then + detected_port=5672 + fi + fi + + detected_host=${detected_host:="localhost"} + + # Allow the DB parameters to be overridden by environment variable + echo DB_HOST="${DB_HOST:=${detected_host}}" + readonly DB_HOST + + echo DB_PORT="${DB_PORT:=${detected_port}}" + readonly DB_PORT + if [[ -n "${DB_HOST=}" ]] && [[ -n "${DB_PORT=}" ]]; then + run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}" + else + >&2 echo "The connection details to the broker could not be determined. Connectivity checks were skipped." + fi +} + +function create_www_user() { + local local_password="" + # Warning: command environment variables (*_CMD) have priority over usual configuration variables + # for configuration parameters that require sensitive information. This is the case for the SQL database + # and the broker backend in this entrypoint script. + if [[ -n "${_AIRFLOW_WWW_USER_PASSWORD_CMD=}" ]]; then + local_password=$(eval "${_AIRFLOW_WWW_USER_PASSWORD_CMD}") + unset _AIRFLOW_WWW_USER_PASSWORD_CMD + elif [[ -n "${_AIRFLOW_WWW_USER_PASSWORD=}" ]]; then + local_password="${_AIRFLOW_WWW_USER_PASSWORD}" + unset _AIRFLOW_WWW_USER_PASSWORD + fi + if [[ -z ${local_password} ]]; then + echo + echo "ERROR! Airflow Admin password not set via _AIRFLOW_WWW_USER_PASSWORD or _AIRFLOW_WWW_USER_PASSWORD_CMD variables!" + echo + exit 1 + fi + + airflow users create \ + --username "${_AIRFLOW_WWW_USER_USERNAME="admin"}" \ + --firstname "${_AIRFLOW_WWW_USER_FIRSTNAME="Airflow"}" \ + --lastname "${_AIRFLOW_WWW_USER_LASTNAME="Admin"}" \ + --email "${_AIRFLOW_WWW_USER_EMAIL="airflowadmin@example.com"}" \ + --role "${_AIRFLOW_WWW_USER_ROLE="Admin"}" \ + --password "${local_password}" || true +} + +function create_system_user_if_missing() { + # This is needed in case of OpenShift-compatible container execution. In case of OpenShift random + # User id is used when starting the image, however group 0 is kept as the user group. Our production + # Image is OpenShift compatible, so all permissions on all folders are set so that 0 group can exercise + # the same privileges as the default "airflow" user, this code checks if the user is already + # present in /etc/passwd and will create the system user dynamically, including setting its + # HOME directory to the /home/airflow so that (for example) the ${HOME}/.local folder where airflow is + # Installed can be automatically added to PYTHONPATH + if ! whoami &> /dev/null; then + if [[ -w /etc/passwd ]]; then + echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${AIRFLOW_USER_HOME_DIR}:/sbin/nologin" \ + >> /etc/passwd + fi + export HOME="${AIRFLOW_USER_HOME_DIR}" + fi +} + +function set_pythonpath_for_root_user() { + # Airflow is installed as a local user application which means that if the container is running as root + # the application is not available. because Python then only load system-wide applications. + # Now also adds applications installed as local user "airflow". + if [[ $UID == "0" ]]; then + local python_major_minor + python_major_minor="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" + export PYTHONPATH="${AIRFLOW_USER_HOME_DIR}/.local/lib/python${python_major_minor}/site-packages:${PYTHONPATH:-}" + >&2 echo "The container is run as root user. For security, consider using a regular user account." + fi +} + +function wait_for_airflow_db() { + # Wait for the command to run successfully to validate the database connection. + run_check_with_retries "airflow db check" +} + +function upgrade_db() { + # Runs airflow db upgrade + airflow db upgrade || true +} + +function wait_for_celery_broker() { + # Verifies connection to Celery Broker + local executor + executor="$(airflow config get-value core executor)" + if [[ "${executor}" == "CeleryExecutor" ]]; then + local connection_url + connection_url="$(airflow config get-value celery broker_url)" + wait_for_connection "${connection_url}" + fi +} + +function exec_to_bash_or_python_command_if_specified() { + # If one of the commands: 'bash', 'python' is used, either run appropriate + # command with exec + if [[ ${AIRFLOW_COMMAND} == "bash" ]]; then + shift + exec "/bin/bash" "${@}" + elif [[ ${AIRFLOW_COMMAND} == "python" ]]; then + shift + exec "python" "${@}" + fi +} + +function check_uid_gid() { + if [[ $(id -g) == "0" ]]; then + return + fi + if [[ $(id -u) == "50000" ]]; then + >&2 echo + >&2 echo "WARNING! You should run the image with GID (Group ID) set to 0" + >&2 echo " even if you use 'airflow' user (UID=50000)" + >&2 echo + >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" + >&2 echo + >&2 echo " This is to make sure you can run the image with an arbitrary UID in the future." + >&2 echo + >&2 echo " See more about it in the Airflow's docker image documentation" + >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" + >&2 echo + # We still allow the image to run with `airflow` user. + return + else + >&2 echo + >&2 echo "ERROR! You should run the image with GID=0" + >&2 echo + >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" + >&2 echo + >&2 echo "The image should always be run with GID (Group ID) set to 0 regardless of the UID used." + >&2 echo " This is to make sure you can run the image with an arbitrary UID." + >&2 echo + >&2 echo " See more about it in the Airflow's docker image documentation" + >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" + # This will not work so we fail hard + exit 1 + fi +} + +unset PIP_USER + +check_uid_gid + +umask 0002 + +CONNECTION_CHECK_MAX_COUNT=${CONNECTION_CHECK_MAX_COUNT:=20} +readonly CONNECTION_CHECK_MAX_COUNT + +CONNECTION_CHECK_SLEEP_TIME=${CONNECTION_CHECK_SLEEP_TIME:=3} +readonly CONNECTION_CHECK_SLEEP_TIME + +create_system_user_if_missing +set_pythonpath_for_root_user +if [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then + wait_for_airflow_db +fi + +if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] ; then + upgrade_db +fi + +if [[ -n "${_AIRFLOW_WWW_USER_CREATE=}" ]] ; then + create_www_user +fi + +if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then + >&2 echo + >&2 echo "!!!!! Installing additional requirements: '${_PIP_ADDITIONAL_REQUIREMENTS}' !!!!!!!!!!!!" + >&2 echo + >&2 echo "WARNING: This is a development/test feature only. NEVER use it in production!" + >&2 echo " Instead, build a custom image as described in" + >&2 echo + >&2 echo " https://airflow.apache.org/docs/docker-stack/build.html" + >&2 echo + >&2 echo " Adding requirements at container startup is fragile and is done every time" + >&2 echo " the container starts, so it is onlny useful for testing and trying out" + >&2 echo " of adding dependencies." + >&2 echo + pip install --root-user-action ignore --no-cache-dir ${_PIP_ADDITIONAL_REQUIREMENTS} +fi + + +exec_to_bash_or_python_command_if_specified "${@}" + +if [[ ${AIRFLOW_COMMAND} == "airflow" ]]; then + AIRFLOW_COMMAND="${2:-}" + shift +fi + +if [[ ${AIRFLOW_COMMAND} =~ ^(scheduler|celery)$ ]] \ + && [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then + wait_for_celery_broker +fi + +exec "airflow" "${@}" +EOF + +# The content below is automatically copied from scripts/docker/clean-logs.sh +COPY <<"EOF" /clean-logs.sh +#!/usr/bin/env bash + + +set -euo pipefail + +readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}" +readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}" + +trap "exit" INT TERM + +readonly EVERY=$((15*60)) + +echo "Cleaning logs every $EVERY seconds" + +while true; do + echo "Trimming airflow logs to ${RETENTION} days." + find "${DIRECTORY}"/logs \ + -type d -name 'lost+found' -prune -o \ + -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \ + xargs -0 rm -f + + seconds=$(( $(date -u +%s) % EVERY)) + (( seconds < 1 )) || sleep $((EVERY - seconds)) +done +EOF + +# The content below is automatically copied from scripts/docker/airflow-scheduler-autorestart.sh +COPY <<"EOF" /airflow-scheduler-autorestart.sh +#!/usr/bin/env bash + +while echo "Running"; do + airflow scheduler -n 5 + return_code=$? + if (( return_code != 0 )); then + echo "Scheduler crashed with exit code $return_code. Respawning.." >&2 + date >> /tmp/airflow_scheduler_errors.txt + fi + + sleep 1 +done +EOF + ############################################################################################## # This is the build image where we build all dependencies ############################################################################################## @@ -117,8 +1145,8 @@ ENV DEV_APT_DEPS=${DEV_APT_DEPS} \ DEV_APT_COMMAND=${DEV_APT_COMMAND} \ ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} \ ADDITIONAL_DEV_APT_ENV=${ADDITIONAL_DEV_APT_ENV} -COPY scripts/docker/determine_debian_version_specific_variables.sh /scripts/docker/ +COPY --from=scripts determine_debian_version_specific_variables.sh /scripts/docker/ # Install basic and additional apt dependencies RUN apt-get update \ && apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 \ @@ -145,7 +1173,7 @@ ARG AIRFLOW_EXTRAS ARG ADDITIONAL_AIRFLOW_EXTRAS="" # Allows to override constraints source ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" -ARG AIRFLOW_CONSTRAINTS="constraints" +ARG AIRFLOW_CONSTRAINTS_MODE="constraints" ARG AIRFLOW_CONSTRAINTS_REFERENCE="" ARG AIRFLOW_CONSTRAINTS_LOCATION="" ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" @@ -177,14 +1205,18 @@ ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" # www to compile the assets but in case of breeze/CI builds we use latest sources and we override those # those SOURCES_FROM/TO with "airflow/www" and "/opt/airflow/airflow/www" respectively. # This is to rebuild the assets only when any of the www sources change -ARG AIRFLOW_SOURCES_WWW_FROM="empty" -ARG AIRFLOW_SOURCES_WWW_TO="/empty" +ARG AIRFLOW_SOURCES_WWW_FROM="Dockerfile" +ARG AIRFLOW_SOURCES_WWW_TO="/Dockerfile" # By default we install latest airflow from PyPI so we do not need to copy sources of Airflow # but in case of breeze/CI builds we use latest sources and we override those # those SOURCES_FROM/TO with "." and "/opt/airflow" respectively -ARG AIRFLOW_SOURCES_FROM="empty" -ARG AIRFLOW_SOURCES_TO="/empty" +ARG AIRFLOW_SOURCES_FROM="Dockerfile" +ARG AIRFLOW_SOURCES_TO="/Dockerfile" + +# By default we do not install from docker context files but if we decide to install from docker context +# files, we should override those variables to "docker-context-files" +ARG DOCKER_CONTEXT_FILES="Dockerfile" ARG AIRFLOW_HOME ARG AIRFLOW_USER_HOME_DIR @@ -196,14 +1228,14 @@ ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ # Only copy mysql/mssql installation scripts for now - so that changing the other # scripts which are needed much later will not invalidate the docker layer here -COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh scripts/docker/install_postgres.sh /scripts/docker/ +COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ RUN bash /scripts/docker/install_mysql.sh dev && \ bash /scripts/docker/install_mssql.sh && \ bash /scripts/docker/install_postgres.sh dev ENV PATH=${PATH}:/opt/mssql-tools/bin -COPY docker-context-files /docker-context-files +COPY ${DOCKER_CONTEXT_FILES} /docker-context-files RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" && \ @@ -231,7 +1263,7 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ - AIRFLOW_CONSTRAINTS=${AIRFLOW_CONSTRAINTS} \ + AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \ AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ @@ -248,9 +1280,8 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here -COPY --chown=airflow:0 scripts/docker/common.sh scripts/docker/install_pip_version.sh \ - /scripts/docker/install_airflow_dependencies_from_branch_tip.sh \ - /scripts/docker/ +COPY --from=scripts common.sh install_pip_version.sh \ + install_airflow_dependencies_from_branch_tip.sh /scripts/docker/ # In case of Production build image segment we want to pre-install main version of airflow # dependencies from GitHub so that we do not have to always reinstall it from the scratch. @@ -265,7 +1296,7 @@ RUN bash /scripts/docker/install_pip_version.sh; \ bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ fi -COPY --chown=airflow:0 scripts/docker/compile_www_assets.sh scripts/docker/prepare_node_modules.sh /scripts/docker/ +COPY --from=scripts compile_www_assets.sh prepare_node_modules.sh /scripts/docker/ COPY --chown=airflow:0 ${AIRFLOW_SOURCES_WWW_FROM} ${AIRFLOW_SOURCES_WWW_TO} # hadolint ignore=SC2086, SC2010 @@ -290,33 +1321,31 @@ RUN if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then \ ARG ADDITIONAL_PYTHON_DEPS="" # We can set this value to true in case we want to install .whl .tar.gz packages placed in the # docker-context-files folder. This can be done for both - additional packages you want to install -# and for airflow as well (you have to set INSTALL_FROM_PYPI to false in this case) -ARG INSTALL_FROM_DOCKER_CONTEXT_FILES="" -# By default we install latest airflow from PyPI. You can set it to false if you want to install -# Airflow from the .whl or .tar.gz packages placed in `docker-context-files` folder. -ARG INSTALL_FROM_PYPI="true" +# and for airflow as well (you have to set AIRFLOW_IS_IN_CONTEXT to true in this case) +ARG INSTALL_PACKAGES_FROM_CONTEXT="false" +# By default we install latest airflow from PyPI or sources. You can set this parameter to false +# if Airflow is in the .whl or .tar.gz packages placed in `docker-context-files` folder and you want +# to skip installing Airflow/Providers from PyPI or sources. +ARG AIRFLOW_IS_IN_CONTEXT="false" # Those are additional constraints that are needed for some extras but we do not want to # Force them on the main Airflow package. -# * certifi<2021.0.0 required to keep snowflake happy # * dill<0.3.3 required by apache-beam -# * google-ads<14.0.1 required to prevent updating google-python-api>=2.0.0 -ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="dill<0.3.3 certifi<2021.0.0 google-ads<14.0.1" +ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="dill<0.3.3" ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ - INSTALL_FROM_DOCKER_CONTEXT_FILES=${INSTALL_FROM_DOCKER_CONTEXT_FILES} \ - INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI} \ + INSTALL_PACKAGES_FROM_CONTEXT=${INSTALL_PACKAGES_FROM_CONTEXT} \ + AIRFLOW_IS_IN_CONTEXT=${AIRFLOW_IS_IN_CONTEXT} \ EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} WORKDIR /opt/airflow -COPY --chown=airflow:0 scripts/docker/install_from_docker_context_files.sh scripts/docker/install_airflow.sh \ - scripts/docker/install_additional_dependencies.sh \ - /scripts/docker/ +COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ + install_additional_dependencies.sh /scripts/docker/ # hadolint ignore=SC2086, SC2010 -RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \ +RUN if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ bash /scripts/docker/install_from_docker_context_files.sh; \ - elif [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \ + elif [[ ${AIRFLOW_IS_IN_CONTEXT} == "false" ]]; then \ bash /scripts/docker/install_airflow.sh; \ fi; \ if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ @@ -403,7 +1432,7 @@ ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \ GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm" \ AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} -COPY scripts/docker/determine_debian_version_specific_variables.sh /scripts/docker/ +COPY --from=scripts determine_debian_version_specific_variables.sh /scripts/docker/ # Install basic and additional apt dependencies RUN apt-get update \ @@ -439,7 +1468,7 @@ ENV PATH="${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH}" \ # Only copy mysql/mssql installation scripts for now - so that changing the other # scripts which are needed much later will not invalidate the docker layer here. -COPY scripts/docker/install_mysql.sh /scripts/docker/install_mssql.sh /scripts/docker/install_postgres.sh /scripts/docker/ +COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ # We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an # unexpected result - the cache for Dockerfiles might get invalidated in case the host system # had different umask set and group x bit was not set. In Azure the bit might be not set at all. @@ -459,10 +1488,11 @@ RUN bash /scripts/docker/install_mysql.sh prod \ && find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x \ && find "${AIRFLOW_USER_HOME_DIR}" -executable -print0 | xargs --null chmod g+x -COPY --chown=airflow:0 --from=airflow-build-image \ +COPY --from=airflow-build-image --chown=airflow:0 \ "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local" -COPY --chown=airflow:0 scripts/in_container/prod/entrypoint_prod.sh /entrypoint -COPY --chown=airflow:0 scripts/in_container/prod/clean-logs.sh /clean-logs +COPY --from=scripts entrypoint_prod.sh /entrypoint +COPY --from=scripts clean-logs.sh /clean-logs +COPY --from=scripts airflow-scheduler-autorestart.sh /airflow-scheduler-autorestart # Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift # See https://github.com/apache/airflow/issues/9248 @@ -491,7 +1521,7 @@ ENV DUMB_INIT_SETSID="1" \ # Add protection against running pip as root user RUN mkdir -pv /root/bin -COPY scripts/docker/pip /root/bin/pip +COPY --from=scripts pip /root/bin/pip RUN chmod u+x /root/bin/pip WORKDIR ${AIRFLOW_HOME} @@ -528,9 +1558,6 @@ LABEL org.apache.airflow.distro="debian" \ org.opencontainers.image.licenses="Apache-2.0" \ org.opencontainers.image.ref.name="airflow" \ org.opencontainers.image.title="Production Airflow Image" \ - org.opencontainers.image.description="Reference, production-ready Apache Airflow image" \ - io.artifacthub.package.license='Apache-2.0' \ - io.artifacthub.package.readme-url='${AIRFLOW_IMAGE_README_URL}' - + org.opencontainers.image.description="Reference, production-ready Apache Airflow image" ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] CMD [] diff --git a/Dockerfile.ci b/Dockerfile.ci index 18fb20feefb36..c089544f244b3 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1,3 +1,4 @@ +# syntax=docker/dockerfile:1.4 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -16,6 +17,997 @@ # WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. # ARG PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" + +############################################################################################## +# This is the script image where we keep all inlined bash scripts needed in other segments +# We use PYTHON_BASE_IMAGE to make sure that the scripts are different for different platforms. +############################################################################################## +FROM ${PYTHON_BASE_IMAGE} as scripts + +############################################################################################## +# Please DO NOT modify the inlined scripts manually. The content of those files will be +# replaced by pre-commit automatically from the "scripts/docker/" folder. +# This is done in order to avoid problems with caching and file permissions and in order to +# make the PROD Dockerfile standalone +############################################################################################## + +# The content below is automatically copied from scripts/docker/determine_debian_version_specific_variables.sh +COPY <<"EOF" /determine_debian_version_specific_variables.sh +function determine_debian_version_specific_variables() { + local color_red + color_red=$'\e[31m' + local color_reset + color_reset=$'\e[0m' + + local debian_version + debian_version=$(lsb_release -cs) + if [[ ${debian_version} == "buster" ]]; then + export DISTRO_LIBENCHANT="libenchant-dev" + export DISTRO_LIBGCC="libgcc-8-dev" + export DISTRO_SELINUX="python-selinux" + export DISTRO_LIBFFI="libffi6" + # Note missing man directories on debian-buster + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 + mkdir -pv /usr/share/man/man1 + mkdir -pv /usr/share/man/man7 + elif [[ ${debian_version} == "bullseye" ]]; then + export DISTRO_LIBENCHANT="libenchant-2-2" + export DISTRO_LIBGCC="libgcc-10-dev" + export DISTRO_SELINUX="python3-selinux" + export DISTRO_LIBFFI="libffi7" + else + echo + echo "${color_red}Unknown distro version ${debian_version}${color_reset}" + echo + exit 1 + fi +} + +determine_debian_version_specific_variables +EOF + +# The content below is automatically copied from scripts/docker/install_mysql.sh +COPY <<"EOF" /install_mysql.sh +set -euo pipefail +declare -a packages + +MYSQL_VERSION="8.0" +readonly MYSQL_VERSION + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" + +install_mysql_client() { + if [[ "${1}" == "dev" ]]; then + packages=("libmysqlclient-dev" "mysql-client") + elif [[ "${1}" == "prod" ]]; then + packages=("libmysqlclient21" "mysql-client") + else + echo + echo "Specify either prod or dev" + echo + exit 1 + fi + + echo + echo "${COLOR_BLUE}Installing mysql client version ${MYSQL_VERSION}: ${1}${COLOR_RESET}" + echo + + local key="467B942D3A79BD29" + readonly key + + GNUPGHOME="$(mktemp -d)" + export GNUPGHOME + set +e + for keyserver in $(shuf -e ha.pool.sks-keyservers.net hkp://p80.pool.sks-keyservers.net:80 \ + keyserver.ubuntu.com hkp://keyserver.ubuntu.com:80) + do + gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break + done + set -e + gpg --export "${key}" > /etc/apt/trusted.gpg.d/mysql.gpg + gpgconf --kill all + rm -rf "${GNUPGHOME}" + unset GNUPGHOME + echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # disable MYSQL for ARM64 + INSTALL_MYSQL_CLIENT="false" +fi + +if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then + install_mysql_client "${@}" +fi +EOF + +# The content below is automatically copied from scripts/docker/install_mssql.sh +COPY <<"EOF" /install_mssql.sh +set -euo pipefail + +: "${INSTALL_MSSQL_CLIENT:?Should be true or false}" + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_YELLOW=$'\e[33m' +readonly COLOR_YELLOW +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +function install_mssql_client() { + # Install MsSQL client from Microsoft repositories + if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then + echo + echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}" + echo + return + fi + echo + echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}" + echo + local distro + local version + distro=$(lsb_release -is | tr '[:upper:]' '[:lower:]') + version_name=$(lsb_release -cs | tr '[:upper:]' '[:lower:]') + version=$(lsb_release -rs) + local driver + if [[ ${version_name} == "buster" ]]; then + driver=msodbcsql17 + elif [[ ${version_name} == "bullseye" ]]; then + driver=msodbcsql18 + else + echo + echo "${COLOR_YELLOW}Only Buster or Bullseye are supported. Skipping MSSQL installation${COLOR_RESET}" + echo + return + fi + curl --silent https://packages.microsoft.com/keys/microsoft.asc | apt-key add - >/dev/null 2>&1 + curl --silent "https://packages.microsoft.com/config/${distro}/${version}/prod.list" > \ + /etc/apt/sources.list.d/mssql-release.list + apt-get update -yqq + apt-get upgrade -yqq + ACCEPT_EULA=Y apt-get -yqq install -y --no-install-recommends "${driver}" + rm -rf /var/lib/apt/lists/* + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + # disable MSSQL for ARM64 + INSTALL_MSSQL_CLIENT="false" +fi + +install_mssql_client "${@}" +EOF + +# The content below is automatically copied from scripts/docker/install_postgres.sh +COPY <<"EOF" /install_postgres.sh +set -euo pipefail +declare -a packages + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" + +install_postgres_client() { + echo + echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}" + echo + + if [[ "${1}" == "dev" ]]; then + packages=("libpq-dev" "postgresql-client") + elif [[ "${1}" == "prod" ]]; then + packages=("postgresql-client") + else + echo + echo "Specify either prod or dev" + echo + exit 1 + fi + + curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - + echo "deb https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then + install_postgres_client "${@}" +fi +EOF + +# The content below is automatically copied from scripts/docker/install_pip_version.sh +COPY <<"EOF" /install_pip_version.sh +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_pip_version() { + echo + echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" + echo + pip install --disable-pip-version-check --no-cache-dir --upgrade "pip==${AIRFLOW_PIP_VERSION}" && + mkdir -p ${HOME}/.local/bin +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::show_pip_version_and_location + +install_pip_version +EOF + +# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh +COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_REPO:?Should be set}" +: "${AIRFLOW_BRANCH:?Should be set}" +: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" +: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow_dependencies_from_branch_tip() { + echo + echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" + echo + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + fi + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + fi + # Install latest set of dependencies using constraints. In case constraints were upgraded and there + # are conflicts, this might fail, but it should be fixed in the following installation steps + set -x + pip install --root-user-action ignore \ + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true + set +x + echo + echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" + echo + pip uninstall --yes apache-airflow || true +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow_dependencies_from_branch_tip +EOF + +# The content below is automatically copied from scripts/docker/common.sh +COPY <<"EOF" /common.sh +set -euo pipefail + +function common::get_colors() { + COLOR_BLUE=$'\e[34m' + COLOR_GREEN=$'\e[32m' + COLOR_RED=$'\e[31m' + COLOR_RESET=$'\e[0m' + COLOR_YELLOW=$'\e[33m' + export COLOR_BLUE + export COLOR_GREEN + export COLOR_RED + export COLOR_RESET + export COLOR_YELLOW +} + + +function common::get_airflow_version_specification() { + if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=} + && -n ${AIRFLOW_VERSION} + && ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then + AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}" + fi +} + +function common::override_pip_version_if_needed() { + if [[ -n ${AIRFLOW_VERSION} ]]; then + if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then + export AIRFLOW_PIP_VERSION="22.1.2" + fi + fi +} + +function common::get_constraints_location() { + # auto-detect Airflow-constraint reference and location + if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then + if [[ ${AIRFLOW_VERSION} =~ v?2.* && ! ${AIRFLOW_VERSION} =~ .*dev.* ]]; then + AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION} + else + AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH} + fi + fi + + if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then + local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}" + local python_version + python_version="$(python --version 2>/dev/stdout | cut -d " " -f 2 | cut -d "." -f 1-2)" + AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt" + fi +} + +function common::show_pip_version_and_location() { + echo "PATH=${PATH}" + echo "pip on path: $(which pip)" + echo "Using pip: $(pip --version)" +} +EOF + +# The content below is automatically copied from scripts/docker/install_pipx_tools.sh +COPY <<"EOF" /install_pipx_tools.sh +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +function install_pipx_tools() { + echo + echo "${COLOR_BLUE}Installing pipx tools${COLOR_RESET}" + echo + # Make sure PIPX is installed in latest version + pip install --root-user-action ignore --upgrade pipx + if [[ $(uname -m) != "aarch64" ]]; then + # Do not install mssql-cli for ARM + # Install all the tools we need available in command line but without impacting the current environment + pipx install mssql-cli + + # Unfortunately mssql-cli installed by `pipx` does not work out of the box because it uses + # its own execution bash script which is not compliant with the auto-activation of + # pipx venvs - we need to manually patch Python executable in the script to fix it: ¯\_(ツ)_/¯ + sed "s/python /\/root\/\.local\/pipx\/venvs\/mssql-cli\/bin\/python /" -i /root/.local/bin/mssql-cli + fi +} + +common::get_colors + +install_pipx_tools +EOF + +# The content below is automatically copied from scripts/docker/prepare_node_modules.sh +COPY <<"EOF" /prepare_node_modules.sh +set -euo pipefail + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +function prepare_node_modules() { + echo + echo "${COLOR_BLUE}Preparing node modules${COLOR_RESET}" + echo + local www_dir + if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." ]]; then + # In case we are building from sources in production image, we should build the assets + www_dir="${AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES}}/airflow/www" + else + www_dir="$(python -m site --user-site)/airflow/www" + fi + pushd ${www_dir} || exit 1 + set +e + yarn install --frozen-lockfile --no-cache 2>/tmp/out-yarn-install.txt + local res=$? + if [[ ${res} != 0 ]]; then + >&2 echo + >&2 echo "Error when running yarn install:" + >&2 echo + >&2 cat /tmp/out-yarn-install.txt && rm -f /tmp/out-yarn-install.txt + exit 1 + fi + rm -f /tmp/out-yarn-install.txt + popd || exit 1 +} + +prepare_node_modules +EOF + +# The content below is automatically copied from scripts/docker/compile_www_assets.sh +COPY <<"EOF" /compile_www_assets.sh +set -euo pipefail + +BUILD_TYPE=${BUILD_TYPE="prod"} +REMOVE_ARTIFACTS=${REMOVE_ARTIFACTS="true"} + +COLOR_BLUE=$'\e[34m' +readonly COLOR_BLUE +COLOR_RESET=$'\e[0m' +readonly COLOR_RESET + +function compile_www_assets() { + echo + echo "${COLOR_BLUE}Compiling www assets: running yarn ${BUILD_TYPE}${COLOR_RESET}" + echo + local www_dir + if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." ]]; then + # In case we are building from sources in production image, we should build the assets + www_dir="${AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES}}/airflow/www" + else + www_dir="$(python -m site --user-site)/airflow/www" + fi + pushd ${www_dir} || exit 1 + set +e + yarn run "${BUILD_TYPE}" 2>/tmp/out-yarn-run.txt + res=$? + if [[ ${res} != 0 ]]; then + >&2 echo + >&2 echo "Error when running yarn run:" + >&2 echo + >&2 cat /tmp/out-yarn-run.txt && rm -rf /tmp/out-yarn-run.txt + exit 1 + fi + rm -f /tmp/out-yarn-run.txt + set -e + local md5sum_file + md5sum_file="static/dist/sum.md5" + readonly md5sum_file + find package.json yarn.lock static/css static/js -type f | sort | xargs md5sum > "${md5sum_file}" + if [[ ${REMOVE_ARTIFACTS} == "true" ]]; then + echo + echo "${COLOR_BLUE}Removing generated node modules${COLOR_RESET}" + echo + rm -rf "${www_dir}/node_modules" + rm -vf "${www_dir}"/{package.json,yarn.lock,.eslintignore,.eslintrc,.stylelintignore,.stylelintrc,compile_assets.sh,webpack.config.js} + else + echo + echo "${COLOR_BLUE}Leaving generated node modules${COLOR_RESET}" + echo + fi + popd || exit 1 +} + +compile_www_assets +EOF + +# The content below is automatically copied from scripts/docker/install_airflow.sh +COPY <<"EOF" /install_airflow.sh + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow() { + # Coherence check for editable installation mode. + if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ + ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then + echo + echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" + echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" + exit 1 + fi + # Remove mysql from extras if client is not going to be installed + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}" + fi + # Remove postgres from extras if client is not going to be installed + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}" + fi + if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then + echo + echo "${COLOR_BLUE}Installing all packages with eager upgrade${COLOR_RESET}" + echo + # eager upgrade + pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ + ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} + if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then + # Remove airflow and reinstall it using editable flag + # We can only do it when we install airflow from sources + set -x + pip uninstall apache-airflow --yes + pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + set +x + fi + + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + else \ + echo + echo "${COLOR_BLUE}Installing all packages with constraints and upgrade if needed${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + # then upgrade if needed without using constraints to account for new limits in setup.py + pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ + ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + fi + +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow +EOF + +# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh +COPY <<"EOF" /install_additional_dependencies.sh +set -euo pipefail + +: "${UPGRADE_TO_NEWER_DEPENDENCIES:?Should be true or false}" +: "${ADDITIONAL_PYTHON_DEPS:?Should be set}" +: "${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS:?Should be set}" +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +function install_additional_dependencies() { + if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then + echo + echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ + ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + else + echo + echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ + ${ADDITIONAL_PYTHON_DEPS} + # make sure correct PIP version is used + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" 2>/dev/null + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + fi +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_additional_dependencies +EOF + +# The content below is automatically copied from scripts/docker/entrypoint_ci.sh +COPY <<"EOF" /entrypoint_ci.sh +#!/usr/bin/env bash +if [[ ${VERBOSE_COMMANDS:="false"} == "true" ]]; then + set -x +fi + +. /opt/airflow/scripts/in_container/_in_container_script_init.sh + +LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" +export LD_PRELOAD + +chmod 1777 /tmp + +AIRFLOW_SOURCES=$(cd "${IN_CONTAINER_DIR}/../.." || exit 1; pwd) + +PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:=3.7} + +export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}} + +: "${AIRFLOW_SOURCES:?"ERROR: AIRFLOW_SOURCES not set !!!!"}" + +if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then + + if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + if [[ ${BACKEND:=} == "mysql" || ${BACKEND} == "mssql" ]]; then + echo "${COLOR_RED}ARM platform is not supported for ${BACKEND} backend. Exiting.${COLOR_RESET}" + exit 1 + fi + fi + + echo + echo "${COLOR_BLUE}Running Initialization. Your basic configuration is:${COLOR_RESET}" + echo + echo " * ${COLOR_BLUE}Airflow home:${COLOR_RESET} ${AIRFLOW_HOME}" + echo " * ${COLOR_BLUE}Airflow sources:${COLOR_RESET} ${AIRFLOW_SOURCES}" + echo " * ${COLOR_BLUE}Airflow core SQL connection:${COLOR_RESET} ${AIRFLOW__CORE__SQL_ALCHEMY_CONN:=}" + echo + + RUN_TESTS=${RUN_TESTS:="false"} + CI=${CI:="false"} + USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}" + + if [[ ${USE_AIRFLOW_VERSION} == "" ]]; then + export PYTHONPATH=${AIRFLOW_SOURCES} + echo + echo "${COLOR_BLUE}Using airflow version from current sources${COLOR_RESET}" + echo + if [[ -d "${AIRFLOW_SOURCES}/airflow/www/" ]]; then + pushd "${AIRFLOW_SOURCES}/airflow/www/" >/dev/null + ./ask_for_recompile_assets_if_needed.sh + popd >/dev/null + fi + # Cleanup the logs, tmp when entering the environment + sudo rm -rf "${AIRFLOW_SOURCES}"/logs/* + sudo rm -rf "${AIRFLOW_SOURCES}"/tmp/* + mkdir -p "${AIRFLOW_SOURCES}"/logs/ + mkdir -p "${AIRFLOW_SOURCES}"/tmp/ + elif [[ ${USE_AIRFLOW_VERSION} == "none" ]]; then + echo + echo "${COLOR_BLUE}Skip installing airflow - only install wheel/tar.gz packages that are present locally.${COLOR_RESET}" + echo + echo + echo "${COLOR_BLUE}Uninstalling airflow and providers" + echo + uninstall_airflow_and_providers + elif [[ ${USE_AIRFLOW_VERSION} == "wheel" ]]; then + echo + echo "${COLOR_BLUE}Uninstalling airflow and providers" + echo + uninstall_airflow_and_providers + echo "${COLOR_BLUE}Install airflow from wheel package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" + echo + install_airflow_from_wheel "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + uninstall_providers + elif [[ ${USE_AIRFLOW_VERSION} == "sdist" ]]; then + echo + echo "${COLOR_BLUE}Uninstalling airflow and providers" + echo + uninstall_airflow_and_providers + echo + echo "${COLOR_BLUE}Install airflow from sdist package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" + echo + install_airflow_from_sdist "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + uninstall_providers + else + echo + echo "${COLOR_BLUE}Uninstalling airflow and providers" + echo + uninstall_airflow_and_providers + echo + echo "${COLOR_BLUE}Install released airflow from PyPI with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" + echo + install_released_airflow_version "${USE_AIRFLOW_VERSION}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + fi + if [[ ${USE_PACKAGES_FROM_DIST=} == "true" ]]; then + echo + echo "${COLOR_BLUE}Install all packages from dist folder${COLOR_RESET}" + if [[ ${USE_AIRFLOW_VERSION} == "wheel" ]]; then + echo "${COLOR_BLUE}(except apache-airflow)${COLOR_RESET}" + fi + if [[ ${PACKAGE_FORMAT} == "both" ]]; then + echo + echo "${COLOR_RED}ERROR:You can only specify 'wheel' or 'sdist' as PACKAGE_FORMAT not 'both'.${COLOR_RESET}" + echo + exit 1 + fi + echo + installable_files=() + for file in /dist/*.{whl,tar.gz} + do + if [[ ${USE_AIRFLOW_VERSION} == "wheel" && ${file} == "/dist/apache?airflow-[0-9]"* ]]; then + # Skip Apache Airflow package - it's just been installed above with extras + echo "Skipping ${file}" + continue + fi + if [[ ${PACKAGE_FORMAT} == "wheel" && ${file} == *".whl" ]]; then + echo "Adding ${file} to install" + installable_files+=( "${file}" ) + fi + if [[ ${PACKAGE_FORMAT} == "sdist" && ${file} == *".tar.gz" ]]; then + echo "Adding ${file} to install" + installable_files+=( "${file}" ) + fi + done + if (( ${#installable_files[@]} )); then + pip install --root-user-action ignore "${installable_files[@]}" + fi + fi + + # Added to have run-tests on path + export PATH=${PATH}:${AIRFLOW_SOURCES} + + # This is now set in conftest.py - only for pytest tests + unset AIRFLOW__CORE__UNIT_TEST_MODE + + mkdir -pv "${AIRFLOW_HOME}/logs/" + cp -f "${IN_CONTAINER_DIR}/airflow_ci.cfg" "${AIRFLOW_HOME}/unittests.cfg" + + # Change the default worker_concurrency for tests + export AIRFLOW__CELERY__WORKER_CONCURRENCY=8 + + set +e + + "${IN_CONTAINER_DIR}/check_environment.sh" + ENVIRONMENT_EXIT_CODE=$? + set -e + if [[ ${ENVIRONMENT_EXIT_CODE} != 0 ]]; then + echo + echo "Error: check_environment returned ${ENVIRONMENT_EXIT_CODE}. Exiting." + echo + exit ${ENVIRONMENT_EXIT_CODE} + fi + # Create symbolic link to fix possible issues with kubectl config cmd-path + mkdir -p /usr/lib/google-cloud-sdk/bin + touch /usr/lib/google-cloud-sdk/bin/gcloud + ln -s -f /usr/bin/gcloud /usr/lib/google-cloud-sdk/bin/gcloud + + if [[ ${SKIP_SSH_SETUP="false"} == "false" ]]; then + # Set up ssh keys + echo 'yes' | ssh-keygen -t rsa -C your_email@youremail.com -m PEM -P '' -f ~/.ssh/id_rsa \ + >"${AIRFLOW_HOME}/logs/ssh-keygen.log" 2>&1 + + cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + ln -s -f ~/.ssh/authorized_keys ~/.ssh/authorized_keys2 + chmod 600 ~/.ssh/* + + # SSH Service + sudo service ssh restart >/dev/null 2>&1 + + # Sometimes the server is not quick enough to load the keys! + while [[ $(ssh-keyscan -H localhost 2>/dev/null | wc -l) != "3" ]] ; do + echo "Not all keys yet loaded by the server" + sleep 0.05 + done + + ssh-keyscan -H localhost >> ~/.ssh/known_hosts 2>/dev/null + fi + + # shellcheck source=scripts/in_container/configure_environment.sh + . "${IN_CONTAINER_DIR}/configure_environment.sh" + + # shellcheck source=scripts/in_container/run_init_script.sh + . "${IN_CONTAINER_DIR}/run_init_script.sh" + + cd "${AIRFLOW_SOURCES}" + + if [[ ${START_AIRFLOW:="false"} == "true" || ${START_AIRFLOW} == "True" ]]; then + export AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=${LOAD_DEFAULT_CONNECTIONS} + export AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES} + # shellcheck source=scripts/in_container/bin/run_tmux + exec run_tmux + fi +fi + +set +u +if [[ "${RUN_TESTS}" != "true" ]]; then + exec /bin/bash "${@}" +fi +set -u + +export RESULT_LOG_FILE="/files/test_result-${TEST_TYPE}-${BACKEND}.xml" + +EXTRA_PYTEST_ARGS=( + "--verbosity=0" + "--strict-markers" + "--durations=100" + "--maxfail=50" + "--color=yes" + "--junitxml=${RESULT_LOG_FILE}" + # timeouts in seconds for individual tests + "--timeouts-order" + "moi" + "--setup-timeout=60" + "--execution-timeout=60" + "--teardown-timeout=60" + # Only display summary for non-expected case + # f - failed + # E - error + # X - xpassed (passed even if expected to fail) + # The following cases are not displayed: + # s - skipped + # x - xfailed (expected to fail and failed) + # p - passed + # P - passed with output + "-rfEX" +) + +if [[ "${TEST_TYPE}" == "Helm" ]]; then + # Enable parallelism + EXTRA_PYTEST_ARGS+=( + "-n" "auto" + ) +else + EXTRA_PYTEST_ARGS+=( + "--with-db-init" + ) +fi + +if [[ ${ENABLE_TEST_COVERAGE:="false"} == "true" ]]; then + EXTRA_PYTEST_ARGS+=( + "--cov=airflow/" + "--cov-config=.coveragerc" + "--cov-report=xml:/files/coverage-${TEST_TYPE}-${BACKEND}.xml" + ) +fi + +declare -a SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TESTS \ + ALL_TESTS ALL_PRESELECTED_TESTS ALL_OTHER_TESTS + +function find_all_other_tests() { + local all_tests_dirs + all_tests_dirs=$(find "tests" -type d) + all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests$/d" ) + all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests\/dags/d" ) + local path + for path in "${ALL_PRESELECTED_TESTS[@]}" + do + escaped_path="${path//\//\\\/}" + all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/${escaped_path}/d" ) + done + for path in ${all_tests_dirs} + do + ALL_OTHER_TESTS+=("${path}") + done +} + +if [[ ${#@} -gt 0 && -n "$1" ]]; then + SELECTED_TESTS=("${@}") +else + CLI_TESTS=("tests/cli") + API_TESTS=("tests/api" "tests/api_connexion") + PROVIDERS_TESTS=("tests/providers") + ALWAYS_TESTS=("tests/always") + CORE_TESTS=( + "tests/core" + "tests/executors" + "tests/jobs" + "tests/models" + "tests/serialization" + "tests/ti_deps" + "tests/utils" + ) + WWW_TESTS=("tests/www") + HELM_CHART_TESTS=("tests/charts") + ALL_TESTS=("tests") + ALL_PRESELECTED_TESTS=( + "${CLI_TESTS[@]}" + "${API_TESTS[@]}" + "${HELM_CHART_TESTS[@]}" + "${PROVIDERS_TESTS[@]}" + "${CORE_TESTS[@]}" + "${ALWAYS_TESTS[@]}" + "${WWW_TESTS[@]}" + ) + + if [[ ${TEST_TYPE:=""} == "CLI" ]]; then + SELECTED_TESTS=("${CLI_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "API" ]]; then + SELECTED_TESTS=("${API_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "Providers" ]]; then + SELECTED_TESTS=("${PROVIDERS_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "Core" ]]; then + SELECTED_TESTS=("${CORE_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "Always" ]]; then + SELECTED_TESTS=("${ALWAYS_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "WWW" ]]; then + SELECTED_TESTS=("${WWW_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "Helm" ]]; then + SELECTED_TESTS=("${HELM_CHART_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "Other" ]]; then + find_all_other_tests + SELECTED_TESTS=("${ALL_OTHER_TESTS[@]}") + elif [[ ${TEST_TYPE:=""} == "All" || ${TEST_TYPE} == "Quarantined" || \ + ${TEST_TYPE} == "Always" || \ + ${TEST_TYPE} == "Postgres" || ${TEST_TYPE} == "MySQL" || \ + ${TEST_TYPE} == "Long" || \ + ${TEST_TYPE} == "Integration" ]]; then + SELECTED_TESTS=("${ALL_TESTS[@]}") + else + echo + echo "${COLOR_RED}ERROR: Wrong test type ${TEST_TYPE} ${COLOR_RESET}" + echo + exit 1 + fi + +fi +readonly SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TESTS \ + ALL_TESTS ALL_PRESELECTED_TESTS + +if [[ -n ${LIST_OF_INTEGRATION_TESTS_TO_RUN=} ]]; then + # Integration tests + for INT in ${LIST_OF_INTEGRATION_TESTS_TO_RUN} + do + EXTRA_PYTEST_ARGS+=("--integration" "${INT}") + done +elif [[ ${TEST_TYPE:=""} == "Long" ]]; then + EXTRA_PYTEST_ARGS+=( + "-m" "long_running" + "--include-long-running" + ) +elif [[ ${TEST_TYPE:=""} == "Postgres" ]]; then + EXTRA_PYTEST_ARGS+=( + "--backend" + "postgres" + ) +elif [[ ${TEST_TYPE:=""} == "MySQL" ]]; then + EXTRA_PYTEST_ARGS+=( + "--backend" + "mysql" + ) +elif [[ ${TEST_TYPE:=""} == "Quarantined" ]]; then + EXTRA_PYTEST_ARGS+=( + "-m" "quarantined" + "--include-quarantined" + ) +fi + +echo +echo "Running tests ${SELECTED_TESTS[*]}" +echo + +ARGS=("${EXTRA_PYTEST_ARGS[@]}" "${SELECTED_TESTS[@]}") + +if [[ ${RUN_SYSTEM_TESTS:="false"} == "true" ]]; then + "${IN_CONTAINER_DIR}/run_system_tests.sh" "${ARGS[@]}" +else + "${IN_CONTAINER_DIR}/run_ci_tests.sh" "${ARGS[@]}" +fi +EOF + +# The content below is automatically copied from scripts/docker/entrypoint_exec.sh +COPY <<"EOF" /entrypoint_exec.sh +#!/usr/bin/env bash +. /opt/airflow/scripts/in_container/_in_container_script_init.sh + +. /opt/airflow/scripts/in_container/configure_environment.sh + +. /opt/airflow/scripts/in_container/run_init_script.sh + +exec /bin/bash "${@}" +EOF + +############################################################################################## +# This is the www image where we keep all inlined files needed to build ui +# It is copied separately to volume to speed up building and avoid cache miss on changed +# file permissions. +# We use PYTHON_BASE_IMAGE to make sure that the scripts are different for different platforms. +############################################################################################## +FROM ${PYTHON_BASE_IMAGE} as www +COPY airflow/www/package.json airflow/www/yarn.lock airflow/www/webpack.config.js / +COPY airflow/www/static/ /static + FROM ${PYTHON_BASE_IMAGE} as main # Nolog bash flag is currently ignored - but you can replace it with other flags (for example @@ -51,7 +1043,7 @@ ENV DEV_APT_COMMAND=${DEV_APT_COMMAND} \ ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \ ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} -COPY scripts/docker/determine_debian_version_specific_variables.sh /scripts/docker/ +COPY --from=scripts determine_debian_version_specific_variables.sh /scripts/docker/ # Install basic and additional apt dependencies RUN apt-get update \ @@ -99,7 +1091,8 @@ RUN apt-get update \ # Only copy mysql/mssql installation scripts for now - so that changing the other # scripts which are needed much later will not invalidate the docker layer here. -COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh scripts/docker/install_postgres.sh /scripts/docker/ +COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ + # We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an # unexpected result - the cache for Dockerfiles might get invalidated in case the host system # had different umask set and group x bit was not set. In Azure the bit might be not set at all. @@ -192,7 +1185,7 @@ ARG AIRFLOW_EXTRAS="all" ARG ADDITIONAL_AIRFLOW_EXTRAS="" # Allows to override constraints source ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" -ARG AIRFLOW_CONSTRAINTS="constraints" +ARG AIRFLOW_CONSTRAINTS_MODE="constraints-source-providers" ARG AIRFLOW_CONSTRAINTS_REFERENCE="" ARG AIRFLOW_CONSTRAINTS_LOCATION="" ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" @@ -202,8 +1195,7 @@ ARG AIRFLOW_CI_BUILD_EPOCH="3" ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" # By default in the image, we are installing all providers when installing from sources ARG INSTALL_PROVIDERS_FROM_SOURCES="true" -ARG INSTALL_FROM_PYPI="true" -ARG AIRFLOW_PIP_VERSION=22.0.4 +ARG AIRFLOW_PIP_VERSION=22.1.2 # Setup PIP # By default PIP install run without cache to make image smaller ARG PIP_NO_CACHE_DIR="true" @@ -222,14 +1214,13 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ - AIRFLOW_CONSTRAINTS=${AIRFLOW_CONSTRAINTS} \ + AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \ AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ - INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ # In the CI image we always: @@ -253,19 +1244,16 @@ RUN echo "Airflow version: ${AIRFLOW_VERSION}" # Those are additional constraints that are needed for some extras but we do not want to # force them on the main Airflow package. Those limitations are: -# * certifi<2021.0.0: required by snowflake provider # * dill<0.3.3 required by apache-beam -# * google-ads<14.0.1 required to prevent updating google-python-api>=2.0.0 -ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="dill<0.3.3 certifi<2021.0.0 google-ads<14.0.1" +ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="dill<0.3.3" ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} \ UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here -COPY scripts/docker/install_pip_version.sh scripts/docker/install_airflow_dependencies_from_branch_tip.sh \ - scripts/docker/common.sh \ - /scripts/docker/ +COPY --from=scripts install_pip_version.sh install_airflow_dependencies_from_branch_tip.sh \ + common.sh /scripts/docker/ # We are first creating a venv where all python packages and .so binaries needed by those are # installed. @@ -276,9 +1264,7 @@ COPY scripts/docker/install_pip_version.sh scripts/docker/install_airflow_depend # are uninstalled, only dependencies remain. # the cache is only used when "upgrade to newer dependencies" is not set to automatically # account for removed dependencies (we do not install them in the first place) -RUN echo -e "\n\e[32mThe 'Running pip as the root user' warnings below are not valid but we can't disable them :(\e[0m\n"; \ - echo -e "\n\e[34mSee https://github.com/pypa/pip/issues/10556 for details.\e[0m\n" ; \ - bash /scripts/docker/install_pip_version.sh; \ +RUN bash /scripts/docker/install_pip_version.sh; \ if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ @@ -287,7 +1273,7 @@ RUN echo -e "\n\e[32mThe 'Running pip as the root user' warnings below are not v # The PATH is needed for PIPX to find the tools installed ENV PATH="/root/.local/bin:${PATH}" -COPY scripts/docker/install_pipx_tools.sh /scripts/docker/ +COPY --from=scripts install_pipx_tools.sh /scripts/docker/ # Install useful command line tools in their own virtualenv so that they do not clash with # dependencies installed in Airflow @@ -296,17 +1282,17 @@ RUN bash /scripts/docker/install_pipx_tools.sh # Copy package.json and yarn.lock to install node modules # this way even if other static check files change, node modules will not need to be installed # we want to keep node_modules so we can do this step separately from compiling assets -COPY airflow/www/package.json airflow/www/yarn.lock ${AIRFLOW_SOURCES}/airflow/www/ -COPY scripts/docker/prepare_node_modules.sh /scripts/docker/ +COPY --from=www package.json yarn.lock ${AIRFLOW_SOURCES}/airflow/www/ +COPY --from=scripts prepare_node_modules.sh /scripts/docker/ # Package JS/css for production RUN bash /scripts/docker/prepare_node_modules.sh # Copy all the needed www/ for assets compilation. Done as two separate COPY # commands so as otherwise it copies the _contents_ of static/ in to www/ -COPY airflow/www/webpack.config.js ${AIRFLOW_SOURCES}/airflow/www/ -COPY airflow/www/static ${AIRFLOW_SOURCES}/airflow/www/static/ -COPY scripts/docker/compile_www_assets.sh /scripts/docker/ +COPY --from=www webpack.config.js ${AIRFLOW_SOURCES}/airflow/www/ +COPY --from=www static ${AIRFLOW_SOURCES}/airflow/www/static/ +COPY --from=scripts compile_www_assets.sh /scripts/docker/ # Build artifacts without removing temporary artifacts (we will need them for incremental changes) # in build mode @@ -318,9 +1304,9 @@ RUN REMOVE_ARTIFACTS="false" BUILD_TYPE="build" bash /scripts/docker/compile_www COPY setup.py ${AIRFLOW_SOURCES}/setup.py COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg -COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py +COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ -COPY scripts/docker/install_airflow.sh /scripts/docker/ +COPY --from=scripts install_airflow.sh /scripts/docker/ # The goal of this line is to install the dependencies from the most current setup.py from sources # This will be usually incremental small set of packages in CI optimized build, so it will be very fast @@ -328,15 +1314,13 @@ COPY scripts/docker/install_airflow.sh /scripts/docker/ # Usually we will install versions based on the dependencies in setup.py and upgraded only if needed. # But in cron job we will install latest versions matching setup.py to see if there is no breaking change # and push the constraints if everything is successful -RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \ - bash /scripts/docker/install_airflow.sh; \ - fi - -COPY scripts/in_container/entrypoint_ci.sh /entrypoint -RUN chmod a+x /entrypoint +RUN bash /scripts/docker/install_airflow.sh -COPY scripts/docker/install_pip_version.sh scripts/docker/install_additional_dependencies.sh /scripts/docker/ +COPY --from=scripts entrypoint_ci.sh /entrypoint +COPY --from=scripts entrypoint_exec.sh /entrypoint-exec +RUN chmod a+x /entrypoint /entrypoint-exec +COPY --from=scripts install_pip_version.sh install_additional_dependencies.sh /scripts/docker/ # Additional python deps to install ARG ADDITIONAL_PYTHON_DEPS="" diff --git a/IMAGES.rst b/IMAGES.rst index 5e31e84191d43..58ef0cca54852 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -85,33 +85,32 @@ You can build the CI image using current sources this command: .. code-block:: bash - ./breeze build-image + breeze build-image You can build the PROD image using current sources with this command: .. code-block:: bash - ./breeze build-image --production-image + breeze build-prod-image By adding ``--python `` parameter you can build the image version for the chosen Python version. The images are build with default extras - different extras for CI and production image and you can change the extras via the ``--extras`` parameters and add new ones with ``--additional-extras``. -You can see default extras used via ``./breeze flags``. For example if you want to build Python 3.7 version of production image with "all" extras installed you should run this command: .. code-block:: bash - ./breeze build-image --python 3.7 --extras "all" --production-image + breeze build-prod-image --python 3.7 --extras "all" If you just want to add new extras you can add them like that: .. code-block:: bash - ./breeze build-image --python 3.7 --additional-extras "all" --production-image + breeze build-prod-image --python 3.7 --additional-extras "all" The command that builds the CI image is optimized to minimize the time needed to rebuild the image when the source code of Airflow evolves. This means that if you already have the image locally downloaded and @@ -129,8 +128,7 @@ parameter to Breeze: .. code-block:: bash - ./breeze build-image --python 3.7 --additional-extras=trino \ - --production-image --install-airflow-version=2.0.0 + breeze build-prod-image --python 3.7 --additional-extras=trino --install-airflow-version=2.0.0 This will build the image using command similar to: @@ -138,7 +136,7 @@ This will build the image using command similar to: pip install \ apache-airflow[async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv]==2.0.0 \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.0.0/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.0.0/constraints-3.7.txt" .. note:: @@ -160,15 +158,15 @@ HEAD of development for constraints): .. code-block:: bash pip install "https://github.com/apache/airflow/archive/.tar.gz#egg=apache-airflow" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" You can also skip installing airflow and install it from locally provided files by using -``--install-from-docker-context-files`` parameter and ``--disable-pypi-when-building`` to Breeze: +``--install-packages-from-context`` parameter to Breeze: .. code-block:: bash - ./breeze build-image --python 3.7 --additional-extras=trino \ - --production-image --disable-pypi-when-building --install-from-docker-context-files + breeze build-prod-image --python 3.7 --additional-extras=trino \ + --airflow-is-in-context-pypi --install-packages-from-context In this case you airflow and all packages (.whl files) should be placed in ``docker-context-files`` folder. @@ -190,31 +188,37 @@ Dockerfile image= and scripts further rebuilds with local build cache will be co You can also disable build cache altogether. This is the strategy used by the scheduled builds in CI - they will always rebuild all the images from scratch. -You can change the strategy by providing one of the ``--build-cache-local``, ``--build-cache-pulled`` or -even ``--build-cache-disabled`` flags when you run Breeze commands. For example: +You can change the strategy by providing one of the ``--build-cache`` flags: ``registry`` (default), ``local``, +or ``disabled`` flags when you run Breeze commands. For example: .. code-block:: bash - ./breeze build-image --python 3.7 --build-cache-local + breeze build-image --python 3.7 --docker-cache local Will build the CI image using local build cache (note that it will take quite a long time the first time you run it). .. code-block:: bash - ./breeze build-image --python 3.7 --production-image --build-cache-pulled + breeze build-prod-image --python 3.7 --docker-cache registry -Will build the production image with pulled images as cache. +Will build the production image with cache used from registry. .. code-block:: bash - ./breeze build-image --python 3.7 --production-image --build-cache-disabled + breeze build-prod-image --python 3.7 --docker-cache disabled Will build the production image from the scratch. -You can also turn local docker caching by setting ``DOCKER_CACHE`` variable to "local", "pulled", -"disabled" and exporting it. +You can also turn local docker caching by setting ``DOCKER_CACHE`` variable to ``local``, ``registry``, +``disabled`` and exporting it. + +.. code-block:: bash + + export DOCKER_CACHE="registry" + +or .. code-block:: bash @@ -229,7 +233,7 @@ or Naming conventions ================== -By default images we are using cache for images in Github Container registry. We are using GitHub +By default images we are using cache for images in GitHub Container registry. We are using GitHub Container Registry as development image cache and CI registry for build images. The images are all in organization wide "apache/" namespace. We are adding "airflow-" as prefix for the image names of all Airflow images. The images are linked to the repository @@ -247,13 +251,7 @@ currently run build. They are built once per each build and pulled by each test ghcr.io/apache/airflow//ci/python: - for CI images ghcr.io/apache/airflow//prod/python: - for production images - -The cache images (pushed when main merge succeeds) are kept with ``cache`` tag: - -.. code-block:: bash - - ghcr.io/apache/airflow//ci/python:cache - for CI images - ghcr.io/apache/airflow//prod/python:cache - for production images +Thoe image contain inlined cache. You can see all the current GitHub images at ``_ @@ -291,7 +289,7 @@ For example this command will run the same Python 3.8 image as was used in build .. code-block:: bash - ./breeze --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e \ + ./breeze-legacy --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e \ --python 3.8 --integration rabbitmq You can see more details and examples in `Breeze `_ @@ -311,8 +309,9 @@ Here just a few examples are presented which should give you general understandi This builds the production image in version 3.7 with additional airflow extras from 2.0.0 PyPI package and additional apt dev and runtime dependencies. -It is recommended to build images with ``DOCKER_BUILDKIT=1`` variable -(Breeze sets ``DOCKER_BUILDKIT=1`` variable automatically). +As of Airflow 2.3.0, it is required to build images with ``DOCKER_BUILDKIT=1`` variable +(Breeze sets ``DOCKER_BUILDKIT=1`` variable automatically) or via ``docker buildx build`` command if +you have ``buildx`` plugin installed. .. code-block:: bash @@ -330,9 +329,7 @@ the same image can be built using ``breeze`` (it supports auto-completion of the .. code-block:: bash - ./breeze build-image -f Dockerfile.ci \ - --production-image --python 3.7 \ - --additional-extras=jdbc --additional-python-deps="pandas" \ + breeze build-prod-image --python 3.7 --additional-extras=jdbc --additional-python-deps="pandas" \ --additional-dev-apt-deps="gcc g++" --additional-runtime-apt-deps="default-jre-headless" You can customize more aspects of the image - such as additional commands executed before apt dependencies @@ -374,7 +371,7 @@ The following build arguments (``--build-arg`` in docker build command) can be u +==========================================+==========================================+==========================================+ | ``PYTHON_BASE_IMAGE`` | ``python:3.7-slim-bullseye`` | Base Python image | +------------------------------------------+------------------------------------------+------------------------------------------+ -| ``PYTHON_MAJOR_MINOR_VERSION`` | ``3.6`` | major/minor version of Python (should | +| ``PYTHON_MAJOR_MINOR_VERSION`` | ``3.7`` | major/minor version of Python (should | | | | match base image) | +------------------------------------------+------------------------------------------+------------------------------------------+ | ``DEPENDENCIES_EPOCH_NUMBER`` | ``2`` | increasing this number will reinstall | @@ -427,9 +424,11 @@ The following build arguments (``--build-arg`` in docker build command) can be u +------------------------------------------+------------------------------------------+------------------------------------------+ | ``AIRFLOW_EXTRAS`` | ``all`` | extras to install | +------------------------------------------+------------------------------------------+------------------------------------------+ -| ``UPGRADE_TO_NEWER_DEPENDENCIES`` | ``false`` | If set to true, the dependencies are | -| | | upgraded to newer versions matching | -| | | setup.py before installation. | +| ``UPGRADE_TO_NEWER_DEPENDENCIES`` | ``false`` | If set to a value different than "false" | +| | | the dependencies are upgraded to newer | +| | | versions. In CI it is set to build id | +| | | to make sure subsequent builds are not | +| | | reusing cached images with same value. | +------------------------------------------+------------------------------------------+------------------------------------------+ | ``AIRFLOW_PRE_CACHED_PIP_PACKAGES`` | ``true`` | Allows to pre-cache airflow PIP packages | | | | from the GitHub of Apache Airflow | @@ -476,7 +475,7 @@ The following build arguments (``--build-arg`` in docker build command) can be u | ``ADDITIONAL_RUNTIME_APT_ENV`` | | Additional env variables defined | | | | when installing runtime deps | +------------------------------------------+------------------------------------------+------------------------------------------+ -| ``AIRFLOW_PIP_VERSION`` | ``22.0.4`` | PIP version used. | +| ``AIRFLOW_PIP_VERSION`` | ``22.1.2`` | PIP version used. | +------------------------------------------+------------------------------------------+------------------------------------------+ | ``PIP_PROGRESS_BAR`` | ``on`` | Progress bar for PIP installation | +------------------------------------------+------------------------------------------+------------------------------------------+ @@ -492,7 +491,7 @@ This builds the CI image in version 3.7 with default extras ("all"). --build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" --tag my-image:0.0.1 -This builds the CI image in version 3.6 with "gcp" extra only. +This builds the CI image in version 3.7 with "gcp" extra only. .. code-block:: bash @@ -502,7 +501,7 @@ This builds the CI image in version 3.6 with "gcp" extra only. --build-arg AIRFLOW_EXTRAS=gcp --tag my-image:0.0.1 -This builds the CI image in version 3.6 with "apache-beam" extra added. +This builds the CI image in version 3.7 with "apache-beam" extra added. .. code-block:: bash @@ -511,7 +510,7 @@ This builds the CI image in version 3.6 with "apache-beam" extra added. --build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \ --build-arg ADDITIONAL_AIRFLOW_EXTRAS="apache-beam" --tag my-image:0.0.1 -This builds the CI image in version 3.6 with "mssql" additional package added. +This builds the CI image in version 3.7 with "mssql" additional package added. .. code-block:: bash @@ -520,7 +519,7 @@ This builds the CI image in version 3.6 with "mssql" additional package added. --build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \ --build-arg ADDITIONAL_PYTHON_DEPS="mssql" --tag my-image:0.0.1 -This builds the CI image in version 3.6 with "gcc" and "g++" additional apt dev dependencies added. +This builds the CI image in version 3.7 with "gcc" and "g++" additional apt dev dependencies added. .. code-block:: @@ -529,7 +528,7 @@ This builds the CI image in version 3.6 with "gcc" and "g++" additional apt dev --build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \ --build-arg ADDITIONAL_DEV_APT_DEPS="gcc g++" --tag my-image:0.0.1 -This builds the CI image in version 3.6 with "jdbc" extra and "default-jre-headless" additional apt runtime dependencies added. +This builds the CI image in version 3.7 with "jdbc" extra and "default-jre-headless" additional apt runtime dependencies added. .. code-block:: @@ -543,7 +542,7 @@ Running the CI image -------------------- The entrypoint in the CI image contains all the initialisation needed for tests to be immediately executed. -It is copied from ``scripts/in_container/entrypoint_ci.sh``. +It is copied from ``scripts/docker/entrypoint_ci.sh``. The default behaviour is that you are dropped into bash shell. However if RUN_TESTS variable is set to "true", then tests passed as arguments are executed diff --git a/INSTALL b/INSTALL index 4354d5e33f558..ba2f5500475ee 100644 --- a/INSTALL +++ b/INSTALL @@ -46,21 +46,21 @@ python setup.py install # There are different constraint files for different python versions. For example" pip install . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" By default `pip install` in Airflow 2.0 installs only the provider packages that are needed by the extras and install them as packages from PyPI rather than from local sources: pip install .[google,amazon] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" -You can upgrade just airflow, without paying attention to provider's dependencies by using 'no-providers' +You can upgrade just airflow, without paying attention to provider's dependencies by using 'constraints-no-providers' constraint files. This allows you to keep installed provider packages. pip install . --upgrade \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.7.txt" You can also install airflow in "editable mode" (with -e) flag and then provider packages are @@ -75,7 +75,7 @@ and in ``CONTRIBUTING.rst`` for developing community maintained providers. This is useful if you want to develop providers: pip install -e . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" You can also skip installing provider packages from PyPI by setting INSTALL_PROVIDERS_FROM_SOURCE to "true". In this case Airflow will be installed in non-editable mode with all providers installed from the sources. @@ -83,31 +83,29 @@ Additionally `provider.yaml` files will also be copied to providers folders whic discoverable by Airflow even if they are not installed from packages in this case. INSTALL_PROVIDERS_FROM_SOURCES="true" pip install . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" Airflow can be installed with extras to install some additional features (for example 'async' or 'doc' or to install automatically providers and all dependencies needed by that provider: pip install .[async,google,amazon] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.6.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.7.txt" The list of available extras: # START EXTRAS HERE - airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.hdfs, apache.hive, apache.kylin, apache.livy, apache.pig, apache.pinot, -apache.spark, apache.sqoop, apache.webhdfs, asana, async, atlas, aws, azure, cassandra, celery, -cgroups, cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, -devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, docker, druid, elasticsearch, -exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, -hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, jira, kerberos, kubernetes, ldap, +apache.spark, apache.sqoop, apache.webhdfs, arangodb, asana, async, atlas, aws, azure, cassandra, +celery, cgroups, cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, dbt.cloud, +deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, docker, druid, +elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, +grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, jira, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, opsgenie, oracle, pagerduty, pandas, papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, slack, snowflake, spark, sqlite, ssh, statsd, tableau, telegram, trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk - # END EXTRAS HERE # For installing Airflow in development environments - see CONTRIBUTING.rst diff --git a/INTHEWILD.md b/INTHEWILD.md index e316a648fa734..acf2a4f719564 100644 --- a/INTHEWILD.md +++ b/INTHEWILD.md @@ -224,7 +224,7 @@ Currently, **officially** using Airflow: 1. [Headout](https://headout.com/) [[@shivanshs9](https://github.com/shivanshs9/)] 1. [Healthjump](http://www.healthjump.com/) [[@miscbits](https://github.com/miscbits)] 1. [HelloFresh](https://www.hellofresh.com) [[@tammymendt](https://github.com/tammymendt) & [@davidsbatista](https://github.com/davidsbatista) & [@iuriinedostup](https://github.com/iuriinedostup)] -1. [Hipages](https://www.hipages.com.au/) [[@arihantsurana](https://github.com/arihantsurana)] +1. [Hipages](https://www.hipages.com.au/) [[@arihantsurana](https://github.com/arihantsurana) & [@koconder](https://github.com/koconder)] 1. [Holimetrix](http://holimetrix.com/) [[@thibault-ketterer](https://github.com/thibault-ketterer)] 1. [HomeToGo](https://www.hometogo.com/) [[@HomeToGo](https://github.com/hometogo), [@AurimasGr](https://github.com/AurimasGr)] 1. [Hootsuite](https://github.com/hootsuite) @@ -349,6 +349,7 @@ Currently, **officially** using Airflow: 1. [Pronto Tools](http://www.prontotools.io/) [[@zkan](https://github.com/zkan) & [@mesodiar](https://github.com/mesodiar)] 1. [proton.ai](https://proton.ai/) [[@prmsolutions](https://github.com/prmsolutions)] 1. [PubNub](https://pubnub.com) [[@jzucker2](https://github.com/jzucker2)] +1. [Pura Scents](https://www.trypura.com/) [[@mfjackson](https://github.com/mfjackson)] 1. [PXYData](https://www.pxydata.com) [[@patchus](https://github.com/patchus)] 1. [Qliro](https://www.qliro.com) [[@kvackkvackanka](https://github.com/kvackkvackanka)] 1. [Qoala](https://www.qoala.id) [[@gnomeria](https://github.com/gnomeria), [@qoala-engineering](https://github.com/qoala-engineering)] @@ -411,7 +412,7 @@ Currently, **officially** using Airflow: 1. [Tesla](https://www.tesla.com/) [[@thoralf-gutierrez](https://github.com/thoralf-gutierrez)] 1. [TextNow](https://www.textnow.com/) 1. [The Climate Corporation](https://climate.com/) [[@jmelching](https://github.com/jmelching)] -1. [The Dyrt](https://thedyrt.com/) [[@mfjackson](https://github.com/mfjackson)] +1. [The Dyrt](https://thedyrt.com/) 1. [The Home Depot](https://www.homedepot.com/) [[@apekshithr](https://github.com/apekshithr)] 1. [THE ICONIC](https://www.theiconic.com.au/) [[@revathijay](https://github.com/revathijay), [@ilikedata](https://github.com/ilikedata)] 1. [theScore](https://www.thescore.com/) [[@kristenmalikk](https://github.com/kristenmalikk)] @@ -464,7 +465,6 @@ Currently, **officially** using Airflow: 1. [WiseBanyan](https://wisebanyan.com/) 1. [Wise](https://wise.com) [[@koszti](https://github.com/koszti)] 1. [Wisr](https://wisr.com.au/) [[@fsodano](https://github.com/fsodano) & [@vincyf1](https://github.com/vincyf1)] -1. [WixAnswers](https://www.wixanswers.com/) [[@eladkal](https://github.com/eladkal)] 1. [Wix](https://www.wix.com/) [[@eladkal](https://github.com/eladkal)] 1. [Wooga](https://www.wooga.com/) 1. [WorldRemit](https://www.worldremit.com/) [[@boittega](https://github.com/boittega)] diff --git a/LICENSE b/LICENSE index 34a85897c882f..81899d0fee17d 100644 --- a/LICENSE +++ b/LICENSE @@ -248,6 +248,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (BSD 3 License) d3 v5.16.0 (https://d3js.org) (BSD 3 License) d3-shape v2.1.0 (https://github.com/d3/d3-shape) + (BSD 3 License) cgroupspy 0.2.1 (https://github.com/cloudsigma/cgroupspy) ======================================================================== See licenses/LICENSES-ui.txt for packages used in `/airflow/www` diff --git a/LOCAL_VIRTUALENV.rst b/LOCAL_VIRTUALENV.rst index 925651abd64d1..1cbf5b6b12e81 100644 --- a/LOCAL_VIRTUALENV.rst +++ b/LOCAL_VIRTUALENV.rst @@ -51,7 +51,7 @@ Required Software Packages Use system-level package managers like yum, apt-get for Linux, or Homebrew for macOS to install required software packages: -* Python (One of: 3.7, 3.8, 3.9) +* Python (One of: 3.7, 3.8, 3.9, 3.10) * MySQL 5.7+ * libxml @@ -102,7 +102,7 @@ Creating a Local virtualenv To use your IDE for Airflow development and testing, you need to configure a virtual environment. Ideally you should set up virtualenv for all Python versions that Airflow -supports (3.7, 3.8, 3.9). +supports (3.7, 3.8, 3.9, 3.10). To create and initialize the local virtualenv: @@ -122,7 +122,7 @@ To create and initialize the local virtualenv: .. code-block:: bash - conda create -n airflow python=3.7 # or 3.8, or 3.9 + conda create -n airflow python=3.7 # or 3.8, 3.9, 3.10 conda activate airflow 2. Install Python PIP requirements: @@ -150,7 +150,7 @@ for different python versions). For development on current main source: .. code-block:: bash - # use the same version of python as you are working with, 3.7, 3.8, or 3.9 + # use the same version of python as you are working with, 3.7, 3.8, 3.9, or 3.10 pip install -e ".[devel,]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.7.txt" @@ -163,7 +163,7 @@ You can also install Airflow in non-editable mode: .. code-block:: bash - # use the same version of python as you are working with, 3.7, 3.8, or 3.9 + # use the same version of python as you are working with, 3.7, 3.8, 3.9, or 3.10 pip install ".[devel,]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.7.txt" @@ -173,7 +173,7 @@ sources, unless you set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment variable .. code-block:: bash - # use the same version of python as you are working with, 3.7, 3.8, or 3.9 + # use the same version of python as you are working with, 3.7, 3.8, 3.9, or 3.10 INSTALL_PROVIDERS_FROM_SOURCES="true" pip install ".[devel,]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.7.txt" @@ -205,14 +205,14 @@ Activate your virtualenv, e.g. by using ``workon``, and once you are in it, run: .. code-block:: bash - ./breeze initialize-local-virtualenv + ./breeze-legacy initialize-local-virtualenv By default Breeze installs the ``devel`` extra only. You can optionally control which extras are installed by exporting ``VIRTUALENV_EXTRAS`` before calling Breeze: .. code-block:: bash export VIRTUALENV_EXTRAS="devel,google,postgres" - ./breeze initialize-local-virtualenv + ./breeze-legacy initialize-local-virtualenv 5. (optionally) run yarn build if you plan to run the webserver diff --git a/MANIFEST.in b/MANIFEST.in index 8291ad54d432e..8f4b22b7ca01c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -18,7 +18,7 @@ include NOTICE include LICENSE -include CHANGELOG.txt +include RELEASE_NOTES.rst include README.md graft licenses graft airflow/www diff --git a/PULL_REQUEST_WORKFLOW.rst b/PULL_REQUEST_WORKFLOW.rst index e5802b24c41d1..d7ca2f9b93eaa 100644 --- a/PULL_REQUEST_WORKFLOW.rst +++ b/PULL_REQUEST_WORKFLOW.rst @@ -57,12 +57,12 @@ We approached the problem by: 3) Even more optimisation came from limiting the scope of tests to only "default" matrix parameters. So far in Airflow we always run all tests for all matrix combinations. The primary matrix components are: - * Python versions (currently 3.6, 3.7, 3.8, 3.9) + * Python versions (currently 3.7, 3.8, 3.9, 3.10) * Backend types (currently MySQL/Postgres) * Backed version (currently MySQL 5.7, MySQL 8, Postgres 13 We've decided that instead of running all the combinations of parameters for all matrix component we will - only run default values (Python 3.6, Mysql 5.7, Postgres 13) for all PRs which are not approved yet by + only run default values (Python 3.7, Mysql 5.7, Postgres 13) for all PRs which are not approved yet by the committers. This has a nice effect, that full set of tests (though with limited combinations of the matrix) are still run in the CI for every Pull Request that needs tests at all - allowing the contributors to make sure that their PR is "good enough" to be reviewed. diff --git a/README.md b/README.md index e30361470ee24..3acb883dc4ba8 100644 --- a/README.md +++ b/README.md @@ -85,15 +85,15 @@ Airflow is not a streaming solution, but it is often used to process real-time d Apache Airflow is tested with: -| | Main version (dev) | Stable version (2.2.4) | -|---------------------|---------------------|--------------------------| -| Python | 3.7, 3.8, 3.9 | 3.6, 3.7, 3.8, 3.9 | -| Platform | AMD64/ARM64(\*) | AMD64 | -| Kubernetes | 1.20, 1.21 | 1.18, 1.19, 1.20 | -| PostgreSQL | 10, 11, 12, 13 | 9.6, 10, 11, 12, 13 | -| MySQL | 5.7, 8 | 5.7, 8 | -| SQLite | 3.15.0+ | 3.15.0+ | -| MSSQL | 2017(\*), 2019 (\*) | | +| | Main version (dev) | Stable version (2.3.1) | +|---------------------|------------------------------|------------------------------| +| Python | 3.7, 3.8, 3.9, 3.10 | 3.7, 3.8, 3.9, 3.10 | +| Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | +| Kubernetes | 1.20, 1.21, 1.22, 1.23, 1.24 | 1.20, 1.21, 1.22, 1.23, 1.24 | +| PostgreSQL | 10, 11, 12, 13, 14 | 10, 11, 12, 13, 14 | +| MySQL | 5.7, 8 | 5.7, 8 | +| SQLite | 3.15.0+ | 3.15.0+ | +| MSSQL | 2017(\*), 2019 (\*) | 2017(\*), 2019 (\*) | \* Experimental @@ -104,7 +104,8 @@ MariaDB is not tested/recommended. **Note**: SQLite is used in Airflow tests. Do not use it in production. We recommend using the latest stable version of SQLite for local development. -**Note**: Python v3.10 is not supported yet. For details, see [#19059](https://github.com/apache/airflow/issues/19059). +**Note**: Support for Python v3.10 will be available from Airflow 2.3.0. The `main` (development) branch +already supports Python 3.10. **Note**: Airflow currently can be run on POSIX-compliant Operating Systems. For development it is regularly tested on fairly modern Linux Distros and recent versions of MacOS. @@ -159,15 +160,15 @@ them to the appropriate format and workflow that your tool requires. ```bash -pip install 'apache-airflow==2.2.4' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.4/constraints-3.7.txt" +pip install 'apache-airflow==2.3.1' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.1/constraints-3.7.txt" ``` 2. Installing with extras (i.e., postgres, google) ```bash -pip install 'apache-airflow[postgres,google]==2.2.4' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.4/constraints-3.7.txt" +pip install 'apache-airflow[postgres,google]==2.3.1' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.1/constraints-3.7.txt" ``` For information on installing provider packages, check @@ -214,9 +215,9 @@ following the ASF Policy. ![DAGs](https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/img/dags.png) -- **Tree**: Tree representation of a DAG that spans across time. +- **Grid**: Grid representation of a DAG that spans across time. - ![Tree](https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/img/tree.png) + ![Grid](https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/img/grid.png) - **Graph**: Visualization of a DAG's dependencies and their current status for a specific run. @@ -267,12 +268,12 @@ packages: Apache Airflow version life cycle: - + | Version | Current Patch/Minor | State | First Release | Limited Support | EOL/Terminated | |-----------|-----------------------|-----------|-----------------|-------------------|------------------| -| 2 | 2.2.4 | Supported | Dec 17, 2020 | TBD | TBD | +| 2 | 2.3.1 | Supported | Dec 17, 2020 | TBD | TBD | | 1.10 | 1.10.15 | EOL | Aug 27, 2018 | Dec 17, 2020 | June 17, 2021 | | 1.9 | 1.9.0 | EOL | Jan 03, 2018 | Aug 27, 2018 | Aug 27, 2018 | | 1.8 | 1.8.2 | EOL | Mar 19, 2017 | Jan 03, 2018 | Jan 03, 2018 | @@ -292,20 +293,21 @@ They are based on the official release schedule of Python and Kubernetes, nicely [Python Developer's Guide](https://devguide.python.org/#status-of-python-branches) and [Kubernetes version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/). -1. We drop support for Python and Kubernetes versions when they reach EOL. We drop support for those - EOL versions in main right after EOL date, and it is effectively removed when we release the - first new MINOR (Or MAJOR if there is no new MINOR version) of Airflow - For example, for Python 3.7 it means that we will drop support in main right after 27.06.2023, and - the first MAJOR or MINOR version of Airflow released after will not have it. +1. We drop support for Python and Kubernetes versions when they reach EOL. Except for kubernetes, a + version stay supported by Airflow if two major cloud provider still provide support for it. We drop + support for those EOL versions in main right after EOL date, and it is effectively removed when we release + the first new MINOR (Or MAJOR if there is no new MINOR version) of Airflow. For example, for Python 3.7 it + means that we will drop support in main right after 27.06.2023, and the first MAJOR or MINOR version of + Airflow released after will not have it. 2. The "oldest" supported version of Python/Kubernetes is the default one until we decide to switch to later version. "Default" is only meaningful in terms of "smoke tests" in CI PRs, which are run using this default version and the default reference image available. Currently `apache/airflow:latest` - and `apache/airflow:2.2.4` images are Python 3.7 images. This means that default reference image will + and `apache/airflow:2.3.1` images are Python 3.7 images. This means that default reference image will become the default at the time when we start preparing for dropping 3.7 support which is few months before the end of life for Python 3.7. -4. We support a new version of Python/Kubernetes in main after they are officially released, as soon as we +3. We support a new version of Python/Kubernetes in main after they are officially released, as soon as we make them work in our CI pipeline (which might not be immediate due to dependencies catching up with new versions of Python mostly) we release new images/support in Airflow based on the working CI setup. @@ -378,6 +380,14 @@ The important dependencies are: are very likely to introduce breaking changes across those so limiting it to MAJOR version makes sense * `werkzeug`: the library is known to cause problems in new versions. It is tightly coupled with Flask libraries, and we should update them together +* `celery`: Celery is crucial component of Airflow as it used for CeleryExecutor (and similar). Celery + [follows SemVer](https://docs.celeryq.dev/en/stable/contributing.html?highlight=semver#versions), so + we should upper-bound it to the next MAJOR version. Also when we bump the upper version of the library, + we should make sure Celery Provider minimum Airflow version is updated). +* `kubernetes`: Kubernetes is a crucial component of Airflow as it is used for the KubernetesExecutor + (and similar). Kubernetes Python library [follows SemVer](https://github.com/kubernetes-client/python#compatibility), + so we should upper-bound it to the next MAJOR version. Also when we bump the upper version of the library, + we should make sure Kubernetes Provider minimum Airflow version is updated. ### Approach for dependencies in Airflow Providers and extras diff --git a/CHANGELOG.txt b/RELEASE_NOTES.rst similarity index 56% rename from CHANGELOG.txt rename to RELEASE_NOTES.rst index c890f68e95084..87319b55aaebb 100644 --- a/CHANGELOG.txt +++ b/RELEASE_NOTES.rst @@ -1,8 +1,743 @@ -Airflow 2.2.4, 2022-02-22 -------------------------- + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. contents:: Apache Airflow Releases + :local: + :depth: 1 + +.. towncrier release notes start + +Airflow 2.3.1 (2022-05-25) +-------------------------- + +Significant Changes +^^^^^^^^^^^^^^^^^^^ +No significant changes. + +Bug Fixes +^^^^^^^^^ + +- Automatically reschedule stalled queued tasks in ``CeleryExecutor`` (#23690) +- Fix expand/collapse all buttons (#23590) +- Grid view status filters (#23392) +- Expand/collapse all groups (#23487) +- Fix retrieval of deprecated non-config values (#23723) +- Fix secrets rendered in UI when task is not executed. (#22754) +- Fix provider import error matching (#23825) +- Fix regression in ignoring symlinks (#23535) +- Fix ``dag-processor`` fetch metadata database config (#23575) +- Fix auto upstream dep when expanding non-templated field (#23771) +- Fix task log is not captured (#23684) +- Add ``reschedule`` to the serialized fields for the ``BaseSensorOperator`` (#23674) +- Modify db clean to also catch the ProgrammingError exception (#23699) +- Remove titles from link buttons (#23736) +- Fix grid details header text overlap (#23728) +- Ensure ``execution_timeout`` as timedelta (#23655) +- Don't run pre-migration checks for downgrade (#23634) +- Add index for event column in log table (#23625) +- Implement ``send_callback`` method for ``CeleryKubernetesExecutor`` and ``LocalKubernetesExecutor`` (#23617) +- Fix ``PythonVirtualenvOperator`` templated_fields (#23559) +- Apply specific ID collation to ``root_dag_id`` too (#23536) +- Prevent ``KubernetesJobWatcher`` getting stuck on resource too old (#23521) +- Fix scheduler crash when expanding with mapped task that returned none (#23486) +- Fix broken dagrun links when many runs start at the same time (#23462) +- Fix: Exception when parsing log #20966 (#23301) +- Handle invalid date parsing in webserver views. (#23161) +- Pools with negative open slots should not block other pools (#23143) +- Move around overflow, position and padding (#23044) +- Change approach to finding bad rows to LEFT OUTER JOIN. (#23528) +- Only count bad refs when ``moved`` table exists (#23491) +- Visually distinguish task group summary (#23488) +- Remove color change for highly nested groups (#23482) +- Optimize 2.3.0 pre-upgrade check queries (#23458) +- Add backward compatibility for ``core__sql_alchemy_conn__cmd`` (#23441) +- Fix literal cross product expansion (#23434) +- Fix broken task instance link in xcom list (#23367) +- Fix connection test button (#23345) +- fix cli ``airflow dags show`` for mapped operator (#23339) +- Hide some task instance attributes (#23338) +- Don't show grid actions if server would reject with permission denied (#23332) +- Use run_id for ``ti.mark_success_url`` (#23330) +- Fix update user auth stats (#23314) +- Use ``
{% endif %} -

Task Actions

-
+

Task Actions

+ +
@@ -257,13 +304,14 @@

Task Actions

+
-
-
+ +
@@ -299,12 +347,13 @@

Task Actions

+
-
-
+ +
@@ -332,12 +381,13 @@

Task Actions

+
-
-
+ +
@@ -375,70 +425,6 @@

Task Actions

- - {% endblock %} {% block tail %} {{ super() }} diff --git a/airflow/www/templates/airflow/dags.html b/airflow/www/templates/airflow/dags.html index f062a57dedcd5..f3d6e517a1e89 100644 --- a/airflow/www/templates/airflow/dags.html +++ b/airflow/www/templates/airflow/dags.html @@ -21,6 +21,34 @@ {% from 'appbuilder/loading_dots.html' import loading_dots %} {% from 'airflow/_messages.html' import show_message %} +{%- macro sortable_column(display_name, attribute_name) -%} + {% set curr_ordering_direction = (request.args.get('sorting_direction', 'desc')) %} + {% set new_ordering_direction = ('asc' if (request.args.get('sorting_key') != attribute_name or curr_ordering_direction == 'desc') else 'desc') %} + + {{ display_name }} + + + +{%- endmacro -%} + {% block page_title %} {% if search_query %}"{{ search_query }}" - {% endif %}DAGs - {{ appbuilder.app_name }} {% endblock %} @@ -52,25 +80,37 @@ {% for m in dashboard_alerts %} {{ show_message(m.message, m.category) }} {% endfor %} - {% for original_table_name, moved_table_name in migration_moved_data_alerts %} - {% call show_message(category='error', dismissible=false) %} - Airflow found incompatible data in the {{ original_table_name }} table in the - metadatabase, and has moved them to {{ moved_table_name }} during the database migration - to upgrade. Please inspect the moved data to decide whether you need to keep them, and manually drop - the {{ moved_table_name }} table to dismiss this warning. Read more about it + {% if migration_moved_data_alerts %} + {% call show_message(category='warning', dismissible=false) %} + While upgrading the metadatabase, Airflow had to move some bad data in order to apply new constraints. + The moved data can be found in the following tables:
+ + + + + + {% for original_table_name, moved_table_name in migration_moved_data_alerts %} + + + + + {% endfor %} +
Source tableTable with moved rows
{{ original_table_name }}{{ moved_table_name }}
+ Please inspect the moved data to decide whether you need to keep them, and manually drop + the moved tables to dismiss this warning. Read more about it in Upgrading. {% endcall %} - {% endfor %} + {% endif %} {{ super() }} {% if sqlite_warning | default(true) %} - {% call show_message(category='warning', dismissible=false) %} + {% call show_message(category='warning', dismissible=false) %} Do not use SQLite as metadata DB in production – it should only be used for dev/testing. We recommend using Postgres or MySQL. Click here for more information. {% endcall %} {% endif %} {% if sequential_executor_warning | default(false) %} - {% call show_message(category='warning', dismissible=false) %} + {% call show_message(category='warning', dismissible=false) %} Do not use SequentialExecutor in production. Click here for more information. {% endcall %} @@ -81,14 +121,23 @@

{{ page_title }}

-
+ -
+
{% if tags_filter|length > 0 %} - + {% endif %}
-
+
- + {% if search_query %} - + {% endif %}
+
+ {{ loading_dots(id='loading-dots', classes='refresh-loading') }} + + +
-
-
- - - - - - - - - - - - - - - - - {% if dags|length == 0 %} - - {% endif %} - {% for dag in dags %} - {% set dag_is_paused = dag.get_is_paused() %} - - - - - - + + + {% endfor %} + +
- info - DAGOwnerRuns - - ScheduleLast Run - - Next Run - - Recent Tasks - - ActionsLinks
No results
- {% if dag.can_edit %} - {% set switch_tooltip = 'Pause/Unpause DAG' %} - {% else %} - {% set switch_tooltip = 'DAG is Paused' if dag_is_paused else 'DAG is Active' %} - {% endif %} - - - - {{ dag.dag_id }} - -
- {% for tag in dag.tags | sort(attribute='name') %} - - {{ tag.name }} - - {% endfor %} -
-
- {% for owner in dag.owners.split(",") %} - - {{ owner | trim }} - - {% endfor %} - - {{ loading_dots(classes='js-loading-dag-stats text-muted') }} - - - - {{ dag.schedule_interval }} + +
+
+ + + + + + + + + + + + + + + + + {% if dags|length == 0 %} + + + + {% endif %} + {% for dag in dags %} + {% set dag_is_paused = dag.get_is_paused() %} + + + - + + + + - + - - + + - - - {% endfor %} - -
+ info + {{ sortable_column("DAG", "dag_id") }}{{ sortable_column("Owner", "owners") }}Runs + + ScheduleLast Run + + {{ sortable_column("Next Run", "next_dagrun") }} + + Recent Tasks + + ActionsLinks
No results
+ {% if dag.can_edit %} + {% set switch_tooltip = 'Pause/Unpause DAG' %} + {% else %} + {% set switch_tooltip = 'DAG is Paused' if dag_is_paused else 'DAG is Active' %} + {% endif %} + + + + {{ dag.dag_id }} + +
+ {% for tag in dag.tags | sort(attribute='name') %} + + {{ tag.name }} - {% if dag is defined and dag.timetable_description %} - - {% endif %} -
- {{ loading_dots(classes='js-loading-last-run text-muted') }} - - + {% for owner in dag.owners.split(",") %} + + {{ owner | trim }} + + {% endfor %} + + {{ loading_dots(classes='js-loading-dag-stats text-muted') }} + + + + {{ dag.schedule_interval }} + + {% if dag is defined and dag.timetable_description %} + + {% endif %} + + {{ loading_dots(classes='js-loading-last-run text-muted') }} + + - - {% if dag.next_dagrun is not none %} - - {% endif %} - {% if dag.next_dagrun_create_after %} - {# data-nextrun is being used to pass next_dagrun dates to js to build the full tooltip #} - + {% if dag.next_dagrun is not none %} + + {% endif %} + {% if dag.next_dagrun_create_after %} + {# data-nextrun is being used to pass next_dagrun dates to js to build the full tooltip #} + - {% endif %} - - {{ loading_dots(classes='js-loading-task-stats text-muted') }} - - -
- {% if dag %} - - {% endif %} - {# Use dag_id instead of dag.dag_id, because the DAG might not exist in the webserver's DagBag #} - - + {% endif %} +
+ {{ loading_dots(classes='js-loading-task-stats text-muted') }} + + +
+ {% if dag %} + -
- {% if dag %} - - {% endif %} -
-
+ {% endif %} + {# Use dag_id instead of dag.dag_id, because the DAG might not exist in the webserver's DagBag #} +
+ + +
+
+ {% if dag %} + + {% endif %} +
-
-
- {{paging}} -
-
- Showing {{num_dag_from}}-{{num_dag_to}} of {{num_of_all_dags}} DAGs -
+
+
+
+ {{ paging }} +
+
+ Showing {{ num_dag_from }}-{{ num_dag_to }} of {{ num_of_all_dags }} DAGs
+