From 0e80ba0e5baf4844f60e0cc082dcc812f9c3562a Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Thu, 1 Sep 2022 00:37:49 +0900 Subject: [PATCH 01/12] CI: Reuse check schema (#14469) * rm _check-shema.yml * Reuse devtools' check schema --- .github/workflows/_check-schema.yml | 37 ----------------------------- .github/workflows/ci-schema.yml | 5 ++-- 2 files changed, 3 insertions(+), 39 deletions(-) delete mode 100644 .github/workflows/_check-schema.yml diff --git a/.github/workflows/_check-schema.yml b/.github/workflows/_check-schema.yml deleted file mode 100644 index 299af83503831..0000000000000 --- a/.github/workflows/_check-schema.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Reusable Check Schema - -on: - workflow_call: - inputs: - azure-dir: - description: 'Directory containing Azure Pipelines config files. Provide an empty string to skip checking on Azure Pipelines files.' - default: './.azure/' - required: false - type: string - -jobs: - schema: - runs-on: ubuntu-20.04 - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Install dependencies - run: pip install check-jsonschema - - - name: GitHub Actions - workflow - run: check-jsonschema $(find .github/workflows -name '*.yml' -a ! -name '_*.yml') --builtin-schema "github-workflows" - - - name: GitHub Actions - action - run: | - if [ -d ".github/actions" ]; then - check-jsonschema .github/actions/*/*.yml --builtin-schema "github-actions" - fi - - - name: Azure Pipelines - env: - SCHEMA_FILE: https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.204.0/service-schema.json - run: | - if [ -d ${{ inputs.azure-dir }} ]; then - check-jsonschema ${{ inputs.azure-dir }}/*.yml --schemafile "$SCHEMA_FILE" - fi diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml index 156334ae96043..364266d340520 100644 --- a/.github/workflows/ci-schema.yml +++ b/.github/workflows/ci-schema.yml @@ -1,10 +1,11 @@ name: Check Schema on: - push: {} + push: + branches: [master, "release/*"] pull_request: branches: [master, "release/*"] jobs: check: - uses: ./.github/workflows/_check-schema.yml + uses: Lightning-AI/devtools/.github/workflows/check-schema.yml@v0.1.0 From 37b5b686d47b341089adcc47299bb0d326eff42c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Sep 2022 19:13:08 +0200 Subject: [PATCH 02/12] Bump actions/checkout from 2 to 3 (#14540) Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v2...v3) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-app-cloud-e2e-test.yml | 5 +- .github/workflows/ci-app-examples.yml | 2 +- .github/workflows/ci-app-tests.yml | 2 +- .github/workflows/ci-lite-test-full.yml | 120 ++++++++++++++++++++ .github/workflows/ci-pkg-install.yml | 6 +- .github/workflows/ci-pytorch-test-conda.yml | 2 +- .github/workflows/ci-pytorch-test-full.yml | 2 +- .github/workflows/ci-pytorch-test-slow.yml | 2 +- .github/workflows/docs-checks.yml | 4 +- .github/workflows/docs-deploy.yml | 4 +- .github/workflows/events-nightly.yml | 2 +- .github/workflows/legacy-checkpoints.yml | 2 +- .github/workflows/release-docker.yml | 2 +- .github/workflows/release-pypi.yml | 14 +-- 14 files changed, 144 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/ci-lite-test-full.yml diff --git a/.github/workflows/ci-app-cloud-e2e-test.yml b/.github/workflows/ci-app-cloud-e2e-test.yml index 3ad455650a117..c8cef5fbf53f9 100644 --- a/.github/workflows/ci-app-cloud-e2e-test.yml +++ b/.github/workflows/ci-app-cloud-e2e-test.yml @@ -57,9 +57,8 @@ jobs: - commands timeout-minutes: 35 steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: "3.8" diff --git a/.github/workflows/ci-app-examples.yml b/.github/workflows/ci-app-examples.yml index 32e1fc54e1814..818777727ca5a 100644 --- a/.github/workflows/ci-app-examples.yml +++ b/.github/workflows/ci-app-examples.yml @@ -32,7 +32,7 @@ jobs: timeout-minutes: 10 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: diff --git a/.github/workflows/ci-app-tests.yml b/.github/workflows/ci-app-tests.yml index e4e6574d9aa31..f5725fab0e832 100644 --- a/.github/workflows/ci-app-tests.yml +++ b/.github/workflows/ci-app-tests.yml @@ -30,7 +30,7 @@ jobs: timeout-minutes: 20 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: diff --git a/.github/workflows/ci-lite-test-full.yml b/.github/workflows/ci-lite-test-full.yml new file mode 100644 index 0000000000000..896086b697d66 --- /dev/null +++ b/.github/workflows/ci-lite-test-full.yml @@ -0,0 +1,120 @@ +name: Test Lite full + +# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows +on: # Trigger the workflow on push or pull request, but only for the master branch + push: + branches: [master, "release/*"] + pull_request: + branches: [master, "release/*"] + types: [opened, reopened, ready_for_review, synchronize] + paths: + - "requirements/lite/**" + - "src/lightning_lite/**" + - "tests/tests_lite/**" + - "setup.cfg" # includes pytest config + - ".github/workflows/ci-lite-test-full.yml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} + cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }} + +jobs: + + lite-cpu: + runs-on: ${{ matrix.os }} + if: github.event.pull_request.draft == false + strategy: + fail-fast: false + matrix: + os: [ubuntu-20.04, windows-2022, macOS-11] + python-version: ["3.7", "3.10"] # minimum, maximum + requires: ["oldest", "latest"] + release: ["stable"] + exclude: + # There's no distribution of the oldest PyTorch 1.9 for Python 3.10. + # TODO: Remove the exclusion when dropping PyTorch 1.9 support. + - {python-version: "3.10", requires: "oldest"} + + timeout-minutes: 40 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Reset caching + run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV + + - name: basic setup + run: | + pip --version + pip install -q fire + + - name: Setup Windows + if: runner.os == 'windows' + run: | + python .actions/assistant.py requirements_prune_pkgs horovod + + - name: Set min. dependencies + if: matrix.requires == 'oldest' + run: | + python .actions/assistant.py replace_oldest_ver + + # Note: This uses an internal pip API and may not always work + # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow + - name: Get pip cache dir + id: pip-cache + run: echo "::set-output name=dir::$(pip cache dir)" + + - name: pip cache + uses: actions/cache@v3 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-${{ hashFiles('requirements/lite/*.txt') }} + restore-keys: | + ${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}- + + - name: Install dependencies + env: + PACKAGE_NAME: pytorch # TODO(lite) does this need to say lite? + FREEZE_REQUIREMENTS: 1 + run: | + flag=$(python -c "print('--pre' if '${{matrix.release}}' == 'pre' else '')" 2>&1) + url=$(python -c "print('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html')" 2>&1) + pip install -e .[test] --upgrade $flag --find-links "https://download.pytorch.org/whl/${url}" + pip list + shell: bash + + - name: Testing Lite + working-directory: tests/tests_lite + # NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003 + run: coverage run --source lightning_lite -m pytest -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml + + - name: Upload pytest results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }} + path: tests/tests_lite/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml + + - name: Statistics + if: success() + working-directory: tests/tests_lite + run: | + coverage report + coverage xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + if: always() + # see: https://github.com/actions/toolkit/issues/399 + continue-on-error: true + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: tests/tests_lite/coverage.xml + flags: cpu,pytest,python${{ matrix.python-version }} + name: CPU-coverage + fail_ci_if_error: false diff --git a/.github/workflows/ci-pkg-install.yml b/.github/workflows/ci-pkg-install.yml index a9fdd36693a67..b4fae74f991aa 100644 --- a/.github/workflows/ci-pkg-install.yml +++ b/.github/workflows/ci-pkg-install.yml @@ -38,7 +38,7 @@ jobs: python-version: [3.8] # , 3.9 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -72,7 +72,7 @@ jobs: python-version: [3.8] # , 3.9 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -104,7 +104,7 @@ jobs: python-version: [3.8] # , 3.9 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/ci-pytorch-test-conda.yml b/.github/workflows/ci-pytorch-test-conda.yml index 82c463a54169f..ad9c34156896b 100644 --- a/.github/workflows/ci-pytorch-test-conda.yml +++ b/.github/workflows/ci-pytorch-test-conda.yml @@ -33,7 +33,7 @@ jobs: - name: Workaround for https://github.com/actions/checkout/issues/760 run: git config --global --add safe.directory /__w/lightning/lightning - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Get changed files id: changed-files diff --git a/.github/workflows/ci-pytorch-test-full.yml b/.github/workflows/ci-pytorch-test-full.yml index 987373b6ea2bf..f43c182b988fb 100644 --- a/.github/workflows/ci-pytorch-test-full.yml +++ b/.github/workflows/ci-pytorch-test-full.yml @@ -35,7 +35,7 @@ jobs: timeout-minutes: 40 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Get changed files id: changed-files diff --git a/.github/workflows/ci-pytorch-test-slow.yml b/.github/workflows/ci-pytorch-test-slow.yml index 126eaaf17da1a..23b55f6b638df 100644 --- a/.github/workflows/ci-pytorch-test-slow.yml +++ b/.github/workflows/ci-pytorch-test-slow.yml @@ -26,7 +26,7 @@ jobs: timeout-minutes: 20 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Get changed files id: changed-files diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml index a91f216af963f..0de1d16cfba58 100644 --- a/.github/workflows/docs-checks.yml +++ b/.github/workflows/docs-checks.yml @@ -19,7 +19,7 @@ jobs: matrix: pkg: ["app", "pytorch"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true - uses: actions/setup-python@v4 @@ -70,7 +70,7 @@ jobs: matrix: pkg: ["app", "pytorch"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true # lfs: true diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml index dd589baf2fa46..97c320ca84298 100644 --- a/.github/workflows/docs-deploy.yml +++ b/.github/workflows/docs-deploy.yml @@ -9,8 +9,8 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout 🛎️ - uses: actions/checkout@v2 - # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly. + uses: actions/checkout@v3 + # If you're using actions/checkout@v3 you must set persist-credentials to false in most cases for the deployment to work correctly. with: persist-credentials: false - uses: actions/setup-python@v4 diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index 13d3895bf365d..2576b05e33566 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -15,7 +15,7 @@ jobs: steps: # does nightly releases from feature branch - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: 3.9 diff --git a/.github/workflows/legacy-checkpoints.yml b/.github/workflows/legacy-checkpoints.yml index 0856cfd3229a2..7a59b9446aab0 100644 --- a/.github/workflows/legacy-checkpoints.yml +++ b/.github/workflows/legacy-checkpoints.yml @@ -8,7 +8,7 @@ jobs: create-legacy-ckpts: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 2de330ea5ca75..67503ba2b2c0d 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -22,7 +22,7 @@ jobs: - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"} steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Get release version id: get_version diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 97c3b8eca77d1..2c6f5da240f63 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -21,7 +21,7 @@ jobs: build-pkgs: ${{ steps.candidate.outputs.pkgs }} pull-pkgs: ${{ steps.download.outputs.pkgs }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: 3.9 @@ -60,7 +60,7 @@ jobs: max-parallel: 1 matrix: ${{ fromJSON(needs.releasing.outputs.build-pkgs) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: name: dist-packages-${{ github.sha }} @@ -94,7 +94,7 @@ jobs: max-parallel: 1 matrix: ${{ fromJSON(needs.releasing.outputs.pull-pkgs) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: name: pypi-packages-${{ github.sha }} @@ -118,7 +118,7 @@ jobs: needs: [build-package, download-package] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: name: dist-packages-${{ github.sha }} @@ -169,7 +169,7 @@ jobs: needs: build-meta-pkg if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: name: dist-packages-${{ github.sha }} @@ -188,7 +188,7 @@ jobs: needs: build-meta-pkg if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: name: dist-packages-${{ github.sha }} @@ -220,7 +220,7 @@ jobs: runs-on: ubuntu-20.04 needs: [build-package, publish-package] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: From f1f1bdba01f9864832901f84efbb222833db2b68 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Sep 2022 17:11:54 +0000 Subject: [PATCH 03/12] Bump tj-actions/changed-files from 29.0.1 to 29.0.3 (#14541) Bumps [tj-actions/changed-files](https://github.com/tj-actions/changed-files) from 29.0.1 to 29.0.3. - [Release notes](https://github.com/tj-actions/changed-files/releases) - [Changelog](https://github.com/tj-actions/changed-files/blob/main/HISTORY.md) - [Commits](https://github.com/tj-actions/changed-files/compare/v29.0.1...v29.0.3) --- updated-dependencies: - dependency-name: tj-actions/changed-files dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-pr-gatekeeper.yml | 2 +- .github/workflows/ci-pytorch-test-conda.yml | 2 +- .github/workflows/ci-pytorch-test-full.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-pr-gatekeeper.yml b/.github/workflows/ci-pr-gatekeeper.yml index 8714bec926c23..5c235f151b59b 100644 --- a/.github/workflows/ci-pr-gatekeeper.yml +++ b/.github/workflows/ci-pr-gatekeeper.yml @@ -20,7 +20,7 @@ jobs: fetch-depth: "2" # To retrieve the preceding commit. - name: Get changed files using defaults id: changed-files - uses: tj-actions/changed-files@v29.0.1 + uses: tj-actions/changed-files@v29.0.3 - name: Determine changes id: touched run: | diff --git a/.github/workflows/ci-pytorch-test-conda.yml b/.github/workflows/ci-pytorch-test-conda.yml index ad9c34156896b..64d06a22949d8 100644 --- a/.github/workflows/ci-pytorch-test-conda.yml +++ b/.github/workflows/ci-pytorch-test-conda.yml @@ -37,7 +37,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v29.0.1 + uses: tj-actions/changed-files@v29.0.3 - name: Decide if the test should be skipped id: skip diff --git a/.github/workflows/ci-pytorch-test-full.yml b/.github/workflows/ci-pytorch-test-full.yml index f43c182b988fb..fbdc81b91c0ed 100644 --- a/.github/workflows/ci-pytorch-test-full.yml +++ b/.github/workflows/ci-pytorch-test-full.yml @@ -39,7 +39,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v29.0.1 + uses: tj-actions/changed-files@v29.0.3 - name: Decide if the test should be skipped id: skip From b396375fd05bc5b637feb2c42d4c7429da268165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 5 Sep 2022 19:11:12 +0200 Subject: [PATCH 04/12] Add path filters for azure PR jobs (#14544) --- .azure/app-cloud-e2e.yml | 163 ++++++++++++++++++++++++++++++++++++++ .azure/gpu-tests-lite.yml | 112 ++++++++++++++++++++++++++ .azure/hpu-tests.yml | 13 ++- 3 files changed, 286 insertions(+), 2 deletions(-) create mode 100644 .azure/app-cloud-e2e.yml create mode 100644 .azure/gpu-tests-lite.yml diff --git a/.azure/app-cloud-e2e.yml b/.azure/app-cloud-e2e.yml new file mode 100644 index 0000000000000..eef8a8b8bfff8 --- /dev/null +++ b/.azure/app-cloud-e2e.yml @@ -0,0 +1,163 @@ +# Python package +# Create and test a Python package on multiple Python versions. +# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/python + +trigger: + tags: + include: + - '*' + branches: + include: + - "master" + - "release/*" + - "refs/tags/*" + paths: + include: + - ".azure/app-cloud-e2e.yml" + - "requirements/app/**" + - "src/lightning_app/**" + - "examples/app_*" + +pr: + branches: + include: + - "master" + - "release/*" + paths: + include: + - ".azure/app-cloud-e2e.yml" + - "requirements/app/**" + - "src/lightning_app/**" + - "examples/app_*" + +# variables are automatically exported as environment variables so this will override pip's default cache dir +variables: + - name: pip_cache_dir + value: $(Pipeline.Workspace)/.pip + - name: local_id + value: $(Build.BuildId) + +jobs: + - job: App_cloud_e2e_testing + pool: azure-cpus + container: + image: mcr.microsoft.com/playwright/python:v1.25.2-focal + options: "--shm-size=2g" + strategy: + matrix: + 'App: v0_app': + name: "v0_app" + 'App: boring_app': + name: "boring_app" + 'App: template_streamlit_ui': + name: "template_streamlit_ui" + 'App: template_react_ui': + name: "template_react_ui" + 'App: template_jupyterlab': # TODO: clarify where these files lives + name: "template_jupyterlab" + 'App: idle_timeout': + name: "idle_timeout" + 'App: collect_failures': + name: "collect_failures" + 'App: custom_work_dependencies': + name: "custom_work_dependencies" + 'App: drive': + name: "drive" + 'App: payload': + name: "payload" + 'App: commands_and_api': + name: "commands_and_api" + timeoutInMinutes: "30" + cancelTimeoutInMinutes: "2" + # values: https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#workspace + workspace: + clean: all + steps: + + - script: echo '##vso[task.setvariable variable=local_id]$(System.PullRequest.PullRequestNumber)' + displayName: "Set id for this PR" + condition: eq(variables['Build.Reason'], 'PullRequest') + + - bash: | + whoami + printf "local id: $(local_id)\n" + python --version + pip --version + displayName: 'Info' + + - task: Cache@2 + inputs: + key: 'pip | "$(name)" | requirements/app/base.txt' + restoreKeys: | + pip | "$(Agent.OS)" + path: $(pip_cache_dir) + displayName: Cache pip + + - bash: python -m pip install -r requirements/app/devel.txt --quiet --find-links ${TORCH_URL} + env: + TORCH_URL: https://download.pytorch.org/whl/cpu/torch_stable.html + displayName: 'Install dependencies' + + - bash: | + python -m pip install playwright + python -m playwright install # --with-deps + displayName: 'Install Playwright system dependencies' + + - bash: pip install -e . --find-links https://download.pytorch.org/whl/cpu/torch_stable.html + displayName: 'Install lightning' + + - bash: | + git clone https://github.com/Lightning-AI/LAI-lightning-template-jupyterlab-App examples/app_template_jupyterlab + cp examples/app_template_jupyterlab/tests/test_template_jupyterlab.py tests/tests_app_examples/test_template_jupyterlab.py + condition: eq(variables['name'], 'template_jupyterlab') + displayName: 'Clone Template Jupyter Lab Repo' + + - bash: git clone https://github.com/Lightning-AI/lightning-template-react examples/app_template_react_ui + condition: eq(variables['name'], 'template_react_ui') + displayName: 'Clone Template React UI Repo' + + - bash: | + mkdir -p ${VIDEO_LOCATION} + ls -l examples/${TEST_APP_NAME} + ls -l tests/tests_app_examples + python -m pytest tests/tests_app_examples/test_${TEST_APP_NAME}.py::test_${TEST_APP_NAME}_example_cloud --timeout=900 --capture=no -v --color=yes + env: + HEADLESS: '1' + PACKAGE_LIGHTNING: '1' + CLOUD: '1' + VIDEO_LOCATION: '$(Build.ArtifactStagingDirectory)/videos' + PR_NUMBER: $(local_id) + TEST_APP_NAME: $(name) + HAR_LOCATION: './artifacts/hars' + SLOW_MO: '50' + # LAI_USER: $(LAI_USER) + # LAI_PASS: $(LAI_PASS) + LIGHTNING_USER_ID: $(LIGHTNING_USER_ID_PROD) + LIGHTNING_API_KEY: $(LIGHTNING_API_KEY_PROD) + LIGHTNING_USERNAME: $(LIGHTNING_USERNAME) + LIGHTNING_CLOUD_URL: $(LIGHTNING_CLOUD_URL_PROD) + displayName: 'Run the tests' + + - publish: '$(Build.ArtifactStagingDirectory)/videos' + condition: failed() + displayName: 'Publish videos' + artifact: $(name) + + - bash: | + time python -c "from lightning.app import testing; testing.delete_cloud_lightning_apps()" + condition: always() + env: + # LAI_USER: $(LAI_USER) + # LAI_PASS: $(LAI_PASS) + LIGHTNING_USER_ID: $(LIGHTNING_USER_ID_PROD) + LIGHTNING_API_KEY: $(LIGHTNING_API_KEY_PROD) + LIGHTNING_USERNAME: $(LIGHTNING_USERNAME) + LIGHTNING_CLOUD_URL: $(LIGHTNING_CLOUD_URL_PROD) + PR_NUMBER: $(local_id) + TEST_APP_NAME: $(name) + # GRID_USER_ID: $(LIGHTNING_USER_ID) # TODO: clarify the meaning + # GRID_USER_KEY: $(LIGHTNING_API_KEY) # TODO: clarify the meaning + # GRID_URL: $(LIGHTNING_CLOUD_URL) + # _GRID_USERNAME: $(LIGHTNING_USERNAME) + displayName: 'Clean Previous Apps' diff --git a/.azure/gpu-tests-lite.yml b/.azure/gpu-tests-lite.yml new file mode 100644 index 0000000000000..66fc3951b9ce1 --- /dev/null +++ b/.azure/gpu-tests-lite.yml @@ -0,0 +1,112 @@ +# Python package +# Create and test a Python package on multiple Python versions. +# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/python + +trigger: + tags: + include: + - '*' + branches: + include: + - "master" + - "release/*" + - "refs/tags/*" + paths: + include: + - ".azure/gpu-tests-lite.yml" + - "requirements/lite/**" + - "src/lightning_lite/**" + - "tests/tests_lite/**" + - "tests/tests_pytorch/run_standalone_tests.sh" + - "tests/tests_lite/run_standalone_tests.sh" # a symlink to the one above + +pr: + branches: + include: + - "master" + - "release/*" + paths: + include: + - ".azure/gpu-tests-lite.yml" + - "requirements/lite/**" + - "src/lightning_lite/**" + - "tests/tests_lite/**" + - "tests/tests_pytorch/run_standalone_tests.sh" + - "tests/tests_lite/run_standalone_tests.sh" # a symlink to the one above + +jobs: + - job: testing + # how long to run the job before automatically cancelling + timeoutInMinutes: "20" + # how much time to give 'run always even if cancelled tasks' before stopping them + cancelTimeoutInMinutes: "2" + pool: azure-jirka-spot + container: + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1" + # default shm size is 64m. Increase it to avoid: + # 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8' + options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=512m" + workspace: + clean: all + + steps: + - bash: | + lspci | egrep 'VGA|3D' + whereis nvidia + nvidia-smi + which python && which pip + python --version + pip --version + pip list + displayName: 'Image info & NVIDIA' + + - bash: | + set -e + TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])") + CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") + python ./requirements/pytorch/adjust-versions.py requirements/lite/base.txt ${PYTORCH_VERSION} + pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html + pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html + pip list + env: + PACKAGE_NAME: pytorch + FREEZE_REQUIREMENTS: 1 + displayName: 'Install dependencies' + + - bash: | + set -e + python requirements/collect_env_details.py + python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'" + displayName: 'Env details' + + - bash: python -m coverage run --source lightning_lite -m pytest --ignore benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 + env: + PL_RUN_CUDA_TESTS: "1" + workingDirectory: tests/tests_lite + displayName: 'Testing: Lite standard' + timeoutInMinutes: "10" + + - bash: bash run_standalone_tests.sh + workingDirectory: tests/tests_lite + env: + PL_RUN_CUDA_TESTS: "1" + PL_STANDALONE_TESTS_SOURCE: "lightning_lite" + displayName: 'Testing: Lite standalone tests' + timeoutInMinutes: "10" + + - bash: | + python -m coverage report + python -m coverage xml + python -m coverage html + python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure + ls -l + workingDirectory: tests/tests_lite + displayName: 'Statistics' + + - task: PublishTestResults@2 + displayName: 'Publish test results' + inputs: + testResultsFiles: '$(Build.StagingDirectory)/test-results.xml' + testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)' + condition: succeededOrFailed() diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml index bdfada907cac9..6f6618a0beb5d 100644 --- a/.azure/hpu-tests.yml +++ b/.azure/hpu-tests.yml @@ -11,8 +11,17 @@ trigger: - "refs/tags/*" pr: - - "master" - - "release/*" + branches: + include: + - "master" + - "release/*" + paths: + include: + - ".azure/hpu-tests.yml" + - "examples/pl_hpu/mnist_sample.py" + - "requirements/pytorch/**" + - "src/pytorch_lightning/**" + - "tests/tests_pytorch/**" jobs: - job: testing From 286d790bdd036c2783d4f425453bdc550e4c5c22 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Mon, 5 Sep 2022 19:31:51 +0530 Subject: [PATCH 05/12] Squeeze tensor while logging (#14489) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- src/pytorch_lightning/core/module.py | 12 +++++------- .../trainer/logging_/test_train_loop_logging.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 1f89609e82e82..39023dd37fdf1 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -419,8 +419,7 @@ def log( " but it should not contain information about `dataloader_idx`" ) - value = apply_to_collection(value, numbers.Number, self.__to_tensor) - apply_to_collection(value, torch.Tensor, self.__check_numel_1, name) + value = apply_to_collection(value, (torch.Tensor, numbers.Number), self.__to_tensor, name) if self.trainer._logger_connector.should_reset_tensors(self._current_fx_name): # if we started a new epoch (running its first batch) the hook name has changed @@ -552,16 +551,15 @@ def __check_not_nested(value: dict, name: str) -> None: def __check_allowed(v: Any, name: str, value: Any) -> None: raise ValueError(f"`self.log({name}, {value})` was called, but `{type(v).__name__}` values cannot be logged") - def __to_tensor(self, value: numbers.Number) -> Tensor: - return torch.tensor(value, device=self.device) - - @staticmethod - def __check_numel_1(value: Tensor, name: str) -> None: + def __to_tensor(self, value: Union[torch.Tensor, numbers.Number], name: str) -> Tensor: + value = torch.tensor(value, device=self.device) if not torch.numel(value) == 1: raise ValueError( f"`self.log({name}, {value})` was called, but the tensor must have a single element." f" You can try doing `self.log({name}, {value}.mean())`" ) + value = value.squeeze() + return value def log_grad_norm(self, grad_norm_dict: Dict[str, float]) -> None: """Override this method to change the default behaviour of ``log_grad_norm``. diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py index d16be306b9365..4045b8d9c4595 100644 --- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py @@ -28,7 +28,8 @@ from pytorch_lightning import callbacks, Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, TQDMProgressBar from pytorch_lightning.core.module import LightningModule -from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset +from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset, RandomDictDataset +from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.datasets import RandomDictDataset from tests_pytorch.helpers.runif import RunIf @@ -837,3 +838,13 @@ def on_train_start(self): assert mock_log_metrics.mock_calls == [call(metrics={"foo": 123.0, "epoch": 0}, step=0)] assert trainer.max_epochs > 1 + + +def test_unsqueezed_tensor_logging(): + model = BoringModel() + trainer = Trainer() + trainer.state.stage = RunningStage.TRAINING + model._current_fx_name = "training_step" + model.trainer = trainer + model.log("foo", torch.Tensor([1.2])) + assert trainer.callback_metrics["foo"].ndim == 0 From 00544b674f127a9ed082369d92374cd5b4db769f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 5 Sep 2022 14:16:26 +0200 Subject: [PATCH 06/12] Add path filters for some non-required jobs (#14539) --- .azure/hpu-tests.yml | 7 ++++ .github/file-filters.yml | 9 ----- .github/workflows/ci-pytorch-test-slow.yml | 38 +++++----------------- 3 files changed, 16 insertions(+), 38 deletions(-) delete mode 100644 .github/file-filters.yml diff --git a/.azure/hpu-tests.yml b/.azure/hpu-tests.yml index 6f6618a0beb5d..a8ecb3ad5efa8 100644 --- a/.azure/hpu-tests.yml +++ b/.azure/hpu-tests.yml @@ -9,6 +9,13 @@ trigger: - "master" - "release/*" - "refs/tags/*" + paths: + include: + - ".azure/hpu-tests.yml" + - "examples/pl_hpu/mnist_sample.py" + - "requirements/pytorch/**" + - "src/pytorch_lightning/**" + - "tests/tests_pytorch/**" pr: branches: diff --git a/.github/file-filters.yml b/.github/file-filters.yml deleted file mode 100644 index e621cd83881e4..0000000000000 --- a/.github/file-filters.yml +++ /dev/null @@ -1,9 +0,0 @@ -# This file contains filters to be used in the CI to detect file changes and run the required CI jobs. - -app_examples: - - "src/lightning_app/**" - - "tests/tests_app_examples/**" - - "requirements/app/**" - - "examples/app_*" - - "setup.py" - - "src/pytorch_lightning/__version__.py" diff --git a/.github/workflows/ci-pytorch-test-slow.yml b/.github/workflows/ci-pytorch-test-slow.yml index 23b55f6b638df..091c3f606c3ca 100644 --- a/.github/workflows/ci-pytorch-test-slow.yml +++ b/.github/workflows/ci-pytorch-test-slow.yml @@ -7,6 +7,12 @@ on: # Trigger the workflow on push or pull request, but only for the master bra pull_request: branches: [master, "release/*"] types: [opened, reopened, ready_for_review, synchronize] + paths: + - "requirements/pytorch/**" + - "src/pytorch_lightning/**" + - "tests/tests_pytorch/**" + - "setup.cfg" # includes pytest config + - ".github/workflows/ci-pytorch-test-slow.yml" concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} @@ -28,43 +34,19 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Get changed files - id: changed-files - uses: tj-actions/changed-files@v29.0.1 - - - name: Decide if the test should be skipped - id: skip - shell: bash -l {0} - run: | - FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*' - echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt - MATCHES=$(cat changed_files.txt | grep -E $FILTER) - echo $MATCHES - if [ -z "$MATCHES" ]; then - echo "Skip" - echo "::set-output name=continue::0" - else - echo "Continue" - echo "::set-output name=continue::1" - fi - - uses: actions/setup-python@v4 - if: ${{ (steps.skip.outputs.continue == '1') }} with: python-version: ${{ matrix.python-version }} - name: Reset caching - if: ${{ (steps.skip.outputs.continue == '1') }} run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV - name: Get pip cache - if: ${{ (steps.skip.outputs.continue == '1') }} id: pip-cache run: | python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)" - name: Cache pip - if: ${{ (steps.skip.outputs.continue == '1') }} uses: actions/cache@v3 with: path: ${{ steps.pip-cache.outputs.dir }} @@ -73,7 +55,6 @@ jobs: ${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}- - name: Install dependencies - if: ${{ (steps.skip.outputs.continue == '1') }} env: PACKAGE_NAME: pytorch FREEZE_REQUIREMENTS: 1 @@ -85,21 +66,20 @@ jobs: shell: bash - name: Testing PyTorch - if: ${{ (steps.skip.outputs.continue == '1') }} working-directory: tests/tests_pytorch run: coverage run --source pytorch_lightning -m pytest -v --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}.xml env: PL_RUN_SLOW_TESTS: 1 - name: Upload pytest test results - if: ${{ (failure()) && (steps.skip.outputs.continue == '1') }} + if: failure() uses: actions/upload-artifact@v3 with: name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }} path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}.xml - name: Statistics - if: ${{ (success()) && (steps.skip.outputs.continue == '1') }} + if: success() working-directory: tests/tests_pytorch run: | coverage report @@ -107,7 +87,7 @@ jobs: - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 - if: ${{ (success()) && (steps.skip.outputs.continue == '1') }} + if: success() # see: https://github.com/actions/toolkit/issues/399 continue-on-error: true with: From cadff2d54f3993d8d19d0afeea31be3cc8d94e6c Mon Sep 17 00:00:00 2001 From: Tianshu Wang Date: Mon, 5 Sep 2022 18:12:43 +0800 Subject: [PATCH 07/12] Fixed `WandbLogger` `save_dir` is not set after creation (#12748) (#14326) Co-authored-by: Jirka Borovec --- src/pytorch_lightning/loggers/wandb.py | 6 +++--- tests/tests_pytorch/loggers/test_wandb.py | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pytorch_lightning/loggers/wandb.py b/src/pytorch_lightning/loggers/wandb.py index baf4bc9092774..3198e46b1a586 100644 --- a/src/pytorch_lightning/loggers/wandb.py +++ b/src/pytorch_lightning/loggers/wandb.py @@ -223,7 +223,7 @@ def __init__(self, *args, **kwarg): Args: name: Display name for the run. - save_dir: Path where data is saved (wandb dir by default). + save_dir: Path where data is saved. offline: Run offline (data can be streamed later to wandb servers). id: Sets the version, mainly used to resume a previous run. version: Same as id. @@ -255,7 +255,7 @@ def __init__(self, *args, **kwarg): def __init__( self, name: Optional[str] = None, - save_dir: Optional[str] = None, + save_dir: str = ".", offline: bool = False, id: Optional[str] = None, anonymous: Optional[bool] = None, @@ -300,7 +300,7 @@ def __init__( name=name, project=project, id=version or id, - dir=save_dir, + dir=save_dir or kwargs.pop("dir"), resume="allow", anonymous=("allow" if anonymous else None), ) diff --git a/tests/tests_pytorch/loggers/test_wandb.py b/tests/tests_pytorch/loggers/test_wandb.py index 648e1a8f38ec8..b408046c9e5d2 100644 --- a/tests/tests_pytorch/loggers/test_wandb.py +++ b/tests/tests_pytorch/loggers/test_wandb.py @@ -58,9 +58,15 @@ def test_wandb_logger_init(wandb, monkeypatch): wandb.init.reset_mock() WandbLogger(project="test_project").experiment wandb.init.assert_called_once_with( - name=None, dir=None, id=None, project="test_project", resume="allow", anonymous=None + name=None, dir=".", id=None, project="test_project", resume="allow", anonymous=None ) + # test wandb.init set save_dir correctly after created + wandb.run = None + wandb.init.reset_mock() + logger = WandbLogger(project="test_project") + assert logger.save_dir is not None + # test wandb.init and setting logger experiment externally wandb.run = None run = wandb.init() From e359268186906fcf9a5aec9d311576fde18ae43d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20de=20Moura=20Estev=C3=A3o=20Filho?= Date: Mon, 5 Sep 2022 06:05:21 -0300 Subject: [PATCH 08/12] Estimate stepping batches with max_steps if max_epochs is not set (#14317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Roberto Estevão Co-authored-by: Rohit Gupta Co-authored-by: Jirka Borovec Co-authored-by: Adrian Wälchli --- src/pytorch_lightning/CHANGELOG.md | 3 +++ src/pytorch_lightning/trainer/trainer.py | 4 ++-- .../trainer/properties/test_estimated_stepping_batches.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index f9c9ecdc46a98..851364b71426b 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -29,6 +29,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue to avoid the impact of sanity check on `reload_dataloaders_every_n_epochs` for validation ([#13964](https://github.com/Lightning-AI/lightning/pull/13964)) +- Fixed `Trainer.estimated_stepping_batches` when maximum number of epochs is not set ([#14317](https://github.com/Lightning-AI/lightning/pull/14317)) + + ## [1.7.2] - 2022-08-17 ### Added diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 7025e49ee5613..378a969830a6f 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -2769,8 +2769,8 @@ def configure_optimizers(self): ) # infinite training - if self.max_epochs == -1 and self.max_steps == -1: - return float("inf") + if self.max_epochs == -1: + return float("inf") if self.max_steps == -1 else self.max_steps if self.train_dataloader is None: rank_zero_info("Loading `train_dataloader` to estimate number of stepping batches.") diff --git a/tests/tests_pytorch/trainer/properties/test_estimated_stepping_batches.py b/tests/tests_pytorch/trainer/properties/test_estimated_stepping_batches.py index 92a1126294dfc..0f694757ca22d 100644 --- a/tests/tests_pytorch/trainer/properties/test_estimated_stepping_batches.py +++ b/tests/tests_pytorch/trainer/properties/test_estimated_stepping_batches.py @@ -95,9 +95,9 @@ def test_num_stepping_batches_infinite_training(): assert trainer.estimated_stepping_batches == float("inf") -def test_num_stepping_batches_with_max_steps(): +@pytest.mark.parametrize("max_steps", [2, 100]) +def test_num_stepping_batches_with_max_steps(max_steps): """Test stepping batches with `max_steps`.""" - max_steps = 2 trainer = Trainer(max_steps=max_steps) model = BoringModel() trainer.fit(model) From d82e30241b0cd3e178ac2b4fe096b0a5ae5a2e5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Sun, 4 Sep 2022 13:29:56 +0200 Subject: [PATCH 09/12] Remove deprecated `test_tube` dependency (#14513) --- requirements/pytorch/loggers.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/pytorch/loggers.txt b/requirements/pytorch/loggers.txt index 905823451973b..573daaa541ced 100644 --- a/requirements/pytorch/loggers.txt +++ b/requirements/pytorch/loggers.txt @@ -6,5 +6,4 @@ neptune-client>=0.10.0, <0.16.4 comet-ml>=3.1.12, <3.31.8 mlflow>=1.0.0, <1.29.0 -test_tube>=0.7.5, <=0.7.5 wandb>=0.10.22, <0.13.2 From 8106a0169fef624a6d0c16529deb6cafd4948636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Sat, 3 Sep 2022 16:38:30 +0200 Subject: [PATCH 10/12] Pin protobuf (#14512) --- requirements/pytorch/extra.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 20b6c1b8dbc12..b331a93c0b0bb 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -9,3 +9,4 @@ hydra-core>=1.0.5, <1.3.0 jsonargparse[signatures]>=4.12.0, <=4.12.0 gcsfs>=2021.5.0, <2022.8.0 rich>=10.14.0, !=10.15.0.a, <13.0.0 +protobuf<=3.20.1 # strict # an extra is updating protobuf, this pin prevents TensorBoard failure From 277217e3a6fe16b528fe227e1b0ac15ed3055b5e Mon Sep 17 00:00:00 2001 From: Kushashwa Ravi Shrimali Date: Tue, 6 Sep 2022 17:58:14 +0530 Subject: [PATCH 11/12] Update changelog and versions for v1.7.5 (remove files not relevant for this release) --- .azure/app-cloud-e2e.yml | 163 ------------------ .azure/gpu-tests-lite.yml | 112 ------------ .github/workflows/ci-lite-test-full.yml | 120 ------------- src/pytorch_lightning/CHANGELOG.md | 12 +- src/pytorch_lightning/__version__.py | 2 +- .../logging_/test_train_loop_logging.py | 1 - 6 files changed, 10 insertions(+), 400 deletions(-) delete mode 100644 .azure/app-cloud-e2e.yml delete mode 100644 .azure/gpu-tests-lite.yml delete mode 100644 .github/workflows/ci-lite-test-full.yml diff --git a/.azure/app-cloud-e2e.yml b/.azure/app-cloud-e2e.yml deleted file mode 100644 index eef8a8b8bfff8..0000000000000 --- a/.azure/app-cloud-e2e.yml +++ /dev/null @@ -1,163 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: - tags: - include: - - '*' - branches: - include: - - "master" - - "release/*" - - "refs/tags/*" - paths: - include: - - ".azure/app-cloud-e2e.yml" - - "requirements/app/**" - - "src/lightning_app/**" - - "examples/app_*" - -pr: - branches: - include: - - "master" - - "release/*" - paths: - include: - - ".azure/app-cloud-e2e.yml" - - "requirements/app/**" - - "src/lightning_app/**" - - "examples/app_*" - -# variables are automatically exported as environment variables so this will override pip's default cache dir -variables: - - name: pip_cache_dir - value: $(Pipeline.Workspace)/.pip - - name: local_id - value: $(Build.BuildId) - -jobs: - - job: App_cloud_e2e_testing - pool: azure-cpus - container: - image: mcr.microsoft.com/playwright/python:v1.25.2-focal - options: "--shm-size=2g" - strategy: - matrix: - 'App: v0_app': - name: "v0_app" - 'App: boring_app': - name: "boring_app" - 'App: template_streamlit_ui': - name: "template_streamlit_ui" - 'App: template_react_ui': - name: "template_react_ui" - 'App: template_jupyterlab': # TODO: clarify where these files lives - name: "template_jupyterlab" - 'App: idle_timeout': - name: "idle_timeout" - 'App: collect_failures': - name: "collect_failures" - 'App: custom_work_dependencies': - name: "custom_work_dependencies" - 'App: drive': - name: "drive" - 'App: payload': - name: "payload" - 'App: commands_and_api': - name: "commands_and_api" - timeoutInMinutes: "30" - cancelTimeoutInMinutes: "2" - # values: https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#workspace - workspace: - clean: all - steps: - - - script: echo '##vso[task.setvariable variable=local_id]$(System.PullRequest.PullRequestNumber)' - displayName: "Set id for this PR" - condition: eq(variables['Build.Reason'], 'PullRequest') - - - bash: | - whoami - printf "local id: $(local_id)\n" - python --version - pip --version - displayName: 'Info' - - - task: Cache@2 - inputs: - key: 'pip | "$(name)" | requirements/app/base.txt' - restoreKeys: | - pip | "$(Agent.OS)" - path: $(pip_cache_dir) - displayName: Cache pip - - - bash: python -m pip install -r requirements/app/devel.txt --quiet --find-links ${TORCH_URL} - env: - TORCH_URL: https://download.pytorch.org/whl/cpu/torch_stable.html - displayName: 'Install dependencies' - - - bash: | - python -m pip install playwright - python -m playwright install # --with-deps - displayName: 'Install Playwright system dependencies' - - - bash: pip install -e . --find-links https://download.pytorch.org/whl/cpu/torch_stable.html - displayName: 'Install lightning' - - - bash: | - git clone https://github.com/Lightning-AI/LAI-lightning-template-jupyterlab-App examples/app_template_jupyterlab - cp examples/app_template_jupyterlab/tests/test_template_jupyterlab.py tests/tests_app_examples/test_template_jupyterlab.py - condition: eq(variables['name'], 'template_jupyterlab') - displayName: 'Clone Template Jupyter Lab Repo' - - - bash: git clone https://github.com/Lightning-AI/lightning-template-react examples/app_template_react_ui - condition: eq(variables['name'], 'template_react_ui') - displayName: 'Clone Template React UI Repo' - - - bash: | - mkdir -p ${VIDEO_LOCATION} - ls -l examples/${TEST_APP_NAME} - ls -l tests/tests_app_examples - python -m pytest tests/tests_app_examples/test_${TEST_APP_NAME}.py::test_${TEST_APP_NAME}_example_cloud --timeout=900 --capture=no -v --color=yes - env: - HEADLESS: '1' - PACKAGE_LIGHTNING: '1' - CLOUD: '1' - VIDEO_LOCATION: '$(Build.ArtifactStagingDirectory)/videos' - PR_NUMBER: $(local_id) - TEST_APP_NAME: $(name) - HAR_LOCATION: './artifacts/hars' - SLOW_MO: '50' - # LAI_USER: $(LAI_USER) - # LAI_PASS: $(LAI_PASS) - LIGHTNING_USER_ID: $(LIGHTNING_USER_ID_PROD) - LIGHTNING_API_KEY: $(LIGHTNING_API_KEY_PROD) - LIGHTNING_USERNAME: $(LIGHTNING_USERNAME) - LIGHTNING_CLOUD_URL: $(LIGHTNING_CLOUD_URL_PROD) - displayName: 'Run the tests' - - - publish: '$(Build.ArtifactStagingDirectory)/videos' - condition: failed() - displayName: 'Publish videos' - artifact: $(name) - - - bash: | - time python -c "from lightning.app import testing; testing.delete_cloud_lightning_apps()" - condition: always() - env: - # LAI_USER: $(LAI_USER) - # LAI_PASS: $(LAI_PASS) - LIGHTNING_USER_ID: $(LIGHTNING_USER_ID_PROD) - LIGHTNING_API_KEY: $(LIGHTNING_API_KEY_PROD) - LIGHTNING_USERNAME: $(LIGHTNING_USERNAME) - LIGHTNING_CLOUD_URL: $(LIGHTNING_CLOUD_URL_PROD) - PR_NUMBER: $(local_id) - TEST_APP_NAME: $(name) - # GRID_USER_ID: $(LIGHTNING_USER_ID) # TODO: clarify the meaning - # GRID_USER_KEY: $(LIGHTNING_API_KEY) # TODO: clarify the meaning - # GRID_URL: $(LIGHTNING_CLOUD_URL) - # _GRID_USERNAME: $(LIGHTNING_USERNAME) - displayName: 'Clean Previous Apps' diff --git a/.azure/gpu-tests-lite.yml b/.azure/gpu-tests-lite.yml deleted file mode 100644 index 66fc3951b9ce1..0000000000000 --- a/.azure/gpu-tests-lite.yml +++ /dev/null @@ -1,112 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: - tags: - include: - - '*' - branches: - include: - - "master" - - "release/*" - - "refs/tags/*" - paths: - include: - - ".azure/gpu-tests-lite.yml" - - "requirements/lite/**" - - "src/lightning_lite/**" - - "tests/tests_lite/**" - - "tests/tests_pytorch/run_standalone_tests.sh" - - "tests/tests_lite/run_standalone_tests.sh" # a symlink to the one above - -pr: - branches: - include: - - "master" - - "release/*" - paths: - include: - - ".azure/gpu-tests-lite.yml" - - "requirements/lite/**" - - "src/lightning_lite/**" - - "tests/tests_lite/**" - - "tests/tests_pytorch/run_standalone_tests.sh" - - "tests/tests_lite/run_standalone_tests.sh" # a symlink to the one above - -jobs: - - job: testing - # how long to run the job before automatically cancelling - timeoutInMinutes: "20" - # how much time to give 'run always even if cancelled tasks' before stopping them - cancelTimeoutInMinutes: "2" - pool: azure-jirka-spot - container: - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1" - # default shm size is 64m. Increase it to avoid: - # 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8' - options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=512m" - workspace: - clean: all - - steps: - - bash: | - lspci | egrep 'VGA|3D' - whereis nvidia - nvidia-smi - which python && which pip - python --version - pip --version - pip list - displayName: 'Image info & NVIDIA' - - - bash: | - set -e - TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])") - CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") - python ./requirements/pytorch/adjust-versions.py requirements/lite/base.txt ${PYTORCH_VERSION} - pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html - pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html - pip list - env: - PACKAGE_NAME: pytorch - FREEZE_REQUIREMENTS: 1 - displayName: 'Install dependencies' - - - bash: | - set -e - python requirements/collect_env_details.py - python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'" - displayName: 'Env details' - - - bash: python -m coverage run --source lightning_lite -m pytest --ignore benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 - env: - PL_RUN_CUDA_TESTS: "1" - workingDirectory: tests/tests_lite - displayName: 'Testing: Lite standard' - timeoutInMinutes: "10" - - - bash: bash run_standalone_tests.sh - workingDirectory: tests/tests_lite - env: - PL_RUN_CUDA_TESTS: "1" - PL_STANDALONE_TESTS_SOURCE: "lightning_lite" - displayName: 'Testing: Lite standalone tests' - timeoutInMinutes: "10" - - - bash: | - python -m coverage report - python -m coverage xml - python -m coverage html - python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure - ls -l - workingDirectory: tests/tests_lite - displayName: 'Statistics' - - - task: PublishTestResults@2 - displayName: 'Publish test results' - inputs: - testResultsFiles: '$(Build.StagingDirectory)/test-results.xml' - testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)' - condition: succeededOrFailed() diff --git a/.github/workflows/ci-lite-test-full.yml b/.github/workflows/ci-lite-test-full.yml deleted file mode 100644 index 896086b697d66..0000000000000 --- a/.github/workflows/ci-lite-test-full.yml +++ /dev/null @@ -1,120 +0,0 @@ -name: Test Lite full - -# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows -on: # Trigger the workflow on push or pull request, but only for the master branch - push: - branches: [master, "release/*"] - pull_request: - branches: [master, "release/*"] - types: [opened, reopened, ready_for_review, synchronize] - paths: - - "requirements/lite/**" - - "src/lightning_lite/**" - - "tests/tests_lite/**" - - "setup.cfg" # includes pytest config - - ".github/workflows/ci-lite-test-full.yml" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} - cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }} - -jobs: - - lite-cpu: - runs-on: ${{ matrix.os }} - if: github.event.pull_request.draft == false - strategy: - fail-fast: false - matrix: - os: [ubuntu-20.04, windows-2022, macOS-11] - python-version: ["3.7", "3.10"] # minimum, maximum - requires: ["oldest", "latest"] - release: ["stable"] - exclude: - # There's no distribution of the oldest PyTorch 1.9 for Python 3.10. - # TODO: Remove the exclusion when dropping PyTorch 1.9 support. - - {python-version: "3.10", requires: "oldest"} - - timeout-minutes: 40 - - steps: - - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Reset caching - run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV - - - name: basic setup - run: | - pip --version - pip install -q fire - - - name: Setup Windows - if: runner.os == 'windows' - run: | - python .actions/assistant.py requirements_prune_pkgs horovod - - - name: Set min. dependencies - if: matrix.requires == 'oldest' - run: | - python .actions/assistant.py replace_oldest_ver - - # Note: This uses an internal pip API and may not always work - # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow - - name: Get pip cache dir - id: pip-cache - run: echo "::set-output name=dir::$(pip cache dir)" - - - name: pip cache - uses: actions/cache@v3 - with: - path: ${{ steps.pip-cache.outputs.dir }} - key: ${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-${{ hashFiles('requirements/lite/*.txt') }} - restore-keys: | - ${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}- - - - name: Install dependencies - env: - PACKAGE_NAME: pytorch # TODO(lite) does this need to say lite? - FREEZE_REQUIREMENTS: 1 - run: | - flag=$(python -c "print('--pre' if '${{matrix.release}}' == 'pre' else '')" 2>&1) - url=$(python -c "print('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html')" 2>&1) - pip install -e .[test] --upgrade $flag --find-links "https://download.pytorch.org/whl/${url}" - pip list - shell: bash - - - name: Testing Lite - working-directory: tests/tests_lite - # NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003 - run: coverage run --source lightning_lite -m pytest -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml - - - name: Upload pytest results - if: failure() - uses: actions/upload-artifact@v3 - with: - name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }} - path: tests/tests_lite/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml - - - name: Statistics - if: success() - working-directory: tests/tests_lite - run: | - coverage report - coverage xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - if: always() - # see: https://github.com/actions/toolkit/issues/399 - continue-on-error: true - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: tests/tests_lite/coverage.xml - flags: cpu,pytest,python${{ matrix.python-version }} - name: CPU-coverage - fail_ci_if_error: false diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 851364b71426b..d1838774b49ac 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). +## [1.7.5] - 2022-09-06 + +### Fixed + +- Squeezed tensor values when logging with `LightningModule.log` ([#14489](https://github.com/Lightning-AI/lightning/pull/14489)) +- Fixed `WandbLogger` `save_dir` is not set after creation ([#14326](https://github.com/Lightning-AI/lightning/pull/14326)) +- Fixed `Trainer.estimated_stepping_batches` when maximum number of epochs is not set ([#14317](https://github.com/Lightning-AI/lightning/pull/14317)) + + ## [1.7.4] - 2022-08-31 ### Added @@ -29,9 +38,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue to avoid the impact of sanity check on `reload_dataloaders_every_n_epochs` for validation ([#13964](https://github.com/Lightning-AI/lightning/pull/13964)) -- Fixed `Trainer.estimated_stepping_batches` when maximum number of epochs is not set ([#14317](https://github.com/Lightning-AI/lightning/pull/14317)) - - ## [1.7.2] - 2022-08-17 ### Added diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py index 582554e87c281..57a819f4fc5bb 100644 --- a/src/pytorch_lightning/__version__.py +++ b/src/pytorch_lightning/__version__.py @@ -1 +1 @@ -version = "1.7.4" +version = "1.7.5" diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py index 4045b8d9c4595..cd7f83ddc7bfe 100644 --- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py @@ -31,7 +31,6 @@ from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset, RandomDictDataset from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests_pytorch.helpers.datasets import RandomDictDataset from tests_pytorch.helpers.runif import RunIf From a1e6e38dd3e59ca55bf89bff053752dfb4c5bebb Mon Sep 17 00:00:00 2001 From: Mansy Date: Sat, 27 Aug 2022 01:55:22 +0200 Subject: [PATCH 12/12] [App][CI] Fix psutil requirement CI (#14413) --- requirements/app/test.txt | 1 + tests/tests_app/cli/test_cli.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/requirements/app/test.txt b/requirements/app/test.txt index 1ab762936db18..466defcae79ce 100644 --- a/requirements/app/test.txt +++ b/requirements/app/test.txt @@ -7,3 +7,4 @@ playwright==1.22.0 # pytest-flake8 httpx trio +psutil diff --git a/tests/tests_app/cli/test_cli.py b/tests/tests_app/cli/test_cli.py index 3e003293692a8..ec942db6f157c 100644 --- a/tests/tests_app/cli/test_cli.py +++ b/tests/tests_app/cli/test_cli.py @@ -140,3 +140,6 @@ def test_cli_logout(exists: mock.MagicMock, unlink: mock.MagicMock, creds: bool) unlink.assert_called_once_with() else: unlink.assert_not_called() + + +# TODO: test for the other commands