From fd7d35a0cb8bd28d65bc6da28952c7749bdb0309 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Wed, 4 Oct 2023 23:11:13 +0200 Subject: [PATCH 01/14] update docker cuda images --- .azure/gpu-tests-fabric.yml | 8 ++------ .azure/gpu-tests-pytorch.yml | 8 ++------ .github/checkgroup.yml | 4 ++-- .github/workflows/docker-build.yml | 3 ++- dockers/base-cuda/Dockerfile | 4 ++-- 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index c185060b75198..62e07dd7d482b 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -59,17 +59,13 @@ jobs: strategy: matrix: "Fabric | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" IS_NIGHTLY: "false" PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" IS_NIGHTLY: "false" PACKAGE_NAME: "lightning" - "Lightning | RC": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "true" - PACKAGE_NAME: "lightning" workspace: clean: all steps: diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 1a1725292419d..652e68ebb34cf 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -51,17 +51,13 @@ jobs: strategy: matrix: "PyTorch | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" IS_NIGHTLY: "false" PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" IS_NIGHTLY: "false" PACKAGE_NAME: "lightning" - "Lightning | RC": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "true" - PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' ) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index a0284ac66fcc1..12599e30d5218 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -143,13 +143,13 @@ subprojects: - "build-cuda (3.9, 1.13, 11.8.0)" - "build-cuda (3.9, 1.13, 12.0.1)" - "build-cuda (3.10, 2.0, 11.8.0)" - - "build-cuda (3.10, 2.0, 12.0.1)" + - "build-cuda (3.11, 2.1, 12.1.0)" #- "build-NGC" - "build-pl (3.9, 1.12, 11.7.1)" - "build-pl (3.9, 1.13, 11.8.0)" - "build-pl (3.9, 1.13, 12.0.1)" - "build-pl (3.10, 2.0, 11.8.0)" - - "build-pl (3.10, 2.0, 12.0.1)" + - "build-pl (3.11, 2.1, 12.1.0)" # SECTIONS: lightning_data diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 1c1b8e99d5bfd..b677296d6b261 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -48,6 +48,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } + - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -108,7 +109,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } + - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 5fcaec5ffbfa0..e51e665278e50 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -18,8 +18,8 @@ ARG CUDA_VERSION=11.7.1 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} -ARG PYTHON_VERSION=3.10 -ARG PYTORCH_VERSION=2.0 +ARG PYTHON_VERSION=3.11 +ARG PYTORCH_VERSION=2.1 ARG MAX_ALLOWED_NCCL=2.16.2 SHELL ["/bin/bash", "-c"] From 0ccc140b6dd3ca62013ec0de5c8394ba87147bf2 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Wed, 4 Oct 2023 23:45:18 +0200 Subject: [PATCH 02/14] debug --- dockers/base-cuda/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index e51e665278e50..5de9c10451db8 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -18,9 +18,9 @@ ARG CUDA_VERSION=11.7.1 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} -ARG PYTHON_VERSION=3.11 +ARG PYTHON_VERSION=3.10 ARG PYTORCH_VERSION=2.1 -ARG MAX_ALLOWED_NCCL=2.16.2 +ARG MAX_ALLOWED_NCCL=2.18.5 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ From 9b9703feea087f5b46abc389364f61fcd1d1dc52 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 4 Oct 2023 23:45:48 +0200 Subject: [PATCH 03/14] nccl does not have cuda12.1 --- .github/checkgroup.yml | 4 ++-- .github/workflows/docker-build.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 12599e30d5218..c641ad0df93eb 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -143,13 +143,13 @@ subprojects: - "build-cuda (3.9, 1.13, 11.8.0)" - "build-cuda (3.9, 1.13, 12.0.1)" - "build-cuda (3.10, 2.0, 11.8.0)" - - "build-cuda (3.11, 2.1, 12.1.0)" + - "build-cuda (3.11, 2.1, 12.0.0)" #- "build-NGC" - "build-pl (3.9, 1.12, 11.7.1)" - "build-pl (3.9, 1.13, 11.8.0)" - "build-pl (3.9, 1.13, 12.0.1)" - "build-pl (3.10, 2.0, 11.8.0)" - - "build-pl (3.11, 2.1, 12.1.0)" + - "build-pl (3.11, 2.1, 12.0.0)" # SECTIONS: lightning_data diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b677296d6b261..82528cc3214d4 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -48,7 +48,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.0.0" } steps: - uses: actions/checkout@v4 with: @@ -109,7 +109,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.0.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 From d66035649a89b47aa35bd9d2af2a04f03ada48db Mon Sep 17 00:00:00 2001 From: awaelchli Date: Wed, 4 Oct 2023 23:47:13 +0200 Subject: [PATCH 04/14] remove is_nightly --- .azure/gpu-tests-fabric.yml | 13 +------------ .azure/gpu-tests-pytorch.yml | 13 +------------ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index 62e07dd7d482b..2aeff87ebd0f5 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -60,11 +60,9 @@ jobs: matrix: "Fabric | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" - IS_NIGHTLY: "false" PACKAGE_NAME: "fabric" "Lightning | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" - IS_NIGHTLY: "false" PACKAGE_NAME: "lightning" workspace: clean: all @@ -83,7 +81,6 @@ jobs: echo $CUDA_VISIBLE_DEVICES echo $CUDA_VERSION_MM echo $TORCH_URL - echo $(IS_NIGHTLY) echo $COVERAGE_SOURCE whereis nvidia nvidia-smi @@ -101,7 +98,7 @@ jobs: python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false')) + condition: succeeded() displayName: "Adjust dependencies" - bash: | @@ -109,14 +106,6 @@ jobs: pip install -e ".[${extra}dev]" pytest-timeout -U --find-links ${TORCH_URL} displayName: "Install package & dependencies" - - bash: | - pip uninstall -y torch torchvision - pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%} - python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver" - # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true')) - displayName: "Bump to RC" - - bash: | set -e python requirements/collect_env_details.py diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 652e68ebb34cf..53300b145f486 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -52,11 +52,9 @@ jobs: matrix: "PyTorch | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" - IS_NIGHTLY: "false" PACKAGE_NAME: "pytorch" "Lightning | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" - IS_NIGHTLY: "false" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: @@ -85,7 +83,6 @@ jobs: echo $CUDA_VISIBLE_DEVICES echo $CUDA_VERSION_MM echo $TORCH_URL - echo $(IS_NIGHTLY) echo $COVERAGE_SOURCE whereis nvidia nvidia-smi @@ -103,7 +100,7 @@ jobs: python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false')) + condition: succeeded() displayName: "Adjust dependencies" - bash: | @@ -118,14 +115,6 @@ jobs: pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL} displayName: "Install package & dependencies" - - bash: | - pip uninstall -y torch torchvision - pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%} - python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver" - # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true')) - displayName: "Bump to RC" - - bash: pip uninstall -y lightning # without succeeded this could run even if the job has already failed condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'pytorch')) From 564dd770a90ec4dc77119ccb872292fe281f11cd Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 4 Oct 2023 23:49:28 +0200 Subject: [PATCH 05/14] Revert "nccl does not have cuda12.1" This reverts commit 9b9703feea087f5b46abc389364f61fcd1d1dc52. --- .github/checkgroup.yml | 4 ++-- .github/workflows/docker-build.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index c641ad0df93eb..12599e30d5218 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -143,13 +143,13 @@ subprojects: - "build-cuda (3.9, 1.13, 11.8.0)" - "build-cuda (3.9, 1.13, 12.0.1)" - "build-cuda (3.10, 2.0, 11.8.0)" - - "build-cuda (3.11, 2.1, 12.0.0)" + - "build-cuda (3.11, 2.1, 12.1.0)" #- "build-NGC" - "build-pl (3.9, 1.12, 11.7.1)" - "build-pl (3.9, 1.13, 11.8.0)" - "build-pl (3.9, 1.13, 12.0.1)" - "build-pl (3.10, 2.0, 11.8.0)" - - "build-pl (3.11, 2.1, 12.0.0)" + - "build-pl (3.11, 2.1, 12.1.0)" # SECTIONS: lightning_data diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 82528cc3214d4..b677296d6b261 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -48,7 +48,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.0.0" } + - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -109,7 +109,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.0.0" } + - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 From 8ae7bdcbe9d2597a732b83f32942b438377515e3 Mon Sep 17 00:00:00 2001 From: Jirka Date: Wed, 4 Oct 2023 23:50:10 +0200 Subject: [PATCH 06/14] bump NCCL to 2.17.1, first with 12.1 support --- dockers/base-cuda/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index e51e665278e50..d635149ca998b 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -20,7 +20,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} ARG PYTHON_VERSION=3.11 ARG PYTORCH_VERSION=2.1 -ARG MAX_ALLOWED_NCCL=2.16.2 +ARG MAX_ALLOWED_NCCL=2.17.1 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ From 195a9cb17a382d112a21411a76be897d1207ac7b Mon Sep 17 00:00:00 2001 From: awaelchli Date: Thu, 5 Oct 2023 00:04:20 +0200 Subject: [PATCH 07/14] trigger push --- .github/workflows/docker-build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b677296d6b261..c8f3a58a8200b 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,9 +2,9 @@ name: Docker builds on: push: - branches: [master, "release/*"] + branches: ["*"] pull_request: - branches: [master, "release/*"] + branches: ["*"] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - ".actions/*" @@ -29,7 +29,7 @@ concurrency: env: PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} - PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} + PUSH_RELEASE: 'true' # ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} jobs: build-pl: From 423ec3c83a051fd43de91dbb812a2b3d4dc659fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 22:06:01 +0000 Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/workflows/docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index c8f3a58a8200b..de34a91378a9a 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -29,7 +29,7 @@ concurrency: env: PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} - PUSH_RELEASE: 'true' # ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} + PUSH_RELEASE: "true" # ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} jobs: build-pl: From df7b2012edbcf548a9083b3fc4ba258f562acae1 Mon Sep 17 00:00:00 2001 From: Jirka Date: Thu, 5 Oct 2023 00:56:19 +0200 Subject: [PATCH 09/14] Revert "trigger push" This reverts commit 195a9cb1 --- .github/workflows/docker-build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index de34a91378a9a..b677296d6b261 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,9 +2,9 @@ name: Docker builds on: push: - branches: ["*"] + branches: [master, "release/*"] pull_request: - branches: ["*"] + branches: [master, "release/*"] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - ".actions/*" @@ -29,7 +29,7 @@ concurrency: env: PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} - PUSH_RELEASE: "true" # ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} + PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} jobs: build-pl: From 4091343141e99131ba42bd4ff24447b81fa6f57e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 5 Oct 2023 02:14:05 +0200 Subject: [PATCH 10/14] stay with Python 3.10 for now some optional dependencies don't install in Python 3.11 yet --- .azure/gpu-tests-fabric.yml | 4 ++-- .azure/gpu-tests-pytorch.yml | 4 ++-- .github/workflows/docker-build.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index 2aeff87ebd0f5..b01fe2fe96dfe 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -59,10 +59,10 @@ jobs: strategy: matrix: "Fabric | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "lightning" workspace: clean: all diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 53300b145f486..e98a1d4db47ba 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -51,10 +51,10 @@ jobs: strategy: matrix: "PyTorch | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.1-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b677296d6b261..b074553feb490 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -48,7 +48,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -109,7 +109,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 From 054853bdbefcd46750446743cd7eb20a204a9446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 5 Oct 2023 02:15:43 +0200 Subject: [PATCH 11/14] checkgroup --- .github/checkgroup.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 12599e30d5218..48ac8b673ece8 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -143,13 +143,13 @@ subprojects: - "build-cuda (3.9, 1.13, 11.8.0)" - "build-cuda (3.9, 1.13, 12.0.1)" - "build-cuda (3.10, 2.0, 11.8.0)" - - "build-cuda (3.11, 2.1, 12.1.0)" + - "build-cuda (3.10, 2.1, 12.1.0)" #- "build-NGC" - "build-pl (3.9, 1.12, 11.7.1)" - "build-pl (3.9, 1.13, 11.8.0)" - "build-pl (3.9, 1.13, 12.0.1)" - "build-pl (3.10, 2.0, 11.8.0)" - - "build-pl (3.11, 2.1, 12.1.0)" + - "build-pl (3.10, 2.1, 12.1.0)" # SECTIONS: lightning_data From b2f36ba86b87de05afcd349109b62e3231b13888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 5 Oct 2023 03:02:20 +0200 Subject: [PATCH 12/14] remove extra docker build entry --- .github/workflows/docker-build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b074553feb490..4aba32fbdf485 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -47,7 +47,6 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 From f7a5b4d6ff2ab9294e77ceb4fe57ec86f5b217af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 4 Oct 2023 21:02:48 -0400 Subject: [PATCH 13/14] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- .azure/gpu-tests-fabric.yml | 1 - .azure/gpu-tests-pytorch.yml | 2 -- 2 files changed, 3 deletions(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index b01fe2fe96dfe..4dbae8793354f 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -98,7 +98,6 @@ jobs: python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done # without succeeded this could run even if the job has already failed - condition: succeeded() displayName: "Adjust dependencies" - bash: | diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index e98a1d4db47ba..95c760328e779 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -99,8 +99,6 @@ jobs: for fpath in `ls requirements/**/*.txt`; do \ python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done - # without succeeded this could run even if the job has already failed - condition: succeeded() displayName: "Adjust dependencies" - bash: | From 980ba13129a484a7b51134c13d62bdc6488a0a44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 4 Oct 2023 21:03:08 -0400 Subject: [PATCH 14/14] Update .azure/gpu-tests-fabric.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- .azure/gpu-tests-fabric.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index 4dbae8793354f..2554ab3d67cbf 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -97,7 +97,6 @@ jobs: for fpath in `ls requirements/**/*.txt`; do \ python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done - # without succeeded this could run even if the job has already failed displayName: "Adjust dependencies" - bash: |