From 1ae14ca7542e83db47888fd0d68324449014dfc0 Mon Sep 17 00:00:00 2001
From: otaj <6065855+otaj@users.noreply.github.com>
Date: Thu, 25 Aug 2022 19:30:06 +0200
Subject: [PATCH 1/3] [CI] fix horovod tests (#14382)

---
 .azure/gpu-tests.yml          | 11 +++++++----
 dockers/base-conda/Dockerfile | 14 ++++++++------
 dockers/base-cuda/Dockerfile  | 23 +++++++----------------
 3 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
index f19c5bafc7814..2da30c0dd66ab 100644
--- a/.azure/gpu-tests.yml
+++ b/.azure/gpu-tests.yml
@@ -44,7 +44,7 @@ jobs:
 
     - bash: |
         CHANGED_FILES=$(git diff --name-status origin/master -- . | awk  '{print $2}')
-        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
+        FILTER='.azure/gpu_*|src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
         echo $CHANGED_FILES > changed_files.txt
         MATCHES=$(cat changed_files.txt | grep -E $FILTER)
         echo $MATCHES
@@ -72,12 +72,15 @@ jobs:
         set -e
         python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
         python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'bagua' not in line] ; open(fname, 'w').writelines(lines)"
+        TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
         CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
         CUDA_VERSION_BAGUA=$(python -c "print([ver for ver in [115,113,111,102] if $CUDA_VERSION_MM >= ver][0])")
+        python ./requirements/pytorch/adjust-versions.py requirements/pytorch/base.txt ${PYTORCH_VERSION}
+        python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt ${PYTORCH_VERSION}
+        python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt ${PYTORCH_VERSION}
         pip install "bagua-cuda$CUDA_VERSION_BAGUA>=0.9.0"
-        pip install -e .[strategies]
-        pip install -U deepspeed  # TODO: remove when docker images are upgraded
-        pip install --requirement requirements/pytorch/devel.txt
+        pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
+        pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
         pip list
       env:
         PACKAGE_NAME: pytorch
diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile
index d6bfeee90d561..9bb75e34b8ff6 100644
--- a/dockers/base-conda/Dockerfile
+++ b/dockers/base-conda/Dockerfile
@@ -34,6 +34,10 @@ RUN \
     # https://github.com/NVIDIA/nvidia-docker/issues/1631
     apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
     apt-get update -qq --fix-missing && \
+    NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
+    CUDA_VERSION_MM="${CUDA_VERSION%.*}" && \
+    MAX_ALLOWED_NCCL=2.11.4 && \
+    TO_INSTALL_NCCL=$(echo -e "$MAX_ALLOWED_NCCL\n$NCCL_VER" | sort -V  | head -n1)-1+cuda${CUDA_VERSION_MM} && \
     apt-get install -y --no-install-recommends \
         build-essential \
         cmake \
@@ -42,17 +46,15 @@ RUN \
         curl \
         unzip \
         ca-certificates \
-        libopenmpi-dev
-
-RUN \
+        libopenmpi-dev \
+        libnccl2=$TO_INSTALL_NCCL \
+        libnccl-dev=$TO_INSTALL_NCCL && \
 # Install conda and python.
 # NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
     curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_${CONDA_VERSION}-Linux-x86_64.sh && \
     chmod +x ~/miniconda.sh && \
     ~/miniconda.sh -b && \
-    rm ~/miniconda.sh
-
-RUN \
+    rm ~/miniconda.sh && \
 # Cleaning
     apt-get autoremove -y && \
     apt-get clean && \
diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile
index be613f3b6415f..08692ff00ab78 100644
--- a/dockers/base-cuda/Dockerfile
+++ b/dockers/base-cuda/Dockerfile
@@ -37,7 +37,11 @@ RUN \
     # https://github.com/NVIDIA/nvidia-docker/issues/1631
     apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
     apt-get update -qq --fix-missing && \
-    apt-get install -y --no-install-recommends \
+    NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
+    CUDA_VERSION_MM="${CUDA_VERSION%.*}" && \
+    MAX_ALLOWED_NCCL=2.11.4 && \
+    TO_INSTALL_NCCL=$(echo -e "$MAX_ALLOWED_NCCL\n$NCCL_VER" | sort -V  | head -n1)-1+cuda${CUDA_VERSION_MM} && \
+    apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
         build-essential \
         pkg-config \
         cmake \
@@ -50,8 +54,8 @@ RUN \
         libopenmpi-dev \
         openmpi-bin \
         ssh \
-    && \
-
+        libnccl2=$TO_INSTALL_NCCL \
+        libnccl-dev=$TO_INSTALL_NCCL && \
 # Install python
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get install -y \
@@ -59,10 +63,8 @@ RUN \
         python${PYTHON_VERSION}-distutils \
         python${PYTHON_VERSION}-dev \
     && \
-
     update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \
     update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \
-
 # Cleaning
     apt-get autoremove -y && \
     apt-get clean && \
@@ -78,7 +80,6 @@ RUN \
     wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
     python${PYTHON_VERSION} get-pip.py && \
     rm get-pip.py && \
-
     pip install -q fire && \
     # Disable cache \
     CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \
@@ -91,16 +92,6 @@ RUN \
     pip install -r requirements/pytorch/devel.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html && \
     rm assistant.py
 
-RUN \
-    apt-get purge -y cmake && \
-    wget -q https://github.com/Kitware/CMake/releases/download/v3.20.2/cmake-3.20.2.tar.gz && \
-    tar -zxvf cmake-3.20.2.tar.gz && \
-    cd cmake-3.20.2 && \
-    ./bootstrap -- -DCMAKE_USE_OPENSSL=OFF && \
-    make && \
-    make install && \
-    cmake  --version
-
 ENV \
     HOROVOD_CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR \
     HOROVOD_GPU_OPERATIONS=NCCL \

From 807435885ea265580fee9f4e69c063eace46def2 Mon Sep 17 00:00:00 2001
From: Tanmoy <tanmoyf2@gmail.com>
Date: Fri, 26 Aug 2022 00:27:48 +0530
Subject: [PATCH 2/3] Fix `LightningDataModule` hparams parsing (#12806)

Co-authored-by: Akihiro Nitta <nitta@akihironitta.com>
Co-authored-by: Jirka <jirka.borovec@seznam.cz>
Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>
---
 src/pytorch_lightning/CHANGELOG.md            |  3 +
 src/pytorch_lightning/utilities/parsing.py    | 13 ++--
 .../tuner/test_scale_batch_size.py            | 69 ++++++++++++-------
 tests/tests_pytorch/utilities/test_parsing.py | 22 ++++--
 4 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
index 07c34bbc0e579..642cb28d4db4c 100644
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@@ -82,6 +82,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed wrong num padding for `RichProgressBar` ([#14296](https://github.com/Lightning-AI/lightning/pull/14296))
 
 
+- Fixed `LightningDataModule` hparams parsing ([#12806](https://github.com/PyTorchLightning/pytorch-lightning/pull/12806))
+
+
 ## [1.7.2] - 2022-08-17
 
 ### Added
diff --git a/src/pytorch_lightning/utilities/parsing.py b/src/pytorch_lightning/utilities/parsing.py
index 073423ab60773..22dfb538828ab 100644
--- a/src/pytorch_lightning/utilities/parsing.py
+++ b/src/pytorch_lightning/utilities/parsing.py
@@ -321,14 +321,17 @@ def _lightning_get_all_attr_holders(model: "pl.LightningModule", attribute: str)
         holders.append(model)
 
     # Check if attribute in model.hparams, either namespace or dict
-    if hasattr(model, "hparams"):
-        if attribute in model.hparams:
-            holders.append(model.hparams)
+    if hasattr(model, "hparams") and attribute in model.hparams:
+        holders.append(model.hparams)
 
     trainer = model._trainer
     # Check if the attribute in datamodule (datamodule gets registered in Trainer)
-    if trainer is not None and trainer.datamodule is not None and hasattr(trainer.datamodule, attribute):
-        holders.append(trainer.datamodule)
+    if trainer is not None and trainer.datamodule is not None:
+        if hasattr(trainer.datamodule, attribute):
+            holders.append(trainer.datamodule)
+
+        if hasattr(trainer.datamodule, "hparams") and attribute in trainer.datamodule.hparams:
+            holders.append(trainer.datamodule.hparams)
 
     return holders
 
diff --git a/tests/tests_pytorch/tuner/test_scale_batch_size.py b/tests/tests_pytorch/tuner/test_scale_batch_size.py
index d2fc8a61e0107..ce7c3613f5012 100644
--- a/tests/tests_pytorch/tuner/test_scale_batch_size.py
+++ b/tests/tests_pytorch/tuner/test_scale_batch_size.py
@@ -29,8 +29,8 @@
 
 
 class BatchSizeDataModule(BoringDataModule):
-    def __init__(self, batch_size):
-        super().__init__()
+    def __init__(self, data_dir, batch_size):
+        super().__init__(data_dir)
         if batch_size is not None:
             self.batch_size = batch_size
 
@@ -58,7 +58,7 @@ def test_scale_batch_size_method_with_model_or_datamodule(tmpdir, model_bs, dm_b
     tuner = Tuner(trainer)
 
     model = BatchSizeModel(model_bs)
-    datamodule = BatchSizeDataModule(dm_bs) if dm_bs != -1 else None
+    datamodule = BatchSizeDataModule(tmpdir, dm_bs) if dm_bs != -1 else None
 
     new_batch_size = tuner.scale_batch_size(model, mode="binsearch", init_val=4, max_trials=2, datamodule=datamodule)
     assert new_batch_size == 16
@@ -140,47 +140,64 @@ def test_auto_scale_batch_size_trainer_arg(tmpdir, scale_arg):
     assert not os.path.exists(tmpdir / "scale_batch_size_temp_model.ckpt")
 
 
-@RunIf(min_cuda_gpus=1)
 @pytest.mark.parametrize("use_hparams", [True, False])
 def test_auto_scale_batch_size_set_model_attribute(tmpdir, use_hparams):
-    """Test that new batch size gets written to the correct hyperparameter attribute."""
+    """Test that new batch size gets written to the correct hyperparameter attribute for model."""
     tutils.reset_seed()
 
     hparams = {"batch_size": 2}
-    before_batch_size = hparams.get("batch_size")
+    before_batch_size = hparams["batch_size"]
 
-    class HparamsBatchSizeModel(BatchSizeModel):
+    class HparamsBatchSizeModel(BoringModel):
         def __init__(self, *args, **kwargs):
-            super().__init__(*args, **kwargs)
+            super().__init__()
             self.save_hyperparameters()
 
-        def dataloader(self, *args, **kwargs):
-            # artificially set batch_size so we can get a dataloader
-            # remove it immediately after, because we want only self.hparams.batch_size
-            setattr(self, "batch_size", before_batch_size)
-            dataloader = super().dataloader(*args, **kwargs)
-            del self.batch_size
-            return dataloader
+        def train_dataloader(self):
+            return DataLoader(RandomDataset(32, 64), batch_size=self.hparams.batch_size)
+
+        def val_dataloader(self):
+            return DataLoader(RandomDataset(32, 64), batch_size=self.hparams.batch_size)
+
+    model_class = HparamsBatchSizeModel if use_hparams else BatchSizeModel
+    model = model_class(**hparams)
+
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, auto_scale_batch_size=True)
+    trainer.tune(model, scale_batch_size_kwargs={"steps_per_trial": 2, "max_trials": 4})
+    after_batch_size = model.hparams.batch_size if use_hparams else model.batch_size
+    assert before_batch_size != after_batch_size
+    assert after_batch_size <= len(trainer.train_dataloader.dataset)
+
+
+@pytest.mark.parametrize("use_hparams", [True, False])
+def test_auto_scale_batch_size_set_datamodule_attribute(tmpdir, use_hparams):
+    """Test that new batch size gets written to the correct hyperparameter attribute for datamodule."""
+    tutils.reset_seed()
+
+    hparams = {"batch_size": 2}
+    before_batch_size = hparams["batch_size"]
 
     class HparamsBatchSizeDataModule(BoringDataModule):
         def __init__(self, data_dir, batch_size):
             super().__init__(data_dir)
-            self.batch_size = batch_size
+            self.save_hyperparameters()
 
         def train_dataloader(self):
-            return DataLoader(self.random_train, batch_size=self.batch_size)
+            return DataLoader(self.random_train, batch_size=self.hparams.batch_size)
 
-    datamodule_fit = HparamsBatchSizeDataModule(data_dir=tmpdir, batch_size=before_batch_size)
-    model_class = HparamsBatchSizeModel if use_hparams else BatchSizeModel
-    model = model_class(**hparams)
+        def val_dataloader(self):
+            return DataLoader(RandomDataset(32, 64), batch_size=self.hparams.batch_size)
 
-    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, auto_scale_batch_size=True, accelerator="gpu", devices=1)
-    trainer.tune(model, datamodule_fit)
-    after_batch_size = model.hparams.batch_size if use_hparams else model.batch_size
-    assert trainer.datamodule == datamodule_fit
-    assert before_batch_size != after_batch_size
+    datamodule_class = HparamsBatchSizeDataModule if use_hparams else BatchSizeDataModule
+    datamodule = datamodule_class(data_dir=tmpdir, batch_size=before_batch_size)
+    model = BatchSizeModel(**hparams)
+
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, auto_scale_batch_size=True)
+    trainer.tune(model, datamodule=datamodule, scale_batch_size_kwargs={"steps_per_trial": 2, "max_trials": 4})
+    after_batch_size = datamodule.hparams.batch_size if use_hparams else datamodule.batch_size
+    assert trainer.datamodule == datamodule
+    assert before_batch_size < after_batch_size
     assert after_batch_size <= len(trainer.train_dataloader.dataset)
-    assert datamodule_fit.batch_size == after_batch_size
 
 
 def test_auto_scale_batch_size_duplicate_attribute_warning(tmpdir):
diff --git a/tests/tests_pytorch/utilities/test_parsing.py b/tests/tests_pytorch/utilities/test_parsing.py
index e918c9df2ac32..98b00a374d778 100644
--- a/tests/tests_pytorch/utilities/test_parsing.py
+++ b/tests/tests_pytorch/utilities/test_parsing.py
@@ -64,8 +64,8 @@ class TestModel4(LightningModule):  # fail case
         batch_size = 1
 
     model4 = TestModel4()
-
     trainer = Trainer()
+    model4.trainer = trainer
     datamodule = LightningDataModule()
     datamodule.batch_size = 8
     trainer.datamodule = datamodule
@@ -87,12 +87,21 @@ class TestModel7(LightningModule):  # test for datamodule w/ hparams w/ attribut
     model7 = TestModel7()
     model7.trainer = trainer
 
-    return model1, model2, model3, model4, model5, model6, model7
+    class TestDataModule8(LightningDataModule):  # test for hparams dict
+        hparams = TestHparamsDict2
+
+    model8 = TestModel1()
+    trainer = Trainer()
+    model8.trainer = trainer
+    datamodule = TestDataModule8()
+    trainer.datamodule = datamodule
+
+    return model1, model2, model3, model4, model5, model6, model7, model8
 
 
 def test_lightning_hasattr():
     """Test that the lightning_hasattr works in all cases."""
-    model1, model2, model3, model4, model5, model6, model7 = models = model_cases()
+    model1, model2, model3, model4, model5, model6, model7, model8 = models = model_cases()
     assert lightning_hasattr(model1, "learning_rate"), "lightning_hasattr failed to find namespace variable"
     assert lightning_hasattr(model2, "learning_rate"), "lightning_hasattr failed to find hparams namespace variable"
     assert lightning_hasattr(model3, "learning_rate"), "lightning_hasattr failed to find hparams dict variable"
@@ -104,6 +113,7 @@ def test_lightning_hasattr():
     assert lightning_hasattr(
         model7, "batch_size"
     ), "lightning_hasattr failed to find batch_size in hparams w/ datamodule present"
+    assert lightning_hasattr(model8, "batch_size")
 
     for m in models:
         assert not lightning_hasattr(m, "this_attr_not_exist")
@@ -116,10 +126,11 @@ def test_lightning_getattr():
         value = lightning_getattr(m, "learning_rate")
         assert value == i, "attribute not correctly extracted"
 
-    model5, model6, model7 = models[4:]
+    model5, model6, model7, model8 = models[4:]
     assert lightning_getattr(model5, "batch_size") == 8, "batch_size not correctly extracted"
     assert lightning_getattr(model6, "batch_size") == 8, "batch_size not correctly extracted"
     assert lightning_getattr(model7, "batch_size") == 8, "batch_size not correctly extracted"
+    assert lightning_getattr(model8, "batch_size") == 2, "batch_size not correctly extracted"
 
     for m in models:
         with pytest.raises(
@@ -136,13 +147,14 @@ def test_lightning_setattr(tmpdir):
         lightning_setattr(m, "learning_rate", 10)
         assert lightning_getattr(m, "learning_rate") == 10, "attribute not correctly set"
 
-    model5, model6, model7 = models[4:]
+    model5, model6, model7, model8 = models[4:]
     lightning_setattr(model5, "batch_size", 128)
     lightning_setattr(model6, "batch_size", 128)
     lightning_setattr(model7, "batch_size", 128)
     assert lightning_getattr(model5, "batch_size") == 128, "batch_size not correctly set"
     assert lightning_getattr(model6, "batch_size") == 128, "batch_size not correctly set"
     assert lightning_getattr(model7, "batch_size") == 128, "batch_size not correctly set"
+    assert lightning_getattr(model8, "batch_size") == 128, "batch_size not correctly set"
 
     for m in models:
         with pytest.raises(

From 33a5ed98794943b7eb6c7fcfa078b184c9d4d736 Mon Sep 17 00:00:00 2001
From: Anner <anner.de.jong@outlook.com>
Date: Fri, 26 Aug 2022 06:26:00 +0100
Subject: [PATCH 3/3] Add torch.cuda rng state to seed save/load (#14384)

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>
---
 src/pytorch_lightning/CHANGELOG.md         |  4 +++-
 src/pytorch_lightning/utilities/seed.py    | 15 ++++++++++++---
 tests/tests_pytorch/utilities/test_seed.py | 20 +++++++++++++++++++-
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
index 642cb28d4db4c..ac7e68d177fbe 100644
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@@ -27,7 +27,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Replaced the unwrapping logic in strategies with direct access to unwrapped `LightningModule` ([#13738](https://github.com/Lightning-AI/lightning/pull/13738))
 
 
-- Enabled `on_before_batch_transfer` for `DPStrategy` and `IPUAccelerator` ([14023](https://github.com/Lightning-AI/lightning/pull/14023))
+- Enabled `on_before_batch_transfer` for `DPStrategy` and `IPUAccelerator` ([#14023](https://github.com/Lightning-AI/lightning/pull/14023))
+
+- Included `torch.cuda` rng state to the aggregate `_collect_rng_states()` and `_set_rng_states()` ([#14384](https://github.com/Lightning-AI/lightning/pull/14384))
 
 
 
diff --git a/src/pytorch_lightning/utilities/seed.py b/src/pytorch_lightning/utilities/seed.py
index 8fce6a1debfcf..925337c7845ae 100644
--- a/src/pytorch_lightning/utilities/seed.py
+++ b/src/pytorch_lightning/utilities/seed.py
@@ -121,13 +121,22 @@ def pl_worker_init_function(worker_id: int, rank: Optional[int] = None) -> None:
 
 
 def _collect_rng_states() -> Dict[str, Any]:
-    """Collect the global random state of :mod:`torch`, :mod:`numpy` and Python."""
-    return {"torch": torch.get_rng_state(), "numpy": np.random.get_state(), "python": python_get_rng_state()}
+    """Collect the global random state of :mod:`torch`, :mod:`torch.cuda`, :mod:`numpy` and Python."""
+    return {
+        "torch": torch.get_rng_state(),
+        "torch.cuda": torch.cuda.get_rng_state_all(),
+        "numpy": np.random.get_state(),
+        "python": python_get_rng_state(),
+    }
 
 
 def _set_rng_states(rng_state_dict: Dict[str, Any]) -> None:
-    """Set the global random state of :mod:`torch`, :mod:`numpy` and Python in the current process."""
+    """Set the global random state of :mod:`torch`, :mod:`torch.cuda`, :mod:`numpy` and Python in the current
+    process."""
     torch.set_rng_state(rng_state_dict["torch"])
+    # torch.cuda rng_state is only included since v1.8.
+    if "torch.cuda" in rng_state_dict:
+        torch.cuda.set_rng_state_all(rng_state_dict["torch.cuda"])
     np.random.set_state(rng_state_dict["numpy"])
     version, state, gauss = rng_state_dict["python"]
     python_set_rng_state((version, tuple(state), gauss))
diff --git a/tests/tests_pytorch/utilities/test_seed.py b/tests/tests_pytorch/utilities/test_seed.py
index 6908badf1a037..c8df824e93b41 100644
--- a/tests/tests_pytorch/utilities/test_seed.py
+++ b/tests/tests_pytorch/utilities/test_seed.py
@@ -9,7 +9,7 @@
 import torch
 
 import pytorch_lightning.utilities.seed as seed_utils
-from pytorch_lightning.utilities.seed import isolate_rng
+from pytorch_lightning.utilities.seed import _collect_rng_states, _set_rng_states, isolate_rng
 
 
 @mock.patch.dict(os.environ, {}, clear=True)
@@ -87,6 +87,13 @@ def test_isolate_rng():
         generated = [torch.rand(2) for _ in range(3)]
     assert torch.equal(torch.rand(2), generated[0])
 
+    # torch.cuda
+    if torch.cuda.is_available():
+        torch.cuda.FloatTensor(1).normal_()
+        with isolate_rng():
+            generated = [torch.cuda.FloatTensor(2).normal_() for _ in range(3)]
+        assert torch.equal(torch.cuda.FloatTensor(2).normal_(), generated[0])
+
     # numpy
     np.random.rand(1)
     with isolate_rng():
@@ -100,6 +107,17 @@ def test_isolate_rng():
     assert random.random() == generated[0]
 
 
+def test_backward_compatibility_rng_states_dict():
+    """Test that an older rng_states_dict without the "torch.cuda" key does not crash.
+
+    This test is only relevant when torch.cuda is available.
+    """
+    states = _collect_rng_states()
+    assert "torch.cuda" in states
+    states.pop("torch.cuda")
+    _set_rng_states(states)
+
+
 @mock.patch("pytorch_lightning.utilities.seed.log.info")
 @pytest.mark.parametrize("env_vars", [{"RANK": "0"}, {"RANK": "1"}, {"RANK": "4"}])
 def test_seed_everything_log_info(log_mock: MagicMock, env_vars: Mapping[str, str]):